From 0d39f88996ec5d55d6aae0cf3bccde548adad3da Mon Sep 17 00:00:00 2001
From: Pariksheet <pariksheet.pinjari@huawei.com>
Date: Fri, 11 May 2018 15:11:33 +0530
Subject: [PATCH 01/12] CPP implementation of L2Norm and LRN ops

---
 topi/include/topi/cuda/nn.h                | 107 +++++++++++++++++++++
 topi/include/topi/nn/l2_norm.h             |  46 +++++++++
 topi/include/topi/nn/local_response_norm.h |  76 +++++++++++++++
 topi/include/topi/rocm/nn.h                |  41 ++++++++
 topi/python/topi/cuda/nn.py                |  60 ++----------
 topi/python/topi/generic/nn.py             |  11 ++-
 topi/python/topi/nn/l2_norm.py             |  10 +-
 topi/python/topi/nn/local_response_norm.py |  28 +-----
 topi/python/topi/rocm/nn.py                |  11 ++-
 topi/src/topi.cc                           |  39 ++++++++
 topi/tests/python/test_topi_l2norm.py      |   7 +-
 topi/tests/python/test_topi_lrn.py         |  15 +--
 topi/tests/python_cpp/test_topi_l2norm.py  |  75 +++++++++++++++
 topi/tests/python_cpp/test_topi_lrn.py     | 101 +++++++++++++++++++
 14 files changed, 525 insertions(+), 102 deletions(-)
 create mode 100644 topi/include/topi/cuda/nn.h
 create mode 100644 topi/include/topi/nn/l2_norm.h
 create mode 100644 topi/include/topi/nn/local_response_norm.h
 create mode 100644 topi/include/topi/rocm/nn.h
 create mode 100644 topi/tests/python_cpp/test_topi_l2norm.py
 create mode 100644 topi/tests/python_cpp/test_topi_lrn.py
diff --git a/topi/include/topi/cuda/nn.h b/topi/include/topi/cuda/nn.h
new file mode 100644
index 000000000000..a425a47b5ce7
--- /dev/null
+++ b/topi/include/topi/cuda/nn.h
@@ -0,0 +1,107 @@
+/*!
+*  Copyright (c) 2018 by Contributors
+* \file cuda/nn.h
+* \brief CUDA schedule for lrn and l2 normalization operations
+*/
+#ifndef TOPI_CUDA_NN_H_
+#define TOPI_CUDA_NN_H_
+
+#include "tvm/tvm.h"
+#include "tvm/build_module.h"
+#include "topi/tags.h"
+
+namespace topi {
+using namespace tvm;
+namespace cuda {
+/*!
+* \brief Create a CUDA schedule for LRN
+*
+* \param target The target to generate a schedule for.
+* \param outs The output tensors.
+*
+* \return A schedule for the given ops.
+*/
+inline Schedule schedule_lrn(const Target &target, const Array<Tensor>& outs) {
+  Array<Operation> out_ops;
+  for (auto t : outs) {
+    out_ops.push_back(t->op);
+  }
+  auto s = create_schedule(out_ops);
+  auto num_thread = 64;
+  auto block_x = tvm::thread_axis(Range(), "blockIdx.x");
+  auto thread_x = tvm::thread_axis(Range(0, num_thread), "threadIdx.x");
+  auto lrn = outs[0];
+  auto sqr_sum_up = lrn->op->InputTensors()[1];
+  auto sqr_sum = sqr_sum_up->op->InputTensors()[0];
+  auto set_pad = sqr_sum->op->InputTensors()[0];
+  s[set_pad].bind(set_pad->op.as<ComputeOpNode>()->axis[0], block_x);
+  auto rxk = sqr_sum->op.as<ComputeOpNode>()->reduce_axis[0];
+  IterVar xko, xki;
+  s[sqr_sum].split(rxk, num_thread, &xko, &xki);
+  auto srf = s.rfactor(sqr_sum, xki)[0];
+  s[sqr_sum].bind(s[sqr_sum]->op.as<ComputeOpNode>()->axis[0], block_x);
+  s[sqr_sum].bind(s[sqr_sum]->op.as<ComputeOpNode>()->reduce_axis[0], thread_x);
+  s[srf].compute_at(s[sqr_sum], s[sqr_sum]->op.as<ComputeOpNode>()->reduce_axis[0]);
+  s[sqr_sum_up].bind(sqr_sum_up->op.as<ComputeOpNode>()->axis[0], block_x);
+  IterVar xto, xti;
+  s[lrn].split_by_nparts(lrn->op.as<ComputeOpNode>()->axis[1], num_thread, &xto, &xti);
+  s[lrn].bind(lrn->op.as<ComputeOpNode>()->axis[0], block_x);
+  s[lrn].bind(xto, thread_x);
+
+  return s;
+}
+
+/*!
+* \brief Create a CUDA schedule for L2 normalization
+*
+* \param target The target to generate a schedule for.
+* \param outs The output tensors.
+*
+* \return A schedule for the given ops.
+*/
+inline Schedule schedule_l2norm(const Target &target, const Array<Tensor>& outs) {
+  Array<Operation> out_ops;
+  for (auto t : outs) {
+    out_ops.push_back(t->op);
+  }
+  auto s = create_schedule(out_ops);
+
+  std::function<void(Operation)> traverse;
+  traverse = [&](const Operation& op) {
+    // Inline all one-to-one-mapping operators except the last stage (output)
+    if (is_injective(op->tag) || op->tag == "l2norm") {
+      if (!detail::contains(s->outputs, op)) {
+        s[op].compute_inline();
+      }
+      for (auto tensor : op->InputTensors()) {
+        if (tensor->op->InputTensors().size() > 0) {
+          traverse(tensor->op);
+        }
+      }
+    } else if (op->tag == "comm_reduce") {
+      ScheduleReduce(target, op, s, false);
+      for (auto tensor : op->InputTensors()) {
+        traverse(tensor->op);
+      }
+    } else {
+      LOG(ERROR) << "Unsupported operator " << op->tag;
+    }
+  };
+
+  traverse(outs[0]->op);
+  auto num_thread = 64;
+  auto l2norm = outs[0];
+  auto block_x = tvm::thread_axis(Range(), "blockIdx.x");
+  auto thread_x = tvm::thread_axis(Range(0, num_thread), "threadIdx.x");
+  IterVar xto, xti;
+  s[l2norm].split_by_nparts(l2norm->op.as<ComputeOpNode>()->axis[1], num_thread, &xto, &xti);
+  s[l2norm].bind(l2norm->op.as<ComputeOpNode>()->axis[0], block_x);
+  s[l2norm].bind(xto, thread_x);
+  return s;
+}
+}  // namespace cuda
+}  // namespace topi
+#endif  // TOPI_CUDA_NN_H_
+
+
+
diff --git a/topi/include/topi/nn/l2_norm.h b/topi/include/topi/nn/l2_norm.h
new file mode 100644
index 000000000000..e1fe25cb6040
--- /dev/null
+++ b/topi/include/topi/nn/l2_norm.h
@@ -0,0 +1,46 @@
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \brief l2 normalization op constructions
+ * \file nn/l2_norm.h
+ */
+#ifndef TOPI_NN_L2_NORM_H_
+#define TOPI_NN_L2_NORM_H_
+
+#include <string>
+#include <algorithm>
+#include "topi/tags.h"
+#include "tvm/tvm.h"
+namespace topi {
+namespace nn {
+using namespace tvm;
+
+/*!
+* \brief L2 normalization inference operator 
+*
+* \param data The input tensor. 4-D with shape [batch, channel, height, width]
+* \param eps Epsilon to prevent div by 0
+* \param axis Axes over the normalization applied
+* \param name The name of the operation
+* \param tag The tag to mark the operation
+*
+* \return A Tensor whose op member is the l2 normalization operation
+*/
+inline Tensor l2norm_instance(const Tensor& data,
+                              float eps,
+                              const Array<Expr>& axis,
+                              std::string name = "tensor",
+                              std::string tag = "l2norm") {
+  CHECK_EQ(data->shape.size(), 4) << "L2 norm requires 4-D input";
+  auto input_shape = data->shape;
+  Tensor dot_value = pow(data, static_cast<float>(2.0));
+  Tensor sum_value = topi::sum(dot_value, axis, true);
+  Tensor expand_sum = topi::broadcast_to(sum_value, input_shape);
+  return topi::broadcast_div(data,
+                             topi::sqrt(tvm::compute(expand_sum->shape,
+                                                     [&](const Array<Var>& i){
+                                                     return (max(expand_sum(i), eps));
+                                                     }, name = name, tag = tag)));
+}
+}  // namespace nn
+}  // namespace topi
+#endif  // TOPI_NN_L2_NORM_H_
diff --git a/topi/include/topi/nn/local_response_norm.h b/topi/include/topi/nn/local_response_norm.h
new file mode 100644
index 000000000000..0e31c7c0515d
--- /dev/null
+++ b/topi/include/topi/nn/local_response_norm.h
@@ -0,0 +1,76 @@
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \brief local response normalization op constructions
+ * \file nn/local_response_normalization.h
+ */
+#ifndef TOPI_NN_LOCAL_RESPONSE_NORM_H_
+#define TOPI_NN_LOCAL_RESPONSE_NORM_H_
+
+#include <string>
+
+#include "topi/tags.h"
+#include "tvm/tvm.h"
+
+namespace topi {
+namespace nn {
+using namespace tvm;
+
+/*!
+* \brief Local response normalization inference operator
+*
+* \param data The input tensor. 4-D shape NCHW or NHWC
+* \param size Integer to define normalisation window size
+* \param axis Input data layout channel axis
+* \param alpha Float scaling factor
+* \param beta Exponent value
+* \param bias Offset to avoid dividing by zero
+* \param name The name of the operation
+* \param tag The tag to mark the operation
+*
+* \return A Tensor whose op member is the Local response normalization operation
+*/
+inline Tensor lrn(const Tensor& data,
+                  int size,
+                  int axis = 1,
+                  float alpha = 0.0001,
+                  float beta = 0.75,
+                  float bias = 2,
+                  std::string name = "tensor",
+                  std::string tag = kBroadcast) {
+  CHECK_EQ(data->shape.size(), 4) << "LRN requires 4-D input";
+  assert(size % 2 == 1);
+  assert(axis == 1 || axis == 3);
+  auto input_shape = data->shape;
+  Array<Expr> pad_before{ 0, 0, 0, 0};
+  Array<Expr> pad_after{ 0, 0, 0, 0};
+  pad_before.Set(axis, static_cast<Expr>(size/2));
+  pad_after.Set(axis, static_cast<Expr>(size/2));
+  auto pad_data = pad(data, pad_before, pad_after, 0, "pad_data");
+  auto rxs = tvm::reduce_axis(Range(0, size), "rxs");
+  Tensor sqr_sum;
+  if (axis == 1) {
+    sqr_sum = tvm::compute(input_shape,
+                           [&](Var i, Var l, Var j, Var k) {
+                           return tvm::sum(pad_data(i, l + rxs, j, k) *
+                                           pad_data(i, l + rxs, j, k),
+                                           {rxs});
+                           });
+  } else if (axis == 3) {
+    sqr_sum = tvm::compute(input_shape,
+                           [&](Var i, Var l, Var j, Var k) {
+                           return tvm::sum(pad_data(i, l, j, k + rxs) *
+                                           pad_data(i, l, j, k + rxs),
+                                           {rxs});
+                           });
+  }
+  auto sqrt_sum_up = tvm::compute(input_shape,
+                                  [&](Var i, Var j, Var k, Var l) {
+                                  return tvm::pow(bias +
+                                                  (alpha * sqr_sum(i, j, k, l) / size),
+                                                  beta);
+                                  });
+  return topi::broadcast_div(data, sqrt_sum_up);
+}
+}  // namespace nn
+}  // namespace topi
+#endif  // TOPI_NN_LOCAL_RESPONSE_NORM_H_
diff --git a/topi/include/topi/rocm/nn.h b/topi/include/topi/rocm/nn.h
new file mode 100644
index 000000000000..98b74ad80593
--- /dev/null
+++ b/topi/include/topi/rocm/nn.h
@@ -0,0 +1,41 @@
+/*!
+*  Copyright (c) 2018 by Contributors
+* \file rocm/nn.h
+* \brief rocm schedule for lrn and l2 normalization operations
+*/
+#ifndef TOPI_ROCM_NN_H_
+#define TOPI_ROCM_NN_H_
+
+#include "tvm/tvm.h"
+#include "tvm/build_module.h"
+#include "topi/tags.h"
+
+namespace topi {
+using namespace tvm;
+namespace rocm {
+/*!
+* \brief Create a rocm schedule for LRN
+*
+* \param target The target to generate a schedule for.
+* \param outs The output tensors.
+*
+* \return A schedule for the given ops.
+*/
+inline Schedule schedule_lrn(const Target &target, const Array<Tensor>& outs) {
+  return topi::cuda::schedule_lrn(target, outs);
+}
+
+/*!
+* \brief Create a rocm schedule for L2 Normalization
+*
+* \param target The target to generate a schedule for.
+* \param outs The output tensors.
+*
+* \return A schedule for the given ops.
+*/
+inline Schedule schedule_l2norm(const Target &target, const Array<Tensor>& outs) {
+  return topi::cuda::schedule_l2norm(target, outs);
+}
+}  // namespace rocm
+}  // namespace topi
+#endif  // TOPI_ROCM_NN_H_
diff --git a/topi/python/topi/cuda/nn.py b/topi/python/topi/cuda/nn.py
index e8757970505b..c565698001fe 100644
--- a/topi/python/topi/cuda/nn.py
+++ b/topi/python/topi/cuda/nn.py
@@ -4,8 +4,7 @@
 
 import tvm
 from .. import generic
-from .. import tag
-from .reduction import _schedule_reduce
+from .. import cpp
 
 @generic.schedule_lrn.register(["cuda"])
 def schedule_lrn(outs):
@@ -22,28 +21,9 @@ def schedule_lrn(outs):
     sch: Schedule
         The computation schedule for the op.
     """
-    outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
-    s = tvm.create_schedule([x.op for x in outs])
-    num_thread = 64
-    block_x = tvm.thread_axis("blockIdx.x")
-    thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x")
-
-    lrn = outs[0]
-    sqr_sum_up = lrn.op.input_tensors[1]
-    sqr_sum = sqr_sum_up.op.input_tensors[0]
-    set_pad = sqr_sum.op.input_tensors[0]
-    s[set_pad].bind(set_pad.op.axis[0], block_x)
-    rxk = sqr_sum.op.reduce_axis[0]
-    _, xki = s[sqr_sum].split(rxk, factor=num_thread)
-    srf = s.rfactor(sqr_sum, xki)
-    s[sqr_sum].bind(s[sqr_sum].op.axis[0], block_x)
-    s[sqr_sum].bind(s[sqr_sum].op.reduce_axis[0], thread_x)
-    s[srf].compute_at(s[sqr_sum], s[sqr_sum].op.reduce_axis[0])
-    s[sqr_sum_up].bind(sqr_sum_up.op.axis[0], block_x)
-    xto, _ = s[lrn].split(lrn.op.axis[1], nparts=num_thread)
-    s[lrn].bind(lrn.op.axis[0], block_x)
-    s[lrn].bind(xto, thread_x)
-    return s
+    target = tvm.target.current_target(allow_none=False)
+    cpp_target = cpp.TEST_create_target(target.target_name)
+    return cpp.cuda.schedule_lrn(cpp_target, outs)
 
 @generic.schedule_l2norm.register(["cuda"])
 def schedule_l2norm(outs):
@@ -60,32 +40,6 @@ def schedule_l2norm(outs):
     sch: Schedule
         The computation schedule for the op.
     """
-    outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
-    s = tvm.create_schedule([x.op for x in outs])
-
-    def traverse(OP):
-        '''inline all one-to-one-mapping operators
-        except the last stage (output)'''
-        if tag.is_injective(OP.tag) or OP.tag == 'l2norm':
-            if OP not in s.outputs:
-                s[OP].compute_inline()
-            for tensor in OP.input_tensors:
-                if tensor.op.input_tensors:
-                    traverse(tensor.op)
-        elif OP.tag == 'comm_reduce':
-            _schedule_reduce(OP, s, is_idx_reduce=False)
-            for tensor in OP.input_tensors:
-                traverse(tensor.op)
-        else:
-            raise RuntimeError("Unsupported operator tag: %s" % OP.tag)
-    traverse(outs[0].op)
-
-    num_thread = 64
-    l2norm = outs[0]
-    block_x = tvm.thread_axis("blockIdx.x")
-    thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x")
-    xto, _ = s[l2norm].split(l2norm.op.axis[1], nparts=num_thread)
-    s[l2norm].bind(l2norm.op.axis[0], block_x)
-    s[l2norm].bind(xto, thread_x)
-
-    return s
+    target = tvm.target.current_target(allow_none=False)
+    cpp_target = cpp.TEST_create_target(target.target_name)
+    return cpp.cuda.schedule_l2norm(cpp_target, outs)
diff --git a/topi/python/topi/generic/nn.py b/topi/python/topi/generic/nn.py
index 5a16d12206a3..7f67f8093856 100644
--- a/topi/python/topi/generic/nn.py
+++ b/topi/python/topi/generic/nn.py
@@ -2,7 +2,7 @@
 """Generic nn operators"""
 from __future__ import absolute_import as _abs
 import tvm
-
+from .. import cpp
 
 def _default_schedule(outs, auto_inline):
     """Default schedule for llvm."""
@@ -273,8 +273,9 @@ def schedule_lrn(outs):
     sch: Schedule
         The computation schedule for the op.
     """
-    return _default_schedule(outs, False)
-
+    target = tvm.target.current_target(allow_none=False)
+    cpp_target = cpp.TEST_create_target(target.target_name)
+    return cpp.generic.default_schedule(cpp_target, outs, False)
 
 @tvm.target.generic_func
 def schedule_l2norm(outs):
@@ -291,4 +292,6 @@ def schedule_l2norm(outs):
     sch: Schedule
         The computation schedule for the op.
     """
-    return _default_schedule(outs, False)
+    target = tvm.target.current_target(allow_none=False)
+    cpp_target = cpp.TEST_create_target(target.target_name)
+    return cpp.generic.default_schedule(cpp_target, outs, False)
diff --git a/topi/python/topi/nn/l2_norm.py b/topi/python/topi/nn/l2_norm.py
index 6b5381a85599..964c8f8d264a 100644
--- a/topi/python/topi/nn/l2_norm.py
+++ b/topi/python/topi/nn/l2_norm.py
@@ -2,7 +2,7 @@
 """TVM operator for l2norm"""
 from __future__ import absolute_import
 import tvm
-import topi
+from .. import cpp
 
 @tvm.target.generic_func
 def l2norm_instance(data, eps, axis=None):
@@ -26,10 +26,4 @@ def l2norm_instance(data, eps, axis=None):
     output : tvm.Tensor
         4-D output with same shape
     """
-    assert len(data.shape) == 4, "only support 4-dim lrn"
-    dot_value = topi.cpp.pow(data, 2.0)
-    sum_value = topi.sum(dot_value, axis=axis, keepdims=True)
-    expand_sum = topi.broadcast_to(sum_value, data.shape)
-    return topi.broadcast_div(data, topi.sqrt(\
-                tvm.compute(expand_sum.shape, lambda i, j, k, l:\
-                tvm.max(expand_sum[i, j, k, l], eps), tag='l2norm')))
+    return cpp.nn.l2norm_instance(data, eps, axis)
diff --git a/topi/python/topi/nn/local_response_norm.py b/topi/python/topi/nn/local_response_norm.py
index b44e02214acc..73eb41242513 100644
--- a/topi/python/topi/nn/local_response_norm.py
+++ b/topi/python/topi/nn/local_response_norm.py
@@ -2,8 +2,7 @@
 """TVM operator for local response norm compute."""
 from __future__ import absolute_import
 import tvm
-import topi
-from .pad import pad
+from .. import cpp
 
 @tvm.target.generic_func
 def lrn(data, size, axis=1, alpha=0.0001, beta=0.75, bias=2):
@@ -42,27 +41,4 @@ def lrn(data, size, axis=1, alpha=0.0001, beta=0.75, bias=2):
     output : tvm.Tensor
         4-D output with same shape
     """
-    assert len(data.shape) == 4, "only support 4-dim lrn"
-    assert (size % 2) == 1, "size should be odd number"
-    assert (axis == 1) or (axis == 3), "axis should 1 or 3 for NCHW and NHWC"
-    ##Add padding on left & right of size radius first
-    pad_after = pad_before = [0, 0, 0, 0]
-    pad_after[axis] = pad_before[axis] = (size//2)
-    pad_data = pad(data, pad_before, pad_after, name="pad_data")
-
-    rxs = tvm.reduce_axis((0, size), name='rxs')
-    if axis == 1:
-        #NCHW layout
-        sqr_sum = tvm.compute(data.shape, lambda i, j, k, l: tvm.sum(
-            pad_data[i, j + rxs, k, l] * pad_data[i, j + rxs, k, l],
-            axis=rxs))
-    elif axis == 3:
-        #NHWC layout
-        sqr_sum = tvm.compute(data.shape, lambda i, j, k, l: tvm.sum(
-            pad_data[i, j, k, l + rxs] * pad_data[i, j, k, l + rxs],
-            axis=rxs))
-
-    sqr_sum_up = tvm.compute(data.shape, lambda i, j, k, l: tvm.power(
-        (bias + (alpha * sqr_sum[i, j, k, l] / size)), beta))
-
-    return topi.broadcast_div(data, sqr_sum_up)
+    return cpp.nn.lrn(data, size, axis, alpha, beta, bias)
diff --git a/topi/python/topi/rocm/nn.py b/topi/python/topi/rocm/nn.py
index d9c529155f7b..ca935740f2e2 100644
--- a/topi/python/topi/rocm/nn.py
+++ b/topi/python/topi/rocm/nn.py
@@ -1,13 +1,18 @@
 """scheduler for normalization functions on rocm backend"""
 from __future__ import absolute_import as _abs
 
-import topi
+import tvm
 from .. import generic
+from .. import cpp
 
 @generic.schedule_lrn.register(["rocm", "gpu"])
 def schedule_lrn(outs):
-    return topi.cuda.schedule_lrn(outs)
+    target = tvm.target.current_target(allow_none=False)
+    cpp_target = cpp.TEST_create_target(target.target_name)
+    return cpp.rocm.schedule_lrn(cpp_target, outs)
 
 @generic.schedule_l2norm.register(["rocm", "gpu"])
 def schedule_l2norm(outs):
-    return topi.cuda.schedule_l2norm(outs)
+    target = tvm.target.current_target(allow_none=False)
+    cpp_target = cpp.TEST_create_target(target.target_name)
+    return cpp.rocm.schedule_l2norm(cpp_target, outs)
diff --git a/topi/src/topi.cc b/topi/src/topi.cc
index 4169f5f563ad..cd3ad66bb45a 100644
--- a/topi/src/topi.cc
+++ b/topi/src/topi.cc
@@ -24,6 +24,8 @@
 #include <topi/nn/pooling.h>
 #include <topi/nn/softmax.h>
 #include <topi/nn/upsampling.h>
+#include <topi/nn/l2_norm.h>
+#include <topi/nn/local_response_norm.h>
 
 #include <topi/vision/reorg.h>
 #include <topi/image/resize.h>
@@ -39,6 +41,7 @@
 #include <topi/cuda/reduction.h>
 #include <topi/cuda/softmax.h>
 #include <topi/cuda/vision.h>
+#include <topi/cuda/nn.h>
 
 #include <topi/x86/bnn.h>
 #include <topi/x86/default.h>
@@ -46,6 +49,7 @@
 
 #include <topi/rocm/dense.h>
 #include <topi/rocm/vision.h>
+#include <topi/rocm/nn.h>
 
 namespace topi {
 
@@ -359,6 +363,20 @@ TVM_REGISTER_GLOBAL("topi.nn.log_softmax")
   *rv = nn::log_softmax(args[0]);
   });
 
+/* Ops from nn/l2_norm.h */
+TVM_REGISTER_GLOBAL("topi.nn.l2norm_instance")
+.set_body([](TVMArgs args, TVMRetValue *rv) {
+  *rv = nn::l2norm_instance(args[0], static_cast<double>(args[1]), args[2]);
+  });
+
+TVM_REGISTER_GLOBAL("topi.nn.lrn")
+.set_body([](TVMArgs args, TVMRetValue *rv) {
+  *rv = nn::lrn(args[0], args[1], args[2],
+                static_cast<double>(args[3]),
+                static_cast<double>(args[4]),
+                static_cast<double>(args[5]));
+  });
+
 TVM_REGISTER_GLOBAL("topi.vision.reorg")
 .set_body([](TVMArgs args, TVMRetValue *rv) {
   *rv = vision::reorg(args[0], args[1]);
@@ -435,6 +453,17 @@ TVM_REGISTER_GLOBAL("topi.rocm.schedule_region")
 .set_body([](TVMArgs args, TVMRetValue *rv) {
   *rv = topi::rocm::schedule_region(args[0], args[1]);
   });
+
+TVM_REGISTER_GLOBAL("topi.rocm.schedule_lrn")
+.set_body([](TVMArgs args, TVMRetValue *rv) {
+  *rv = topi::rocm::schedule_lrn(args[0], args[1]);
+  });
+
+TVM_REGISTER_GLOBAL("topi.rocm.schedule_l2norm")
+.set_body([](TVMArgs args, TVMRetValue *rv) {
+  *rv = topi::rocm::schedule_l2norm(args[0], args[1]);
+  });
+
 /* CUDA schedules */
 TVM_REGISTER_GLOBAL("topi.cuda.dense_cuda")
 .set_body([](TVMArgs args, TVMRetValue *rv) {
@@ -481,6 +510,16 @@ TVM_REGISTER_GLOBAL("topi.cuda.schedule_region")
   *rv = topi::cuda::schedule_region(args[0], args[1]);
   });
 
+TVM_REGISTER_GLOBAL("topi.cuda.schedule_lrn")
+.set_body([](TVMArgs args, TVMRetValue *rv) {
+  *rv = topi::cuda::schedule_lrn(args[0], args[1]);
+  });
+
+TVM_REGISTER_GLOBAL("topi.cuda.schedule_l2norm")
+.set_body([](TVMArgs args, TVMRetValue *rv) {
+  *rv = topi::cuda::schedule_l2norm(args[0], args[1]);
+  });
+
 /*! \brief Builder function for instantiating schedules. */
 using FTVMScheduleBuilder = std::function<
   tvm::Schedule(const tvm::Target& target, const tvm::Array<tvm::Tensor>& outs)>;
diff --git a/topi/tests/python/test_topi_l2norm.py b/topi/tests/python/test_topi_l2norm.py
index 182099ff9367..999b82169149 100644
--- a/topi/tests/python/test_topi_l2norm.py
+++ b/topi/tests/python/test_topi_l2norm.py
@@ -47,7 +47,10 @@ def check_device(device):
             return
         print("Running on target: %s" % device)
         with tvm.target.create(device):
-            s = topi.generic.schedule_l2norm(B)
+            if device == 'llvm':
+                s = topi.generic.schedule_l2norm([B])
+            else:
+                s = topi.cuda.schedule_l2norm([B])
         a = tvm.nd.array(a_np, ctx)
         b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
         f = tvm.build(s, [A, B], device)
@@ -59,7 +62,7 @@ def check_device(device):
 
 def test_l2norm():
     verify_l2norm(1, 3, 20, 20, 0.001)
-    verify_l2norm(1, 3, 20, 20, 0.001, 1)
+    verify_l2norm(1, 3, 20, 20, 0.001, (1,))
     verify_l2norm(1, 3, 20, 20, 0.001, (1, 2))
     verify_l2norm(1, 3, 20, 20, 0.001, (2, 3))
     verify_l2norm(1, 3, 20, 20, 0.001, (0, 3))
diff --git a/topi/tests/python/test_topi_lrn.py b/topi/tests/python/test_topi_lrn.py
index 596e5747a6c5..69c3fa404b71 100644
--- a/topi/tests/python/test_topi_lrn.py
+++ b/topi/tests/python/test_topi_lrn.py
@@ -70,13 +70,16 @@ def verify_lrn(shape, size, axis, bias, alpha, beta):
     b_np = lrn_python(a_np, size, axis, bias, alpha, beta)
 
     def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.module.enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
         with tvm.target.create(device):
-            s = topi.generic.schedule_lrn(B)
+            if device == 'llvm':
+                s = topi.generic.schedule_lrn([B])
+            else:
+                s = topi.cuda.schedule_lrn([B])
+        ctx = tvm.context(device, 0)
         a = tvm.nd.array(a_np, ctx)
         b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
         f = tvm.build(s, [A, B], device)
@@ -87,9 +90,9 @@ def check_device(device):
         check_device(device)
 
 def test_lrn():
-    verify_lrn((1, 3, 5, 5), 3, 1, 1, 1, 0.5)
-    verify_lrn((1, 3, 5, 5), 3, 3, 1, 1, 0.5)
-    verify_lrn((1, 3, 20, 20), 3, 1, 2, 1, 0.75)
+    verify_lrn((1, 3, 5, 5), 3, 1, 1.0, 1.0, 0.5)
+    verify_lrn((1, 3, 5, 5), 3, 3, 1.0, 1.0, 0.5)
+    verify_lrn((1, 3, 20, 20), 3, 1, 2.0, 1.0, 0.75)
 
 if __name__ == "__main__":
     test_lrn()
diff --git a/topi/tests/python_cpp/test_topi_l2norm.py b/topi/tests/python_cpp/test_topi_l2norm.py
new file mode 100644
index 000000000000..5e81406d2177
--- /dev/null
+++ b/topi/tests/python_cpp/test_topi_l2norm.py
@@ -0,0 +1,75 @@
+"""Test code for l2 normalization"""
+import os
+import numpy as np
+import tvm
+import topi
+import logging
+from topi.util import get_const_tuple
+
+def l2norm_instance_python(a_np, eps, axis=None):
+    """L2 norm operator in NCHW layout.
+
+    Parameters
+    ----------
+    a_np : numpy.ndarray
+        4-D with shape [batch, in_channel, in_height, in_width]
+
+    eps : float
+        epsilon constant value
+    axis : list of int
+        axis over the normalization applied
+
+    Returns
+    -------
+    l2norm_out : np.ndarray
+        4-D with shape [batch, out_channel, out_height, out_width]
+    """
+    batch, axis1, axis2, axis3 = a_np.shape
+    sqr_sum = np.zeros(shape=(batch,)).astype(a_np.dtype)
+    sqrt_sum = np.zeros(shape=(batch,)).astype(a_np.dtype)
+    l2norm_out = np.zeros(shape=a_np.shape).astype(a_np.dtype)
+    dot_value = np.power(a_np, 2.0)
+    sqr_sum = np.sum(dot_value, axis, keepdims=True)
+    sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps))
+    return np.divide(a_np, sqrt_sum)
+
+def verify_l2norm(n, c, h, w, eps, axis=None):
+    '''Verify l2 normalization operator by comparing outputs from tvm and numpy implementation'''
+    A = tvm.placeholder((n, c, h, w), name='A')
+    B = topi.cpp.nn.l2norm_instance(A, eps, axis)
+    dtype = A.dtype
+
+    a_np = np.random.uniform(size=(n, c, h, w)).astype(dtype)
+    b_np = l2norm_instance_python(a_np, eps, axis)
+
+    def check_device(device):
+        if not tvm.module.enabled(device):
+            print("Skip because %s is not enabled" % device)
+            return
+        print("Running on target: %s" % device)
+        target = topi.cpp.TEST_create_target(device)
+        if device == "llvm":
+            s = topi.cpp.generic.default_schedule(target, [B], False)
+        else:
+            s = topi.cpp.cuda.schedule_l2norm(target, [B])
+        ctx = tvm.context(device, 0)
+        a = tvm.nd.array(a_np, ctx)
+        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
+        func = tvm.build(s, [A, B], device, name="l2_norm")
+        func(a, b)
+        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+
+    for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm']:
+        check_device(device)
+
+def test_l2_norm():
+    verify_l2norm(1, 3, 20, 20, 0.001)
+    verify_l2norm(1, 3, 20, 20, 0.001, (1,))
+    verify_l2norm(1, 3, 20, 20, 0.001, (1, 2))
+    verify_l2norm(1, 3, 20, 20, 0.001, (2, 3))
+    verify_l2norm(1, 3, 20, 20, 0.001, (0, 3))
+    verify_l2norm(1, 3, 20, 20, 0.001, (0, 2, 3))
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.DEBUG)
+    test_l2_norm()
diff --git a/topi/tests/python_cpp/test_topi_lrn.py b/topi/tests/python_cpp/test_topi_lrn.py
new file mode 100644
index 000000000000..69c66c028c69
--- /dev/null
+++ b/topi/tests/python_cpp/test_topi_lrn.py
@@ -0,0 +1,101 @@
+"""Test code for LRN"""
+import os
+import numpy as np
+import tvm
+import topi
+import logging
+from topi.util import get_const_tuple
+
+def lrn_python(a_np, size, axis, bias, alpha, beta):
+    """Local response norm operator in NCHW layout.
+
+    Parameters
+    ----------
+    a_np : numpy.ndarray
+        4-D with shape [batch, in_channel, in_height, in_width]
+
+    size : int
+        normalisation window size
+
+    axis : int
+        input data layout channel axis
+
+    bias : float
+        offset to avoid dividing by 0. constant value
+
+    alpha : float
+        contant valie
+
+    beta : float
+        exponent constant value
+
+    Returns
+    -------
+    b_np : np.ndarray
+        4-D with shape [batch, out_channel, out_height, out_width]
+    """
+    axis0, axis1, axis2, axis3 = a_np.shape
+    radius = size // 2
+    sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype)
+    sqr_sum_up = np.zeros(shape=a_np.shape).astype(a_np.dtype)
+    lrn_out = np.zeros(shape=a_np.shape).astype(a_np.dtype)
+    def sum_dot_values(i, j, k, l):
+        axis_size = a_np.shape[axis]
+        if (axis == 1):
+            #NCHW layout
+            sum_start = j-radius if j-radius >= 0 else 0
+            sum_end = j+radius+1 if j+radius+1 < axis_size else axis_size
+            sqr_sum[i, j, k, l] = sum(a_np[i, sum_start:sum_end, k, l] * \
+                                      a_np[i, sum_start:sum_end, k, l])
+        elif (axis == 3):
+            #NHWC layout
+            sum_start = l-radius if l-radius >= 0 else 0
+            sum_end = l+radius+1 if l+radius+1 < axis_size else axis_size
+            sqr_sum[i, j, k, l] = sum(a_np[i, j, k, sum_start:sum_end] * \
+                                      a_np[i, j, k, sum_start:sum_end])
+
+    for i in range(axis0):
+        for j in range(axis1):
+            for k in range(axis2):
+                for l in range(axis3):
+                    sum_dot_values(i, j, k, l)
+
+    sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta)
+    return np.divide(a_np, sqr_sum_up)
+
+def verify_lrn(shape, size, axis, bias, alpha, beta):
+    '''Verify Local response normalization operator by comparing outputs from tvm and numpy implementation'''
+    A = tvm.placeholder(shape, name='A')
+    B = topi.cpp.nn.lrn(A, size, axis, alpha, beta, bias)
+    dtype = A.dtype
+
+    a_np = np.random.uniform(size=shape).astype(dtype)
+    b_np = lrn_python(a_np, size, axis, bias, alpha, beta)
+    def check_device(device):
+        if not tvm.module.enabled(device):
+            print("Skip because %s is not enabled" % device)
+            return
+        print("Running on target: %s" % device)
+        target = topi.cpp.TEST_create_target(device)
+        if device == "llvm":
+            s = topi.cpp.generic.default_schedule(target, [B], False)
+        else:
+            s = topi.cpp.cuda.schedule_lrn(target, [B])
+        ctx = tvm.context(device, 0)
+        a = tvm.nd.array(a_np, ctx)
+        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
+        f = tvm.build(s, [A, B], device)
+        f(a, b)
+        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-1)
+
+    for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm']:
+        check_device(device)
+
+def test_lrn():
+    verify_lrn((1, 3, 5, 5), 3, 3, 1.0, 1.0, 0.5)
+    verify_lrn((1, 3, 5, 5), 3, 3, 1.0, 1.0, 0.5)
+    verify_lrn((1, 3, 20, 20), 3, 1, 2.0, 1.0, 0.75)
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.DEBUG)
+    test_lrn()

From d9e5e779dcd8f14c094fdc471ce2ffd3e38e54ea Mon Sep 17 00:00:00 2001
From: Pariksheet <pariksheet.pinjari@huawei.com>
Date: Fri, 11 May 2018 15:42:25 +0530
Subject: [PATCH 02/12] Sanity check issue fixed

---
 topi/include/topi/nn.h                     | 2 +-
 topi/include/topi/nn/local_response_norm.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/topi/include/topi/nn.h b/topi/include/topi/nn.h
index 2459eb515707..f893e3bdaa6f 100644
--- a/topi/include/topi/nn.h
+++ b/topi/include/topi/nn.h
@@ -1,7 +1,7 @@
 /*!
  *  Copyright (c) 2017 by Contributors
  * \brief NN op constructions
- * \file topi/nn.h
+ * \file
  */
 #ifndef TOPI_NN_H_
 #define TOPI_NN_H_
diff --git a/topi/include/topi/nn/local_response_norm.h b/topi/include/topi/nn/local_response_norm.h
index 0e31c7c0515d..c6b044b78280 100644
--- a/topi/include/topi/nn/local_response_norm.h
+++ b/topi/include/topi/nn/local_response_norm.h
@@ -1,7 +1,7 @@
 /*!
  *  Copyright (c) 2018 by Contributors
  * \brief local response normalization op constructions
- * \file nn/local_response_normalization.h
+ * \file nn/local_response_norm.h
  */
 #ifndef TOPI_NN_LOCAL_RESPONSE_NORM_H_
 #define TOPI_NN_LOCAL_RESPONSE_NORM_H_

From 9dfab018f10f18989941f31a226cbff7f636ad1b Mon Sep 17 00:00:00 2001
From: Pariksheet <pariksheet.pinjari@huawei.com>
Date: Wed, 30 May 2018 14:06:11 +0530
Subject: [PATCH 03/12] nnvm support for lrn and l2norm ops added

---
 nnvm/include/nnvm/top/nn.h                    |  35 +++++
 nnvm/python/nnvm/top/nn.py                    |  33 +++++
 nnvm/src/top/nn/nn.cc                         |  46 +++++++
 nnvm/tests/python/compiler/test_top_level1.py | 125 ++++++++++++++++++
 4 files changed, 239 insertions(+)

diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h
index bbdb3b9c4f12..a1659cc3b0d3 100644
--- a/nnvm/include/nnvm/top/nn.h
+++ b/nnvm/include/nnvm/top/nn.h
@@ -368,6 +368,41 @@ struct NMSParam : public dmlc::Parameter<NMSParam> {
   }
 };
 
+struct LrnParam : public dmlc::Parameter<LrnParam> {
+  int size;
+  int axis;
+  float alpha;
+  float beta;
+  float bias;
+
+  DMLC_DECLARE_PARAMETER(LrnParam) {
+    DMLC_DECLARE_FIELD(size)
+      .describe("The size of the local region to be considered for normalization.");
+    DMLC_DECLARE_FIELD(axis)
+      .describe("input data layout channel axis");
+    DMLC_DECLARE_FIELD(alpha)
+      .describe("alpha constant.");
+    DMLC_DECLARE_FIELD(beta)
+      .describe("beta constant.");
+    DMLC_DECLARE_FIELD(bias)
+      .describe("bias constant.");
+  }
+  // constants
+  static const constexpr int kData = 0;
+};
+
+struct L2normParam : public dmlc::Parameter<L2normParam> {
+    float eps;
+    Tuple<int> axis;
+
+  DMLC_DECLARE_PARAMETER(L2normParam) {
+    DMLC_DECLARE_FIELD(eps)
+      .describe("float type epsilon value.");
+    DMLC_DECLARE_FIELD(axis)
+      .describe("axis over the normalization applied");
+  }
+};
+
 }  // namespace top
 }  // namespace nnvm
 
diff --git a/nnvm/python/nnvm/top/nn.py b/nnvm/python/nnvm/top/nn.py
index b7e0d0952888..614d4982f715 100644
--- a/nnvm/python/nnvm/top/nn.py
+++ b/nnvm/python/nnvm/top/nn.py
@@ -243,3 +243,36 @@ def schedule_upsampling(_, outs, target):
         return topi.generic.schedule_injective(outs)
 
 reg.register_pattern("upsampling", OpPattern.INJECTIVE)
+
+@reg.register_compute("lrn")
+def compute_lrn(attrs, inputs, _):
+    """Compute definition of lrn"""
+    size = attrs.get_int("size")
+    axis = attrs.get_int("axis")
+    alpha = attrs.get_float("alpha")
+    beta = attrs.get_float("beta")
+    bias = attrs.get_float("bias")
+    return topi.nn.lrn(inputs[0], size, axis, alpha, beta, bias)
+
+@reg.register_schedule("lrn")
+def schedule_lrn(attrs, outs, target):
+    """Schedule definition of lrn"""
+    with tvm.target.create(target):
+        return topi.generic.schedule_lrn(outs)
+
+reg.register_pattern("lrn", OpPattern.OUT_ELEMWISE_FUSABLE)
+
+@reg.register_compute("l2norm")
+def compute_l2norm(attrs, inputs, _):
+    """Compute definition of l2norm"""
+    eps = attrs.get_float("eps")
+    axis = attrs.get_int_tuple("axis")
+    return topi.nn.l2norm_instance(inputs[0], eps, axis)
+
+@reg.register_schedule("l2norm")
+def schedule_l2norm(attrs, outs, target):
+    """Schedule definition of l2norm"""
+    with tvm.target.create(target):
+        return topi.generic.schedule_l2norm(outs)
+
+reg.register_pattern("l2norm", OpPattern.OUT_ELEMWISE_FUSABLE)
\ No newline at end of file
diff --git a/nnvm/src/top/nn/nn.cc b/nnvm/src/top/nn/nn.cc
index cedfb210855e..b343ab695c05 100644
--- a/nnvm/src/top/nn/nn.cc
+++ b/nnvm/src/top/nn/nn.cc
@@ -712,5 +712,51 @@ the input array by output[n, c, h, w, C] = data[n, C*16+c, h, w]
 })
 .set_support_level(1);
 
+DMLC_REGISTER_PARAMETER(LrnParam);
+
+inline bool LrnInferShape(const nnvm::NodeAttrs& attrs,
+                          std::vector<TShape>* in_shape,
+                          std::vector<TShape>* out_shape) {
+  TShape dshape = (*in_shape)[0];
+  TShape oshape = dshape;
+
+  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
+  return true;
+}
+
+NNVM_REGISTER_OP(lrn)
+.describe(R"code(LRN layer)code" NNVM_ADD_FILELINE)
+.add_argument("data", "4D Tesndor", "Input data.")
+.set_attr_parser(ParamParser<LrnParam>)
+.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<LrnParam>)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr<FInferShape>("FInferShape", LrnInferShape)
+.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_support_level(1);
+
+DMLC_REGISTER_PARAMETER(L2normParam);
+
+inline bool L2normInferShape(const nnvm::NodeAttrs& attrs,
+                          std::vector<TShape>* in_shape,
+                          std::vector<TShape>* out_shape) {
+  TShape dshape = (*in_shape)[0];
+  TShape oshape = dshape;
+
+  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
+  return true;
+}
+
+NNVM_REGISTER_OP(l2norm)
+.describe(R"code(L2NORM layer)code" NNVM_ADD_FILELINE)
+.add_argument("data", "4D Tesndor", "Input data.")
+.set_attr_parser(ParamParser<L2normParam>)
+.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<L2normParam>)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr<FInferShape>("FInferShape", L2normInferShape)
+.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_support_level(1);
+
 }  // namespace top
 }  // namespace nnvm
diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py
index 3058d6ccfc7b..5313ce3c601f 100644
--- a/nnvm/tests/python/compiler/test_top_level1.py
+++ b/nnvm/tests/python/compiler/test_top_level1.py
@@ -365,6 +365,129 @@ def forward(x):
     inputs = [('x', (1, 3, 28, 28), x)]
     helper(y, inputs, dtype, forward)
 
+def verify_lrn(n, c, h, w, size, axis, bias, alpha, beta):
+    x = sym.Variable("x")
+    y = sym.lrn(x, size=size, axis=axis, bias=bias, alpha=alpha, beta=beta)
+    dtype = "float32"
+    dshape = (n, c, h, w)
+    x_np = np.random.uniform(size=dshape).astype(dtype)
+
+    def lrn_python(a_np, size, axis, bias, alpha, beta):
+        """Local response norm operator in NCHW layout.
+
+        Parameters
+        ----------
+        a_np : numpy.ndarray
+            4-D with shape [batch, in_channel, in_height, in_width]
+
+        size : int
+            normalisation window size
+
+        axis : int
+            input data layout channel axis
+
+        bias : float
+            offset to avoid dividing by 0. constant value
+
+        alpha : float
+            contant valie
+
+        beta : float
+            exponent constant value
+
+        Returns
+        -------
+        b_np : np.ndarray
+            4-D with shape [batch, out_channel, out_height, out_width]
+        """
+        axis0, axis1, axis2, axis3 = a_np.shape
+        radius = size // 2
+        sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype)
+        sqr_sum_up = np.zeros(shape=a_np.shape).astype(a_np.dtype)
+        lrn_out = np.zeros(shape=a_np.shape).astype(a_np.dtype)
+        def sum_dot_values(i, j, k, l):
+            axis_size = a_np.shape[axis]
+            if (axis == 1):
+                #NCHW layout
+                sum_start = j-radius if j-radius >= 0 else 0
+                sum_end = j+radius+1 if j+radius+1 < axis_size else axis_size
+                sqr_sum[i, j, k, l] = sum(a_np[i, sum_start:sum_end, k, l] * \
+                                          a_np[i, sum_start:sum_end, k, l])
+            elif (axis == 3):
+                #NHWC layout
+                sum_start = l-radius if l-radius >= 0 else 0
+                sum_end = l+radius+1 if l+radius+1 < axis_size else axis_size
+                sqr_sum[i, j, k, l] = sum(a_np[i, j, k, sum_start:sum_end] * \
+                                          a_np[i, j, k, sum_start:sum_end])
+
+        for i in range(axis0):
+            for j in range(axis1):
+                for k in range(axis2):
+                    for l in range(axis3):
+                        sum_dot_values(i, j, k, l)
+
+        sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta)
+        return np.divide(a_np, sqr_sum_up)
+
+    for target, ctx in ctx_list():
+        graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
+        m = graph_runtime.create(graph, lib, ctx)
+        m.run(x=x_np)
+        out = m.get_output(0, tvm.nd.empty(dshape))
+        out_np = np.zeros(shape=(n, c, h, w)).astype(dtype)
+        out_np = lrn_python(x_np, size, axis, bias, alpha, beta)
+        np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
+
+def verify_l2norm(batch, channel, height, width, eps, axis):
+    x = sym.Variable("x")
+    y = sym.l2norm(x, eps=eps, axis=axis)
+    dtype = "float32"
+    dshape = (batch, channel, height, width)
+    x_np = np.random.uniform(size=dshape).astype(dtype)
+
+    def l2norm_instance_python(a_np, eps, axis=None):
+        """L2 norm operator in NCHW layout.
+
+        Parameters
+        ----------
+        a_np : numpy.ndarray
+            4-D with shape [batch, in_channel, in_height, in_width]
+
+        eps : float
+            epsilon constant value
+        axis : list of int
+            axis over the normalization applied
+
+        Returns
+        -------
+        l2norm_out : np.ndarray
+            4-D with shape [batch, out_channel, out_height, out_width]
+        """
+        batch, axis1, axis2, axis3 = a_np.shape
+        sqr_sum = np.zeros(shape=(batch,)).astype(a_np.dtype)
+        sqrt_sum = np.zeros(shape=(batch,)).astype(a_np.dtype)
+        l2norm_out = np.zeros(shape=a_np.shape).astype(a_np.dtype)
+        dot_value = np.power(a_np, 2.0)
+        sqr_sum = np.sum(dot_value, axis, keepdims=True)
+        sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps))
+        return np.divide(a_np, sqrt_sum)
+
+    for target, ctx in ctx_list():
+        graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
+        m = graph_runtime.create(graph, lib, ctx)
+        m.run(x=x_np)
+        out = m.get_output(0, tvm.nd.empty(dshape))
+        out_np = np.zeros(shape=(batch, channel, height, width)).astype(dtype)
+        out_np = l2norm_instance_python(x_np, eps, axis)
+        np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
+
+def test_lrn():
+    verify_lrn(1, 3, 20, 20, 3, 1, 1.0, 1.0, 0.5)
+    verify_lrn(1, 3, 20, 20, 3, 1, 2.0, 1.0, 0.75)
+
+def test_l2norm():
+    verify_l2norm(1, 3, 20, 20, 0.001, (1,))
+    verify_l2norm(1, 3, 20, 20, 0.001, (1, 2))
 
 if __name__ == "__main__":
     test_split()
@@ -384,3 +507,5 @@ def forward(x):
     test_softmax()
     test_squeeze()
     test_pad()
+    test_lrn()
+    test_l2norm()

From 620f9efc1a7e37d150f9e46e373f5be2aa5b8874 Mon Sep 17 00:00:00 2001
From: Pariksheet <pariksheet.pinjari@huawei.com>
Date: Wed, 30 May 2018 14:17:30 +0530
Subject: [PATCH 04/12] lint error fixed

---
 nnvm/python/nnvm/top/nn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nnvm/python/nnvm/top/nn.py b/nnvm/python/nnvm/top/nn.py
index 614d4982f715..d6b413e1204d 100644
--- a/nnvm/python/nnvm/top/nn.py
+++ b/nnvm/python/nnvm/top/nn.py
@@ -275,4 +275,4 @@ def schedule_l2norm(attrs, outs, target):
     with tvm.target.create(target):
         return topi.generic.schedule_l2norm(outs)
 
-reg.register_pattern("l2norm", OpPattern.OUT_ELEMWISE_FUSABLE)
\ No newline at end of file
+reg.register_pattern("l2norm", OpPattern.OUT_ELEMWISE_FUSABLE)

From 8886fb91795b01c0f353e30b9b10a3e33d9005e6 Mon Sep 17 00:00:00 2001
From: Pariksheet <pariksheet.pinjari@huawei.com>
Date: Mon, 4 Jun 2018 10:44:22 +0530
Subject: [PATCH 05/12] Build check

---
 nnvm/tests/python/compiler/test_top_level1.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py
index 5313ce3c601f..a79345f63e24 100644
--- a/nnvm/tests/python/compiler/test_top_level1.py
+++ b/nnvm/tests/python/compiler/test_top_level1.py
@@ -373,7 +373,7 @@ def verify_lrn(n, c, h, w, size, axis, bias, alpha, beta):
     x_np = np.random.uniform(size=dshape).astype(dtype)
 
     def lrn_python(a_np, size, axis, bias, alpha, beta):
-        """Local response norm operator in NCHW layout.
+        """Local response norm operator numpy implementation.
 
         Parameters
         ----------

From c2dcc600cad9db780e44cf06f774b0df782d0e01 Mon Sep 17 00:00:00 2001
From: Pariksheet <pariksheet.pinjari@huawei.com>
Date: Tue, 5 Jun 2018 18:48:25 +0530
Subject: [PATCH 06/12] build recheck

---
 nnvm/tests/python/compiler/test_top_level1.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py
index a79345f63e24..fff7381a6f46 100644
--- a/nnvm/tests/python/compiler/test_top_level1.py
+++ b/nnvm/tests/python/compiler/test_top_level1.py
@@ -373,7 +373,7 @@ def verify_lrn(n, c, h, w, size, axis, bias, alpha, beta):
     x_np = np.random.uniform(size=dshape).astype(dtype)
 
     def lrn_python(a_np, size, axis, bias, alpha, beta):
-        """Local response norm operator numpy implementation.
+        """Local response normalization operator numpy implementation.
 
         Parameters
         ----------

From c41f9980b2d9260e0b8d7275d5b23f628e74556b Mon Sep 17 00:00:00 2001
From: Pariksheet <pariksheet.pinjari@huawei.com>
Date: Thu, 7 Jun 2018 11:38:06 +0530
Subject: [PATCH 07/12] Review comments updated

---
 nnvm/include/nnvm/top/nn.h                    |  6 +--
 nnvm/tests/python/compiler/test_top_level1.py |  2 +-
 .../topi/cuda/{nn.h => normalization.h}       | 40 +++++++++----------
 topi/include/topi/nn.h                        |  2 +-
 topi/include/topi/nn/l2_norm.h                |  2 +-
 topi/include/topi/nn/local_response_norm.h    |  4 +-
 .../topi/rocm/{nn.h => normalization.h}       |  8 ++--
 topi/src/topi.cc                              |  4 +-
 8 files changed, 33 insertions(+), 35 deletions(-)
 rename topi/include/topi/cuda/{nn.h => normalization.h} (75%)
 rename topi/include/topi/rocm/{nn.h => normalization.h} (87%)

diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h
index a1659cc3b0d3..ef2f3b2a829b 100644
--- a/nnvm/include/nnvm/top/nn.h
+++ b/nnvm/include/nnvm/top/nn.h
@@ -381,11 +381,11 @@ struct LrnParam : public dmlc::Parameter<LrnParam> {
     DMLC_DECLARE_FIELD(axis)
       .describe("input data layout channel axis");
     DMLC_DECLARE_FIELD(alpha)
-      .describe("alpha constant.");
+      .describe("The scaling parameter.");
     DMLC_DECLARE_FIELD(beta)
-      .describe("beta constant.");
+      .describe("The exponent parameter.");
     DMLC_DECLARE_FIELD(bias)
-      .describe("bias constant.");
+      .describe("The offset parameter.");
   }
   // constants
   static const constexpr int kData = 0;
diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py
index fff7381a6f46..683a2ec5dc35 100644
--- a/nnvm/tests/python/compiler/test_top_level1.py
+++ b/nnvm/tests/python/compiler/test_top_level1.py
@@ -390,7 +390,7 @@ def lrn_python(a_np, size, axis, bias, alpha, beta):
             offset to avoid dividing by 0. constant value
 
         alpha : float
-            contant valie
+            contant value
 
         beta : float
             exponent constant value
diff --git a/topi/include/topi/cuda/nn.h b/topi/include/topi/cuda/normalization.h
similarity index 75%
rename from topi/include/topi/cuda/nn.h
rename to topi/include/topi/cuda/normalization.h
index a425a47b5ce7..2816c99f8e25 100644
--- a/topi/include/topi/cuda/nn.h
+++ b/topi/include/topi/cuda/normalization.h
@@ -1,10 +1,10 @@
 /*!
 *  Copyright (c) 2018 by Contributors
-* \file cuda/nn.h
+* \file cuda/normalization.h
 * \brief CUDA schedule for lrn and l2 normalization operations
 */
-#ifndef TOPI_CUDA_NN_H_
-#define TOPI_CUDA_NN_H_
+#ifndef TOPI_CUDA_NORMALIZATION_H_
+#define TOPI_CUDA_NORMALIZATION_H_
 
 #include "tvm/tvm.h"
 #include "tvm/build_module.h"
@@ -26,19 +26,19 @@ inline Schedule schedule_lrn(const Target &target, const Array<Tensor>& outs) {
   for (auto t : outs) {
     out_ops.push_back(t->op);
   }
-  auto s = create_schedule(out_ops);
-  auto num_thread = 64;
-  auto block_x = tvm::thread_axis(Range(), "blockIdx.x");
-  auto thread_x = tvm::thread_axis(Range(0, num_thread), "threadIdx.x");
-  auto lrn = outs[0];
-  auto sqr_sum_up = lrn->op->InputTensors()[1];
-  auto sqr_sum = sqr_sum_up->op->InputTensors()[0];
-  auto set_pad = sqr_sum->op->InputTensors()[0];
+  Schedule s = create_schedule(out_ops);
+  int num_thread = 64;
+  IterVar block_x = tvm::thread_axis(Range(), "blockIdx.x");
+  IterVar thread_x = tvm::thread_axis(Range(0, num_thread), "threadIdx.x");
+  Tensor lrn = outs[0];
+  Tensor sqr_sum_up = lrn->op->InputTensors()[1];
+  Tensor sqr_sum = sqr_sum_up->op->InputTensors()[0];
+  Tensor set_pad = sqr_sum->op->InputTensors()[0];
   s[set_pad].bind(set_pad->op.as<ComputeOpNode>()->axis[0], block_x);
-  auto rxk = sqr_sum->op.as<ComputeOpNode>()->reduce_axis[0];
+  IterVar rxk = sqr_sum->op.as<ComputeOpNode>()->reduce_axis[0];
   IterVar xko, xki;
   s[sqr_sum].split(rxk, num_thread, &xko, &xki);
-  auto srf = s.rfactor(sqr_sum, xki)[0];
+  Tensor srf = s.rfactor(sqr_sum, xki)[0];
   s[sqr_sum].bind(s[sqr_sum]->op.as<ComputeOpNode>()->axis[0], block_x);
   s[sqr_sum].bind(s[sqr_sum]->op.as<ComputeOpNode>()->reduce_axis[0], thread_x);
   s[srf].compute_at(s[sqr_sum], s[sqr_sum]->op.as<ComputeOpNode>()->reduce_axis[0]);
@@ -64,7 +64,7 @@ inline Schedule schedule_l2norm(const Target &target, const Array<Tensor>& outs)
   for (auto t : outs) {
     out_ops.push_back(t->op);
   }
-  auto s = create_schedule(out_ops);
+  Schedule s = create_schedule(out_ops);
 
   std::function<void(Operation)> traverse;
   traverse = [&](const Operation& op) {
@@ -89,10 +89,10 @@ inline Schedule schedule_l2norm(const Target &target, const Array<Tensor>& outs)
   };
 
   traverse(outs[0]->op);
-  auto num_thread = 64;
-  auto l2norm = outs[0];
-  auto block_x = tvm::thread_axis(Range(), "blockIdx.x");
-  auto thread_x = tvm::thread_axis(Range(0, num_thread), "threadIdx.x");
+  int num_thread = 64;
+  Tensor l2norm = outs[0];
+  IterVar block_x = tvm::thread_axis(Range(), "blockIdx.x");
+  IterVar thread_x = tvm::thread_axis(Range(0, num_thread), "threadIdx.x");
   IterVar xto, xti;
   s[l2norm].split_by_nparts(l2norm->op.as<ComputeOpNode>()->axis[1], num_thread, &xto, &xti);
   s[l2norm].bind(l2norm->op.as<ComputeOpNode>()->axis[0], block_x);
@@ -101,7 +101,5 @@ inline Schedule schedule_l2norm(const Target &target, const Array<Tensor>& outs)
 }
 }  // namespace cuda
 }  // namespace topi
-#endif  // TOPI_CUDA_NN_H_
-
-
+#endif  // TOPI_CUDA_NORMALIZATION_H_
 
diff --git a/topi/include/topi/nn.h b/topi/include/topi/nn.h
index f893e3bdaa6f..2459eb515707 100644
--- a/topi/include/topi/nn.h
+++ b/topi/include/topi/nn.h
@@ -1,7 +1,7 @@
 /*!
  *  Copyright (c) 2017 by Contributors
  * \brief NN op constructions
- * \file
+ * \file topi/nn.h
  */
 #ifndef TOPI_NN_H_
 #define TOPI_NN_H_
diff --git a/topi/include/topi/nn/l2_norm.h b/topi/include/topi/nn/l2_norm.h
index e1fe25cb6040..c8f8be455591 100644
--- a/topi/include/topi/nn/l2_norm.h
+++ b/topi/include/topi/nn/l2_norm.h
@@ -15,7 +15,7 @@ namespace nn {
 using namespace tvm;
 
 /*!
-* \brief L2 normalization inference operator 
+* \brief L2 normalization inference operator
 *
 * \param data The input tensor. 4-D with shape [batch, channel, height, width]
 * \param eps Epsilon to prevent div by 0
diff --git a/topi/include/topi/nn/local_response_norm.h b/topi/include/topi/nn/local_response_norm.h
index c6b044b78280..339fb6dae879 100644
--- a/topi/include/topi/nn/local_response_norm.h
+++ b/topi/include/topi/nn/local_response_norm.h
@@ -38,8 +38,8 @@ inline Tensor lrn(const Tensor& data,
                   std::string name = "tensor",
                   std::string tag = kBroadcast) {
   CHECK_EQ(data->shape.size(), 4) << "LRN requires 4-D input";
-  assert(size % 2 == 1);
-  assert(axis == 1 || axis == 3);
+  CHECK_EQ(size % 2, 1) << "size should be odd number";
+  CHECK_EQ((axis - 1) && (axis - 3), 0) << "axis should be 1 or 3 for NCHW and NHWC";
   auto input_shape = data->shape;
   Array<Expr> pad_before{ 0, 0, 0, 0};
   Array<Expr> pad_after{ 0, 0, 0, 0};
diff --git a/topi/include/topi/rocm/nn.h b/topi/include/topi/rocm/normalization.h
similarity index 87%
rename from topi/include/topi/rocm/nn.h
rename to topi/include/topi/rocm/normalization.h
index 98b74ad80593..8c6c31d462c4 100644
--- a/topi/include/topi/rocm/nn.h
+++ b/topi/include/topi/rocm/normalization.h
@@ -1,10 +1,10 @@
 /*!
 *  Copyright (c) 2018 by Contributors
-* \file rocm/nn.h
+* \file rocm/normalization.h
 * \brief rocm schedule for lrn and l2 normalization operations
 */
-#ifndef TOPI_ROCM_NN_H_
-#define TOPI_ROCM_NN_H_
+#ifndef TOPI_ROCM_NORMALIZATION_H_
+#define TOPI_ROCM_NORMALIZATION_H_
 
 #include "tvm/tvm.h"
 #include "tvm/build_module.h"
@@ -38,4 +38,4 @@ inline Schedule schedule_l2norm(const Target &target, const Array<Tensor>& outs)
 }
 }  // namespace rocm
 }  // namespace topi
-#endif  // TOPI_ROCM_NN_H_
+#endif  // TOPI_ROCM_NORMALIZATION_H_
diff --git a/topi/src/topi.cc b/topi/src/topi.cc
index cd3ad66bb45a..930d9d16260d 100644
--- a/topi/src/topi.cc
+++ b/topi/src/topi.cc
@@ -41,7 +41,7 @@
 #include <topi/cuda/reduction.h>
 #include <topi/cuda/softmax.h>
 #include <topi/cuda/vision.h>
-#include <topi/cuda/nn.h>
+#include <topi/cuda/normalization.h>
 
 #include <topi/x86/bnn.h>
 #include <topi/x86/default.h>
@@ -49,7 +49,7 @@
 
 #include <topi/rocm/dense.h>
 #include <topi/rocm/vision.h>
-#include <topi/rocm/nn.h>
+#include <topi/rocm/normalization.h>
 
 namespace topi {
 

From 5a44f63a1497d8709509b3162299f824bdec3fbf Mon Sep 17 00:00:00 2001
From: pariksheet <pariksheet.pinjari@huawei.com>
Date: Fri, 15 Jun 2018 10:03:52 +0530
Subject: [PATCH 08/12] Review comments reworked

---
 nnvm/include/nnvm/top/nn.h                    | 12 ++--
 nnvm/python/nnvm/top/nn.py                    | 18 +++---
 nnvm/src/top/nn/nn.cc                         | 28 ++++-----
 nnvm/tests/python/compiler/test_top_level1.py | 63 +++++++++----------
 topi/include/topi/cuda/normalization.h        | 15 ++---
 .../topi/nn/{l2_norm.h => l2_normalize.h}     | 14 ++---
 topi/include/topi/nn/local_response_norm.h    |  2 +-
 topi/include/topi/rocm/normalization.h        |  6 +-
 topi/python/topi/cuda/__init__.py             |  2 +-
 topi/python/topi/cuda/nn.py                   | 10 +--
 topi/python/topi/generic/nn.py                |  6 +-
 topi/python/topi/nn/__init__.py               |  2 +-
 .../topi/nn/{l2_norm.py => l2_normalize.py}   |  8 +--
 topi/python/topi/rocm/nn.py                   |  6 +-
 topi/src/topi.cc                              | 16 ++---
 topi/tests/python/test_topi_l2norm.py         | 46 +++++++-------
 topi/tests/python/test_topi_lrn.py            | 11 ++--
 topi/tests/python_cpp/test_topi_l2norm.py     | 45 +++++++------
 topi/tests/python_cpp/test_topi_lrn.py        | 14 ++---
 19 files changed, 155 insertions(+), 169 deletions(-)
 rename topi/include/topi/nn/{l2_norm.h => l2_normalize.h} (80%)
 rename topi/python/topi/nn/{l2_norm.py => l2_normalize.py} (71%)

diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h
index ef2f3b2a829b..6687535c4f85 100644
--- a/nnvm/include/nnvm/top/nn.h
+++ b/nnvm/include/nnvm/top/nn.h
@@ -368,14 +368,14 @@ struct NMSParam : public dmlc::Parameter<NMSParam> {
   }
 };
 
-struct LrnParam : public dmlc::Parameter<LrnParam> {
+struct LRNParam : public dmlc::Parameter<LRNParam> {
   int size;
   int axis;
   float alpha;
   float beta;
   float bias;
 
-  DMLC_DECLARE_PARAMETER(LrnParam) {
+  DMLC_DECLARE_PARAMETER(LRNParam) {
     DMLC_DECLARE_FIELD(size)
       .describe("The size of the local region to be considered for normalization.");
     DMLC_DECLARE_FIELD(axis)
@@ -391,11 +391,11 @@ struct LrnParam : public dmlc::Parameter<LrnParam> {
   static const constexpr int kData = 0;
 };
 
-struct L2normParam : public dmlc::Parameter<L2normParam> {
-    float eps;
-    Tuple<int> axis;
+struct L2normalizeParam : public dmlc::Parameter<L2normalizeParam> {
+  float eps;
+  Tuple<int> axis;
 
-  DMLC_DECLARE_PARAMETER(L2normParam) {
+  DMLC_DECLARE_PARAMETER(L2normalizeParam) {
     DMLC_DECLARE_FIELD(eps)
       .describe("float type epsilon value.");
     DMLC_DECLARE_FIELD(axis)
diff --git a/nnvm/python/nnvm/top/nn.py b/nnvm/python/nnvm/top/nn.py
index d6b413e1204d..37c014e705d9 100644
--- a/nnvm/python/nnvm/top/nn.py
+++ b/nnvm/python/nnvm/top/nn.py
@@ -262,17 +262,17 @@ def schedule_lrn(attrs, outs, target):
 
 reg.register_pattern("lrn", OpPattern.OUT_ELEMWISE_FUSABLE)
 
-@reg.register_compute("l2norm")
-def compute_l2norm(attrs, inputs, _):
-    """Compute definition of l2norm"""
+@reg.register_compute("l2normalize")
+def compute_l2normalize(attrs, inputs, _):
+    """Compute definition of l2normalize"""
     eps = attrs.get_float("eps")
     axis = attrs.get_int_tuple("axis")
-    return topi.nn.l2norm_instance(inputs[0], eps, axis)
+    return topi.nn.l2normalize_instance(inputs[0], eps, axis)
 
-@reg.register_schedule("l2norm")
-def schedule_l2norm(attrs, outs, target):
-    """Schedule definition of l2norm"""
+@reg.register_schedule("l2normalize")
+def schedule_l2normalize(attrs, outs, target):
+    """Schedule definition of l2normalize"""
     with tvm.target.create(target):
-        return topi.generic.schedule_l2norm(outs)
+        return topi.generic.schedule_l2normalize(outs)
 
-reg.register_pattern("l2norm", OpPattern.OUT_ELEMWISE_FUSABLE)
+reg.register_pattern("l2normalize", OpPattern.OUT_ELEMWISE_FUSABLE)
diff --git a/nnvm/src/top/nn/nn.cc b/nnvm/src/top/nn/nn.cc
index b343ab695c05..7f50f2b1f58a 100644
--- a/nnvm/src/top/nn/nn.cc
+++ b/nnvm/src/top/nn/nn.cc
@@ -712,9 +712,9 @@ the input array by output[n, c, h, w, C] = data[n, C*16+c, h, w]
 })
 .set_support_level(1);
 
-DMLC_REGISTER_PARAMETER(LrnParam);
+DMLC_REGISTER_PARAMETER(LRNParam);
 
-inline bool LrnInferShape(const nnvm::NodeAttrs& attrs,
+inline bool LRNInferShape(const nnvm::NodeAttrs& attrs,
                           std::vector<TShape>* in_shape,
                           std::vector<TShape>* out_shape) {
   TShape dshape = (*in_shape)[0];
@@ -727,19 +727,19 @@ inline bool LrnInferShape(const nnvm::NodeAttrs& attrs,
 NNVM_REGISTER_OP(lrn)
 .describe(R"code(LRN layer)code" NNVM_ADD_FILELINE)
 .add_argument("data", "4D Tesndor", "Input data.")
-.set_attr_parser(ParamParser<LrnParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<LrnParam>)
+.set_attr_parser(ParamParser<LRNParam>)
+.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<LRNParam>)
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", LrnInferShape)
+.set_attr<FInferShape>("FInferShape", LRNInferShape)
 .set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_support_level(1);
 
-DMLC_REGISTER_PARAMETER(L2normParam);
+DMLC_REGISTER_PARAMETER(L2normalizeParam);
 
-inline bool L2normInferShape(const nnvm::NodeAttrs& attrs,
-                          std::vector<TShape>* in_shape,
-                          std::vector<TShape>* out_shape) {
+inline bool L2normalizeInferShape(const nnvm::NodeAttrs& attrs,
+                                  std::vector<TShape>* in_shape,
+                                  std::vector<TShape>* out_shape) {
   TShape dshape = (*in_shape)[0];
   TShape oshape = dshape;
 
@@ -747,14 +747,14 @@ inline bool L2normInferShape(const nnvm::NodeAttrs& attrs,
   return true;
 }
 
-NNVM_REGISTER_OP(l2norm)
-.describe(R"code(L2NORM layer)code" NNVM_ADD_FILELINE)
+NNVM_REGISTER_OP(l2normalize)
+.describe(R"code(L2NORMALIZE layer)code" NNVM_ADD_FILELINE)
 .add_argument("data", "4D Tesndor", "Input data.")
-.set_attr_parser(ParamParser<L2normParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<L2normParam>)
+.set_attr_parser(ParamParser<L2normalizeParam>)
+.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<L2normalizeParam>)
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", L2normInferShape)
+.set_attr<FInferShape>("FInferShape", L2normalizeInferShape)
 .set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_support_level(1);
 
diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py
index 683a2ec5dc35..5cf661b412c4 100644
--- a/nnvm/tests/python/compiler/test_top_level1.py
+++ b/nnvm/tests/python/compiler/test_top_level1.py
@@ -365,15 +365,14 @@ def forward(x):
     inputs = [('x', (1, 3, 28, 28), x)]
     helper(y, inputs, dtype, forward)
 
-def verify_lrn(n, c, h, w, size, axis, bias, alpha, beta):
+def verify_lrn(ishape, size, axis, bias, alpha, beta):
     x = sym.Variable("x")
     y = sym.lrn(x, size=size, axis=axis, bias=bias, alpha=alpha, beta=beta)
     dtype = "float32"
-    dshape = (n, c, h, w)
-    x_np = np.random.uniform(size=dshape).astype(dtype)
+    x_np = np.random.uniform(size=ishape).astype(dtype)
 
     def lrn_python(a_np, size, axis, bias, alpha, beta):
-        """Local response normalization operator numpy implementation.
+        """Local response normalization operator in NCHW layout.
 
         Parameters
         ----------
@@ -381,7 +380,7 @@ def lrn_python(a_np, size, axis, bias, alpha, beta):
             4-D with shape [batch, in_channel, in_height, in_width]
 
         size : int
-            normalisation window size
+            normalization window size
 
         axis : int
             input data layout channel axis
@@ -390,21 +389,19 @@ def lrn_python(a_np, size, axis, bias, alpha, beta):
             offset to avoid dividing by 0. constant value
 
         alpha : float
-            contant value
+            constant value
 
         beta : float
             exponent constant value
 
         Returns
         -------
-        b_np : np.ndarray
+        lrn_out : np.ndarray
             4-D with shape [batch, out_channel, out_height, out_width]
         """
         axis0, axis1, axis2, axis3 = a_np.shape
         radius = size // 2
         sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype)
-        sqr_sum_up = np.zeros(shape=a_np.shape).astype(a_np.dtype)
-        lrn_out = np.zeros(shape=a_np.shape).astype(a_np.dtype)
         def sum_dot_values(i, j, k, l):
             axis_size = a_np.shape[axis]
             if (axis == 1):
@@ -427,26 +424,25 @@ def sum_dot_values(i, j, k, l):
                         sum_dot_values(i, j, k, l)
 
         sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta)
-        return np.divide(a_np, sqr_sum_up)
+        lrn_out = np.divide(a_np, sqr_sum_up)
+        return lrn_out
 
     for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
+        graph, lib, _ = nnvm.compiler.build(y, target, {"x": ishape})
         m = graph_runtime.create(graph, lib, ctx)
         m.run(x=x_np)
-        out = m.get_output(0, tvm.nd.empty(dshape))
-        out_np = np.zeros(shape=(n, c, h, w)).astype(dtype)
+        out = m.get_output(0, tvm.nd.empty(ishape))
         out_np = lrn_python(x_np, size, axis, bias, alpha, beta)
         np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
 
-def verify_l2norm(batch, channel, height, width, eps, axis):
+def verify_l2normalize(ishape, eps, axis):
     x = sym.Variable("x")
-    y = sym.l2norm(x, eps=eps, axis=axis)
+    y = sym.l2normalize(x, eps=eps, axis=axis)
     dtype = "float32"
-    dshape = (batch, channel, height, width)
-    x_np = np.random.uniform(size=dshape).astype(dtype)
+    x_np = np.random.uniform(size=ishape).astype(dtype)
 
-    def l2norm_instance_python(a_np, eps, axis=None):
-        """L2 norm operator in NCHW layout.
+    def l2normalize_instance_python(a_np, eps, axis=None):
+        """L2 normalize operator in NCHW layout.
 
         Parameters
         ----------
@@ -460,34 +456,31 @@ def l2norm_instance_python(a_np, eps, axis=None):
 
         Returns
         -------
-        l2norm_out : np.ndarray
+        l2normalize_out : np.ndarray
             4-D with shape [batch, out_channel, out_height, out_width]
         """
-        batch, axis1, axis2, axis3 = a_np.shape
-        sqr_sum = np.zeros(shape=(batch,)).astype(a_np.dtype)
-        sqrt_sum = np.zeros(shape=(batch,)).astype(a_np.dtype)
-        l2norm_out = np.zeros(shape=a_np.shape).astype(a_np.dtype)
+        batch = a_np.shape[0]
         dot_value = np.power(a_np, 2.0)
         sqr_sum = np.sum(dot_value, axis, keepdims=True)
         sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps))
-        return np.divide(a_np, sqrt_sum)
+        l2normalize_out = np.divide(a_np, sqrt_sum)
+        return l2normalize_out
 
     for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
+        graph, lib, _ = nnvm.compiler.build(y, target, {"x": ishape})
         m = graph_runtime.create(graph, lib, ctx)
         m.run(x=x_np)
-        out = m.get_output(0, tvm.nd.empty(dshape))
-        out_np = np.zeros(shape=(batch, channel, height, width)).astype(dtype)
-        out_np = l2norm_instance_python(x_np, eps, axis)
+        out = m.get_output(0, tvm.nd.empty(ishape))
+        out_np = l2normalize_instance_python(x_np, eps, axis)
         np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
 
 def test_lrn():
-    verify_lrn(1, 3, 20, 20, 3, 1, 1.0, 1.0, 0.5)
-    verify_lrn(1, 3, 20, 20, 3, 1, 2.0, 1.0, 0.75)
+    verify_lrn((1, 3, 20, 20), 3, 1, 1.0, 1.0, 0.5)
+    verify_lrn((1, 3, 20, 20), 3, 1, 2.0, 1.0, 0.75)
 
-def test_l2norm():
-    verify_l2norm(1, 3, 20, 20, 0.001, (1,))
-    verify_l2norm(1, 3, 20, 20, 0.001, (1, 2))
+def test_l2normalize():
+    verify_l2normalize((1, 3, 20, 20), 0.001, (1,))
+    verify_l2normalize((1, 3, 20, 20), 0.001, (1, 2))
 
 if __name__ == "__main__":
     test_split()
@@ -508,4 +501,4 @@ def test_l2norm():
     test_squeeze()
     test_pad()
     test_lrn()
-    test_l2norm()
+    test_l2normalize()
diff --git a/topi/include/topi/cuda/normalization.h b/topi/include/topi/cuda/normalization.h
index 2816c99f8e25..d189893450e0 100644
--- a/topi/include/topi/cuda/normalization.h
+++ b/topi/include/topi/cuda/normalization.h
@@ -1,7 +1,7 @@
 /*!
 *  Copyright (c) 2018 by Contributors
 * \file cuda/normalization.h
-* \brief CUDA schedule for lrn and l2 normalization operations
+* \brief CUDA schedule for LRN and l2 normalization operations
 */
 #ifndef TOPI_CUDA_NORMALIZATION_H_
 #define TOPI_CUDA_NORMALIZATION_H_
@@ -59,7 +59,7 @@ inline Schedule schedule_lrn(const Target &target, const Array<Tensor>& outs) {
 *
 * \return A schedule for the given ops.
 */
-inline Schedule schedule_l2norm(const Target &target, const Array<Tensor>& outs) {
+inline Schedule schedule_l2normalize(const Target &target, const Array<Tensor>& outs) {
   Array<Operation> out_ops;
   for (auto t : outs) {
     out_ops.push_back(t->op);
@@ -69,7 +69,7 @@ inline Schedule schedule_l2norm(const Target &target, const Array<Tensor>& outs)
   std::function<void(Operation)> traverse;
   traverse = [&](const Operation& op) {
     // Inline all one-to-one-mapping operators except the last stage (output)
-    if (is_injective(op->tag) || op->tag == "l2norm") {
+    if (is_injective(op->tag) || op->tag == "l2normalize") {
       if (!detail::contains(s->outputs, op)) {
         s[op].compute_inline();
       }
@@ -90,13 +90,14 @@ inline Schedule schedule_l2norm(const Target &target, const Array<Tensor>& outs)
 
   traverse(outs[0]->op);
   int num_thread = 64;
-  Tensor l2norm = outs[0];
+  Tensor l2normalize = outs[0];
   IterVar block_x = tvm::thread_axis(Range(), "blockIdx.x");
   IterVar thread_x = tvm::thread_axis(Range(0, num_thread), "threadIdx.x");
   IterVar xto, xti;
-  s[l2norm].split_by_nparts(l2norm->op.as<ComputeOpNode>()->axis[1], num_thread, &xto, &xti);
-  s[l2norm].bind(l2norm->op.as<ComputeOpNode>()->axis[0], block_x);
-  s[l2norm].bind(xto, thread_x);
+  s[l2normalize].split_by_nparts(l2normalize->op.as<ComputeOpNode>()->axis[1],
+                                 num_thread, &xto, &xti);
+  s[l2normalize].bind(l2normalize->op.as<ComputeOpNode>()->axis[0], block_x);
+  s[l2normalize].bind(xto, thread_x);
   return s;
 }
 }  // namespace cuda
diff --git a/topi/include/topi/nn/l2_norm.h b/topi/include/topi/nn/l2_normalize.h
similarity index 80%
rename from topi/include/topi/nn/l2_norm.h
rename to topi/include/topi/nn/l2_normalize.h
index c8f8be455591..96f31782ff88 100644
--- a/topi/include/topi/nn/l2_norm.h
+++ b/topi/include/topi/nn/l2_normalize.h
@@ -1,10 +1,10 @@
 /*!
  *  Copyright (c) 2018 by Contributors
  * \brief l2 normalization op constructions
- * \file nn/l2_norm.h
+ * \file nn/l2_normalize.h
  */
-#ifndef TOPI_NN_L2_NORM_H_
-#define TOPI_NN_L2_NORM_H_
+#ifndef TOPI_NN_L2_NORMALIZE_H_
+#define TOPI_NN_L2_NORMALIZE_H_
 
 #include <string>
 #include <algorithm>
@@ -25,12 +25,12 @@ using namespace tvm;
 *
 * \return A Tensor whose op member is the l2 normalization operation
 */
-inline Tensor l2norm_instance(const Tensor& data,
+inline Tensor l2normalize_instance(const Tensor& data,
                               float eps,
                               const Array<Expr>& axis,
                               std::string name = "tensor",
-                              std::string tag = "l2norm") {
-  CHECK_EQ(data->shape.size(), 4) << "L2 norm requires 4-D input";
+                              std::string tag = "l2normalize") {
+  CHECK_EQ(data->shape.size(), 4) << "L2 normalization requires 4-D input";
   auto input_shape = data->shape;
   Tensor dot_value = pow(data, static_cast<float>(2.0));
   Tensor sum_value = topi::sum(dot_value, axis, true);
@@ -43,4 +43,4 @@ inline Tensor l2norm_instance(const Tensor& data,
 }
 }  // namespace nn
 }  // namespace topi
-#endif  // TOPI_NN_L2_NORM_H_
+#endif  // TOPI_NN_L2_NORMALIZE_H_
diff --git a/topi/include/topi/nn/local_response_norm.h b/topi/include/topi/nn/local_response_norm.h
index 339fb6dae879..c956a9c253dc 100644
--- a/topi/include/topi/nn/local_response_norm.h
+++ b/topi/include/topi/nn/local_response_norm.h
@@ -39,7 +39,7 @@ inline Tensor lrn(const Tensor& data,
                   std::string tag = kBroadcast) {
   CHECK_EQ(data->shape.size(), 4) << "LRN requires 4-D input";
   CHECK_EQ(size % 2, 1) << "size should be odd number";
-  CHECK_EQ((axis - 1) && (axis - 3), 0) << "axis should be 1 or 3 for NCHW and NHWC";
+  CHECK(axis == 1 || axis == 3) << "axis should be 1 or 3 for NCHW and NHWC";
   auto input_shape = data->shape;
   Array<Expr> pad_before{ 0, 0, 0, 0};
   Array<Expr> pad_after{ 0, 0, 0, 0};
diff --git a/topi/include/topi/rocm/normalization.h b/topi/include/topi/rocm/normalization.h
index 8c6c31d462c4..609177f4b8ef 100644
--- a/topi/include/topi/rocm/normalization.h
+++ b/topi/include/topi/rocm/normalization.h
@@ -1,7 +1,7 @@
 /*!
 *  Copyright (c) 2018 by Contributors
 * \file rocm/normalization.h
-* \brief rocm schedule for lrn and l2 normalization operations
+* \brief rocm schedule for LRN and l2 normalization operations
 */
 #ifndef TOPI_ROCM_NORMALIZATION_H_
 #define TOPI_ROCM_NORMALIZATION_H_
@@ -33,8 +33,8 @@ inline Schedule schedule_lrn(const Target &target, const Array<Tensor>& outs) {
 *
 * \return A schedule for the given ops.
 */
-inline Schedule schedule_l2norm(const Target &target, const Array<Tensor>& outs) {
-  return topi::cuda::schedule_l2norm(target, outs);
+inline Schedule schedule_l2normalize(const Target &target, const Array<Tensor>& outs) {
+  return topi::cuda::schedule_l2normalize(target, outs);
 }
 }  // namespace rocm
 }  // namespace topi
diff --git a/topi/python/topi/cuda/__init__.py b/topi/python/topi/cuda/__init__.py
index 3b0e38c4d3f4..a6bab999ccc6 100644
--- a/topi/python/topi/cuda/__init__.py
+++ b/topi/python/topi/cuda/__init__.py
@@ -17,4 +17,4 @@
 from .extern import schedule_extern
 from .vision import schedule_region
 from .vision import schedule_reorg
-from .nn import schedule_lrn, schedule_l2norm
+from .nn import schedule_lrn, schedule_l2normalize
diff --git a/topi/python/topi/cuda/nn.py b/topi/python/topi/cuda/nn.py
index c565698001fe..39486afbf2cf 100644
--- a/topi/python/topi/cuda/nn.py
+++ b/topi/python/topi/cuda/nn.py
@@ -25,14 +25,14 @@ def schedule_lrn(outs):
     cpp_target = cpp.TEST_create_target(target.target_name)
     return cpp.cuda.schedule_lrn(cpp_target, outs)
 
-@generic.schedule_l2norm.register(["cuda"])
-def schedule_l2norm(outs):
-    """Schedule for L2norm
+@generic.schedule_l2normalize.register(["cuda"])
+def schedule_l2normalize(outs):
+    """Schedule for L2normalize
 
     Parameters
     ----------
     outs: Array of Tensor
-          The computation graph description of L2norm
+          The computation graph description of L2normalize
           in the format of an array of tensors.
 
     Returns
@@ -42,4 +42,4 @@ def schedule_l2norm(outs):
     """
     target = tvm.target.current_target(allow_none=False)
     cpp_target = cpp.TEST_create_target(target.target_name)
-    return cpp.cuda.schedule_l2norm(cpp_target, outs)
+    return cpp.cuda.schedule_l2normalize(cpp_target, outs)
diff --git a/topi/python/topi/generic/nn.py b/topi/python/topi/generic/nn.py
index 7f67f8093856..892c90af7062 100644
--- a/topi/python/topi/generic/nn.py
+++ b/topi/python/topi/generic/nn.py
@@ -278,13 +278,13 @@ def schedule_lrn(outs):
     return cpp.generic.default_schedule(cpp_target, outs, False)
 
 @tvm.target.generic_func
-def schedule_l2norm(outs):
-    """Schedule for l2norm
+def schedule_l2normalize(outs):
+    """Schedule for l2normalize
 
     Parameters
     ----------
     outs: Array of Tensor
-          The computation graph description of l2norm
+          The computation graph description of l2normalize
           in the format of an array of tensors.
 
     Returns
diff --git a/topi/python/topi/nn/__init__.py b/topi/python/topi/nn/__init__.py
index 056d1a76339a..7b6ee4a86836 100644
--- a/topi/python/topi/nn/__init__.py
+++ b/topi/python/topi/nn/__init__.py
@@ -16,4 +16,4 @@
 from .bnn import *
 from .upsampling import *
 from .local_response_norm import *
-from .l2_norm import *
+from .l2_normalize import *
diff --git a/topi/python/topi/nn/l2_norm.py b/topi/python/topi/nn/l2_normalize.py
similarity index 71%
rename from topi/python/topi/nn/l2_norm.py
rename to topi/python/topi/nn/l2_normalize.py
index 964c8f8d264a..8ac51c908841 100644
--- a/topi/python/topi/nn/l2_norm.py
+++ b/topi/python/topi/nn/l2_normalize.py
@@ -1,12 +1,12 @@
 # pylint: disable=invalid-name
-"""TVM operator for l2norm"""
+"""TVM operator for l2normalize"""
 from __future__ import absolute_import
 import tvm
 from .. import cpp
 
 @tvm.target.generic_func
-def l2norm_instance(data, eps, axis=None):
-    """Perform L2norm on the input data
+def l2normalize_instance(data, eps, axis=None):
+    """Perform L2 normalization on the input data
 
     For axis=None, y(i, j) = x(i, j) / sqrt(max(sum(x^2), eps))
 
@@ -26,4 +26,4 @@ def l2norm_instance(data, eps, axis=None):
     output : tvm.Tensor
         4-D output with same shape
     """
-    return cpp.nn.l2norm_instance(data, eps, axis)
+    return cpp.nn.l2normalize_instance(data, eps, axis)
diff --git a/topi/python/topi/rocm/nn.py b/topi/python/topi/rocm/nn.py
index ca935740f2e2..994be565ff87 100644
--- a/topi/python/topi/rocm/nn.py
+++ b/topi/python/topi/rocm/nn.py
@@ -11,8 +11,8 @@ def schedule_lrn(outs):
     cpp_target = cpp.TEST_create_target(target.target_name)
     return cpp.rocm.schedule_lrn(cpp_target, outs)
 
-@generic.schedule_l2norm.register(["rocm", "gpu"])
-def schedule_l2norm(outs):
+@generic.schedule_l2normalize.register(["rocm", "gpu"])
+def schedule_l2normalize(outs):
     target = tvm.target.current_target(allow_none=False)
     cpp_target = cpp.TEST_create_target(target.target_name)
-    return cpp.rocm.schedule_l2norm(cpp_target, outs)
+    return cpp.rocm.schedule_l2normalize(cpp_target, outs)
diff --git a/topi/src/topi.cc b/topi/src/topi.cc
index 930d9d16260d..5a5c4f537d17 100644
--- a/topi/src/topi.cc
+++ b/topi/src/topi.cc
@@ -24,7 +24,7 @@
 #include <topi/nn/pooling.h>
 #include <topi/nn/softmax.h>
 #include <topi/nn/upsampling.h>
-#include <topi/nn/l2_norm.h>
+#include <topi/nn/l2_normalize.h>
 #include <topi/nn/local_response_norm.h>
 
 #include <topi/vision/reorg.h>
@@ -363,10 +363,10 @@ TVM_REGISTER_GLOBAL("topi.nn.log_softmax")
   *rv = nn::log_softmax(args[0]);
   });
 
-/* Ops from nn/l2_norm.h */
-TVM_REGISTER_GLOBAL("topi.nn.l2norm_instance")
+/* Ops from nn/l2_normalize.h */
+TVM_REGISTER_GLOBAL("topi.nn.l2normalize_instance")
 .set_body([](TVMArgs args, TVMRetValue *rv) {
-  *rv = nn::l2norm_instance(args[0], static_cast<double>(args[1]), args[2]);
+  *rv = nn::l2normalize_instance(args[0], static_cast<double>(args[1]), args[2]);
   });
 
 TVM_REGISTER_GLOBAL("topi.nn.lrn")
@@ -459,9 +459,9 @@ TVM_REGISTER_GLOBAL("topi.rocm.schedule_lrn")
   *rv = topi::rocm::schedule_lrn(args[0], args[1]);
   });
 
-TVM_REGISTER_GLOBAL("topi.rocm.schedule_l2norm")
+TVM_REGISTER_GLOBAL("topi.rocm.schedule_l2normalize")
 .set_body([](TVMArgs args, TVMRetValue *rv) {
-  *rv = topi::rocm::schedule_l2norm(args[0], args[1]);
+  *rv = topi::rocm::schedule_l2normalize(args[0], args[1]);
   });
 
 /* CUDA schedules */
@@ -515,9 +515,9 @@ TVM_REGISTER_GLOBAL("topi.cuda.schedule_lrn")
   *rv = topi::cuda::schedule_lrn(args[0], args[1]);
   });
 
-TVM_REGISTER_GLOBAL("topi.cuda.schedule_l2norm")
+TVM_REGISTER_GLOBAL("topi.cuda.schedule_l2normalize")
 .set_body([](TVMArgs args, TVMRetValue *rv) {
-  *rv = topi::cuda::schedule_l2norm(args[0], args[1]);
+  *rv = topi::cuda::schedule_l2normalize(args[0], args[1]);
   });
 
 /*! \brief Builder function for instantiating schedules. */
diff --git a/topi/tests/python/test_topi_l2norm.py b/topi/tests/python/test_topi_l2norm.py
index 999b82169149..4f9a503bce7d 100644
--- a/topi/tests/python/test_topi_l2norm.py
+++ b/topi/tests/python/test_topi_l2norm.py
@@ -1,11 +1,11 @@
-"""Test code for L2 norm"""
+"""Test code for L2 normalization"""
 import numpy as np
 import tvm
 import topi
 from topi.util import get_const_tuple
 
-def l2norm_instance_python(a_np, eps, axis=None):
-    """L2 norm operator in NCHW layout.
+def l2normalize_instance_python(a_np, eps, axis=None):
+    """L2 normalize operator in NCHW layout.
 
     Parameters
     ----------
@@ -19,26 +19,24 @@ def l2norm_instance_python(a_np, eps, axis=None):
 
     Returns
     -------
-    l2norm_out : np.ndarray
+    l2normalize_out : np.ndarray
         4-D with shape [batch, out_channel, out_height, out_width]
     """
-    batch, axis1, axis2, axis3 = a_np.shape
-    sqr_sum = np.zeros(shape=(batch,)).astype(a_np.dtype)
-    sqrt_sum = np.zeros(shape=(batch,)).astype(a_np.dtype)
-    l2norm_out = np.zeros(shape=a_np.shape).astype(a_np.dtype)
+    batch = a_np.shape[0]
     dot_value = np.power(a_np, 2.0)
     sqr_sum = np.sum(dot_value, axis, keepdims=True)
     sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps))
-    return np.divide(a_np, sqrt_sum)
+    l2normalize_out = np.divide(a_np, sqrt_sum)
+    return l2normalize_out
 
-def verify_l2norm(n, c, h, w, eps, axis=None):
+def verify_l2normalize(ishape, eps, axis=None):
 
-    A = tvm.placeholder((n, c, h, w), name='A')
-    B = topi.nn.l2norm_instance(A, eps, axis)
+    A = tvm.placeholder(ishape, name='A')
+    B = topi.nn.l2normalize_instance(A, eps, axis)
     dtype = A.dtype
 
-    a_np = np.random.uniform(size=(n, c, h, w)).astype(dtype)
-    b_np = l2norm_instance_python(a_np, eps, axis)
+    a_np = np.random.uniform(size=ishape).astype(dtype)
+    b_np = l2normalize_instance_python(a_np, eps, axis)
 
     def check_device(device):
         ctx = tvm.context(device, 0)
@@ -48,9 +46,9 @@ def check_device(device):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             if device == 'llvm':
-                s = topi.generic.schedule_l2norm([B])
+                s = topi.generic.schedule_l2normalize([B])
             else:
-                s = topi.cuda.schedule_l2norm([B])
+                s = topi.cuda.schedule_l2normalize([B])
         a = tvm.nd.array(a_np, ctx)
         b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
         f = tvm.build(s, [A, B], device)
@@ -60,14 +58,14 @@ def check_device(device):
     for device in ['llvm', 'cuda', 'opencl', 'metal', 'rocm', 'vulkan']:
         check_device(device)
 
-def test_l2norm():
-    verify_l2norm(1, 3, 20, 20, 0.001)
-    verify_l2norm(1, 3, 20, 20, 0.001, (1,))
-    verify_l2norm(1, 3, 20, 20, 0.001, (1, 2))
-    verify_l2norm(1, 3, 20, 20, 0.001, (2, 3))
-    verify_l2norm(1, 3, 20, 20, 0.001, (0, 3))
-    verify_l2norm(1, 3, 20, 20, 0.001, (0, 2, 3))
+def test_l2normalize():
+    verify_l2normalize((1, 3, 20, 20), 0.001)
+    verify_l2normalize((1, 3, 20, 20), 0.001, (1,))
+    verify_l2normalize((1, 3, 20, 20), 0.001, (1, 2))
+    verify_l2normalize((1, 3, 20, 20), 0.001, (2, 3))
+    verify_l2normalize((1, 3, 20, 20), 0.001, (0, 3))
+    verify_l2normalize((1, 3, 20, 20), 0.001, (0, 2, 3))
 
 
 if __name__ == "__main__":
-    test_l2norm()
+    test_l2normalize()
diff --git a/topi/tests/python/test_topi_lrn.py b/topi/tests/python/test_topi_lrn.py
index 69c3fa404b71..5e49fe739103 100644
--- a/topi/tests/python/test_topi_lrn.py
+++ b/topi/tests/python/test_topi_lrn.py
@@ -13,7 +13,7 @@ def lrn_python(a_np, size, axis, bias, alpha, beta):
         4-D with shape [batch, in_channel, in_height, in_width]
 
     size : int
-        normalisation window size
+        normalization window size
 
     axis : int
         input data layout channel axis
@@ -22,21 +22,19 @@ def lrn_python(a_np, size, axis, bias, alpha, beta):
         offset to avoid dividing by 0. constant value
 
     alpha : float
-        contant valie
+        constant value
 
     beta : float
         exponent constant value
 
     Returns
     -------
-    b_np : np.ndarray
+    lrn_out : np.ndarray
         4-D with shape [batch, out_channel, out_height, out_width]
     """
     axis0, axis1, axis2, axis3 = a_np.shape
     radius = size // 2
     sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype)
-    sqr_sum_up = np.zeros(shape=a_np.shape).astype(a_np.dtype)
-    lrn_out = np.zeros(shape=a_np.shape).astype(a_np.dtype)
     def sum_dot_values(i, j, k, l):
         axis_size = a_np.shape[axis]
         if (axis == 1):
@@ -59,7 +57,8 @@ def sum_dot_values(i, j, k, l):
                     sum_dot_values(i, j, k, l)
 
     sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta)
-    return np.divide(a_np, sqr_sum_up)
+    lrn_out = np.divide(a_np, sqr_sum_up)
+    return lrn_out
 
 def verify_lrn(shape, size, axis, bias, alpha, beta):
     A = tvm.placeholder(shape, name='A')
diff --git a/topi/tests/python_cpp/test_topi_l2norm.py b/topi/tests/python_cpp/test_topi_l2norm.py
index 5e81406d2177..22ef06b90487 100644
--- a/topi/tests/python_cpp/test_topi_l2norm.py
+++ b/topi/tests/python_cpp/test_topi_l2norm.py
@@ -1,13 +1,12 @@
 """Test code for l2 normalization"""
-import os
 import numpy as np
 import tvm
 import topi
 import logging
 from topi.util import get_const_tuple
 
-def l2norm_instance_python(a_np, eps, axis=None):
-    """L2 norm operator in NCHW layout.
+def l2normalize_instance_python(a_np, eps, axis=None):
+    """L2 normalize operator in NCHW layout.
 
     Parameters
     ----------
@@ -21,26 +20,24 @@ def l2norm_instance_python(a_np, eps, axis=None):
 
     Returns
     -------
-    l2norm_out : np.ndarray
+    l2normalize_out : np.ndarray
         4-D with shape [batch, out_channel, out_height, out_width]
     """
-    batch, axis1, axis2, axis3 = a_np.shape
-    sqr_sum = np.zeros(shape=(batch,)).astype(a_np.dtype)
-    sqrt_sum = np.zeros(shape=(batch,)).astype(a_np.dtype)
-    l2norm_out = np.zeros(shape=a_np.shape).astype(a_np.dtype)
+    batch = a_np.shape[0]
     dot_value = np.power(a_np, 2.0)
     sqr_sum = np.sum(dot_value, axis, keepdims=True)
     sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps))
-    return np.divide(a_np, sqrt_sum)
+    l2normalize_out = np.divide(a_np, sqrt_sum)
+    return l2normalize_out
 
-def verify_l2norm(n, c, h, w, eps, axis=None):
+def verify_l2normalize(shape, eps, axis=None):
     '''Verify l2 normalization operator by comparing outputs from tvm and numpy implementation'''
-    A = tvm.placeholder((n, c, h, w), name='A')
-    B = topi.cpp.nn.l2norm_instance(A, eps, axis)
+    A = tvm.placeholder(shape, name='A')
+    B = topi.cpp.nn.l2normalize_instance(A, eps, axis)
     dtype = A.dtype
 
-    a_np = np.random.uniform(size=(n, c, h, w)).astype(dtype)
-    b_np = l2norm_instance_python(a_np, eps, axis)
+    a_np = np.random.uniform(size=shape).astype(dtype)
+    b_np = l2normalize_instance_python(a_np, eps, axis)
 
     def check_device(device):
         if not tvm.module.enabled(device):
@@ -51,25 +48,25 @@ def check_device(device):
         if device == "llvm":
             s = topi.cpp.generic.default_schedule(target, [B], False)
         else:
-            s = topi.cpp.cuda.schedule_l2norm(target, [B])
+            s = topi.cpp.cuda.schedule_l2normalize(target, [B])
         ctx = tvm.context(device, 0)
         a = tvm.nd.array(a_np, ctx)
         b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
-        func = tvm.build(s, [A, B], device, name="l2_norm")
+        func = tvm.build(s, [A, B], device, name="l2_normalize")
         func(a, b)
         np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
 
     for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm']:
         check_device(device)
 
-def test_l2_norm():
-    verify_l2norm(1, 3, 20, 20, 0.001)
-    verify_l2norm(1, 3, 20, 20, 0.001, (1,))
-    verify_l2norm(1, 3, 20, 20, 0.001, (1, 2))
-    verify_l2norm(1, 3, 20, 20, 0.001, (2, 3))
-    verify_l2norm(1, 3, 20, 20, 0.001, (0, 3))
-    verify_l2norm(1, 3, 20, 20, 0.001, (0, 2, 3))
+def test_l2_normalize():
+    verify_l2normalize((1, 3, 20, 20), 0.001)
+    verify_l2normalize((1, 3, 20, 20), 0.001, (1,))
+    verify_l2normalize((1, 3, 20, 20), 0.001, (1, 2))
+    verify_l2normalize((1, 3, 20, 20), 0.001, (2, 3))
+    verify_l2normalize((1, 3, 20, 20), 0.001, (0, 3))
+    verify_l2normalize((1, 3, 20, 20), 0.001, (0, 2, 3))
 
 if __name__ == "__main__":
     logging.basicConfig(level=logging.DEBUG)
-    test_l2_norm()
+    test_l2_normalize()
diff --git a/topi/tests/python_cpp/test_topi_lrn.py b/topi/tests/python_cpp/test_topi_lrn.py
index 69c66c028c69..0af9ca9b8ac4 100644
--- a/topi/tests/python_cpp/test_topi_lrn.py
+++ b/topi/tests/python_cpp/test_topi_lrn.py
@@ -1,5 +1,4 @@
 """Test code for LRN"""
-import os
 import numpy as np
 import tvm
 import topi
@@ -7,7 +6,7 @@
 from topi.util import get_const_tuple
 
 def lrn_python(a_np, size, axis, bias, alpha, beta):
-    """Local response norm operator in NCHW layout.
+    """Local response normalization operator in NCHW layout.
 
     Parameters
     ----------
@@ -15,7 +14,7 @@ def lrn_python(a_np, size, axis, bias, alpha, beta):
         4-D with shape [batch, in_channel, in_height, in_width]
 
     size : int
-        normalisation window size
+        normalization window size
 
     axis : int
         input data layout channel axis
@@ -24,21 +23,19 @@ def lrn_python(a_np, size, axis, bias, alpha, beta):
         offset to avoid dividing by 0. constant value
 
     alpha : float
-        contant valie
+        constant value
 
     beta : float
         exponent constant value
 
     Returns
     -------
-    b_np : np.ndarray
+    lrn_out : np.ndarray
         4-D with shape [batch, out_channel, out_height, out_width]
     """
     axis0, axis1, axis2, axis3 = a_np.shape
     radius = size // 2
     sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype)
-    sqr_sum_up = np.zeros(shape=a_np.shape).astype(a_np.dtype)
-    lrn_out = np.zeros(shape=a_np.shape).astype(a_np.dtype)
     def sum_dot_values(i, j, k, l):
         axis_size = a_np.shape[axis]
         if (axis == 1):
@@ -61,7 +58,8 @@ def sum_dot_values(i, j, k, l):
                     sum_dot_values(i, j, k, l)
 
     sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta)
-    return np.divide(a_np, sqr_sum_up)
+    lrn_out = np.divide(a_np, sqr_sum_up)
+    return lrn_out
 
 def verify_lrn(shape, size, axis, bias, alpha, beta):
     '''Verify Local response normalization operator by comparing outputs from tvm and numpy implementation'''

From 88a12f5cf8bd5300fba77bb2293145a8b59518ae Mon Sep 17 00:00:00 2001
From: pariksheet <pariksheet.pinjari@huawei.com>
Date: Wed, 20 Jun 2018 15:17:26 +0530
Subject: [PATCH 09/12] Review comments addressed

---
 nnvm/python/nnvm/top/nn.py                    | 16 +++++-----
 nnvm/src/top/nn/nn.cc                         |  3 +-
 nnvm/tests/python/compiler/test_top_level1.py | 31 ++++++++++++++++---
 topi/include/topi/cuda/normalization.h        |  4 +--
 topi/include/topi/nn/l2_normalize.h           |  4 +--
 topi/include/topi/rocm/normalization.h        |  4 +--
 topi/python/topi/cuda/__init__.py             |  2 +-
 topi/python/topi/cuda/nn.py                   |  6 ++--
 topi/python/topi/generic/nn.py                |  2 +-
 topi/python/topi/nn/l2_normalize.py           |  4 +--
 topi/python/topi/rocm/nn.py                   |  6 ++--
 topi/src/topi.cc                              | 12 +++----
 topi/tests/python/test_topi_l2norm.py         | 11 +++----
 topi/tests/python_cpp/test_topi_l2norm.py     |  9 +++---
 14 files changed, 68 insertions(+), 46 deletions(-)

diff --git a/nnvm/python/nnvm/top/nn.py b/nnvm/python/nnvm/top/nn.py
index 37c014e705d9..b0678a1edfb3 100644
--- a/nnvm/python/nnvm/top/nn.py
+++ b/nnvm/python/nnvm/top/nn.py
@@ -260,19 +260,19 @@ def schedule_lrn(attrs, outs, target):
     with tvm.target.create(target):
         return topi.generic.schedule_lrn(outs)
 
-reg.register_pattern("lrn", OpPattern.OUT_ELEMWISE_FUSABLE)
+reg.register_pattern("lrn", OpPattern.OPAQUE)
 
-@reg.register_compute("l2normalize")
-def compute_l2normalize(attrs, inputs, _):
+@reg.register_compute("l2_normalize")
+def compute_l2_normalize(attrs, inputs, _):
     """Compute definition of l2normalize"""
     eps = attrs.get_float("eps")
     axis = attrs.get_int_tuple("axis")
-    return topi.nn.l2normalize_instance(inputs[0], eps, axis)
+    return topi.nn.l2_normalize(inputs[0], eps, axis)
 
-@reg.register_schedule("l2normalize")
-def schedule_l2normalize(attrs, outs, target):
+@reg.register_schedule("l2_normalize")
+def schedule_l2_normalize(attrs, outs, target):
     """Schedule definition of l2normalize"""
     with tvm.target.create(target):
-        return topi.generic.schedule_l2normalize(outs)
+        return topi.generic.schedule_l2_normalize(outs)
 
-reg.register_pattern("l2normalize", OpPattern.OUT_ELEMWISE_FUSABLE)
+reg.register_pattern("l2_normalize", OpPattern.OUT_ELEMWISE_FUSABLE)
diff --git a/nnvm/src/top/nn/nn.cc b/nnvm/src/top/nn/nn.cc
index 7f50f2b1f58a..e78e22edf272 100644
--- a/nnvm/src/top/nn/nn.cc
+++ b/nnvm/src/top/nn/nn.cc
@@ -747,7 +747,7 @@ inline bool L2normalizeInferShape(const nnvm::NodeAttrs& attrs,
   return true;
 }
 
-NNVM_REGISTER_OP(l2normalize)
+NNVM_REGISTER_OP(l2_normalize)
 .describe(R"code(L2NORMALIZE layer)code" NNVM_ADD_FILELINE)
 .add_argument("data", "4D Tesndor", "Input data.")
 .set_attr_parser(ParamParser<L2normalizeParam>)
@@ -756,6 +756,7 @@ NNVM_REGISTER_OP(l2normalize)
 .set_num_outputs(1)
 .set_attr<FInferShape>("FInferShape", L2normalizeInferShape)
 .set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseArbitraryLayout<1, 1>)
 .set_support_level(1);
 
 }  // namespace top
diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py
index 5cf661b412c4..a6cc475920f7 100644
--- a/nnvm/tests/python/compiler/test_top_level1.py
+++ b/nnvm/tests/python/compiler/test_top_level1.py
@@ -435,13 +435,25 @@ def sum_dot_values(i, j, k, l):
         out_np = lrn_python(x_np, size, axis, bias, alpha, beta)
         np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
 
+    #Checking LRN op followed by elementwise op relu
+    z = sym.relu(y)
+    x_np = np.random.uniform(low=-10.0, high=10.0, size=ishape).astype(dtype)
+    for target, ctx in ctx_list():
+        graph, lib, _ = nnvm.compiler.build(z, target, {"x": ishape})
+        m = graph_runtime.create(graph, lib, ctx)
+        m.run(x=x_np)
+        out = m.get_output(0, tvm.nd.empty(ishape))
+        out_np = lrn_python(x_np, size, axis, bias, alpha, beta)
+        out_np = (out_np > 0) * out_np
+        np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
+
 def verify_l2normalize(ishape, eps, axis):
     x = sym.Variable("x")
-    y = sym.l2normalize(x, eps=eps, axis=axis)
+    y = sym.l2_normalize(x, eps=eps, axis=axis)
     dtype = "float32"
     x_np = np.random.uniform(size=ishape).astype(dtype)
 
-    def l2normalize_instance_python(a_np, eps, axis=None):
+    def l2normalize_python(a_np, eps, axis=None):
         """L2 normalize operator in NCHW layout.
 
         Parameters
@@ -459,7 +471,6 @@ def l2normalize_instance_python(a_np, eps, axis=None):
         l2normalize_out : np.ndarray
             4-D with shape [batch, out_channel, out_height, out_width]
         """
-        batch = a_np.shape[0]
         dot_value = np.power(a_np, 2.0)
         sqr_sum = np.sum(dot_value, axis, keepdims=True)
         sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps))
@@ -471,7 +482,19 @@ def l2normalize_instance_python(a_np, eps, axis=None):
         m = graph_runtime.create(graph, lib, ctx)
         m.run(x=x_np)
         out = m.get_output(0, tvm.nd.empty(ishape))
-        out_np = l2normalize_instance_python(x_np, eps, axis)
+        out_np = l2normalize_python(x_np, eps, axis)
+        np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
+
+    #Checking L2 normalization op followed by elementwise op relu
+    z = sym.relu(y)
+    x_np = np.random.uniform(low=-10.0, high=10.0, size=ishape).astype(dtype)
+    for target, ctx in ctx_list():
+        graph, lib, _ = nnvm.compiler.build(z, target, {"x": ishape})
+        m = graph_runtime.create(graph, lib, ctx)
+        m.run(x=x_np)
+        out = m.get_output(0, tvm.nd.empty(ishape))
+        out_np = l2normalize_python(x_np, eps, axis)
+        out_np = (out_np > 0) * out_np
         np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
 
 def test_lrn():
diff --git a/topi/include/topi/cuda/normalization.h b/topi/include/topi/cuda/normalization.h
index d189893450e0..4ffbf521379f 100644
--- a/topi/include/topi/cuda/normalization.h
+++ b/topi/include/topi/cuda/normalization.h
@@ -59,7 +59,7 @@ inline Schedule schedule_lrn(const Target &target, const Array<Tensor>& outs) {
 *
 * \return A schedule for the given ops.
 */
-inline Schedule schedule_l2normalize(const Target &target, const Array<Tensor>& outs) {
+inline Schedule schedule_l2_normalize(const Target &target, const Array<Tensor>& outs) {
   Array<Operation> out_ops;
   for (auto t : outs) {
     out_ops.push_back(t->op);
@@ -69,7 +69,7 @@ inline Schedule schedule_l2normalize(const Target &target, const Array<Tensor>&
   std::function<void(Operation)> traverse;
   traverse = [&](const Operation& op) {
     // Inline all one-to-one-mapping operators except the last stage (output)
-    if (is_injective(op->tag) || op->tag == "l2normalize") {
+    if (is_injective(op->tag) || op->tag == "l2_normalize") {
       if (!detail::contains(s->outputs, op)) {
         s[op].compute_inline();
       }
diff --git a/topi/include/topi/nn/l2_normalize.h b/topi/include/topi/nn/l2_normalize.h
index 96f31782ff88..079c6d467561 100644
--- a/topi/include/topi/nn/l2_normalize.h
+++ b/topi/include/topi/nn/l2_normalize.h
@@ -25,11 +25,11 @@ using namespace tvm;
 *
 * \return A Tensor whose op member is the l2 normalization operation
 */
-inline Tensor l2normalize_instance(const Tensor& data,
+inline Tensor l2_normalize(const Tensor& data,
                               float eps,
                               const Array<Expr>& axis,
                               std::string name = "tensor",
-                              std::string tag = "l2normalize") {
+                              std::string tag = "l2_normalize") {
   CHECK_EQ(data->shape.size(), 4) << "L2 normalization requires 4-D input";
   auto input_shape = data->shape;
   Tensor dot_value = pow(data, static_cast<float>(2.0));
diff --git a/topi/include/topi/rocm/normalization.h b/topi/include/topi/rocm/normalization.h
index 609177f4b8ef..b12e64aba963 100644
--- a/topi/include/topi/rocm/normalization.h
+++ b/topi/include/topi/rocm/normalization.h
@@ -33,8 +33,8 @@ inline Schedule schedule_lrn(const Target &target, const Array<Tensor>& outs) {
 *
 * \return A schedule for the given ops.
 */
-inline Schedule schedule_l2normalize(const Target &target, const Array<Tensor>& outs) {
-  return topi::cuda::schedule_l2normalize(target, outs);
+inline Schedule schedule_l2_normalize(const Target &target, const Array<Tensor>& outs) {
+  return topi::cuda::schedule_l2_normalize(target, outs);
 }
 }  // namespace rocm
 }  // namespace topi
diff --git a/topi/python/topi/cuda/__init__.py b/topi/python/topi/cuda/__init__.py
index a6bab999ccc6..dbf00ebeb52b 100644
--- a/topi/python/topi/cuda/__init__.py
+++ b/topi/python/topi/cuda/__init__.py
@@ -17,4 +17,4 @@
 from .extern import schedule_extern
 from .vision import schedule_region
 from .vision import schedule_reorg
-from .nn import schedule_lrn, schedule_l2normalize
+from .nn import schedule_lrn, schedule_l2_normalize
diff --git a/topi/python/topi/cuda/nn.py b/topi/python/topi/cuda/nn.py
index 39486afbf2cf..da88235084b6 100644
--- a/topi/python/topi/cuda/nn.py
+++ b/topi/python/topi/cuda/nn.py
@@ -25,8 +25,8 @@ def schedule_lrn(outs):
     cpp_target = cpp.TEST_create_target(target.target_name)
     return cpp.cuda.schedule_lrn(cpp_target, outs)
 
-@generic.schedule_l2normalize.register(["cuda"])
-def schedule_l2normalize(outs):
+@generic.schedule_l2_normalize.register(["cuda"])
+def schedule_l2_normalize(outs):
     """Schedule for L2normalize
 
     Parameters
@@ -42,4 +42,4 @@ def schedule_l2normalize(outs):
     """
     target = tvm.target.current_target(allow_none=False)
     cpp_target = cpp.TEST_create_target(target.target_name)
-    return cpp.cuda.schedule_l2normalize(cpp_target, outs)
+    return cpp.cuda.schedule_l2_normalize(cpp_target, outs)
diff --git a/topi/python/topi/generic/nn.py b/topi/python/topi/generic/nn.py
index 892c90af7062..ff61deac13d5 100644
--- a/topi/python/topi/generic/nn.py
+++ b/topi/python/topi/generic/nn.py
@@ -278,7 +278,7 @@ def schedule_lrn(outs):
     return cpp.generic.default_schedule(cpp_target, outs, False)
 
 @tvm.target.generic_func
-def schedule_l2normalize(outs):
+def schedule_l2_normalize(outs):
     """Schedule for l2normalize
 
     Parameters
diff --git a/topi/python/topi/nn/l2_normalize.py b/topi/python/topi/nn/l2_normalize.py
index 8ac51c908841..0ae104ce715f 100644
--- a/topi/python/topi/nn/l2_normalize.py
+++ b/topi/python/topi/nn/l2_normalize.py
@@ -5,7 +5,7 @@
 from .. import cpp
 
 @tvm.target.generic_func
-def l2normalize_instance(data, eps, axis=None):
+def l2_normalize(data, eps, axis=None):
     """Perform L2 normalization on the input data
 
     For axis=None, y(i, j) = x(i, j) / sqrt(max(sum(x^2), eps))
@@ -26,4 +26,4 @@ def l2normalize_instance(data, eps, axis=None):
     output : tvm.Tensor
         4-D output with same shape
     """
-    return cpp.nn.l2normalize_instance(data, eps, axis)
+    return cpp.nn.l2_normalize(data, eps, axis)
diff --git a/topi/python/topi/rocm/nn.py b/topi/python/topi/rocm/nn.py
index 994be565ff87..5a9b2ad84db0 100644
--- a/topi/python/topi/rocm/nn.py
+++ b/topi/python/topi/rocm/nn.py
@@ -11,8 +11,8 @@ def schedule_lrn(outs):
     cpp_target = cpp.TEST_create_target(target.target_name)
     return cpp.rocm.schedule_lrn(cpp_target, outs)
 
-@generic.schedule_l2normalize.register(["rocm", "gpu"])
-def schedule_l2normalize(outs):
+@generic.schedule_l2_normalize.register(["rocm", "gpu"])
+def schedule_l2_normalize(outs):
     target = tvm.target.current_target(allow_none=False)
     cpp_target = cpp.TEST_create_target(target.target_name)
-    return cpp.rocm.schedule_l2normalize(cpp_target, outs)
+    return cpp.rocm.schedule_l2_normalize(cpp_target, outs)
diff --git a/topi/src/topi.cc b/topi/src/topi.cc
index 5a5c4f537d17..9f2ecacd11a4 100644
--- a/topi/src/topi.cc
+++ b/topi/src/topi.cc
@@ -364,9 +364,9 @@ TVM_REGISTER_GLOBAL("topi.nn.log_softmax")
   });
 
 /* Ops from nn/l2_normalize.h */
-TVM_REGISTER_GLOBAL("topi.nn.l2normalize_instance")
+TVM_REGISTER_GLOBAL("topi.nn.l2_normalize")
 .set_body([](TVMArgs args, TVMRetValue *rv) {
-  *rv = nn::l2normalize_instance(args[0], static_cast<double>(args[1]), args[2]);
+  *rv = nn::l2_normalize(args[0], static_cast<double>(args[1]), args[2]);
   });
 
 TVM_REGISTER_GLOBAL("topi.nn.lrn")
@@ -459,9 +459,9 @@ TVM_REGISTER_GLOBAL("topi.rocm.schedule_lrn")
   *rv = topi::rocm::schedule_lrn(args[0], args[1]);
   });
 
-TVM_REGISTER_GLOBAL("topi.rocm.schedule_l2normalize")
+TVM_REGISTER_GLOBAL("topi.rocm.schedule_l2_normalize")
 .set_body([](TVMArgs args, TVMRetValue *rv) {
-  *rv = topi::rocm::schedule_l2normalize(args[0], args[1]);
+  *rv = topi::rocm::schedule_l2_normalize(args[0], args[1]);
   });
 
 /* CUDA schedules */
@@ -515,9 +515,9 @@ TVM_REGISTER_GLOBAL("topi.cuda.schedule_lrn")
   *rv = topi::cuda::schedule_lrn(args[0], args[1]);
   });
 
-TVM_REGISTER_GLOBAL("topi.cuda.schedule_l2normalize")
+TVM_REGISTER_GLOBAL("topi.cuda.schedule_l2_normalize")
 .set_body([](TVMArgs args, TVMRetValue *rv) {
-  *rv = topi::cuda::schedule_l2normalize(args[0], args[1]);
+  *rv = topi::cuda::schedule_l2_normalize(args[0], args[1]);
   });
 
 /*! \brief Builder function for instantiating schedules. */
diff --git a/topi/tests/python/test_topi_l2norm.py b/topi/tests/python/test_topi_l2norm.py
index 4f9a503bce7d..f86bc7c5f324 100644
--- a/topi/tests/python/test_topi_l2norm.py
+++ b/topi/tests/python/test_topi_l2norm.py
@@ -4,7 +4,7 @@
 import topi
 from topi.util import get_const_tuple
 
-def l2normalize_instance_python(a_np, eps, axis=None):
+def l2normalize_python(a_np, eps, axis=None):
     """L2 normalize operator in NCHW layout.
 
     Parameters
@@ -22,7 +22,6 @@ def l2normalize_instance_python(a_np, eps, axis=None):
     l2normalize_out : np.ndarray
         4-D with shape [batch, out_channel, out_height, out_width]
     """
-    batch = a_np.shape[0]
     dot_value = np.power(a_np, 2.0)
     sqr_sum = np.sum(dot_value, axis, keepdims=True)
     sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps))
@@ -32,11 +31,11 @@ def l2normalize_instance_python(a_np, eps, axis=None):
 def verify_l2normalize(ishape, eps, axis=None):
 
     A = tvm.placeholder(ishape, name='A')
-    B = topi.nn.l2normalize_instance(A, eps, axis)
+    B = topi.nn.l2_normalize(A, eps, axis)
     dtype = A.dtype
 
     a_np = np.random.uniform(size=ishape).astype(dtype)
-    b_np = l2normalize_instance_python(a_np, eps, axis)
+    b_np = l2normalize_python(a_np, eps, axis)
 
     def check_device(device):
         ctx = tvm.context(device, 0)
@@ -46,9 +45,9 @@ def check_device(device):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             if device == 'llvm':
-                s = topi.generic.schedule_l2normalize([B])
+                s = topi.generic.schedule_l2_normalize([B])
             else:
-                s = topi.cuda.schedule_l2normalize([B])
+                s = topi.cuda.schedule_l2_normalize([B])
         a = tvm.nd.array(a_np, ctx)
         b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
         f = tvm.build(s, [A, B], device)
diff --git a/topi/tests/python_cpp/test_topi_l2norm.py b/topi/tests/python_cpp/test_topi_l2norm.py
index 22ef06b90487..b1ede2d8085e 100644
--- a/topi/tests/python_cpp/test_topi_l2norm.py
+++ b/topi/tests/python_cpp/test_topi_l2norm.py
@@ -5,7 +5,7 @@
 import logging
 from topi.util import get_const_tuple
 
-def l2normalize_instance_python(a_np, eps, axis=None):
+def l2normalize_python(a_np, eps, axis=None):
     """L2 normalize operator in NCHW layout.
 
     Parameters
@@ -23,7 +23,6 @@ def l2normalize_instance_python(a_np, eps, axis=None):
     l2normalize_out : np.ndarray
         4-D with shape [batch, out_channel, out_height, out_width]
     """
-    batch = a_np.shape[0]
     dot_value = np.power(a_np, 2.0)
     sqr_sum = np.sum(dot_value, axis, keepdims=True)
     sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps))
@@ -33,11 +32,11 @@ def l2normalize_instance_python(a_np, eps, axis=None):
 def verify_l2normalize(shape, eps, axis=None):
     '''Verify l2 normalization operator by comparing outputs from tvm and numpy implementation'''
     A = tvm.placeholder(shape, name='A')
-    B = topi.cpp.nn.l2normalize_instance(A, eps, axis)
+    B = topi.cpp.nn.l2_normalize(A, eps, axis)
     dtype = A.dtype
 
     a_np = np.random.uniform(size=shape).astype(dtype)
-    b_np = l2normalize_instance_python(a_np, eps, axis)
+    b_np = l2normalize_python(a_np, eps, axis)
 
     def check_device(device):
         if not tvm.module.enabled(device):
@@ -48,7 +47,7 @@ def check_device(device):
         if device == "llvm":
             s = topi.cpp.generic.default_schedule(target, [B], False)
         else:
-            s = topi.cpp.cuda.schedule_l2normalize(target, [B])
+            s = topi.cpp.cuda.schedule_l2_normalize(target, [B])
         ctx = tvm.context(device, 0)
         a = tvm.nd.array(a_np, ctx)
         b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)

From be94a2edc91c1e5ef8507afa2252aec5f813aa4e Mon Sep 17 00:00:00 2001
From: pariksheet <pariksheet.pinjari@huawei.com>
Date: Thu, 21 Jun 2018 09:59:05 +0530
Subject: [PATCH 10/12] Consistent l2_normalize name

---
 nnvm/include/nnvm/top/nn.h                    |  4 +--
 nnvm/python/nnvm/top/nn.py                    |  4 +--
 nnvm/src/top/nn/nn.cc                         | 10 +++----
 nnvm/tests/python/compiler/test_top_level1.py | 22 +++++++--------
 topi/include/topi/cuda/normalization.h        |  8 +++---
 topi/python/topi/cuda/nn.py                   |  4 +--
 topi/python/topi/generic/nn.py                |  4 +--
 topi/python/topi/nn/l2_normalize.py           |  2 +-
 topi/tests/python/test_topi_l2norm.py         | 28 +++++++++----------
 topi/tests/python_cpp/test_topi_l2norm.py     | 24 ++++++++--------
 10 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h
index 6687535c4f85..0e0b03a72ebb 100644
--- a/nnvm/include/nnvm/top/nn.h
+++ b/nnvm/include/nnvm/top/nn.h
@@ -391,11 +391,11 @@ struct LRNParam : public dmlc::Parameter<LRNParam> {
   static const constexpr int kData = 0;
 };
 
-struct L2normalizeParam : public dmlc::Parameter<L2normalizeParam> {
+struct L2NormalizeParam : public dmlc::Parameter<L2NormalizeParam> {
   float eps;
   Tuple<int> axis;
 
-  DMLC_DECLARE_PARAMETER(L2normalizeParam) {
+  DMLC_DECLARE_PARAMETER(L2NormalizeParam) {
     DMLC_DECLARE_FIELD(eps)
       .describe("float type epsilon value.");
     DMLC_DECLARE_FIELD(axis)
diff --git a/nnvm/python/nnvm/top/nn.py b/nnvm/python/nnvm/top/nn.py
index b0678a1edfb3..5bfabdac2c8d 100644
--- a/nnvm/python/nnvm/top/nn.py
+++ b/nnvm/python/nnvm/top/nn.py
@@ -264,14 +264,14 @@ def schedule_lrn(attrs, outs, target):
 
 @reg.register_compute("l2_normalize")
 def compute_l2_normalize(attrs, inputs, _):
-    """Compute definition of l2normalize"""
+    """Compute definition of l2 normalize"""
     eps = attrs.get_float("eps")
     axis = attrs.get_int_tuple("axis")
     return topi.nn.l2_normalize(inputs[0], eps, axis)
 
 @reg.register_schedule("l2_normalize")
 def schedule_l2_normalize(attrs, outs, target):
-    """Schedule definition of l2normalize"""
+    """Schedule definition of l2 normalize"""
     with tvm.target.create(target):
         return topi.generic.schedule_l2_normalize(outs)
 
diff --git a/nnvm/src/top/nn/nn.cc b/nnvm/src/top/nn/nn.cc
index e78e22edf272..432f6e7a0b41 100644
--- a/nnvm/src/top/nn/nn.cc
+++ b/nnvm/src/top/nn/nn.cc
@@ -735,9 +735,9 @@ NNVM_REGISTER_OP(lrn)
 .set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_support_level(1);
 
-DMLC_REGISTER_PARAMETER(L2normalizeParam);
+DMLC_REGISTER_PARAMETER(L2NormalizeParam);
 
-inline bool L2normalizeInferShape(const nnvm::NodeAttrs& attrs,
+inline bool L2NormalizeInferShape(const nnvm::NodeAttrs& attrs,
                                   std::vector<TShape>* in_shape,
                                   std::vector<TShape>* out_shape) {
   TShape dshape = (*in_shape)[0];
@@ -750,11 +750,11 @@ inline bool L2normalizeInferShape(const nnvm::NodeAttrs& attrs,
 NNVM_REGISTER_OP(l2_normalize)
 .describe(R"code(L2NORMALIZE layer)code" NNVM_ADD_FILELINE)
 .add_argument("data", "4D Tesndor", "Input data.")
-.set_attr_parser(ParamParser<L2normalizeParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<L2normalizeParam>)
+.set_attr_parser(ParamParser<L2NormalizeParam>)
+.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<L2NormalizeParam>)
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", L2normalizeInferShape)
+.set_attr<FInferShape>("FInferShape", L2NormalizeInferShape)
 .set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseArbitraryLayout<1, 1>)
 .set_support_level(1);
diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py
index a6cc475920f7..1fa7f5d87675 100644
--- a/nnvm/tests/python/compiler/test_top_level1.py
+++ b/nnvm/tests/python/compiler/test_top_level1.py
@@ -447,13 +447,13 @@ def sum_dot_values(i, j, k, l):
         out_np = (out_np > 0) * out_np
         np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
 
-def verify_l2normalize(ishape, eps, axis):
+def verify_l2_normalize(ishape, eps, axis):
     x = sym.Variable("x")
     y = sym.l2_normalize(x, eps=eps, axis=axis)
     dtype = "float32"
     x_np = np.random.uniform(size=ishape).astype(dtype)
 
-    def l2normalize_python(a_np, eps, axis=None):
+    def l2_normalize_python(a_np, eps, axis=None):
         """L2 normalize operator in NCHW layout.
 
         Parameters
@@ -468,21 +468,21 @@ def l2normalize_python(a_np, eps, axis=None):
 
         Returns
         -------
-        l2normalize_out : np.ndarray
+        l2_normalize_out : np.ndarray
             4-D with shape [batch, out_channel, out_height, out_width]
         """
         dot_value = np.power(a_np, 2.0)
         sqr_sum = np.sum(dot_value, axis, keepdims=True)
         sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps))
-        l2normalize_out = np.divide(a_np, sqrt_sum)
-        return l2normalize_out
+        l2_normalize_out = np.divide(a_np, sqrt_sum)
+        return l2_normalize_out
 
     for target, ctx in ctx_list():
         graph, lib, _ = nnvm.compiler.build(y, target, {"x": ishape})
         m = graph_runtime.create(graph, lib, ctx)
         m.run(x=x_np)
         out = m.get_output(0, tvm.nd.empty(ishape))
-        out_np = l2normalize_python(x_np, eps, axis)
+        out_np = l2_normalize_python(x_np, eps, axis)
         np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
 
     #Checking L2 normalization op followed by elementwise op relu
@@ -493,7 +493,7 @@ def l2normalize_python(a_np, eps, axis=None):
         m = graph_runtime.create(graph, lib, ctx)
         m.run(x=x_np)
         out = m.get_output(0, tvm.nd.empty(ishape))
-        out_np = l2normalize_python(x_np, eps, axis)
+        out_np = l2_normalize_python(x_np, eps, axis)
         out_np = (out_np > 0) * out_np
         np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
 
@@ -501,9 +501,9 @@ def test_lrn():
     verify_lrn((1, 3, 20, 20), 3, 1, 1.0, 1.0, 0.5)
     verify_lrn((1, 3, 20, 20), 3, 1, 2.0, 1.0, 0.75)
 
-def test_l2normalize():
-    verify_l2normalize((1, 3, 20, 20), 0.001, (1,))
-    verify_l2normalize((1, 3, 20, 20), 0.001, (1, 2))
+def test_l2_normalize():
+    verify_l2_normalize((1, 3, 20, 20), 0.001, (1,))
+    verify_l2_normalize((1, 3, 20, 20), 0.001, (1, 2))
 
 if __name__ == "__main__":
     test_split()
@@ -524,4 +524,4 @@ def test_l2normalize():
     test_squeeze()
     test_pad()
     test_lrn()
-    test_l2normalize()
+    test_l2_normalize()
diff --git a/topi/include/topi/cuda/normalization.h b/topi/include/topi/cuda/normalization.h
index 4ffbf521379f..91578c46d266 100644
--- a/topi/include/topi/cuda/normalization.h
+++ b/topi/include/topi/cuda/normalization.h
@@ -90,14 +90,14 @@ inline Schedule schedule_l2_normalize(const Target &target, const Array<Tensor>&
 
   traverse(outs[0]->op);
   int num_thread = 64;
-  Tensor l2normalize = outs[0];
+  Tensor l2_normalize = outs[0];
   IterVar block_x = tvm::thread_axis(Range(), "blockIdx.x");
   IterVar thread_x = tvm::thread_axis(Range(0, num_thread), "threadIdx.x");
   IterVar xto, xti;
-  s[l2normalize].split_by_nparts(l2normalize->op.as<ComputeOpNode>()->axis[1],
+  s[l2_normalize].split_by_nparts(l2_normalize->op.as<ComputeOpNode>()->axis[1],
                                  num_thread, &xto, &xti);
-  s[l2normalize].bind(l2normalize->op.as<ComputeOpNode>()->axis[0], block_x);
-  s[l2normalize].bind(xto, thread_x);
+  s[l2_normalize].bind(l2_normalize->op.as<ComputeOpNode>()->axis[0], block_x);
+  s[l2_normalize].bind(xto, thread_x);
   return s;
 }
 }  // namespace cuda
diff --git a/topi/python/topi/cuda/nn.py b/topi/python/topi/cuda/nn.py
index da88235084b6..b503b2dad50f 100644
--- a/topi/python/topi/cuda/nn.py
+++ b/topi/python/topi/cuda/nn.py
@@ -27,12 +27,12 @@ def schedule_lrn(outs):
 
 @generic.schedule_l2_normalize.register(["cuda"])
 def schedule_l2_normalize(outs):
-    """Schedule for L2normalize
+    """Schedule for L2 normalize
 
     Parameters
     ----------
     outs: Array of Tensor
-          The computation graph description of L2normalize
+          The computation graph description of L2 normalize
           in the format of an array of tensors.
 
     Returns
diff --git a/topi/python/topi/generic/nn.py b/topi/python/topi/generic/nn.py
index ff61deac13d5..8f2f8612c7fa 100644
--- a/topi/python/topi/generic/nn.py
+++ b/topi/python/topi/generic/nn.py
@@ -279,12 +279,12 @@ def schedule_lrn(outs):
 
 @tvm.target.generic_func
 def schedule_l2_normalize(outs):
-    """Schedule for l2normalize
+    """Schedule for l2 normalize
 
     Parameters
     ----------
     outs: Array of Tensor
-          The computation graph description of l2normalize
+          The computation graph description of l2 normalize
           in the format of an array of tensors.
 
     Returns
diff --git a/topi/python/topi/nn/l2_normalize.py b/topi/python/topi/nn/l2_normalize.py
index 0ae104ce715f..951084379eec 100644
--- a/topi/python/topi/nn/l2_normalize.py
+++ b/topi/python/topi/nn/l2_normalize.py
@@ -1,5 +1,5 @@
 # pylint: disable=invalid-name
-"""TVM operator for l2normalize"""
+"""TVM operator for l2 normalize"""
 from __future__ import absolute_import
 import tvm
 from .. import cpp
diff --git a/topi/tests/python/test_topi_l2norm.py b/topi/tests/python/test_topi_l2norm.py
index f86bc7c5f324..3c17005a56e1 100644
--- a/topi/tests/python/test_topi_l2norm.py
+++ b/topi/tests/python/test_topi_l2norm.py
@@ -4,7 +4,7 @@
 import topi
 from topi.util import get_const_tuple
 
-def l2normalize_python(a_np, eps, axis=None):
+def l2_normalize_python(a_np, eps, axis=None):
     """L2 normalize operator in NCHW layout.
 
     Parameters
@@ -19,23 +19,23 @@ def l2normalize_python(a_np, eps, axis=None):
 
     Returns
     -------
-    l2normalize_out : np.ndarray
+    l2_normalize_out : np.ndarray
         4-D with shape [batch, out_channel, out_height, out_width]
     """
     dot_value = np.power(a_np, 2.0)
     sqr_sum = np.sum(dot_value, axis, keepdims=True)
     sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps))
-    l2normalize_out = np.divide(a_np, sqrt_sum)
-    return l2normalize_out
+    l2_normalize_out = np.divide(a_np, sqrt_sum)
+    return l2_normalize_out
 
-def verify_l2normalize(ishape, eps, axis=None):
+def verify_l2_normalize(ishape, eps, axis=None):
 
     A = tvm.placeholder(ishape, name='A')
     B = topi.nn.l2_normalize(A, eps, axis)
     dtype = A.dtype
 
     a_np = np.random.uniform(size=ishape).astype(dtype)
-    b_np = l2normalize_python(a_np, eps, axis)
+    b_np = l2_normalize_python(a_np, eps, axis)
 
     def check_device(device):
         ctx = tvm.context(device, 0)
@@ -57,14 +57,14 @@ def check_device(device):
     for device in ['llvm', 'cuda', 'opencl', 'metal', 'rocm', 'vulkan']:
         check_device(device)
 
-def test_l2normalize():
-    verify_l2normalize((1, 3, 20, 20), 0.001)
-    verify_l2normalize((1, 3, 20, 20), 0.001, (1,))
-    verify_l2normalize((1, 3, 20, 20), 0.001, (1, 2))
-    verify_l2normalize((1, 3, 20, 20), 0.001, (2, 3))
-    verify_l2normalize((1, 3, 20, 20), 0.001, (0, 3))
-    verify_l2normalize((1, 3, 20, 20), 0.001, (0, 2, 3))
+def test_l2_normalize():
+    verify_l2_normalize((1, 3, 20, 20), 0.001)
+    verify_l2_normalize((1, 3, 20, 20), 0.001, (1,))
+    verify_l2_normalize((1, 3, 20, 20), 0.001, (1, 2))
+    verify_l2_normalize((1, 3, 20, 20), 0.001, (2, 3))
+    verify_l2_normalize((1, 3, 20, 20), 0.001, (0, 3))
+    verify_l2_normalize((1, 3, 20, 20), 0.001, (0, 2, 3))
 
 
 if __name__ == "__main__":
-    test_l2normalize()
+    test_l2_normalize()
diff --git a/topi/tests/python_cpp/test_topi_l2norm.py b/topi/tests/python_cpp/test_topi_l2norm.py
index b1ede2d8085e..74396def5d76 100644
--- a/topi/tests/python_cpp/test_topi_l2norm.py
+++ b/topi/tests/python_cpp/test_topi_l2norm.py
@@ -5,7 +5,7 @@
 import logging
 from topi.util import get_const_tuple
 
-def l2normalize_python(a_np, eps, axis=None):
+def l2_normalize_python(a_np, eps, axis=None):
     """L2 normalize operator in NCHW layout.
 
     Parameters
@@ -20,23 +20,23 @@ def l2normalize_python(a_np, eps, axis=None):
 
     Returns
     -------
-    l2normalize_out : np.ndarray
+    l2_normalize_out : np.ndarray
         4-D with shape [batch, out_channel, out_height, out_width]
     """
     dot_value = np.power(a_np, 2.0)
     sqr_sum = np.sum(dot_value, axis, keepdims=True)
     sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps))
-    l2normalize_out = np.divide(a_np, sqrt_sum)
-    return l2normalize_out
+    l2_normalize_out = np.divide(a_np, sqrt_sum)
+    return l2_normalize_out
 
-def verify_l2normalize(shape, eps, axis=None):
+def verify_l2_normalize(shape, eps, axis=None):
     '''Verify l2 normalization operator by comparing outputs from tvm and numpy implementation'''
     A = tvm.placeholder(shape, name='A')
     B = topi.cpp.nn.l2_normalize(A, eps, axis)
     dtype = A.dtype
 
     a_np = np.random.uniform(size=shape).astype(dtype)
-    b_np = l2normalize_python(a_np, eps, axis)
+    b_np = l2_normalize_python(a_np, eps, axis)
 
     def check_device(device):
         if not tvm.module.enabled(device):
@@ -59,12 +59,12 @@ def check_device(device):
         check_device(device)
 
 def test_l2_normalize():
-    verify_l2normalize((1, 3, 20, 20), 0.001)
-    verify_l2normalize((1, 3, 20, 20), 0.001, (1,))
-    verify_l2normalize((1, 3, 20, 20), 0.001, (1, 2))
-    verify_l2normalize((1, 3, 20, 20), 0.001, (2, 3))
-    verify_l2normalize((1, 3, 20, 20), 0.001, (0, 3))
-    verify_l2normalize((1, 3, 20, 20), 0.001, (0, 2, 3))
+    verify_l2_normalize((1, 3, 20, 20), 0.001)
+    verify_l2_normalize((1, 3, 20, 20), 0.001, (1,))
+    verify_l2_normalize((1, 3, 20, 20), 0.001, (1, 2))
+    verify_l2_normalize((1, 3, 20, 20), 0.001, (2, 3))
+    verify_l2_normalize((1, 3, 20, 20), 0.001, (0, 3))
+    verify_l2_normalize((1, 3, 20, 20), 0.001, (0, 2, 3))
 
 if __name__ == "__main__":
     logging.basicConfig(level=logging.DEBUG)

From e5bc18bb2eea68f58201feea56020bf77b5265ab Mon Sep 17 00:00:00 2001
From: pariksheet <pariksheet.pinjari@huawei.com>
Date: Thu, 21 Jun 2018 16:19:05 +0530
Subject: [PATCH 11/12] Modified lrn_python function

---
 nnvm/src/top/nn/nn.cc                         |  4 ++--
 nnvm/tests/python/compiler/test_top_level1.py | 11 ++---------
 topi/tests/python/test_topi_lrn.py            | 10 ++--------
 topi/tests/python_cpp/test_topi_lrn.py        | 10 ++--------
 4 files changed, 8 insertions(+), 27 deletions(-)

diff --git a/nnvm/src/top/nn/nn.cc b/nnvm/src/top/nn/nn.cc
index 432f6e7a0b41..ab47ae521224 100644
--- a/nnvm/src/top/nn/nn.cc
+++ b/nnvm/src/top/nn/nn.cc
@@ -726,7 +726,7 @@ inline bool LRNInferShape(const nnvm::NodeAttrs& attrs,
 
 NNVM_REGISTER_OP(lrn)
 .describe(R"code(LRN layer)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tesndor", "Input data.")
+.add_argument("data", "4D Tensor", "Input data.")
 .set_attr_parser(ParamParser<LRNParam>)
 .set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<LRNParam>)
 .set_num_inputs(1)
@@ -749,7 +749,7 @@ inline bool L2NormalizeInferShape(const nnvm::NodeAttrs& attrs,
 
 NNVM_REGISTER_OP(l2_normalize)
 .describe(R"code(L2NORMALIZE layer)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tesndor", "Input data.")
+.add_argument("data", "4D Tensor", "Input data.")
 .set_attr_parser(ParamParser<L2NormalizeParam>)
 .set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<L2NormalizeParam>)
 .set_num_inputs(1)
diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py
index 1fa7f5d87675..678f8a8ccc46 100644
--- a/nnvm/tests/python/compiler/test_top_level1.py
+++ b/nnvm/tests/python/compiler/test_top_level1.py
@@ -5,7 +5,7 @@
 import nnvm.symbol as sym
 import nnvm.compiler
 from nnvm.testing.config import ctx_list
-
+from itertools import product
 
 def helper(symbol, inputs, dtype,
            np_forward, np_backward=None, need_input=True, need_head_grads=True):
@@ -399,10 +399,9 @@ def lrn_python(a_np, size, axis, bias, alpha, beta):
         lrn_out : np.ndarray
             4-D with shape [batch, out_channel, out_height, out_width]
         """
-        axis0, axis1, axis2, axis3 = a_np.shape
         radius = size // 2
         sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype)
-        def sum_dot_values(i, j, k, l):
+        for i, j, k, l in product(*[range(_axis) for _axis in a_np.shape]):
             axis_size = a_np.shape[axis]
             if (axis == 1):
                 #NCHW layout
@@ -417,12 +416,6 @@ def sum_dot_values(i, j, k, l):
                 sqr_sum[i, j, k, l] = sum(a_np[i, j, k, sum_start:sum_end] * \
                                           a_np[i, j, k, sum_start:sum_end])
 
-        for i in range(axis0):
-            for j in range(axis1):
-                for k in range(axis2):
-                    for l in range(axis3):
-                        sum_dot_values(i, j, k, l)
-
         sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta)
         lrn_out = np.divide(a_np, sqr_sum_up)
         return lrn_out
diff --git a/topi/tests/python/test_topi_lrn.py b/topi/tests/python/test_topi_lrn.py
index 5e49fe739103..51446ebf712d 100644
--- a/topi/tests/python/test_topi_lrn.py
+++ b/topi/tests/python/test_topi_lrn.py
@@ -3,6 +3,7 @@
 import tvm
 import topi
 from topi.util import get_const_tuple
+from itertools import product
 
 def lrn_python(a_np, size, axis, bias, alpha, beta):
     """Local response norm operator in NCHW layout.
@@ -32,10 +33,9 @@ def lrn_python(a_np, size, axis, bias, alpha, beta):
     lrn_out : np.ndarray
         4-D with shape [batch, out_channel, out_height, out_width]
     """
-    axis0, axis1, axis2, axis3 = a_np.shape
     radius = size // 2
     sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype)
-    def sum_dot_values(i, j, k, l):
+    for i, j, k, l in product(*[range(_axis) for _axis in a_np.shape]):
         axis_size = a_np.shape[axis]
         if (axis == 1):
             #NCHW layout
@@ -50,12 +50,6 @@ def sum_dot_values(i, j, k, l):
             sqr_sum[i, j, k, l] = sum(a_np[i, j, k, sum_start:sum_end] * \
                                       a_np[i, j, k, sum_start:sum_end])
 
-    for i in range(axis0):
-        for j in range(axis1):
-            for k in range(axis2):
-                for l in range(axis3):
-                    sum_dot_values(i, j, k, l)
-
     sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta)
     lrn_out = np.divide(a_np, sqr_sum_up)
     return lrn_out
diff --git a/topi/tests/python_cpp/test_topi_lrn.py b/topi/tests/python_cpp/test_topi_lrn.py
index 0af9ca9b8ac4..6f62e5143e27 100644
--- a/topi/tests/python_cpp/test_topi_lrn.py
+++ b/topi/tests/python_cpp/test_topi_lrn.py
@@ -4,6 +4,7 @@
 import topi
 import logging
 from topi.util import get_const_tuple
+from itertools import product
 
 def lrn_python(a_np, size, axis, bias, alpha, beta):
     """Local response normalization operator in NCHW layout.
@@ -33,10 +34,9 @@ def lrn_python(a_np, size, axis, bias, alpha, beta):
     lrn_out : np.ndarray
         4-D with shape [batch, out_channel, out_height, out_width]
     """
-    axis0, axis1, axis2, axis3 = a_np.shape
     radius = size // 2
     sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype)
-    def sum_dot_values(i, j, k, l):
+    for i, j, k, l in product(*[range(_axis) for _axis in a_np.shape]):
         axis_size = a_np.shape[axis]
         if (axis == 1):
             #NCHW layout
@@ -51,12 +51,6 @@ def sum_dot_values(i, j, k, l):
             sqr_sum[i, j, k, l] = sum(a_np[i, j, k, sum_start:sum_end] * \
                                       a_np[i, j, k, sum_start:sum_end])
 
-    for i in range(axis0):
-        for j in range(axis1):
-            for k in range(axis2):
-                for l in range(axis3):
-                    sum_dot_values(i, j, k, l)
-
     sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta)
     lrn_out = np.divide(a_np, sqr_sum_up)
     return lrn_out

From c2ca9c25b330c44bd4280920694bce681d98be77 Mon Sep 17 00:00:00 2001
From: pariksheet <pariksheet.pinjari@huawei.com>
Date: Fri, 22 Jun 2018 09:53:13 +0530
Subject: [PATCH 12/12] Moved lrn_python and l2_normalize_python to
 topi.testing

---
 nnvm/tests/python/compiler/test_top_level1.py | 82 +------------------
 topi/python/topi/testing/__init__.py          |  2 +
 .../topi/testing/l2_normalize_python.py       | 27 ++++++
 topi/python/topi/testing/lrn_python.py        | 53 ++++++++++++
 topi/tests/python/test_topi_l2norm.py         | 27 +-----
 topi/tests/python/test_topi_lrn.py            | 53 +-----------
 topi/tests/python_cpp/test_topi_l2norm.py     | 27 +-----
 topi/tests/python_cpp/test_topi_lrn.py        | 53 +-----------
 8 files changed, 94 insertions(+), 230 deletions(-)
 create mode 100644 topi/python/topi/testing/l2_normalize_python.py
 create mode 100644 topi/python/topi/testing/lrn_python.py

diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py
index 678f8a8ccc46..37798d37f400 100644
--- a/nnvm/tests/python/compiler/test_top_level1.py
+++ b/nnvm/tests/python/compiler/test_top_level1.py
@@ -5,7 +5,6 @@
 import nnvm.symbol as sym
 import nnvm.compiler
 from nnvm.testing.config import ctx_list
-from itertools import product
 
 def helper(symbol, inputs, dtype,
            np_forward, np_backward=None, need_input=True, need_head_grads=True):
@@ -371,61 +370,12 @@ def verify_lrn(ishape, size, axis, bias, alpha, beta):
     dtype = "float32"
     x_np = np.random.uniform(size=ishape).astype(dtype)
 
-    def lrn_python(a_np, size, axis, bias, alpha, beta):
-        """Local response normalization operator in NCHW layout.
-
-        Parameters
-        ----------
-        a_np : numpy.ndarray
-            4-D with shape [batch, in_channel, in_height, in_width]
-
-        size : int
-            normalization window size
-
-        axis : int
-            input data layout channel axis
-
-        bias : float
-            offset to avoid dividing by 0. constant value
-
-        alpha : float
-            constant value
-
-        beta : float
-            exponent constant value
-
-        Returns
-        -------
-        lrn_out : np.ndarray
-            4-D with shape [batch, out_channel, out_height, out_width]
-        """
-        radius = size // 2
-        sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype)
-        for i, j, k, l in product(*[range(_axis) for _axis in a_np.shape]):
-            axis_size = a_np.shape[axis]
-            if (axis == 1):
-                #NCHW layout
-                sum_start = j-radius if j-radius >= 0 else 0
-                sum_end = j+radius+1 if j+radius+1 < axis_size else axis_size
-                sqr_sum[i, j, k, l] = sum(a_np[i, sum_start:sum_end, k, l] * \
-                                          a_np[i, sum_start:sum_end, k, l])
-            elif (axis == 3):
-                #NHWC layout
-                sum_start = l-radius if l-radius >= 0 else 0
-                sum_end = l+radius+1 if l+radius+1 < axis_size else axis_size
-                sqr_sum[i, j, k, l] = sum(a_np[i, j, k, sum_start:sum_end] * \
-                                          a_np[i, j, k, sum_start:sum_end])
-
-        sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta)
-        lrn_out = np.divide(a_np, sqr_sum_up)
-        return lrn_out
-
     for target, ctx in ctx_list():
         graph, lib, _ = nnvm.compiler.build(y, target, {"x": ishape})
         m = graph_runtime.create(graph, lib, ctx)
         m.run(x=x_np)
         out = m.get_output(0, tvm.nd.empty(ishape))
-        out_np = lrn_python(x_np, size, axis, bias, alpha, beta)
+        out_np = topi.testing.lrn_python(x_np, size, axis, bias, alpha, beta)
         np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
 
     #Checking LRN op followed by elementwise op relu
@@ -436,7 +386,7 @@ def lrn_python(a_np, size, axis, bias, alpha, beta):
         m = graph_runtime.create(graph, lib, ctx)
         m.run(x=x_np)
         out = m.get_output(0, tvm.nd.empty(ishape))
-        out_np = lrn_python(x_np, size, axis, bias, alpha, beta)
+        out_np = topi.testing.lrn_python(x_np, size, axis, bias, alpha, beta)
         out_np = (out_np > 0) * out_np
         np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
 
@@ -446,36 +396,12 @@ def verify_l2_normalize(ishape, eps, axis):
     dtype = "float32"
     x_np = np.random.uniform(size=ishape).astype(dtype)
 
-    def l2_normalize_python(a_np, eps, axis=None):
-        """L2 normalize operator in NCHW layout.
-
-        Parameters
-        ----------
-        a_np : numpy.ndarray
-            4-D with shape [batch, in_channel, in_height, in_width]
-
-        eps : float
-            epsilon constant value
-        axis : list of int
-            axis over the normalization applied
-
-        Returns
-        -------
-        l2_normalize_out : np.ndarray
-            4-D with shape [batch, out_channel, out_height, out_width]
-        """
-        dot_value = np.power(a_np, 2.0)
-        sqr_sum = np.sum(dot_value, axis, keepdims=True)
-        sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps))
-        l2_normalize_out = np.divide(a_np, sqrt_sum)
-        return l2_normalize_out
-
     for target, ctx in ctx_list():
         graph, lib, _ = nnvm.compiler.build(y, target, {"x": ishape})
         m = graph_runtime.create(graph, lib, ctx)
         m.run(x=x_np)
         out = m.get_output(0, tvm.nd.empty(ishape))
-        out_np = l2_normalize_python(x_np, eps, axis)
+        out_np = topi.testing.l2_normalize_python(x_np, eps, axis)
         np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
 
     #Checking L2 normalization op followed by elementwise op relu
@@ -486,7 +412,7 @@ def l2_normalize_python(a_np, eps, axis=None):
         m = graph_runtime.create(graph, lib, ctx)
         m.run(x=x_np)
         out = m.get_output(0, tvm.nd.empty(ishape))
-        out_np = l2_normalize_python(x_np, eps, axis)
+        out_np = topi.testing.l2_normalize_python(x_np, eps, axis)
         out_np = (out_np > 0) * out_np
         np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
 
diff --git a/topi/python/topi/testing/__init__.py b/topi/python/topi/testing/__init__.py
index 3731040e3a85..c91eea7958ea 100644
--- a/topi/python/topi/testing/__init__.py
+++ b/topi/python/topi/testing/__init__.py
@@ -16,3 +16,5 @@
 from .reorg_python import reorg_python
 from .region_python import region_python
 from .shortcut_python import shortcut_python
+from .lrn_python import lrn_python
+from .l2_normalize_python import l2_normalize_python
diff --git a/topi/python/topi/testing/l2_normalize_python.py b/topi/python/topi/testing/l2_normalize_python.py
new file mode 100644
index 000000000000..98f1843233a7
--- /dev/null
+++ b/topi/python/topi/testing/l2_normalize_python.py
@@ -0,0 +1,27 @@
+# pylint: disable=invalid-name, line-too-long, unused-variable, too-many-locals
+"""L2 normalize in python"""
+import numpy as np
+
+def l2_normalize_python(a_np, eps, axis=None):
+    """L2 normalize operator in NCHW layout.
+
+    Parameters
+    ----------
+    a_np : numpy.ndarray
+        4-D with shape [batch, in_channel, in_height, in_width]
+
+    eps : float
+        epsilon constant value
+    axis : list of int
+        axis over the normalization applied
+
+    Returns
+    -------
+    l2_normalize_out : np.ndarray
+        4-D with shape [batch, out_channel, out_height, out_width]
+    """
+    dot_value = np.power(a_np, 2.0)
+    sqr_sum = np.sum(dot_value, axis, keepdims=True)
+    sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps))
+    l2_normalize_out = np.divide(a_np, sqrt_sum)
+    return l2_normalize_out
diff --git a/topi/python/topi/testing/lrn_python.py b/topi/python/topi/testing/lrn_python.py
new file mode 100644
index 000000000000..4e44e8bcb635
--- /dev/null
+++ b/topi/python/topi/testing/lrn_python.py
@@ -0,0 +1,53 @@
+# pylint: disable=invalid-name, line-too-long, unused-variable, too-many-locals
+"""LRN in python"""
+from itertools import product
+import numpy as np
+
+def lrn_python(a_np, size, axis, bias, alpha, beta):
+    """Local response normalization operator in NCHW layout.
+
+    Parameters
+    ----------
+    a_np : numpy.ndarray
+        4-D with shape [batch, in_channel, in_height, in_width]
+
+    size : int
+        normalization window size
+
+    axis : int
+        input data layout channel axis
+
+    bias : float
+        offset to avoid dividing by 0. constant value
+
+    alpha : float
+        constant value
+
+    beta : float
+        exponent constant value
+
+    Returns
+    -------
+    lrn_out : np.ndarray
+        4-D with shape [batch, out_channel, out_height, out_width]
+    """
+    radius = size // 2
+    sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype)
+    for i, j, k, l in product(*[range(_axis) for _axis in a_np.shape]):
+        axis_size = a_np.shape[axis]
+        if axis == 1:
+            #NCHW layout
+            sum_start = j-radius if j-radius >= 0 else 0
+            sum_end = j+radius+1 if j+radius+1 < axis_size else axis_size
+            sqr_sum[i, j, k, l] = sum(a_np[i, sum_start:sum_end, k, l] * \
+                                      a_np[i, sum_start:sum_end, k, l])
+        elif axis == 3:
+            #NHWC layout
+            sum_start = l-radius if l-radius >= 0 else 0
+            sum_end = l+radius+1 if l+radius+1 < axis_size else axis_size
+            sqr_sum[i, j, k, l] = sum(a_np[i, j, k, sum_start:sum_end] * \
+                                      a_np[i, j, k, sum_start:sum_end])
+
+    sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta)
+    lrn_out = np.divide(a_np, sqr_sum_up)
+    return lrn_out
diff --git a/topi/tests/python/test_topi_l2norm.py b/topi/tests/python/test_topi_l2norm.py
index 3c17005a56e1..b27a1dc27e72 100644
--- a/topi/tests/python/test_topi_l2norm.py
+++ b/topi/tests/python/test_topi_l2norm.py
@@ -3,30 +3,7 @@
 import tvm
 import topi
 from topi.util import get_const_tuple
-
-def l2_normalize_python(a_np, eps, axis=None):
-    """L2 normalize operator in NCHW layout.
-
-    Parameters
-    ----------
-    a_np : numpy.ndarray
-        4-D with shape [batch, in_channel, in_height, in_width]
-
-    eps : float
-        epsilon constant value
-    axis : list of int
-        axis over the normalization applied
-
-    Returns
-    -------
-    l2_normalize_out : np.ndarray
-        4-D with shape [batch, out_channel, out_height, out_width]
-    """
-    dot_value = np.power(a_np, 2.0)
-    sqr_sum = np.sum(dot_value, axis, keepdims=True)
-    sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps))
-    l2_normalize_out = np.divide(a_np, sqrt_sum)
-    return l2_normalize_out
+import topi.testing
 
 def verify_l2_normalize(ishape, eps, axis=None):
 
@@ -35,7 +12,7 @@ def verify_l2_normalize(ishape, eps, axis=None):
     dtype = A.dtype
 
     a_np = np.random.uniform(size=ishape).astype(dtype)
-    b_np = l2_normalize_python(a_np, eps, axis)
+    b_np = topi.testing.l2_normalize_python(a_np, eps, axis)
 
     def check_device(device):
         ctx = tvm.context(device, 0)
diff --git a/topi/tests/python/test_topi_lrn.py b/topi/tests/python/test_topi_lrn.py
index 51446ebf712d..7d62aefe5f55 100644
--- a/topi/tests/python/test_topi_lrn.py
+++ b/topi/tests/python/test_topi_lrn.py
@@ -3,56 +3,7 @@
 import tvm
 import topi
 from topi.util import get_const_tuple
-from itertools import product
-
-def lrn_python(a_np, size, axis, bias, alpha, beta):
-    """Local response norm operator in NCHW layout.
-
-    Parameters
-    ----------
-    a_np : numpy.ndarray
-        4-D with shape [batch, in_channel, in_height, in_width]
-
-    size : int
-        normalization window size
-
-    axis : int
-        input data layout channel axis
-
-    bias : float
-        offset to avoid dividing by 0. constant value
-
-    alpha : float
-        constant value
-
-    beta : float
-        exponent constant value
-
-    Returns
-    -------
-    lrn_out : np.ndarray
-        4-D with shape [batch, out_channel, out_height, out_width]
-    """
-    radius = size // 2
-    sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype)
-    for i, j, k, l in product(*[range(_axis) for _axis in a_np.shape]):
-        axis_size = a_np.shape[axis]
-        if (axis == 1):
-            #NCHW layout
-            sum_start = j-radius if j-radius >= 0 else 0
-            sum_end = j+radius+1 if j+radius+1 < axis_size else axis_size
-            sqr_sum[i, j, k, l] = sum(a_np[i, sum_start:sum_end, k, l] * \
-                                      a_np[i, sum_start:sum_end, k, l])
-        elif (axis == 3):
-            #NHWC layout
-            sum_start = l-radius if l-radius >= 0 else 0
-            sum_end = l+radius+1 if l+radius+1 < axis_size else axis_size
-            sqr_sum[i, j, k, l] = sum(a_np[i, j, k, sum_start:sum_end] * \
-                                      a_np[i, j, k, sum_start:sum_end])
-
-    sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta)
-    lrn_out = np.divide(a_np, sqr_sum_up)
-    return lrn_out
+import topi.testing
 
 def verify_lrn(shape, size, axis, bias, alpha, beta):
     A = tvm.placeholder(shape, name='A')
@@ -60,7 +11,7 @@ def verify_lrn(shape, size, axis, bias, alpha, beta):
     dtype = A.dtype
 
     a_np = np.random.uniform(size=shape).astype(dtype)
-    b_np = lrn_python(a_np, size, axis, bias, alpha, beta)
+    b_np = topi.testing.lrn_python(a_np, size, axis, bias, alpha, beta)
 
     def check_device(device):
         if not tvm.module.enabled(device):
diff --git a/topi/tests/python_cpp/test_topi_l2norm.py b/topi/tests/python_cpp/test_topi_l2norm.py
index 74396def5d76..08799f76c5c3 100644
--- a/topi/tests/python_cpp/test_topi_l2norm.py
+++ b/topi/tests/python_cpp/test_topi_l2norm.py
@@ -4,30 +4,7 @@
 import topi
 import logging
 from topi.util import get_const_tuple
-
-def l2_normalize_python(a_np, eps, axis=None):
-    """L2 normalize operator in NCHW layout.
-
-    Parameters
-    ----------
-    a_np : numpy.ndarray
-        4-D with shape [batch, in_channel, in_height, in_width]
-
-    eps : float
-        epsilon constant value
-    axis : list of int
-        axis over the normalization applied
-
-    Returns
-    -------
-    l2_normalize_out : np.ndarray
-        4-D with shape [batch, out_channel, out_height, out_width]
-    """
-    dot_value = np.power(a_np, 2.0)
-    sqr_sum = np.sum(dot_value, axis, keepdims=True)
-    sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps))
-    l2_normalize_out = np.divide(a_np, sqrt_sum)
-    return l2_normalize_out
+import topi.testing
 
 def verify_l2_normalize(shape, eps, axis=None):
     '''Verify l2 normalization operator by comparing outputs from tvm and numpy implementation'''
@@ -36,7 +13,7 @@ def verify_l2_normalize(shape, eps, axis=None):
     dtype = A.dtype
 
     a_np = np.random.uniform(size=shape).astype(dtype)
-    b_np = l2_normalize_python(a_np, eps, axis)
+    b_np = topi.testing.l2_normalize_python(a_np, eps, axis)
 
     def check_device(device):
         if not tvm.module.enabled(device):
diff --git a/topi/tests/python_cpp/test_topi_lrn.py b/topi/tests/python_cpp/test_topi_lrn.py
index 6f62e5143e27..d685643a9406 100644
--- a/topi/tests/python_cpp/test_topi_lrn.py
+++ b/topi/tests/python_cpp/test_topi_lrn.py
@@ -4,56 +4,7 @@
 import topi
 import logging
 from topi.util import get_const_tuple
-from itertools import product
-
-def lrn_python(a_np, size, axis, bias, alpha, beta):
-    """Local response normalization operator in NCHW layout.
-
-    Parameters
-    ----------
-    a_np : numpy.ndarray
-        4-D with shape [batch, in_channel, in_height, in_width]
-
-    size : int
-        normalization window size
-
-    axis : int
-        input data layout channel axis
-
-    bias : float
-        offset to avoid dividing by 0. constant value
-
-    alpha : float
-        constant value
-
-    beta : float
-        exponent constant value
-
-    Returns
-    -------
-    lrn_out : np.ndarray
-        4-D with shape [batch, out_channel, out_height, out_width]
-    """
-    radius = size // 2
-    sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype)
-    for i, j, k, l in product(*[range(_axis) for _axis in a_np.shape]):
-        axis_size = a_np.shape[axis]
-        if (axis == 1):
-            #NCHW layout
-            sum_start = j-radius if j-radius >= 0 else 0
-            sum_end = j+radius+1 if j+radius+1 < axis_size else axis_size
-            sqr_sum[i, j, k, l] = sum(a_np[i, sum_start:sum_end, k, l] * \
-                                      a_np[i, sum_start:sum_end, k, l])
-        elif (axis == 3):
-            #NHWC layout
-            sum_start = l-radius if l-radius >= 0 else 0
-            sum_end = l+radius+1 if l+radius+1 < axis_size else axis_size
-            sqr_sum[i, j, k, l] = sum(a_np[i, j, k, sum_start:sum_end] * \
-                                      a_np[i, j, k, sum_start:sum_end])
-
-    sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta)
-    lrn_out = np.divide(a_np, sqr_sum_up)
-    return lrn_out
+import topi.testing
 
 def verify_lrn(shape, size, axis, bias, alpha, beta):
     '''Verify Local response normalization operator by comparing outputs from tvm and numpy implementation'''
@@ -62,7 +13,7 @@ def verify_lrn(shape, size, axis, bias, alpha, beta):
     dtype = A.dtype
 
     a_np = np.random.uniform(size=shape).astype(dtype)
-    b_np = lrn_python(a_np, size, axis, bias, alpha, beta)
+    b_np = topi.testing.lrn_python(a_np, size, axis, bias, alpha, beta)
     def check_device(device):
         if not tvm.module.enabled(device):
             print("Skip because %s is not enabled" % device)