From 3efe962a67632138648f0bb8c2c4bfd98b1f5240 Mon Sep 17 00:00:00 2001
From: tcd <tcd@localhost.localdomain>
Date: Mon, 3 Sep 2018 20:47:23 +0800
Subject: [PATCH 1/5] Add stable nrm2 for L2 normalization

---
 src/operator/l2_normalization-inl.h | 42 ++++++++++++++---------------
 1 file changed, 21 insertions(+), 21 deletions(-)
diff --git a/src/operator/l2_normalization-inl.h b/src/operator/l2_normalization-inl.h
index d53e0c5caf98..6cece040c213 100644
--- a/src/operator/l2_normalization-inl.h
+++ b/src/operator/l2_normalization-inl.h
@@ -34,6 +34,7 @@
 #include <utility>
 #include "./operator_common.h"
 #include "./mshadow_op.h"
+#include "./tensor/broadcast_reduce_op.h"
 
 namespace mxnet {
 namespace op {
@@ -87,6 +88,10 @@ class L2NormalizationOp : public Operator {
     Stream<xpu> *s = ctx.get_stream<xpu>();
     TShape orig_shape = in_data[l2_normalization::kData].shape_;
     if (param_.mode == l2_normalization::kInstance) {
+      TShape small = out_data[1].shape_;
+      ReduceAxesComputeImpl<xpu, mxnet::op::mshadow_op::nrm2, false,
+        mxnet::op::mshadow_op::identity>(ctx, in_data, req,
+        { out_data[l2_normalization::kNorm] }, small);
       Shape<2> dshape = Shape2(orig_shape[0],
         orig_shape.ProdShape(1, orig_shape.ndim()));
       Tensor<xpu, 2, DType> data = in_data[l2_normalization::kData]
@@ -94,15 +99,13 @@ class L2NormalizationOp : public Operator {
       Tensor<xpu, 2, DType> out = out_data[l2_normalization::kOut]
         .get_with_shape<xpu, 2, DType>(dshape, s);
       Tensor<xpu, 1, DType> norm = out_data[l2_normalization::kNorm].get<xpu, 1, DType>(s);
-      norm = sumall_except_dim<0>(F<mxnet::op::mshadow_op::square>(data));
-      MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
-        mxnet_op::Kernel<mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, Req>, xpu>::Launch(
-          s, norm.size(0), norm.dptr_, norm.dptr_, DType(param_.eps));
-      });
-      norm = F<mxnet::op::mshadow_op::square_root>(norm);
-      out = data / broadcast<0>(norm, out.shape_);
+      out = data / mshadow::expr::broadcast<0>(norm, out.shape_);
     } else if (param_.mode == l2_normalization::kChannel) {
       CHECK_GE(orig_shape.ndim(), 3U);
+      TShape small = out_data[1].shape_;
+      ReduceAxesComputeImpl<xpu, mxnet::op::mshadow_op::nrm2, false,
+        mxnet::op::mshadow_op::identity>(ctx, in_data, req,
+        { out_data[l2_normalization::kNorm] }, small);
       Shape<3> dshape = Shape3(orig_shape[0], orig_shape[1],
         orig_shape.ProdShape(2, orig_shape.ndim()));
       Tensor<xpu, 3, DType> data = in_data[l2_normalization::kData]
@@ -112,15 +115,13 @@ class L2NormalizationOp : public Operator {
       Shape<2> norm_shape = Shape2(dshape[0], dshape[2]);
       Tensor<xpu, 2, DType> norm = out_data[l2_normalization::kNorm]
         .get_with_shape<xpu, 2, DType>(norm_shape, s);
-      norm = reduce_with_axis<red::sum, false>(F<mxnet::op::mshadow_op::square>(data), 1);
-      MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
-        mxnet_op::Kernel<mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, Req>, xpu>::Launch(
-          s, norm.size(0) * norm.size(1), norm.dptr_, norm.dptr_, DType(param_.eps));
-      });
-      norm = F<mxnet::op::mshadow_op::square_root>(norm);
       out = data / broadcast_with_axis(norm, 0, orig_shape[1]);
     } else if (param_.mode == l2_normalization::kSpatial) {
       CHECK_GE(orig_shape.ndim(), 3U);
+      TShape small = out_data[1].shape_;
+      ReduceAxesComputeImpl<xpu, mxnet::op::mshadow_op::nrm2, false,
+        mxnet::op::mshadow_op::identity>(ctx, in_data, req,
+        { out_data[l2_normalization::kNorm] }, small);
       Shape<3> dshape = Shape3(orig_shape[0], orig_shape[1],
         orig_shape.ProdShape(2, orig_shape.ndim()));
       Tensor<xpu, 3, DType> data = in_data[l2_normalization::kData]
@@ -130,12 +131,6 @@ class L2NormalizationOp : public Operator {
       Shape<2> norm_shape = Shape2(dshape[0], dshape[1]);
       Tensor<xpu, 2, DType> norm = out_data[l2_normalization::kNorm]
         .get_with_shape<xpu, 2, DType>(norm_shape, s);
-      norm = reduce_with_axis<red::sum, false>(F<mxnet::op::mshadow_op::square>(data), 2);
-      MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
-        mxnet_op::Kernel<mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, Req>, xpu>::Launch(
-          s, norm.size(0) * norm.size(1), norm.dptr_, norm.dptr_, DType(param_.eps));
-      });
-      norm = F<mxnet::op::mshadow_op::square_root>(norm);
       out = data / broadcast_with_axis(norm, 1, dshape[2]);
     } else {
       LOG(FATAL) << "Unexpected mode in l2 normalization";
@@ -171,8 +166,8 @@ class L2NormalizationOp : public Operator {
         .get_space_typed<xpu, 1, DType>(mshadow::Shape1(data.shape_[0]), s);
       temp = sumall_except_dim<0>(grad_out * data);
       Assign(grad_in, req[l2_normalization::kData],
-        (grad_out - data * broadcast<0>(temp, data.shape_)) /
-        broadcast<0>(norm, data.shape_));
+        (grad_out - data * mshadow::expr::broadcast<0>(temp, data.shape_)) /
+        mshadow::expr::broadcast<0>(norm, data.shape_));
     } else if (param_.mode == l2_normalization::kChannel) {
       CHECK_GE(orig_shape.ndim(), 3U);
       Shape<3> dshape = Shape3(orig_shape[0], orig_shape[1],
@@ -314,6 +309,11 @@ class L2NormalizationProp : public OperatorProperty {
     return {{out_grad[l2_normalization::kOut], in_grad[l2_normalization::kData]}};
   }
 
+  std::vector<ResourceRequest> ForwardResource(
+      const std::vector<TShape> &in_shape) const override {
+    return{ ResourceRequest::kTempSpace };
+  }
+
   std::vector<ResourceRequest> BackwardResource(
       const std::vector<TShape> &in_shape) const override {
     return {ResourceRequest::kTempSpace};

From e7b84802aebc35845f5653f87f7d88e705dd02e6 Mon Sep 17 00:00:00 2001
From: tcd <tcd@localhost.localdomain>
Date: Mon, 3 Sep 2018 21:04:53 +0800
Subject: [PATCH 2/5] Add stable nrm2 for L2 normalization, fix whitespace

---
 src/operator/l2_normalization-inl.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/operator/l2_normalization-inl.h b/src/operator/l2_normalization-inl.h
index 6cece040c213..47e37c766ea4 100644
--- a/src/operator/l2_normalization-inl.h
+++ b/src/operator/l2_normalization-inl.h
@@ -120,7 +120,7 @@ class L2NormalizationOp : public Operator {
       CHECK_GE(orig_shape.ndim(), 3U);
       TShape small = out_data[1].shape_;
       ReduceAxesComputeImpl<xpu, mxnet::op::mshadow_op::nrm2, false,
-        mxnet::op::mshadow_op::identity>(ctx, in_data, req,
+        mxnet::op::mshadow_op::identity>(ctx, in_data, req,
         { out_data[l2_normalization::kNorm] }, small);
       Shape<3> dshape = Shape3(orig_shape[0], orig_shape[1],
         orig_shape.ProdShape(2, orig_shape.ndim()));
@@ -309,7 +309,7 @@ class L2NormalizationProp : public OperatorProperty {
     return {{out_grad[l2_normalization::kOut], in_grad[l2_normalization::kData]}};
   }
 
-  std::vector<ResourceRequest> ForwardResource(
+  std::vector<ResourceRequest> ForwardResource(
       const std::vector<TShape> &in_shape) const override {
     return{ ResourceRequest::kTempSpace };
   }

From 84b3643723d640299f0349d75a4e789322c18844 Mon Sep 17 00:00:00 2001
From: TccccD <1213978582@qq.com>
Date: Wed, 14 Nov 2018 22:24:26 +0800
Subject: [PATCH 3/5] test 1

---
 src/operator/tensor/broadcast_reduce_op.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h
index 1edcb5a74a77..996d3f9046f7 100644
--- a/src/operator/tensor/broadcast_reduce_op.h
+++ b/src/operator/tensor/broadcast_reduce_op.h
@@ -536,6 +536,9 @@ void ReduceAxesComputeImpl(const OpContext& ctx,
   TShape src_shape, dst_shape;
   BroadcastReduceShapeCompact(inputs[0].shape_, small, &src_shape, &dst_shape);
   Stream<xpu> *s = ctx.get_stream<xpu>();
+  LOG(INFO) << "small:" << small;
+  LOG(INFO) << "inputs:" << inputs[0].shape_ << "src_shape" << src_shape;
+  LOG(INFO) << "outputs:" << outputs[0].shape_ << "dst_shape" << dst_shape;
   MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
     const TBlob in_data = inputs[0].reshape(src_shape);
     const TBlob out_data = outputs[0].reshape(dst_shape);

From a689ace035baffdde4cce405c07320d4ecb9119b Mon Sep 17 00:00:00 2001
From: TccccD <1213978582@qq.com>
Date: Thu, 15 Nov 2018 00:13:47 +0800
Subject: [PATCH 4/5] test 2

---
 src/operator/tensor/broadcast_reduce_op.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h
index 996d3f9046f7..15a9d8c76d13 100644
--- a/src/operator/tensor/broadcast_reduce_op.h
+++ b/src/operator/tensor/broadcast_reduce_op.h
@@ -536,7 +536,6 @@ void ReduceAxesComputeImpl(const OpContext& ctx,
   TShape src_shape, dst_shape;
   BroadcastReduceShapeCompact(inputs[0].shape_, small, &src_shape, &dst_shape);
   Stream<xpu> *s = ctx.get_stream<xpu>();
-  LOG(INFO) << "small:" << small;
   LOG(INFO) << "inputs:" << inputs[0].shape_ << "src_shape" << src_shape;
   LOG(INFO) << "outputs:" << outputs[0].shape_ << "dst_shape" << dst_shape;
   MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {

From b7914845ae292dc2a8f9eee2e3255cbeec7913c7 Mon Sep 17 00:00:00 2001
From: TcD <tianchudong@huawei.com>
Date: Thu, 15 Nov 2018 09:44:52 +0800
Subject: [PATCH 5/5] no change, but it is ok.

---
 src/operator/tensor/broadcast_reduce_op.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h
index 15a9d8c76d13..1edcb5a74a77 100644
--- a/src/operator/tensor/broadcast_reduce_op.h
+++ b/src/operator/tensor/broadcast_reduce_op.h
@@ -536,8 +536,6 @@ void ReduceAxesComputeImpl(const OpContext& ctx,
   TShape src_shape, dst_shape;
   BroadcastReduceShapeCompact(inputs[0].shape_, small, &src_shape, &dst_shape);
   Stream<xpu> *s = ctx.get_stream<xpu>();
-  LOG(INFO) << "inputs:" << inputs[0].shape_ << "src_shape" << src_shape;
-  LOG(INFO) << "outputs:" << outputs[0].shape_ << "dst_shape" << dst_shape;
   MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
     const TBlob in_data = inputs[0].reshape(src_shape);
     const TBlob out_data = outputs[0].reshape(dst_shape);