diff --git a/src/operator/l2_normalization-inl.h b/src/operator/l2_normalization-inl.h index d53e0c5caf98..c7e71424ada9 100644 --- a/src/operator/l2_normalization-inl.h +++ b/src/operator/l2_normalization-inl.h @@ -216,7 +216,7 @@ class L2NormalizationOp : public Operator { } } - private: + protected: L2NormalizationParam param_; }; // class L2NormalizationOp diff --git a/src/operator/l2_normalization.cc b/src/operator/l2_normalization.cc index f2f485ae6d1b..6801a0a20576 100644 --- a/src/operator/l2_normalization.cc +++ b/src/operator/l2_normalization.cc @@ -23,13 +23,111 @@ * \brief l2 normalization operator */ #include "./l2_normalization-inl.h" + +/* VisualStudio only supports openmp 2.0 */ +#ifdef _MSC_VER +#define collapse(x) +#endif + namespace mxnet { namespace op { + +template +class L2NormalizationOpCPU : public L2NormalizationOp { + public: + explicit L2NormalizationOpCPU(L2NormalizationParam p) + : L2NormalizationOp(p) {} + void Forward(const OpContext &ctx, const std::vector &in_data, + const std::vector &req, + const std::vector &out_data, + const std::vector &aux_args) override { + using namespace mshadow; + using namespace mshadow::expr; + if (req[l2_normalization::kOut] == kNullOp) return; + CHECK_EQ(req[l2_normalization::kOut], kWriteTo); + CHECK_EQ(in_data.size(), 1U); + CHECK_EQ(out_data.size(), 2U); + Stream *s = ctx.get_stream(); + TShape orig_shape = in_data[l2_normalization::kData].shape_; + auto omp_threads = engine::OpenMP::Get()->GetRecommendedOMPThreadCount(); + if (this->param_.mode == l2_normalization::kInstance) { + Shape<2> dshape = Shape2(orig_shape[0], + orig_shape.ProdShape(1, orig_shape.ndim())); + Tensor data = in_data[l2_normalization::kData] + .get_with_shape(dshape, s); + Tensor out = out_data[l2_normalization::kOut] + .get_with_shape(dshape, s); + Tensor norm = out_data[l2_normalization::kNorm].get(s); +#pragma omp parallel for num_threads(omp_threads) + for (int shape0 = 0; shape0 < static_cast(dshape[0]); shape0++) { + norm[shape0] = DType(this->param_.eps); + for (int shape1 = 0; shape1 < static_cast(dshape[1]); shape1++) { + norm[shape0] += data[shape0][shape1] * data[shape0][shape1]; + } + norm[shape0] = std::sqrt(norm[shape0]); + for (int shape1 = 0; shape1 < static_cast(dshape[1]); shape1++) { + out[shape0][shape1] = data[shape0][shape1] / norm[shape0]; + } + } + } else if (this->param_.mode == l2_normalization::kChannel) { + CHECK_GE(orig_shape.ndim(), 3U); + Shape<3> dshape = Shape3(orig_shape[0], orig_shape[1], + orig_shape.ProdShape(2, orig_shape.ndim())); + Tensor data = in_data[l2_normalization::kData] + .get_with_shape(dshape, s); + Tensor out = out_data[l2_normalization::kOut] + .get_with_shape(dshape, s); + Shape<2> norm_shape = Shape2(dshape[0], dshape[2]); + Tensor norm = out_data[l2_normalization::kNorm] + .get_with_shape(norm_shape, s); +#pragma omp parallel for num_threads(omp_threads) collapse(2) + for (int shape0 = 0; shape0 < static_cast(dshape[0]); shape0++) { + for (int shape2 = 0; shape2 < static_cast(dshape[2]); shape2++) { + norm[shape0][shape2] = DType(this->param_.eps); + for (int shape1 = 0; shape1 < static_cast(dshape[1]); shape1++) { + norm[shape0][shape2] += data[shape0][shape1][shape2] * data[shape0][shape1][shape2]; + } + norm[shape0][shape2] = std::sqrt(norm[shape0][shape2]); + for (int shape1 = 0; shape1 < static_cast(dshape[1]); shape1++) { + out[shape0][shape1][shape2] = data[shape0][shape1][shape2] / norm[shape0][shape2]; + } + } + } + } else if (this->param_.mode == l2_normalization::kSpatial) { + CHECK_GE(orig_shape.ndim(), 3U); + Shape<3> dshape = Shape3(orig_shape[0], orig_shape[1], + orig_shape.ProdShape(2, orig_shape.ndim())); + Tensor data = in_data[l2_normalization::kData] + .get_with_shape(dshape, s); + Tensor out = out_data[l2_normalization::kOut] + .get_with_shape(dshape, s); + Shape<2> norm_shape = Shape2(dshape[0], dshape[1]); + Tensor norm = out_data[l2_normalization::kNorm] + .get_with_shape(norm_shape, s); +#pragma omp parallel for num_threads(omp_threads) collapse(2) + for (int shape0 = 0; shape0 < static_cast(dshape[0]); shape0++) { + for (int shape1 = 0; shape1 < static_cast(dshape[1]); shape1++) { + norm[shape0][shape1] = DType(this->param_.eps); + for (int shape2 = 0; shape2 < static_cast(dshape[2]); shape2++) { + norm[shape0][shape1] += data[shape0][shape1][shape2] * data[shape0][shape1][shape2]; + } + norm[shape0][shape1] = std::sqrt(norm[shape0][shape1]); + for (int shape2 = 0; shape2 < static_cast(dshape[2]); shape2++) { + out[shape0][shape1][shape2] = data[shape0][shape1][shape2] / norm[shape0][shape1]; + } + } + } + } else { + LOG(FATAL) << "Unexpected mode in l2 normalization"; + } + } +}; + template<> Operator* CreateOp(L2NormalizationParam param, int dtype) { Operator* op = nullptr; MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { - op = new L2NormalizationOp(param); + op = new L2NormalizationOpCPU(param); }); return op; } @@ -37,7 +135,7 @@ Operator* CreateOp(L2NormalizationParam param, int dtype) { // DO_BIND_DISPATCH comes from static_operator_common.h Operator* L2NormalizationProp::CreateOperatorEx(Context ctx, std::vector *in_shape, std::vector *in_type) const { - DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0)); + DO_BIND_DISPATCH(CreateOp, this->param_, in_type->at(0)); } DMLC_REGISTER_PARAMETER(L2NormalizationParam);