Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
3bf48b8
CPU optimization for ActivationOp
Oct 16, 2017
40b13f1
lint
Oct 16, 2017
6d4a2bb
Trigger build
cjolivier01 Oct 17, 2017
df49eae
Merge remote-tracking branch 'apache/master' into activation_opt_pr
cjolivier01 Oct 17, 2017
5c92218
Merge remote-tracking branch 'apache/master' into activation_opt_pr
cjolivier01 Oct 17, 2017
db2767d
Trigger build
cjolivier01 Oct 18, 2017
bf58bee
Negative begin and end support for csr slice (#8241)
ZiyueHuang Oct 17, 2017
4ecb763
Preparing for 0.12.0.rc0: Final changes before RC (#8301)
mbaijal Oct 17, 2017
618c2cc
Enable smoothing in softmax operator (#8125)
KellenSunderland Oct 17, 2017
cc93069
v0.12 regression: Fix registration of children for Block (#8277)
leezu Oct 17, 2017
8730f7a
Revert "[CMAKE] Fix windows cmake build" (#8311)
cjolivier01 Oct 17, 2017
252227e
fixed broken links. https was pointing to http for mxnet.io (#8300)
thinksanky Oct 17, 2017
310bbeb
Update rnn.md (#8320)
szha Oct 18, 2017
83e96a9
fluent methods for missed ops (#8329)
szha Oct 18, 2017
dc4c3c8
update ps lite (#8327)
piiswrong Oct 18, 2017
28b76e3
Fix unused type warning (#8316)
cjolivier01 Oct 18, 2017
55068f7
Trigger build
Oct 20, 2017
4065639
Trigger build
cjolivier01 Oct 21, 2017
2cf83cb
Misc fixes for sparse distributed training (#8345)
eric-haibin-lin Oct 21, 2017
f4c57aa
Fix the Readme (#8369)
mbaijal Oct 21, 2017
68ea95f
Allow test to converge (#8351)
cjolivier01 Oct 21, 2017
2bb9e94
Update cudnn_algoreg-inl.h (#7988)
solin319 Oct 21, 2017
52adc56
[Perl] emulate Python zip() for Perl (#8192)
tlby Oct 21, 2017
fa80a31
add profile option for frontend profiling to image script (#8171)
szha Oct 21, 2017
9795461
Fix Typo (classification) (#8376)
0x6a62 Oct 21, 2017
d60707c
Merge remote-tracking branch 'apache/master' into activation_opt_pr
cjolivier01 Oct 22, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 27 additions & 8 deletions src/operator/activation-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
* \brief Activation operator
* \author Bing Xu
*/

#ifndef MXNET_OPERATOR_ACTIVATION_INL_H_
#define MXNET_OPERATOR_ACTIVATION_INL_H_

Expand All @@ -34,6 +35,7 @@
#include <vector>
#include <utility>
#include "./operator_common.h"
#include "./mxnet_op.h"

namespace mxnet {
namespace op {
Expand Down Expand Up @@ -75,9 +77,16 @@ class ActivationOp : public Operator {
CHECK_EQ(in_data.size(), 1U);
CHECK_EQ(out_data.size(), 1U);
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 2, DType> data = in_data[activation::kData].FlatTo2D<xpu, DType>(s);
Tensor<xpu, 2, DType> out = out_data[activation::kOut].FlatTo2D<xpu, DType>(s);
Assign(out, req[activation::kOut], F<ForwardOp>(data));
const TBlob& input = in_data[activation::kData];
const size_t sz = input.shape_.Size();
if (sz) {
MXNET_ASSIGN_REQ_SWITCH(req[activation::kOut], Req, {
mxnet_op::Kernel<mxnet_op::op_with_req<ForwardOp, Req>, xpu>::Launch(
s, sz,
out_data[activation::kOut].dptr<DType>(),
input.dptr<DType>());
});
}
}

virtual void Backward(const OpContext &ctx,
Expand All @@ -93,14 +102,24 @@ class ActivationOp : public Operator {
CHECK(in_data.size() == 1 && in_grad.size() == 1);
CHECK_EQ(req.size(), 1U);
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 2, DType> m_out_grad = out_grad[activation::kOut].FlatTo2D<xpu, DType>(s);
Tensor<xpu, 2, DType> m_out_data = out_data[activation::kOut].FlatTo2D<xpu, DType>(s);
Tensor<xpu, 2, DType> m_in_grad = in_grad[activation::kData].FlatTo2D<xpu, DType>(s);
Assign(m_in_grad, req[activation::kData], F<BackwardOp>(m_out_data) * m_out_grad);
const TBlob& m_out_grad = out_grad[activation::kOut];
const TBlob& m_out_data = out_data[activation::kOut];
const TBlob& m_in_grad = in_grad[activation::kData];
const size_t sz = m_out_data.shape_.Size();
if (sz) {
MXNET_ASSIGN_REQ_SWITCH(req[activation::kData], Req, {
mxnet_op::Kernel<mxnet_op::op_with_req<
mxnet::op::mxnet_op::backward_grad<BackwardOp>, Req>, xpu>::Launch(
s, sz,
m_in_grad.dptr<DType>(),
m_out_grad.dptr<DType>(),
m_out_data.dptr<DType>());
});
}
}
}; // class ActivationOp

// Decalre Factory function, used for dispatch specialization
// Declare Factory function, used for dispatch specialization
template<typename xpu>
Operator* CreateOp(ActivationParam type, int dtype, const TShape& dshape);

Expand Down
14 changes: 14 additions & 0 deletions src/operator/mxnet_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,20 @@ struct set_zero {
}
};

/*! \brief Binary op backward gradient OP wrapper */
template<typename GRAD_OP>
struct backward_grad {
/* \brief Backward calc with grad
* \param a - output grad
* \param args... - data to grad calculation op (what this is -- input, output, etc. -- varies)
* \return input grad
*/
template<typename DType, typename ...Args>
MSHADOW_XINLINE static DType Map(DType a, Args... args) {
return DType(a * GRAD_OP::Map(args...));
}
};

/*! \brief Select assignment operation based upon the req value
* Also useful for mapping mshadow Compute (F<OP>) to Kernel<OP>::Launch
*/
Expand Down
48 changes: 38 additions & 10 deletions tests/cpp/include/test_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ class BasicOperatorData {
#endif
, initializeForward_(0) // unit testing may call inits in any order based
, initializeBackward_(0) // upon its use-case (ie may not want to run forward pass first)
, initializeCallback_(0) {
, initializeCallback_(0)
, generator_(new std::mt19937()) {
opContext_.is_train = true;
opContext_.run_ctx.stream = nullptr;

Expand All @@ -123,10 +124,14 @@ class BasicOperatorData {
shape_input_vec_.resize(opProp.ListArguments().size());
op_.reset(opProp.CreateOperatorEx(getContext(), &shape_input_vec_, in_type));
if (op_) {
const size_t output_count = opProp.ListOutputs().size();
const size_t aux_count = opProp.ListAuxiliaryStates().size();
// Figure out what sort of blobs we need to allocate
std::vector<TShape> out_shape, aux_shape;
out_shape.resize(output_count);
aux_shape.resize(aux_count);
opProp.InferShape(&shape_input_vec_, &out_shape, &aux_shape);
std::vector<int> out_type, aux_type;
std::vector<int> out_type(output_count, -1), aux_type(aux_count, -1);
opProp.InferType(in_type, &out_type, &aux_type);

// Allocate top blobs (input)
Expand Down Expand Up @@ -174,9 +179,9 @@ class BasicOperatorData {
initForward(opProp, in_type);
if (!initializeBackward_++) {
for (size_t x = 0, n = static_cast<size_t>(opProp.NumVisibleOutputs()); x < n; ++x) {
CHECK_LT(x, c_.blob_input_vec_.size());
allocateBlob(&c_.blob_out_grad_, c_.blob_input_vec_[x].shape_,
false, c_.blob_input_vec_[x].type_flag_);
CHECK_LT(x, c_.blob_output_vec_.size());
allocateBlob(&c_.blob_out_grad_, c_.blob_output_vec_[x].shape_,
false, c_.blob_output_vec_[x].type_flag_);
}

for (size_t x = 0, n = c_.blob_input_vec_.size(); x < n; ++x) {
Expand All @@ -197,6 +202,7 @@ class BasicOperatorData {

/*! \brief Run operator forward */
void forward(const size_t count = 1) {
const std::vector<OpReqType> req(c_.blob_output_vec_.size(), kWriteTo);
// Possibly move data to/from CPU and GPU (outside of timing scope)
MXNET_CUDA_ONLY(std::unique_ptr<GPUOpData> gpuData(isGPU_ ?
new GPUOpData(c_, &opContext_) : nullptr));
Expand All @@ -206,15 +212,15 @@ class BasicOperatorData {
for (size_t x = 0; x < count; ++x) {
op()->Forward(opContext_,
c_.blob_input_vec_,
{kWriteTo, kWriteTo, kWriteTo},
req,
c_.blob_output_vec_,
c_.blob_aux_states_);
}
} else {
for (size_t x = 0; x < count; ++x) {
MXNET_CUDA_ONLY(op()->Forward(opContext_,
gpuData->blob_input_vec_,
{kWriteTo, kWriteTo, kWriteTo},
req,
gpuData->blob_output_vec_,
gpuData->blob_aux_states_));
}
Expand All @@ -223,6 +229,7 @@ class BasicOperatorData {

/*! \brief Run operator backwards */
void backward(const size_t count = 1) {
const std::vector<OpReqType> req(c_.blob_output_vec_.size(), kWriteTo);
// Possibly move data to/from CPU and GPU (outside of timing scope)
MXNET_CUDA_ONLY(std::unique_ptr<GPUOpData> gpuData(isGPU_ ?
new GPUOpData(c_, &opContext_) : nullptr));
Expand All @@ -234,7 +241,7 @@ class BasicOperatorData {
c_.blob_out_grad_,
c_.blob_input_vec_,
c_.blob_output_vec_,
{kWriteTo, kWriteTo, kWriteTo},
req,
c_.blob_in_grad_,
c_.blob_aux_states_);
}
Expand All @@ -244,7 +251,7 @@ class BasicOperatorData {
gpuData->blob_out_grad_,
gpuData->blob_input_vec_,
gpuData->blob_output_vec_,
{kWriteTo, kWriteTo, kWriteTo},
req,
gpuData->blob_in_grad_,
gpuData->blob_aux_states_));
}
Expand Down Expand Up @@ -386,6 +393,21 @@ class BasicOperatorData {
copy(blob, sourceData, 0, sourceDataSize);
}

void FillRandom() {
std::uniform_real_distribution<DType> distribution(-1.0, 1.0);
for (size_t j = 0, jn = this->c_.all_blob_vects_.size(); j < jn; ++j) {
std::vector<TBlob> *data_vect = this->c_.all_blob_vects_[j];
if (data_vect) {
for (size_t i = 0, n = data_vect->size(); i < n; ++i) {
TBlob &blob = (*data_vect)[i];
test::patternFill<DType>(&blob, [this, &distribution]() -> DType {
return distribution(generator());
});
}
}
}
}

/*! \brief Input and output blobs */
OpContext opContext_;

Expand Down Expand Up @@ -520,6 +542,9 @@ class BasicOperatorData {
return allocateBlob(&standalone_blobs_, dest, shape, isGPU, dtype);
}

/*! \brief mt19937 generator for random number generator */
std::mt19937& generator() { return *generator_; }

/*! \brief Performance timing categories */
enum TimingId {
Forward,
Expand All @@ -539,6 +564,9 @@ class BasicOperatorData {
/*! \brief scoped lifecycle management of allocated blobs */
std::list<std::unique_ptr<test::StandaloneBlob>> standalone_blobs_;

/*! \brief Per-test generator */
std::unique_ptr<std::mt19937> generator_;

public:
/*! Timing instrumentation */
test::perf::TimingInstrument timing_;
Expand Down Expand Up @@ -675,7 +703,7 @@ class Validator {
}
const TBlob& b1 = bv1[idx];
const TBlob& b2 = bv2[idx];
if (print && test::debugOutput) {
if (print && test::debug_output) {
test::print(RunContext(), &(std::cout << "Blob 1:"), b1, true, true);
test::print(RunContext(), &(std::cout << "Blob 2:"), b2, true, true);
}
Expand Down
Loading