From 5e18d933ff842c07a1f86aae92beea190b030ad4 Mon Sep 17 00:00:00 2001 From: eric-haibin-lin Date: Thu, 14 Jun 2018 17:44:07 +0000 Subject: [PATCH 1/8] copy paste --- src/imperative/cached_op.cc | 80 +++++++++++++++++++++++++++++++++- src/imperative/cached_op.h | 8 ++++ src/operator/operator_common.h | 4 +- 3 files changed, 89 insertions(+), 3 deletions(-) diff --git a/src/imperative/cached_op.cc b/src/imperative/cached_op.cc index b40605bd25e2..2fdec8f9737f 100644 --- a/src/imperative/cached_op.cc +++ b/src/imperative/cached_op.cc @@ -22,6 +22,7 @@ #include "./cached_op.h" #include "../executor/exec_pass.h" #include "../profiler/profiler.h" +#include "../operator/operator_common.h" namespace mxnet { @@ -95,7 +96,8 @@ CachedOp::CachedOp( using namespace imperative; static const std::vector zero_ops{Op::Get("zeros_like"), Op::Get("_zeros")}; static const auto _copy = Op::Get("_copy"); - + // TODO no need to save sym + sym_ = sym; config_.Init(flags); // construct forward graph @@ -1015,6 +1017,74 @@ void CachedOp::Backward( Engine::Get()->set_bulk_size(prev_bulk_size); } +bool CachedOp::BackwardStorageType(const nnvm::NodeAttrs& attrs, + //const nnvm::Symbol &subgraph, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector *in_attrs, + std::vector *out_attrs) { + using namespace nnvm; + // construct backward graph + nnvm::Graph grad_graph; + nnvm::Graph fwd_graph; + std::vector potential_nodes; + { + fwd_graph.outputs = subgraph.outputs; + std::vector ograd_entries; + ograd_entries.reserve(fwd_graph.outputs.size()); + for (size_t i = 0; i < fwd_graph.outputs.size(); ++i) { + ograd_entries.emplace_back(NodeEntry{Node::Create(), 0, 0}); + } + + // TODO what happens when some arguments are mutable. + std::vector xs; + std::vector args = subgraph.ListInputs(nnvm::Symbol::kReadOnlyArgs); + xs.reserve(args.size()); + for (const auto& i : args) + xs.emplace_back(NodeEntry{i, 0, 0}); + CHECK_GT(xs.size(), 0) + << "There are no inputs in computation graph that require gradients."; + + static const std::vector zero_ops{Op::Get("zeros_like"), Op::Get("_zeros")}; + grad_graph = pass::Gradient( + fwd_graph, fwd_graph.outputs, xs, ograd_entries, + exec::AggregateGradient, nullptr, nullptr, + zero_ops, "_copy"); + potential_nodes.reserve(fwd_graph.outputs.size() + xs.size() + ograd_entries.size()); + for (auto e : ograd_entries) + potential_nodes.push_back(e.node.get()); + for (auto e : xs) + potential_nodes.push_back(e.node.get()); + for (auto e : fwd_graph.outputs) + potential_nodes.push_back(e.node.get()); + } + + const auto& idx = grad_graph.indexed_graph(); + auto input_nodes = idx.input_nodes(); + StorageTypeVector storage_type_inputs(input_nodes.size()); + for (size_t i = 0; i < input_nodes.size(); i++) { + auto node_id = input_nodes[i]; + const nnvm::IndexedGraph::Node &n = idx[node_id]; + auto it = std::find(potential_nodes.begin(), potential_nodes.end(), n.source); + CHECK(it != potential_nodes.end()); + size_t idx = it - potential_nodes.begin(); + CHECK_LT(idx, in_attrs->size()); + storage_type_inputs[i] = in_attrs->at(idx); + } + CHECK_EQ(idx.outputs().size(), out_attrs->size()); + exec::DevMaskVector dev_masks(idx.num_node_entries(), dev_mask); + imperative::CheckAndInferStorageType(&grad_graph, std::move(dev_masks), + std::move(storage_type_inputs), true); + + const auto& stypes = grad_graph.GetAttr("storage_type"); + DISPATCH_MODE_ASSIGN_CHECK(dispatch_mode, 0, DispatchMode::kFComputeEx); + auto &outputs = idx.outputs(); + CHECK(outputs.size() == out_attrs->size()); + for (size_t i = 0; i < out_attrs->size(); i++) + STORAGE_TYPE_ASSIGN_CHECK(*out_attrs, i, stypes[idx.entry_id(outputs[i])]); + return true; +} + NNVM_REGISTER_OP(_CachedOp) .set_num_inputs([](const NodeAttrs& attrs) { @@ -1040,6 +1110,14 @@ NNVM_REGISTER_OP(_backward_CachedOp) const CachedOpPtr& op = nnvm::get(attrs.parsed); return op->num_inputs() - op->mutable_input_nodes().size(); }) +.set_attr("FInferStorageType", [](const nnvm::NodeAttrs& attrs, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector *in_attrs, + std::vector *out_attrs) { + const CachedOpPtr& op = nnvm::get(attrs.parsed); + return op->BackwardStorageType(attrs, dev_mask, dispatch_mode, in_attrs, out_attrs); +}) .set_attr("TIsLayerOpBackward", true) .set_attr("TIsBackward", true); diff --git a/src/imperative/cached_op.h b/src/imperative/cached_op.h index 60a40c5e4a52..77562f0599d1 100644 --- a/src/imperative/cached_op.h +++ b/src/imperative/cached_op.h @@ -102,6 +102,13 @@ class CachedOp { const std::vector& inputs, const std::vector& reqs, const std::vector& outputs); + // backward storage type inference + bool BackwardStorageType( + const nnvm::NodeAttrs& attrs, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector *in_attrs, + std::vector *out_attrs); private: struct GraphInfo; @@ -166,6 +173,7 @@ class CachedOp { std::mutex mutex_; std::unordered_map > cached_op_states_; + nnvm::Symbol sym_; }; using CachedOpPtr = std::shared_ptr; diff --git a/src/operator/operator_common.h b/src/operator/operator_common.h index 0a9cd08db81b..02130eb32e51 100644 --- a/src/operator/operator_common.h +++ b/src/operator/operator_common.h @@ -256,7 +256,7 @@ inline bool dispatch_mode_assign(DispatchMode *y, const DispatchMode& x) { */ #define STORAGE_TYPE_ASSIGN_CHECK(type_array, index, type) \ { \ - if (!type_assign(&(type_array)[index], type)) { \ + if (!::mxnet::op::type_assign(&(type_array)[index], type)) { \ std::ostringstream os; \ os << "Storage type inconsistent, Provided = " \ << common::stype_string((type_array)[index]) << ',' \ @@ -274,7 +274,7 @@ inline bool dispatch_mode_assign(DispatchMode *y, const DispatchMode& x) { */ #define DISPATCH_MODE_ASSIGN_CHECK(type_array, index, type) \ { \ - if (!dispatch_mode_assign(&(type_array)[index], type)) { \ + if (!::mxnet::op::dispatch_mode_assign(&(type_array)[index], type)) { \ std::ostringstream os; \ os << "Dispatch mode inconsistent, Provided = " \ << common::dispatch_mode_string((type_array)[index]) << ',' \ From d1f16e58fb294234799a8e5a13add370543067e3 Mon Sep 17 00:00:00 2001 From: eric-haibin-lin Date: Thu, 14 Jun 2018 21:51:10 +0000 Subject: [PATCH 2/8] pass unit test --- src/imperative/cached_op.cc | 127 +++++++++++++--------------- tests/python/unittest/test_gluon.py | 27 ++++++ 2 files changed, 85 insertions(+), 69 deletions(-) diff --git a/src/imperative/cached_op.cc b/src/imperative/cached_op.cc index 2fdec8f9737f..2f0869b5af21 100644 --- a/src/imperative/cached_op.cc +++ b/src/imperative/cached_op.cc @@ -326,6 +326,27 @@ bool CachedOp::SetForwardGraph( return false; } +// Utility function to set backward input eids +void SetBackwardInputEid(const std::vector& bwd_in_dep, + const std::vector& bwd_out_dep, + const std::vector& bwd_ograd_dep, + const std::vector& ograd_entries, + const nnvm::IndexedGraph& idx, + std::vector *bwd_input_eid) { + for (const auto& i : bwd_ograd_dep) { + auto eid = idx.entry_id(ograd_entries[i]); + bwd_input_eid->push_back(eid); + } + for (const auto& i : bwd_in_dep) { + auto eid = idx.entry_id(idx.input_nodes()[i], 0); + bwd_input_eid->push_back(eid); + } + for (const auto& i : bwd_out_dep) { + auto eid = idx.entry_id(idx.outputs()[i]); + bwd_input_eid->push_back(eid); + } +} + bool CachedOp::SetBackwardGraph( GraphInfo* info, const std::vector& reqs, @@ -354,18 +375,8 @@ bool CachedOp::SetBackwardGraph( if (info->bwd_input_eid.size() != inputs.size()) { info->bwd_input_eid.clear(); - for (const auto& i : bwd_ograd_dep_) { - auto eid = idx.entry_id(ograd_entries_[i]); - info->bwd_input_eid.push_back(eid); - } - for (const auto& i : bwd_in_dep_) { - auto eid = idx.entry_id(idx.input_nodes()[i], 0); - info->bwd_input_eid.push_back(eid); - } - for (const auto& i : bwd_out_dep_) { - auto eid = idx.entry_id(idx.outputs()[i]); - info->bwd_input_eid.push_back(eid); - } + SetBackwardInputEid(bwd_in_dep_, bwd_out_dep_, bwd_ograd_dep_, + ograd_entries_, idx, &info->bwd_input_eid); CHECK_EQ(inputs.size(), info->bwd_input_eid.size()); } @@ -1018,70 +1029,48 @@ void CachedOp::Backward( } bool CachedOp::BackwardStorageType(const nnvm::NodeAttrs& attrs, - //const nnvm::Symbol &subgraph, const int dev_mask, DispatchMode* dispatch_mode, std::vector *in_attrs, std::vector *out_attrs) { - using namespace nnvm; - // construct backward graph - nnvm::Graph grad_graph; - nnvm::Graph fwd_graph; - std::vector potential_nodes; - { - fwd_graph.outputs = subgraph.outputs; - std::vector ograd_entries; - ograd_entries.reserve(fwd_graph.outputs.size()); - for (size_t i = 0; i < fwd_graph.outputs.size(); ++i) { - ograd_entries.emplace_back(NodeEntry{Node::Create(), 0, 0}); - } - - // TODO what happens when some arguments are mutable. - std::vector xs; - std::vector args = subgraph.ListInputs(nnvm::Symbol::kReadOnlyArgs); - xs.reserve(args.size()); - for (const auto& i : args) - xs.emplace_back(NodeEntry{i, 0, 0}); - CHECK_GT(xs.size(), 0) - << "There are no inputs in computation graph that require gradients."; - - static const std::vector zero_ops{Op::Get("zeros_like"), Op::Get("_zeros")}; - grad_graph = pass::Gradient( - fwd_graph, fwd_graph.outputs, xs, ograd_entries, - exec::AggregateGradient, nullptr, nullptr, - zero_ops, "_copy"); - potential_nodes.reserve(fwd_graph.outputs.size() + xs.size() + ograd_entries.size()); - for (auto e : ograd_entries) - potential_nodes.push_back(e.node.get()); - for (auto e : xs) - potential_nodes.push_back(e.node.get()); - for (auto e : fwd_graph.outputs) - potential_nodes.push_back(e.node.get()); - } - - const auto& idx = grad_graph.indexed_graph(); - auto input_nodes = idx.input_nodes(); - StorageTypeVector storage_type_inputs(input_nodes.size()); - for (size_t i = 0; i < input_nodes.size(); i++) { - auto node_id = input_nodes[i]; - const nnvm::IndexedGraph::Node &n = idx[node_id]; - auto it = std::find(potential_nodes.begin(), potential_nodes.end(), n.source); - CHECK(it != potential_nodes.end()); - size_t idx = it - potential_nodes.begin(); - CHECK_LT(idx, in_attrs->size()); - storage_type_inputs[i] = in_attrs->at(idx); + using namespace imperative; + std::lock_guard lock(mutex_); + nnvm::Graph g = full_graph_; + const auto& idx = g.indexed_graph(); + // Construct bwd_input_eid + std::vector bwd_input_eid; + SetBackwardInputEid(bwd_in_dep_, bwd_out_dep_, bwd_ograd_dep_, + ograd_entries_, idx, &bwd_input_eid); + CHECK_EQ(in_attrs->size(), bwd_input_eid.size()); + // Prepare node and entry ranges + const size_t num_forward_nodes = fwd_graph_.indexed_graph().num_nodes(); + const size_t num_forward_entries = fwd_graph_.indexed_graph().num_node_entries(); + const size_t num_forward_outputs = fwd_graph_.outputs.size(); + std::pair node_range, entry_range; + node_range = {num_forward_nodes, idx.num_nodes()}; + entry_range = {num_forward_entries, idx.num_node_entries()}; + g.attrs["node_range"] = std::make_shared(node_range); + g.attrs["entry_range"] = std::make_shared(entry_range); + // Prepare stypes based on inputs + StorageTypeVector stypes(idx.num_node_entries(), -1); + for (size_t i = 0; i < in_attrs->size(); ++i) { + stypes[bwd_input_eid[i]] = in_attrs->at(i); } - CHECK_EQ(idx.outputs().size(), out_attrs->size()); - exec::DevMaskVector dev_masks(idx.num_node_entries(), dev_mask); - imperative::CheckAndInferStorageType(&grad_graph, std::move(dev_masks), - std::move(storage_type_inputs), true); - - const auto& stypes = grad_graph.GetAttr("storage_type"); + // Prepare contexts + exec::DevMaskVector dev_masks(idx.num_nodes(), dev_mask); + // Subgraph storage type inference + CheckAndInferStorageType(&g, std::move(dev_masks), std::move(stypes), + false, node_range, entry_range); + // Retrieve result and set outputs + const auto& inferred_stypes = g.GetAttr("storage_type"); DISPATCH_MODE_ASSIGN_CHECK(dispatch_mode, 0, DispatchMode::kFComputeEx); auto &outputs = idx.outputs(); - CHECK(outputs.size() == out_attrs->size()); - for (size_t i = 0; i < out_attrs->size(); i++) - STORAGE_TYPE_ASSIGN_CHECK(*out_attrs, i, stypes[idx.entry_id(outputs[i])]); + CHECK_EQ(outputs.size(), num_forward_outputs + out_attrs->size()); + // Assign output stypes + for (size_t i = 0; i < out_attrs->size(); i++) { + const auto eid = idx.entry_id(outputs[i + num_forward_outputs]); + STORAGE_TYPE_ASSIGN_CHECK(*out_attrs, i, inferred_stypes[eid]); + } return true; } diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py index bb61af127240..db3bc09dca5a 100644 --- a/tests/python/unittest/test_gluon.py +++ b/tests/python/unittest/test_gluon.py @@ -1285,6 +1285,33 @@ def test_legacy_save_params(): model.load_params('test.params', ctx=mx.cpu()) +def test_sparse_hybrid(): + class Embedding(mx.gluon.HybridBlock): + def __init__(self, num_tokens, embedding_size): + super(Embedding, self).__init__() + self.num_tokens = num_tokens + + with self.name_scope(): + self.embedding = mx.gluon.nn.Embedding( + num_tokens, embedding_size, sparse_grad=True) + + def hybrid_forward(self, F, words): + emb = self.embedding(words) + return emb + F.ones_like(emb) + + ctx = mx.cpu() + embedding = Embedding(1000, 300) + embedding.initialize(ctx=ctx) + embedding.hybridize() + + loss_function = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss() + with mx.autograd.record(): + emb_in = embedding(mx.nd.arange(10, ctx=ctx)) + emb_in_2 = embedding(mx.nd.arange(10, ctx=ctx)) + loss = emb_in.sum() + emb_in_2.sum() + loss.backward() + print(embedding.embedding.weight.grad().data) + if __name__ == '__main__': import nose nose.runmodule() From f82ed288161d880deef17b07ddb0f3e66bfeff57 Mon Sep 17 00:00:00 2001 From: eric-haibin-lin Date: Thu, 14 Jun 2018 22:10:35 +0000 Subject: [PATCH 3/8] remove lock --- src/imperative/cached_op.cc | 18 +++++++++--------- src/imperative/cached_op.h | 10 +++++----- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/imperative/cached_op.cc b/src/imperative/cached_op.cc index 2f0869b5af21..6a01c4e316c0 100644 --- a/src/imperative/cached_op.cc +++ b/src/imperative/cached_op.cc @@ -96,8 +96,6 @@ CachedOp::CachedOp( using namespace imperative; static const std::vector zero_ops{Op::Get("zeros_like"), Op::Get("_zeros")}; static const auto _copy = Op::Get("_copy"); - // TODO no need to save sym - sym_ = sym; config_.Init(flags); // construct forward graph @@ -231,7 +229,7 @@ CachedOp::~CachedOp() { std::vector CachedOp::Gradient( const nnvm::NodePtr& node, - const std::vector& ograds) { + const std::vector& ograds) const { using namespace nnvm; static const auto _backward_CachedOp = Op::Get("_backward_CachedOp"); static const auto _NoGrad = Op::Get("_NoGradient"); @@ -1034,14 +1032,15 @@ bool CachedOp::BackwardStorageType(const nnvm::NodeAttrs& attrs, std::vector *in_attrs, std::vector *out_attrs) { using namespace imperative; - std::lock_guard lock(mutex_); - nnvm::Graph g = full_graph_; + nnvm::Graph g(full_graph_); const auto& idx = g.indexed_graph(); + // Construct bwd_input_eid std::vector bwd_input_eid; SetBackwardInputEid(bwd_in_dep_, bwd_out_dep_, bwd_ograd_dep_, ograd_entries_, idx, &bwd_input_eid); CHECK_EQ(in_attrs->size(), bwd_input_eid.size()); + // Prepare node and entry ranges const size_t num_forward_nodes = fwd_graph_.indexed_graph().num_nodes(); const size_t num_forward_entries = fwd_graph_.indexed_graph().num_node_entries(); @@ -1051,26 +1050,27 @@ bool CachedOp::BackwardStorageType(const nnvm::NodeAttrs& attrs, entry_range = {num_forward_entries, idx.num_node_entries()}; g.attrs["node_range"] = std::make_shared(node_range); g.attrs["entry_range"] = std::make_shared(entry_range); - // Prepare stypes based on inputs + + // Prepare stypes and contexts based on inputs StorageTypeVector stypes(idx.num_node_entries(), -1); for (size_t i = 0; i < in_attrs->size(); ++i) { stypes[bwd_input_eid[i]] = in_attrs->at(i); } - // Prepare contexts exec::DevMaskVector dev_masks(idx.num_nodes(), dev_mask); + // Subgraph storage type inference CheckAndInferStorageType(&g, std::move(dev_masks), std::move(stypes), false, node_range, entry_range); // Retrieve result and set outputs const auto& inferred_stypes = g.GetAttr("storage_type"); - DISPATCH_MODE_ASSIGN_CHECK(dispatch_mode, 0, DispatchMode::kFComputeEx); - auto &outputs = idx.outputs(); + const auto &outputs = idx.outputs(); CHECK_EQ(outputs.size(), num_forward_outputs + out_attrs->size()); // Assign output stypes for (size_t i = 0; i < out_attrs->size(); i++) { const auto eid = idx.entry_id(outputs[i + num_forward_outputs]); STORAGE_TYPE_ASSIGN_CHECK(*out_attrs, i, inferred_stypes[eid]); } + DISPATCH_MODE_ASSIGN_CHECK(dispatch_mode, 0, DispatchMode::kFComputeEx); return true; } diff --git a/src/imperative/cached_op.h b/src/imperative/cached_op.h index 77562f0599d1..f0535c37d565 100644 --- a/src/imperative/cached_op.h +++ b/src/imperative/cached_op.h @@ -71,13 +71,13 @@ class CachedOp { const nnvm::Symbol& sym, const std::vector >& flags); ~CachedOp(); - uint32_t num_inputs() { + uint32_t num_inputs() const { return fwd_graph_.indexed_graph().input_nodes().size(); } - uint32_t num_outputs() { + uint32_t num_outputs() const { return fwd_graph_.outputs.size(); } - uint32_t num_backward_inputs() { + uint32_t num_backward_inputs() const { return bwd_ograd_dep_.size() + bwd_in_dep_.size() + bwd_out_dep_.size(); } std::vector& save_inputs() { @@ -86,12 +86,12 @@ class CachedOp { std::vector& save_outputs() { return save_outputs_; } - const std::unordered_set& mutable_input_nodes() { + const std::unordered_set& mutable_input_nodes() const { return fwd_graph_.indexed_graph().mutable_input_nodes(); } std::vector Gradient( const nnvm::NodePtr& node, - const std::vector& ograds); + const std::vector& ograds) const; void Forward( const std::shared_ptr& op_ptr, const std::vector& inputs, From c94dd2aa301d2fb7f4964ca4f0d78ac11d5c97e2 Mon Sep 17 00:00:00 2001 From: eric-haibin-lin Date: Fri, 15 Jun 2018 18:45:08 +0000 Subject: [PATCH 4/8] save all inputs and outputs --- src/imperative/cached_op.cc | 32 +++++++------------------------ src/imperative/imperative_utils.h | 1 - 2 files changed, 7 insertions(+), 26 deletions(-) diff --git a/src/imperative/cached_op.cc b/src/imperative/cached_op.cc index 6a01c4e316c0..e58af1e5d880 100644 --- a/src/imperative/cached_op.cc +++ b/src/imperative/cached_op.cc @@ -200,26 +200,17 @@ CachedOp::CachedOp( size_t num_forward_outputs = num_outputs(); for (uint32_t i = 0; i < ograd_entries_.size(); ++i) { if (!idx.exist(ograd_entries_[i].node.get())) continue; - auto eid = idx.entry_id(ograd_entries_[i]); - if (ref_count[eid] > 0) { - bwd_ograd_dep_.push_back(i); - } + bwd_ograd_dep_.push_back(i); } save_inputs_.resize(num_forward_inputs, false); for (uint32_t i = 0; i < num_forward_inputs; ++i) { - auto eid = idx.entry_id(idx.input_nodes()[i], 0); - if (ref_count[eid] > 0) { - save_inputs_[i] = true; - bwd_in_dep_.push_back(i); - } + save_inputs_[i] = true; + bwd_in_dep_.push_back(i); } save_outputs_.resize(idx.outputs().size(), false); for (uint32_t i = 0; i < num_forward_outputs; ++i) { - auto eid = idx.entry_id(idx.outputs()[i]); - if (ref_count[eid] > 0) { - save_outputs_[i] = true; - bwd_out_dep_.push_back(i); - } + save_outputs_[i] = true; + bwd_out_dep_.push_back(i); } } } @@ -1042,14 +1033,7 @@ bool CachedOp::BackwardStorageType(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), bwd_input_eid.size()); // Prepare node and entry ranges - const size_t num_forward_nodes = fwd_graph_.indexed_graph().num_nodes(); - const size_t num_forward_entries = fwd_graph_.indexed_graph().num_node_entries(); const size_t num_forward_outputs = fwd_graph_.outputs.size(); - std::pair node_range, entry_range; - node_range = {num_forward_nodes, idx.num_nodes()}; - entry_range = {num_forward_entries, idx.num_node_entries()}; - g.attrs["node_range"] = std::make_shared(node_range); - g.attrs["entry_range"] = std::make_shared(entry_range); // Prepare stypes and contexts based on inputs StorageTypeVector stypes(idx.num_node_entries(), -1); @@ -1058,14 +1042,12 @@ bool CachedOp::BackwardStorageType(const nnvm::NodeAttrs& attrs, } exec::DevMaskVector dev_masks(idx.num_nodes(), dev_mask); - // Subgraph storage type inference - CheckAndInferStorageType(&g, std::move(dev_masks), std::move(stypes), - false, node_range, entry_range); + // Full graph storage type inference + CheckAndInferStorageType(&g, std::move(dev_masks), std::move(stypes), false); // Retrieve result and set outputs const auto& inferred_stypes = g.GetAttr("storage_type"); const auto &outputs = idx.outputs(); CHECK_EQ(outputs.size(), num_forward_outputs + out_attrs->size()); - // Assign output stypes for (size_t i = 0; i < out_attrs->size(); i++) { const auto eid = idx.entry_id(outputs[i + num_forward_outputs]); STORAGE_TYPE_ASSIGN_CHECK(*out_attrs, i, inferred_stypes[eid]); diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h index 726531d02994..faff5f173fe1 100644 --- a/src/imperative/imperative_utils.h +++ b/src/imperative/imperative_utils.h @@ -668,7 +668,6 @@ inline bool CheckAndInferStorageType(nnvm::Graph* p_g, exec::DevMaskVector&& dev g.attrs["storage_type"] = std::make_shared(std::move(storage_types)); g = exec::InferStorageType(std::move(g)); } - CHECK_EQ(g.GetAttr("storage_type_num_unknown_nodes"), 0U); return false; } From adf5faae6bbe7d44299d14d6821f38ee905dfce7 Mon Sep 17 00:00:00 2001 From: eric-haibin-lin Date: Fri, 15 Jun 2018 20:41:59 +0000 Subject: [PATCH 5/8] add one more test --- src/imperative/cached_op.cc | 3 +- src/imperative/cached_op.h | 1 - tests/python/unittest/test_gluon.py | 48 +++++++++++++++++++++++------ 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/src/imperative/cached_op.cc b/src/imperative/cached_op.cc index e58af1e5d880..25f1a7afb71f 100644 --- a/src/imperative/cached_op.cc +++ b/src/imperative/cached_op.cc @@ -1032,8 +1032,6 @@ bool CachedOp::BackwardStorageType(const nnvm::NodeAttrs& attrs, ograd_entries_, idx, &bwd_input_eid); CHECK_EQ(in_attrs->size(), bwd_input_eid.size()); - // Prepare node and entry ranges - const size_t num_forward_outputs = fwd_graph_.outputs.size(); // Prepare stypes and contexts based on inputs StorageTypeVector stypes(idx.num_node_entries(), -1); @@ -1047,6 +1045,7 @@ bool CachedOp::BackwardStorageType(const nnvm::NodeAttrs& attrs, // Retrieve result and set outputs const auto& inferred_stypes = g.GetAttr("storage_type"); const auto &outputs = idx.outputs(); + const size_t num_forward_outputs = fwd_graph_.outputs.size(); CHECK_EQ(outputs.size(), num_forward_outputs + out_attrs->size()); for (size_t i = 0; i < out_attrs->size(); i++) { const auto eid = idx.entry_id(outputs[i + num_forward_outputs]); diff --git a/src/imperative/cached_op.h b/src/imperative/cached_op.h index f0535c37d565..c1f5308feb9a 100644 --- a/src/imperative/cached_op.h +++ b/src/imperative/cached_op.h @@ -173,7 +173,6 @@ class CachedOp { std::mutex mutex_; std::unordered_map > cached_op_states_; - nnvm::Symbol sym_; }; using CachedOpPtr = std::shared_ptr; diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py index db3bc09dca5a..0fcbfffd4b72 100644 --- a/tests/python/unittest/test_gluon.py +++ b/tests/python/unittest/test_gluon.py @@ -1285,7 +1285,8 @@ def test_legacy_save_params(): model.load_params('test.params', ctx=mx.cpu()) -def test_sparse_hybrid(): +@with_seed() +def test_sparse_hybrid_block_grad(): class Embedding(mx.gluon.HybridBlock): def __init__(self, num_tokens, embedding_size): super(Embedding, self).__init__() @@ -1299,18 +1300,47 @@ def hybrid_forward(self, F, words): emb = self.embedding(words) return emb + F.ones_like(emb) - ctx = mx.cpu() - embedding = Embedding(1000, 300) - embedding.initialize(ctx=ctx) + embedding = Embedding(20, 3) + embedding.initialize() embedding.hybridize() - loss_function = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss() with mx.autograd.record(): - emb_in = embedding(mx.nd.arange(10, ctx=ctx)) - emb_in_2 = embedding(mx.nd.arange(10, ctx=ctx)) - loss = emb_in.sum() + emb_in_2.sum() + emb0 = embedding(mx.nd.arange(10)).sum() + emb1 = embedding(mx.nd.arange(10)).sum() + loss = emb0 + emb1 loss.backward() - print(embedding.embedding.weight.grad().data) + grad = embedding.embedding.weight.grad().asnumpy() + assert (grad[:10] == 2).all() + assert (grad[10:] == 0).all() + +@with_seed() +def test_sparse_hybrid_block(): + class Linear(mx.gluon.HybridBlock): + def __init__(self, units): + super(Linear, self).__init__() + with self.name_scope(): + self.w = self.params.get('w', shape=(units, units), grad_stype='row_sparse') + + def hybrid_forward(self, F, x, w): + return F.dot(x, w) + + class SparseBlock(mx.gluon.HybridBlock): + def __init__(self, units): + super(SparseBlock, self).__init__() + with self.name_scope(): + self.net = Linear(units) + + def hybrid_forward(self, F, x): + return self.net(x) + 1 + + block = SparseBlock(2) + block.initialize() + block.hybridize() + x = mx.nd.ones((2,2)).tostype('csr') + with mx.autograd.record(): + z = block(x) + block(x) + z.backward() + assert (block.net.w.grad().asnumpy() == 4).all() if __name__ == '__main__': import nose From 96863e600a563ba3043c4af21518c0d448c32ed3 Mon Sep 17 00:00:00 2001 From: eric-haibin-lin Date: Fri, 15 Jun 2018 20:46:56 +0000 Subject: [PATCH 6/8] update test --- src/imperative/cached_op.cc | 1 - tests/python/unittest/test_gluon.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/imperative/cached_op.cc b/src/imperative/cached_op.cc index 25f1a7afb71f..ca3b9ba03537 100644 --- a/src/imperative/cached_op.cc +++ b/src/imperative/cached_op.cc @@ -1032,7 +1032,6 @@ bool CachedOp::BackwardStorageType(const nnvm::NodeAttrs& attrs, ograd_entries_, idx, &bwd_input_eid); CHECK_EQ(in_attrs->size(), bwd_input_eid.size()); - // Prepare stypes and contexts based on inputs StorageTypeVector stypes(idx.num_node_entries(), -1); for (size_t i = 0; i < in_attrs->size(); ++i) { diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py index 0fcbfffd4b72..f20e714d0d29 100644 --- a/tests/python/unittest/test_gluon.py +++ b/tests/python/unittest/test_gluon.py @@ -1322,7 +1322,7 @@ def __init__(self, units): self.w = self.params.get('w', shape=(units, units), grad_stype='row_sparse') def hybrid_forward(self, F, x, w): - return F.dot(x, w) + return F.dot(x * 2, w) class SparseBlock(mx.gluon.HybridBlock): def __init__(self, units): @@ -1340,7 +1340,7 @@ def hybrid_forward(self, F, x): with mx.autograd.record(): z = block(x) + block(x) z.backward() - assert (block.net.w.grad().asnumpy() == 4).all() + assert (block.net.w.grad().asnumpy() == 8).all() if __name__ == '__main__': import nose From 9c5bf6033e56c65b22ee21cd92c2304cddd06ee9 Mon Sep 17 00:00:00 2001 From: eric-haibin-lin Date: Fri, 15 Jun 2018 21:10:25 +0000 Subject: [PATCH 7/8] update backward stype inference --- src/imperative/cached_op.cc | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/imperative/cached_op.cc b/src/imperative/cached_op.cc index ca3b9ba03537..27b7384ecc06 100644 --- a/src/imperative/cached_op.cc +++ b/src/imperative/cached_op.cc @@ -1025,6 +1025,9 @@ bool CachedOp::BackwardStorageType(const nnvm::NodeAttrs& attrs, using namespace imperative; nnvm::Graph g(full_graph_); const auto& idx = g.indexed_graph(); + const auto &outputs = idx.outputs(); + const size_t num_forward_outputs = fwd_graph_.outputs.size(); + CHECK_EQ(outputs.size(), num_forward_outputs + out_attrs->size()); // Construct bwd_input_eid std::vector bwd_input_eid; @@ -1037,15 +1040,18 @@ bool CachedOp::BackwardStorageType(const nnvm::NodeAttrs& attrs, for (size_t i = 0; i < in_attrs->size(); ++i) { stypes[bwd_input_eid[i]] = in_attrs->at(i); } + // Some out_attr is known ahead of time (e.g. the grad stype is given by users). + // Prepare these to before invoking infer storage on the subgraph + for (size_t i = 0; i < out_attrs->size(); i++) { + const auto eid = idx.entry_id(outputs[i + num_forward_outputs]); + stypes[eid] = out_attrs->at(i); + } exec::DevMaskVector dev_masks(idx.num_nodes(), dev_mask); // Full graph storage type inference CheckAndInferStorageType(&g, std::move(dev_masks), std::move(stypes), false); // Retrieve result and set outputs const auto& inferred_stypes = g.GetAttr("storage_type"); - const auto &outputs = idx.outputs(); - const size_t num_forward_outputs = fwd_graph_.outputs.size(); - CHECK_EQ(outputs.size(), num_forward_outputs + out_attrs->size()); for (size_t i = 0; i < out_attrs->size(); i++) { const auto eid = idx.entry_id(outputs[i + num_forward_outputs]); STORAGE_TYPE_ASSIGN_CHECK(*out_attrs, i, inferred_stypes[eid]); From a124cc707ae12f7ea199e535e3bb5637dc975b41 Mon Sep 17 00:00:00 2001 From: eric-haibin-lin Date: Sun, 17 Jun 2018 21:38:41 +0000 Subject: [PATCH 8/8] + fwd inference --- src/imperative/cached_op.cc | 44 +++++++++++++++++++++++++++-- src/imperative/cached_op.h | 7 +++++ tests/python/unittest/test_gluon.py | 8 +++--- 3 files changed, 52 insertions(+), 7 deletions(-) diff --git a/src/imperative/cached_op.cc b/src/imperative/cached_op.cc index 27b7384ecc06..ec85cee8371d 100644 --- a/src/imperative/cached_op.cc +++ b/src/imperative/cached_op.cc @@ -1017,6 +1017,36 @@ void CachedOp::Backward( Engine::Get()->set_bulk_size(prev_bulk_size); } +bool CachedOp::ForwardStorageType(const nnvm::NodeAttrs& attrs, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector *in_attrs, + std::vector *out_attrs) { + using namespace imperative; + nnvm::Graph g(fwd_graph_); + const auto& idx = g.indexed_graph(); + const auto &outputs = idx.outputs(); + + // Prepare stypes and contexts based on inputs + StorageTypeVector storage_type_inputs; + storage_type_inputs.reserve(in_attrs->size()); + for (size_t i = 0; i < in_attrs->size(); ++i) { + storage_type_inputs.emplace_back(in_attrs->at(i)); + } + exec::DevMaskVector dev_masks(idx.num_nodes(), dev_mask); + + // Forward graph storage type inference + CheckAndInferStorageType(&g, std::move(dev_masks), std::move(storage_type_inputs), true); + // Retrieve result and set outputs + const auto& inferred_stypes = g.GetAttr("storage_type"); + for (size_t i = 0; i < out_attrs->size(); i++) { + const auto eid = idx.entry_id(outputs[i]); + STORAGE_TYPE_ASSIGN_CHECK(*out_attrs, i, inferred_stypes[eid]); + } + DISPATCH_MODE_ASSIGN_CHECK(dispatch_mode, 0, DispatchMode::kFComputeEx); + return true; +} + bool CachedOp::BackwardStorageType(const nnvm::NodeAttrs& attrs, const int dev_mask, DispatchMode* dispatch_mode, @@ -1070,6 +1100,14 @@ NNVM_REGISTER_OP(_CachedOp) const CachedOpPtr& op = nnvm::get(attrs.parsed); return op->num_outputs(); }) +.set_attr("FInferStorageType", [](const nnvm::NodeAttrs& attrs, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector *in_attrs, + std::vector *out_attrs) { + const CachedOpPtr& op = nnvm::get(attrs.parsed); + return op->ForwardStorageType(attrs, dev_mask, dispatch_mode, in_attrs, out_attrs); + }) .set_attr("FGradient", [](const nnvm::NodePtr& n, const std::vector& ograds) { const CachedOpPtr& op = nnvm::get(n->attrs.parsed); @@ -1090,9 +1128,9 @@ NNVM_REGISTER_OP(_backward_CachedOp) DispatchMode* dispatch_mode, std::vector *in_attrs, std::vector *out_attrs) { - const CachedOpPtr& op = nnvm::get(attrs.parsed); - return op->BackwardStorageType(attrs, dev_mask, dispatch_mode, in_attrs, out_attrs); -}) + const CachedOpPtr& op = nnvm::get(attrs.parsed); + return op->BackwardStorageType(attrs, dev_mask, dispatch_mode, in_attrs, out_attrs); + }) .set_attr("TIsLayerOpBackward", true) .set_attr("TIsBackward", true); diff --git a/src/imperative/cached_op.h b/src/imperative/cached_op.h index c1f5308feb9a..6b94c67a94e2 100644 --- a/src/imperative/cached_op.h +++ b/src/imperative/cached_op.h @@ -102,6 +102,13 @@ class CachedOp { const std::vector& inputs, const std::vector& reqs, const std::vector& outputs); + // forward storage type inference + bool ForwardStorageType( + const nnvm::NodeAttrs& attrs, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector *in_attrs, + std::vector *out_attrs); // backward storage type inference bool BackwardStorageType( const nnvm::NodeAttrs& attrs, diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py index f20e714d0d29..cf414649b86f 100644 --- a/tests/python/unittest/test_gluon.py +++ b/tests/python/unittest/test_gluon.py @@ -1319,10 +1319,10 @@ class Linear(mx.gluon.HybridBlock): def __init__(self, units): super(Linear, self).__init__() with self.name_scope(): - self.w = self.params.get('w', shape=(units, units), grad_stype='row_sparse') + self.w = self.params.get('w', shape=(units, units)) def hybrid_forward(self, F, x, w): - return F.dot(x * 2, w) + return F.dot(x, w) class SparseBlock(mx.gluon.HybridBlock): def __init__(self, units): @@ -1331,7 +1331,7 @@ def __init__(self, units): self.net = Linear(units) def hybrid_forward(self, F, x): - return self.net(x) + 1 + return self.net(x) * x block = SparseBlock(2) block.initialize() @@ -1340,7 +1340,7 @@ def hybrid_forward(self, F, x): with mx.autograd.record(): z = block(x) + block(x) z.backward() - assert (block.net.w.grad().asnumpy() == 8).all() + assert (block.net.w.grad().asnumpy() == 4).all() if __name__ == '__main__': import nose