Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions include/mxnet/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,18 @@ struct Context {
*/
Context(int dev_mask, int dev_id)
: dev_mask(dev_mask), dev_id(dev_id) {}
/*!
* \brief Comparator, used to enable Context as std::map key.
* \param b another context to compare
* \return compared result
*/
inline bool operator<(const Context &b) const {
if (dev_mask == b.dev_mask) {
return dev_id < b.dev_id;
} else {
return dev_mask < b.dev_mask;
}
}
/*!
* \brief check if current context equals another one
* \param b another context to compare
Expand Down
18 changes: 7 additions & 11 deletions src/symbol/graph_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <mxnet/resource.h>
#include <mxnet/symbolic.h>
#include <memory>
#include <map>
#include "./graph_executor.h"
#include "./graph_algorithm.h"

Expand Down Expand Up @@ -361,7 +362,7 @@ void GraphExecutor::InitDataEntryMemory() {
}

// use allocator to allocate memory.
GraphStorageAllocator allocator(&graph_);
GraphStorageAllocator allocator(&graph_, topo_order_);
for (size_t i = 0; i < topo_order_.size(); ++i) {
uint32_t nid = topo_order_[i];
if (!op_nodes_[nid].activated) continue;
Expand Down Expand Up @@ -453,13 +454,9 @@ void GraphExecutor::InitDataEntryMemory() {
}
}

// simple unique context index of context
inline uint32_t UniqueContextIndex(const Context &ctx) {
if (ctx.dev_mask == cpu::kDevMask) return 0;
return ctx.dev_id + 1;
}

void GraphExecutor::InitResources() {
// maximum amount of color allowed in coloring algorithm
const uint32_t kMaxNumColor = 8;
// prepare for temp space allocation
std::vector<uint32_t> req_temp_cnt(topo_order_.size(), 0);
for (size_t i = 0; i < topo_order_.size(); ++i) {
Expand All @@ -473,14 +470,14 @@ void GraphExecutor::InitResources() {
CHECK_LE(cnt, 1) << "Node can only have one temp space request";
req_temp_cnt[nid] = cnt;
}
uint32_t num_color = 16;
uint32_t num_color = kMaxNumColor;
std::vector<uint32_t> req_temp_color;
// use graph coloring to find node that won't run in parallel
num_color = graph::ColorNodeGroup(graph_, topo_order_, req_temp_cnt,
num_color, &req_temp_color);

// cached resources temp space
std::map<uint32_t, std::map<uint32_t, Resource> > cached_temp;
std::map<Context, std::map<uint32_t, Resource> > cached_temp;
total_allocated_temp_ = 0;

// Resource allocation
Expand All @@ -496,9 +493,8 @@ void GraphExecutor::InitResources() {
const Context &ctx = op_nodes_[nid].ctx;
if (req.type == ResourceRequest::kTempSpace) {
uint32_t color = req_temp_color[nid];
uint32_t ctx_id = UniqueContextIndex(ctx);
// try to reuse graph in same color
std::map<uint32_t, Resource> &cmap = cached_temp[ctx_id];
std::map<uint32_t, Resource> &cmap = cached_temp[ctx];
if (cmap.count(color) != 0) {
requested.push_back(cmap.at(color));
} else {
Expand Down
50 changes: 45 additions & 5 deletions src/symbol/graph_memory_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <map>
#include <vector>
#include <algorithm>
#include "./graph_algorithm.h"

namespace mxnet {
/*!
Expand All @@ -34,7 +35,9 @@ class GraphStorageAllocator {
/*! \brief bad storage id */
static const StorageID kBadStorageID = -1;
/*! \brief constructor to the graph memory allocator */
explicit GraphStorageAllocator(StaticGraph *graph);
explicit GraphStorageAllocator(
StaticGraph *graph,
const std::vector<uint32_t>& topo_order) noexcept(false);
/*!
* \brief Request a memory.
* \param ctx the context of the graph
Expand Down Expand Up @@ -69,18 +72,24 @@ class GraphStorageAllocator {
Context ctx;
/*! \brief maximum size of the storage that is requested */
size_t max_size;
/*! \brief node index that released it last time */
uint32_t released_by_node;
/*! \brief the actual NDArray to hold the data */
NDArray data;
/*! \brief constructor */
StorageEntry() : max_size(0) {}
StorageEntry() : max_size(0), released_by_node(0) {}
};
/*!
* \brief Allocate a StorageID when Request cannot found existing ones.
* \param ctx the context of the graph
* \param shape shape of the NDArray we want
*/
StorageID Alloc(Context ctx, size_t size);

/*!
* \brief Initialize the colors of graph nodes.
* \param topo_order the topological order in the graph.
*/
void InitColor(const std::vector<uint32_t> &topo_order);
/*! \brief reference to the computation graph */
StaticGraph *graph_;
/*! \brief all the resources available */
Expand All @@ -91,12 +100,39 @@ class GraphStorageAllocator {
* \brief free list of storage entries, maps size to free list
*/
std::multimap<size_t, StorageEntry*> free_;

/*!
* \brief color of nodes in the graph, used for auxiliary policy making.
*/
std::vector<uint32_t> node_color_;
/*! \brief whether use color based match algorithm */
uint32_t num_match_color_;
};

// put implementation in header files for now
GraphStorageAllocator::GraphStorageAllocator(StaticGraph *graph)
: graph_(graph) {
GraphStorageAllocator::GraphStorageAllocator(
StaticGraph *graph,
const std::vector<uint32_t>& topo_order) noexcept(false)
: graph_(graph) , num_match_color_(0) {
match_range_ = dmlc::GetEnv("MXNET_EXEC_MATCH_RANGE", 16);
// if we set this to 1, this means no color based match.
// color based match will cost a bit more memory usually
// but also enables more parallelization.
num_match_color_ = dmlc::GetEnv("MXNET_EXEC_MATCH_NUM_COLOR", 4);
this->InitColor(topo_order);
}

void GraphStorageAllocator::InitColor(const std::vector<uint32_t>& topo_order) {
std::vector<uint32_t> importance(graph_->nodes.size(), 0);
for (size_t i = 0; i < topo_order.size(); ++i) {
uint32_t nid = topo_order[i];
if (graph_->nodes[nid].is_variable()) continue;
importance[nid] = 1;
}
num_match_color_ = graph::ColorNodeGroup(
*graph_, topo_order,
importance, num_match_color_,
&node_color_);
}

GraphStorageAllocator::StorageID
Expand All @@ -114,6 +150,7 @@ GraphStorageAllocator::StorageID
GraphStorageAllocator::Request(Context ctx, TShape shape, uint32_t node_id) {
// search memory block in [size / match_range_, size * match_range_)
size_t size = shape.Size();
if (match_range_ == 0) return this->Alloc(ctx, size);
auto begin = free_.lower_bound(size / match_range_);
auto mid = free_.lower_bound(size);
auto end = free_.upper_bound(size * match_range_);
Expand All @@ -122,6 +159,7 @@ GraphStorageAllocator::Request(Context ctx, TShape shape, uint32_t node_id) {
for (auto it = mid; it != end; ++it) {
StorageEntry *e = it->second;
if (e->ctx != ctx) continue;
if (node_color_[e->released_by_node] != node_color_[node_id]) continue;
// Use exect matching strategy
e->max_size = std::max(size, e->max_size);
// find a exact match, erase from map and return
Expand All @@ -133,6 +171,7 @@ GraphStorageAllocator::Request(Context ctx, TShape shape, uint32_t node_id) {
--it;
StorageEntry *e = it->second;
if (e->ctx != ctx) continue;
if (node_color_[e->released_by_node] != node_color_[node_id]) continue;
// Use exect matching strategy
e->max_size = std::max(size, e->max_size);
// find a exact match, erase from map and return
Expand All @@ -146,6 +185,7 @@ GraphStorageAllocator::Request(Context ctx, TShape shape, uint32_t node_id) {
void GraphStorageAllocator::Release(StorageID id, uint32_t node_id) {
CHECK_NE(id, kBadStorageID);
StorageEntry *e = data_[id].get();
e->released_by_node = node_id;
free_.insert({e->max_size, e});
}

Expand Down