Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
342 changes: 234 additions & 108 deletions src/relay/backend/graph_plan_memory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <tvm/tir/op.h>

#include "../../support/arena.h"
#include "../../runtime/texture.h"

namespace tvm {
namespace relay {
Expand Down Expand Up @@ -184,17 +185,6 @@ class StorageAllocaInit : protected StorageAllocaBaseVisitor {

class StorageAllocator : public StorageAllocaBaseVisitor {
public:
/*!
* \return totoal number of bytes allocated
*/
size_t TotalAllocBytes() const {
size_t total = 0;
for (const auto* p : data_) {
total += p->max_bytes;
}
return total;
}

// Run storage allocation for a function.
Map<Expr, Array<IntegerArray> > Plan(const Function& func) {
prototype_ = StorageAllocaInit(&arena_).GetInitTokenMap(func);
Expand Down Expand Up @@ -238,10 +228,10 @@ class StorageAllocator : public StorageAllocaBaseVisitor {
std::vector<StorageToken*> tokens;
for (StorageToken* tok : it->second) {
if (can_realloc) {
tokens.push_back(Request(tok));
tokens.push_back(allocator_.Request(tok));
} else {
// Allocate a new token,
StorageToken* allocated_tok = Alloc(tok, GetMemorySize(tok));
StorageToken* allocated_tok = allocator_.Alloc(tok);
allocated_tok->device_type = tok->device_type;
// ensure it never get de-allocated.
allocated_tok->ref_counter += 1;
Expand All @@ -263,116 +253,252 @@ class StorageAllocator : public StorageAllocaBaseVisitor {
CreateToken(op, true);
// check if there is orphaned output that can be released immediately.
for (StorageToken* tok : token_map_.at(op)) {
CheckForRelease(tok);
allocator_.CheckForRelease(tok);
}
for (StorageToken* tok : args) {
tok->ref_counter -= 1;
CheckForRelease(tok);
allocator_.CheckForRelease(tok);
}
}
/*!
* \brief ceil(size/word_size) to get number of words.
* \param size The original size.
* \param word_size The element size.
*/
static size_t DivRoundUp(size_t size, size_t word_size) {
return (size + word_size - 1) / word_size;
}
/*!
* \brief Get the memory requirement.
* \param prototype The prototype token.
* \return The required memory size.
*/
size_t GetMemorySize(StorageToken* prototype) {
const TensorTypeNode* ttype = prototype->ttype;
ICHECK(ttype != nullptr);
size_t size = 1;
for (IndexExpr dim : ttype->shape) {
const int64_t* pval = tir::as_const_int(dim);
ICHECK(pval != nullptr) << "Cannot allocate memory symbolic tensor shape " << ttype->shape;
ICHECK_GE(*pval, 0) << "Cannot allocate memory for tensor with negative shape" << *pval;
size *= static_cast<size_t>(pval[0]);

private:
class TokenAllocator1D {
public:
/*!
* \brief Request a storage token for a given prototype.
* \param prototype. The prototype storage token.
* \return The result token.
*/
StorageToken* Request(StorageToken* prototype) {
// calculate the size;
size_t size = GetMemorySize(prototype);
// search memory block in [size / match_range_, size * match_range_)
if (match_range_ == 0) {
return nullptr;
}
auto begin = free_.lower_bound(size / match_range_);
auto mid = free_.lower_bound(size);
auto end = free_.upper_bound(size * match_range_);
// search for memory blocks larger than requested
for (auto it = mid; it != end; ++it) {
StorageToken* tok = it->second;
if (tok->device_type != prototype->device_type) continue;
ICHECK_EQ(tok->ref_counter, 0);
// Use exect matching strategy
tok->max_bytes = std::max(size, tok->max_bytes);
tok->ref_counter = prototype->ref_counter;
// find a exact match, erase from map and return
free_.erase(it);
return tok;
}
// then search for memory blocks smaller than requested space
for (auto it = mid; it != begin;) {
--it;
StorageToken* tok = it->second;
if (tok->device_type != prototype->device_type) continue;
ICHECK_EQ(tok->ref_counter, 0);
// Use exect matching strategy
tok->max_bytes = std::max(size, tok->max_bytes);
tok->ref_counter = prototype->ref_counter;
// erase from map and return
free_.erase(it);
return tok;
}
return nullptr;
}
size *= DivRoundUp(ttype->dtype.bits() * ttype->dtype.lanes(), 8);
return size;
}
/*!
* \brief Request a storage token for a given prototype.
* \param prototype. The prototype storage token.
* \return The result token.
*/
StorageToken* Request(StorageToken* prototype) {
// calculate the size;
size_t size = GetMemorySize(prototype);
// search memory block in [size / match_range_, size * match_range_)
if (match_range_ == 0) {
return this->Alloc(prototype, size);
/*!
* \brief Alloacte a storage token by consuming prototype
* \param prototype The prototype token.
* \param size The size of memory being requested.
*/
StorageToken* Alloc(StorageToken* prototype, int64_t storage_id) {
size_t size = GetMemorySize(prototype);
prototype->max_bytes = size;
prototype->storage_id = storage_id;
data_.push_back(prototype);
return prototype;
}
auto begin = free_.lower_bound(size / match_range_);
auto mid = free_.lower_bound(size);
auto end = free_.upper_bound(size * match_range_);
// search for memory blocks larger than requested
for (auto it = mid; it != end; ++it) {
StorageToken* tok = it->second;
if (tok->device_type != prototype->device_type) continue;
ICHECK_EQ(tok->ref_counter, 0);
// Use exect matching strategy
tok->max_bytes = std::max(size, tok->max_bytes);
tok->ref_counter = prototype->ref_counter;
// find a exact match, erase from map and return
free_.erase(it);
return tok;
/*!
* \brief Check if we can release token.
* \param tok The token to be released.
*/
void CheckForRelease(StorageToken* tok) {
ICHECK_GE(tok->storage_id, 0);
ICHECK_GE(tok->ref_counter, 0);
if (tok->ref_counter == 0) {
free_.insert({tok->max_bytes, tok});
}
}
// then search for memory blocks smaller than requested space
for (auto it = mid; it != begin;) {
--it;
StorageToken* tok = it->second;
if (tok->device_type != prototype->device_type) continue;
ICHECK_EQ(tok->ref_counter, 0);
// Use exect matching strategy
tok->max_bytes = std::max(size, tok->max_bytes);
tok->ref_counter = prototype->ref_counter;
// erase from map and return
free_.erase(it);
return tok;
/*!
* \return totoal number of bytes allocated
*/
size_t TotalAllocBytes() const {
size_t total = 0;
for (const auto* p : data_) {
total += p->max_bytes;
}
return total;
}
// cannot find anything return a new one.
return this->Alloc(prototype, size);
}
/*!
* \brief Allocate a storage token by consuming prototype
* \param prototype The prototype token.
* \param size The size of memory being requested.
*/
StorageToken* Alloc(StorageToken* prototype, size_t size) {
prototype->max_bytes = size;
prototype->storage_id = static_cast<int64_t>(data_.size());
data_.push_back(prototype);
return prototype;
}
/*!
* \brief Check if we can release token.
* \param tok The token to be released.
*/
void CheckForRelease(StorageToken* tok) {
ICHECK_GE(tok->storage_id, 0);
ICHECK_GE(tok->ref_counter, 0);
if (tok->ref_counter == 0) {
free_.insert({tok->max_bytes, tok});
/*!
* \brief ceil(size/word_size) to get number of words.
* \param size The original size.
* \param word_size The element size.
*/
static size_t DivRoundUp(size_t size, size_t word_size) {
return (size + word_size - 1) / word_size;
}
}
/*!
* \brief Get the memory requirement.
* \param prototype The prototype token.
* \return The required memory size.
*/
size_t GetMemorySize(StorageToken* prototype) {
const TensorTypeNode* ttype = prototype->ttype;
ICHECK(ttype != nullptr);
size_t size = 1;
for (IndexExpr dim : ttype->shape) {
const int64_t* pval = tir::as_const_int(dim);
ICHECK(pval != nullptr) << "Cannot allocate memory symbolic tensor shape " << ttype->shape;
ICHECK_GE(*pval, 0) << "Cannot allocate memory for tensor with negative shape" << *pval;
size *= static_cast<size_t>(pval[0]);
}
size *= DivRoundUp(ttype->dtype.bits() * ttype->dtype.lanes(), 8);
return size;
}
private:
// scale used for rough match
const size_t match_range_{16};
// free list of storage entry
std::multimap<size_t, StorageToken*> free_;
// all the storage resources available
std::vector<StorageToken*> data_;
};

class TokenAllocator2D {
public:
/*!
* \brief Request a storage token for a given prototype.
* \param prototype. The prototype storage token.
* \return The result token.
*/
StorageToken* Request(StorageToken* prototype) {
auto shape = GetSize2D(prototype);
int64_t requested_size = shape.height * shape.width;
int64_t min_added_size = std::numeric_limits<int64_t>::max();
int64_t min_wasted_size = std::numeric_limits<int64_t>::max();
int64_t best_storage_id = -1;
MemBlock best_mem, new_mem;
for (int64_t free_id : free_list_) {
MemBlock& cached = blocks_[free_id];
// Can only reuse texture 2d blocks of the same type
if (cached.token_->ttype->dtype != prototype->ttype->dtype) {
continue;
}
int64_t cached_size = cached.x_ * cached.y_;
new_mem.x_ = std::max(cached.x_, shape.width);
new_mem.y_ = std::max(cached.y_, shape.height);
int64_t expanded_size = new_mem.x_ * new_mem.y_;
int64_t added_size = expanded_size - cached_size;
int64_t wasted_size = expanded_size - requested_size;
// Prioritize minimization of added size first, then minimize
// wasted size among blocks which would not require expansion
if ((min_added_size > 0 && added_size < min_added_size) ||
(min_added_size == 0 && wasted_size < min_wasted_size)) {
min_added_size = added_size;
min_wasted_size = wasted_size;
best_storage_id = free_id;
best_mem = new_mem;
}
}

if (min_added_size <= requested_size) {
best_mem.token_ = blocks_[best_storage_id].token_;
// Reset the reference counter of the now live token
best_mem.token_->ref_counter = prototype->ref_counter;
blocks_[best_storage_id] = best_mem;
free_list_.erase(best_storage_id);
return best_mem.token_;
}
return nullptr;
}
/*!
* \brief Alloacte a storage token by consuming prototype
* \param prototype The prototype token.
* \param size The size of memory being requested.
*/
StorageToken* Alloc(StorageToken* prototype, int64_t storage_id) {
auto shape = GetSize2D(prototype);
MemBlock block;
block.x_ = shape.width;
block.y_ = shape.height;
prototype->storage_id = storage_id;
block.token_ = prototype;
blocks_[prototype->storage_id] = block;
return prototype;
}
/*!
* \brief Check if we can release token.
* \param tok The token to be released.
*/
void CheckForRelease(StorageToken* tok) {
ICHECK_GE(tok->storage_id, 0);
ICHECK_GE(tok->ref_counter, 0);
if (tok->ref_counter == 0) {
free_list_.insert(tok->storage_id);
}
}
/*!
* \brief Get the texture 2d size requirement
* \param prototype The prototype token.
* \return The required texture 2d memory size in (width, height, channel).
*/
Texture2DShape GetSize2D(StorageToken* prototype) {
const TensorTypeNode* ttype = prototype->ttype;
ICHECK(ttype != nullptr);
size_t axis = runtime::DefaultTextureLayoutSeparator(ttype->shape.size(), prototype->storage_scope);
struct Shape {
const Array<PrimExpr>& shape;
int64_t operator[](size_t i) const { return *tir::as_const_int(shape[i]); }
};
return runtime::ApplyTexture2DFlattening<int64_t>(Shape{ttype->shape}, ttype->shape.size(), axis);
}
private:
struct MemBlock {
StorageToken* token_;
int64_t x_;
int64_t y_;
};

std::unordered_map<int64_t, MemBlock> blocks_;
std::unordered_set<int64_t> free_list_;
};

class TokenAllocator {
public:
StorageToken* Alloc(StorageToken* proto) {
return Is2DStorage(proto) ? token_2d_.Alloc(proto, storage_ids_++) : token_1d_.Alloc(proto, storage_ids_++);
}
StorageToken* Request(StorageToken* proto) {
StorageToken* token = Is2DStorage(proto) ? token_2d_.Request(proto) : token_1d_.Request(proto);
return token ? token : this->Alloc(proto);
}
void CheckForRelease(StorageToken* tok) {
return Is2DStorage(tok) ? token_2d_.CheckForRelease(tok) : token_1d_.CheckForRelease(tok);
}
static bool Is2DStorage(StorageToken* tok) { return relay::Is2DStorage(tok->storage_scope); }

private:
int64_t storage_ids_{0};
TokenAllocator1D token_1d_;
TokenAllocator2D token_2d_;
};

private:
// allocator
support::Arena arena_;
// scale used for rough match
size_t match_range_{16};
// free list of storage entry
std::multimap<size_t, StorageToken*> free_;
// all the storage resources available
std::vector<StorageToken*> data_;
/*! \brief internal prototype token map */
std::unordered_map<const ExprNode*, std::vector<StorageToken*> > prototype_;
/*! \brief token allocator for optimizing 1d and 2d token alloc requests */
TokenAllocator allocator_;
};

Map<Expr, Array<IntegerArray> > GraphPlanMemory(const Function& func) {
Expand Down
Loading