diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f989a3d904e..d4d599134587 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -343,6 +343,7 @@ list(APPEND COMPILER_SRCS "src/target/datatype/myfloat/myfloat.cc") tvm_file_glob(GLOB RUNTIME_SRCS src/runtime/*.cc src/runtime/vm/*.cc + src/runtime/memory/*.cc src/runtime/disco/*.cc src/runtime/minrpc/*.cc ) diff --git a/apps/android_camera/app/src/main/jni/tvm_runtime.h b/apps/android_camera/app/src/main/jni/tvm_runtime.h index 0aac7f170ab4..e843b56360bb 100644 --- a/apps/android_camera/app/src/main/jni/tvm_runtime.h +++ b/apps/android_camera/app/src/main/jni/tvm_runtime.h @@ -40,6 +40,7 @@ #include "../src/runtime/graph_executor/graph_executor.cc" #include "../src/runtime/library_module.cc" #include "../src/runtime/logging.cc" +#include "../src/runtime/memory/memory_manager.cc" #include "../src/runtime/minrpc/minrpc_logger.cc" #include "../src/runtime/module.cc" #include "../src/runtime/ndarray.cc" diff --git a/apps/android_deploy/app/src/main/jni/tvm_runtime.h b/apps/android_deploy/app/src/main/jni/tvm_runtime.h index a2f10701d6df..9eda834eb433 100644 --- a/apps/android_deploy/app/src/main/jni/tvm_runtime.h +++ b/apps/android_deploy/app/src/main/jni/tvm_runtime.h @@ -35,6 +35,7 @@ #include "../src/runtime/graph_executor/graph_executor.cc" #include "../src/runtime/library_module.cc" #include "../src/runtime/logging.cc" +#include "../src/runtime/memory/memory_manager.cc" #include "../src/runtime/module.cc" #include "../src/runtime/ndarray.cc" #include "../src/runtime/object.cc" diff --git a/apps/android_rpc/app/src/main/jni/tvm_runtime.h b/apps/android_rpc/app/src/main/jni/tvm_runtime.h index 260c8d0cd813..fb14d84b794f 100644 --- a/apps/android_rpc/app/src/main/jni/tvm_runtime.h +++ b/apps/android_rpc/app/src/main/jni/tvm_runtime.h @@ -42,6 +42,7 @@ #include "../src/runtime/graph_executor/graph_executor_factory.cc" #include "../src/runtime/library_module.cc" #include "../src/runtime/logging.cc" +#include "../src/runtime/memory/memory_manager.cc" #include "../src/runtime/minrpc/minrpc_logger.cc" #include "../src/runtime/module.cc" #include "../src/runtime/ndarray.cc" diff --git a/apps/bundle_deploy/runtime.cc b/apps/bundle_deploy/runtime.cc index 393fc3489af8..e52a4796bb48 100644 --- a/apps/bundle_deploy/runtime.cc +++ b/apps/bundle_deploy/runtime.cc @@ -29,6 +29,7 @@ #include "../../src/runtime/graph_executor/graph_executor.cc" #include "../../src/runtime/library_module.cc" #include "../../src/runtime/logging.cc" +#include "../../src/runtime/memory/memory_manager.cc" #include "../../src/runtime/module.cc" #include "../../src/runtime/ndarray.cc" #include "../../src/runtime/object.cc" diff --git a/apps/howto_deploy/tvm_runtime_pack.cc b/apps/howto_deploy/tvm_runtime_pack.cc index 0ee61a7e50e4..25e768302c38 100644 --- a/apps/howto_deploy/tvm_runtime_pack.cc +++ b/apps/howto_deploy/tvm_runtime_pack.cc @@ -64,6 +64,7 @@ // Graph executor #include "../../src/runtime/graph_executor/graph_executor.cc" #include "../../src/runtime/graph_executor/graph_executor_factory.cc" +#include "../../src/runtime/memory/memory_manager.cc" // Uncomment the following lines to enable RPC // #include "../../src/runtime/rpc/rpc_session.cc" diff --git a/golang/src/tvm_runtime_pack.cc b/golang/src/tvm_runtime_pack.cc index c2add6a36734..e4056742eef4 100644 --- a/golang/src/tvm_runtime_pack.cc +++ b/golang/src/tvm_runtime_pack.cc @@ -46,6 +46,7 @@ // Graph executor #include "src/runtime/graph_executor/graph_executor.cc" +#include "src/runtime/memory/memory_manager.cc" // Uncomment the following lines to enable RPC // #include "../../src/runtime/rpc/rpc_session.cc" diff --git a/include/tvm/runtime/device_api.h b/include/tvm/runtime/device_api.h index cb0eb7c21f11..e33539daddb7 100644 --- a/include/tvm/runtime/device_api.h +++ b/include/tvm/runtime/device_api.h @@ -95,6 +95,14 @@ class TVM_DLL DeviceAPI { */ virtual void GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv) = 0; + /*! + * \brief Get the physical memory size required. + * \param arr the tensor object. + * \param mem_scope the memory scope if any + * \return the memory size. + */ + virtual size_t GetDataSize(const DLTensor& arr, Optional mem_scope = NullOpt); + /*! * \brief Query the device for specified properties. * diff --git a/include/tvm/runtime/vm/memory_manager.h b/include/tvm/runtime/memory/memory_manager.h similarity index 82% rename from include/tvm/runtime/vm/memory_manager.h rename to include/tvm/runtime/memory/memory_manager.h index feafc01f63d9..8b38fbf6f0ff 100644 --- a/include/tvm/runtime/vm/memory_manager.h +++ b/include/tvm/runtime/memory/memory_manager.h @@ -18,11 +18,11 @@ */ /*! - * \file tvm/runtime/vm/memory_manager.h + * \file tvm/runtime/memory/memory_manager.h * \brief Abstract device memory management API */ -#ifndef TVM_RUNTIME_VM_MEMORY_MANAGER_H_ -#define TVM_RUNTIME_VM_MEMORY_MANAGER_H_ +#ifndef TVM_RUNTIME_MEMORY_MEMORY_MANAGER_H_ +#define TVM_RUNTIME_MEMORY_MEMORY_MANAGER_H_ #include #include @@ -37,22 +37,22 @@ namespace tvm { namespace runtime { -namespace vm { +namespace memory { + +enum AllocatorType { + kNaive = 1, + kPooled, +}; struct Buffer { /*! \brief The pointer to the allocated block of memory. */ void* data{nullptr}; /*! \brief The size of the block. */ size_t size{0}; - /*! \brief The shape of the tensor. */ - std::vector shape; /*! \brief The context of the allocated buffers. */ Device device; -}; - -enum AllocatorType { - kNaive = 1, - kPooled, + /*! \brief The allocator that created this buffer. */ + AllocatorType alloc_type; }; class Allocator { @@ -63,9 +63,11 @@ class Allocator { * \param shape The shape of the NDArray. * \param dtype The datatype of the NDArray. * \param dev The device where the array is allocated. + * \param mem_scope The device memory scope hint. * \return The empty NDArray. */ - NDArray Empty(std::vector shape, DLDataType dtype, Device dev); + NDArray Empty(ShapeTuple shape, DLDataType dtype, Device dev, + Optional mem_scope = NullOpt); /*! \brief Return the allocator type. */ inline AllocatorType type() const { return type_; } /*! \brief Allocate a buffer given a size, alignment and type. @@ -76,13 +78,12 @@ class Allocator { */ virtual Buffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) = 0; /*! \brief Allocate a buffer given a shape and type. - * \param ndims The rank of the tensor. * \param shape The shape of the tensor. * \param type_hint A type hint to the allocator. * \param mem_scope A memory scope of the buffer. * \return A sized allocation in the form of a buffer. */ - virtual Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, + virtual Buffer Alloc(ShapeTuple shape, DLDataType type_hint, const std::string& mem_scope = "") = 0; /*! \brief Free a buffer allocated by the allocator. * \param buffer The buffer to free. @@ -94,7 +95,7 @@ class Allocator { virtual size_t UsedMemory() const = 0; protected: - virtual Buffer Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_hint, + virtual Buffer Alloc(Device dev, ShapeTuple shape, DLDataType type_hint, const std::string& mem_scope); private: @@ -114,16 +115,18 @@ class MemoryManager { /*! * \brief Get an allocator given the context. * \param dev The TVM device + * \param type The allocator type * \return The memory allocator. */ - static Allocator* GetAllocator(Device dev); + static Allocator* GetAllocator(Device dev, AllocatorType type); private: MemoryManager() {} protected: std::mutex mu_; - std::unordered_map> allocators_; + std::unordered_map>> + allocators_; }; /*! \brief An object representing a storage allocation. */ @@ -133,13 +136,13 @@ class StorageObj : public Object { Buffer buffer; /*! \brief Allocate an NDArray from a given piece of storage. */ - NDArray AllocNDArray(size_t offset, std::vector shape, DLDataType dtype); + NDArray AllocNDArray(size_t offset, ShapeTuple shape, DLDataType dtype); /*! \brief The deleter for an NDArray when allocated from underlying storage. */ static void Deleter(Object* ptr); ~StorageObj() { - auto alloc = MemoryManager::Global()->GetAllocator(buffer.device); + auto alloc = MemoryManager::Global()->GetAllocator(buffer.device, buffer.alloc_type); alloc->Free(buffer); } @@ -156,8 +159,8 @@ class Storage : public ObjectRef { TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(Storage, ObjectRef, StorageObj); }; -} // namespace vm +} // namespace memory } // namespace runtime } // namespace tvm -#endif // TVM_RUNTIME_VM_MEMORY_MANAGER_H_ +#endif // TVM_RUNTIME_MEMORY_MEMORY_MANAGER_H_ diff --git a/include/tvm/runtime/vm/vm.h b/include/tvm/runtime/vm/vm.h index c2adc3b2a0af..a5fe91186d99 100644 --- a/include/tvm/runtime/vm/vm.h +++ b/include/tvm/runtime/vm/vm.h @@ -25,13 +25,13 @@ #define TVM_RUNTIME_VM_VM_H_ #include +#include #include #include #include #include #include #include -#include #include #include @@ -41,6 +41,13 @@ namespace tvm { namespace runtime { + +using memory::Allocator; +using memory::AllocatorType; +using memory::MemoryManager; +using memory::Storage; +using memory::StorageObj; + namespace vm { /*! diff --git a/src/relay/backend/vm/compiler.h b/src/relay/backend/vm/compiler.h index 5009d9084958..acb4d2d1d258 100644 --- a/src/relay/backend/vm/compiler.h +++ b/src/relay/backend/vm/compiler.h @@ -41,7 +41,7 @@ #include #include -#include "../../../runtime/vm/naive_allocator.h" +#include "../../../runtime/memory/naive_allocator.h" #include "../../../runtime/vm/profiler/vm.h" #include "../../transforms/pass_utils.h" #include "../te_compiler.h" diff --git a/src/runtime/c_runtime_api.cc b/src/runtime/c_runtime_api.cc index 980447214a67..3471adefeb7f 100644 --- a/src/runtime/c_runtime_api.cc +++ b/src/runtime/c_runtime_api.cc @@ -152,6 +152,20 @@ static size_t GetDataAlignment(const DLDataType dtype) { return align; } +size_t DeviceAPI::GetDataSize(const DLTensor& arr, Optional mem_scope) { + if (!mem_scope.defined() || mem_scope.value().empty() || mem_scope.value() == "global") { + size_t size = 1; + for (tvm_index_t i = 0; i < arr.ndim; ++i) { + size *= static_cast(arr.shape[i]); + } + size *= (arr.dtype.bits * arr.dtype.lanes + 7) / 8; + return size; + } + LOG(FATAL) << "Device does not support physical mem computation with " + << "specified memory scope: " << mem_scope.value(); + return 0; +} + void* DeviceAPI::AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype, Optional mem_scope) { if (!mem_scope.defined() || mem_scope.value() == "" || mem_scope.value() == "global") { diff --git a/src/runtime/graph_executor/graph_executor.cc b/src/runtime/graph_executor/graph_executor.cc index 777a5a442a98..5bd7967cab37 100644 --- a/src/runtime/graph_executor/graph_executor.cc +++ b/src/runtime/graph_executor/graph_executor.cc @@ -466,7 +466,8 @@ void GraphExecutor::SetupStorage() { if (!pit.scope.empty()) { mem_scope = String(pit.scope); } - storage_pool_.push_back(NDArray::Empty(shape, pit.dtype, dev, mem_scope)); + storage_pool_.push_back(MemoryManager::GetOrCreateAllocator(dev, AllocatorType::kNaive) + ->Empty(shape, pit.dtype, dev, mem_scope)); } } diff --git a/src/runtime/graph_executor/graph_executor.h b/src/runtime/graph_executor/graph_executor.h index 2f6b8b8147e5..08e06f4e6bf3 100644 --- a/src/runtime/graph_executor/graph_executor.h +++ b/src/runtime/graph_executor/graph_executor.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -42,6 +43,9 @@ namespace tvm { namespace runtime { +using memory::AllocatorType; +using memory::MemoryManager; + /*! \brief macro to do C API call */ #define TVM_CCALL(func) \ { \ diff --git a/src/runtime/vm/memory_manager.cc b/src/runtime/memory/memory_manager.cc similarity index 76% rename from src/runtime/vm/memory_manager.cc rename to src/runtime/memory/memory_manager.cc index cb52a4a4436c..e72934ed2eeb 100644 --- a/src/runtime/vm/memory_manager.cc +++ b/src/runtime/memory/memory_manager.cc @@ -18,10 +18,10 @@ */ /*! - * \file tvm/runtime/vm/memory_manager.cc + * \file tvm/runtime/memory/memory_manager.cc * \brief Allocate and manage memory for the runtime. */ -#include +#include #include #include @@ -31,17 +31,23 @@ namespace tvm { namespace runtime { -namespace vm { +namespace memory { static void BufferDeleter(Object* obj) { auto* ptr = static_cast(obj); ICHECK(ptr->manager_ctx != nullptr); Buffer* buffer = reinterpret_cast(ptr->manager_ctx); - MemoryManager::GetAllocator(buffer->device)->Free(*(buffer)); + MemoryManager::GetAllocator(buffer->device, buffer->alloc_type)->Free(*(buffer)); delete buffer; delete ptr; } +Storage::Storage(Buffer buffer) { + auto n = make_object(); + n->buffer = std::move(buffer); + data_ = std::move(n); +} + void StorageObj::Deleter(Object* obj) { auto* ptr = static_cast(obj); // When invoking AllocNDArray we don't own the underlying allocation @@ -76,7 +82,7 @@ inline size_t GetDataAlignment(const DLTensor& arr) { return align; } -NDArray StorageObj::AllocNDArray(size_t offset, std::vector shape, DLDataType dtype) { +NDArray StorageObj::AllocNDArray(size_t offset, ShapeTuple shape, DLDataType dtype) { VerifyDataType(dtype); // crtical zone: allocate header, cannot throw @@ -85,7 +91,7 @@ NDArray StorageObj::AllocNDArray(size_t offset, std::vector shape, DLDa container->dl_tensor.byte_offset = offset; container->SetDeleter(StorageObj::Deleter); - size_t needed_size = GetDataSize(container->dl_tensor); + size_t needed_size = DeviceAPI::Get(this->buffer.device)->GetDataSize(container->dl_tensor); this->IncRef(); // The manager context pointer must continue to point to the storage object // which owns the backing memory, and keeps track of the reference count. @@ -116,6 +122,9 @@ Allocator* MemoryManager::GetOrCreateAllocator(Device dev, AllocatorType type) { MemoryManager* m = MemoryManager::Global(); std::lock_guard lock(m->mu_); if (m->allocators_.find(dev) == m->allocators_.end()) { + m->allocators_.emplace(dev, std::unordered_map>()); + } + if (m->allocators_.at(dev).find(type) == m->allocators_.at(dev).end()) { std::unique_ptr alloc; switch (type) { case kNaive: { @@ -132,51 +141,55 @@ Allocator* MemoryManager::GetOrCreateAllocator(Device dev, AllocatorType type) { LOG(FATAL) << "Unknown allocator type: " << type; } auto ret = alloc.get(); - m->allocators_.emplace(dev, std::move(alloc)); + m->allocators_.at(dev).emplace(type, std::move(alloc)); return ret; } - auto alloc = m->allocators_.at(dev).get(); - if (alloc->type() != type) { + auto alloc = m->allocators_.at(dev).at(type).get(); + /*if (alloc->type() != type) { LOG(WARNING) << "The type of existing allocator for " << dev << " is different from the request type (" << alloc->type() << " vs " << type << ")"; - } + }*/ return alloc; } -Allocator* MemoryManager::GetAllocator(Device dev) { +Allocator* MemoryManager::GetAllocator(Device dev, AllocatorType type) { MemoryManager* m = MemoryManager::Global(); std::lock_guard lock(m->mu_); auto it = m->allocators_.find(dev); if (it == m->allocators_.end()) { LOG(FATAL) << "Allocator for " << dev << " has not been created yet."; } - return it->second.get(); + if (it->second.find(type) == it->second.end()) { + LOG(FATAL) << "Allocator for " << dev << " of type " << type << " has not been created yet."; + } + return it->second.at(type).get(); } -NDArray Allocator::Empty(std::vector shape, DLDataType dtype, DLDevice dev) { +NDArray Allocator::Empty(ShapeTuple shape, DLDataType dtype, DLDevice dev, + Optional mem_scope) { VerifyDataType(dtype); NDArray::Container* container = new NDArray::Container(nullptr, shape, dtype, dev); container->SetDeleter(BufferDeleter); - size_t size = GetDataSize(container->dl_tensor); + size_t size = DeviceAPI::Get(dev)->GetDataSize(container->dl_tensor); size_t alignment = GetDataAlignment(container->dl_tensor); Buffer* buffer = new Buffer; - *buffer = this->Alloc(size, alignment, dtype); + if (!mem_scope.defined() || mem_scope.value().empty() || mem_scope.value() == "global") { + *buffer = this->Alloc(size, alignment, dtype); + } else { + *buffer = this->Alloc(shape, dtype, mem_scope.value()); + } container->manager_ctx = reinterpret_cast(buffer); container->dl_tensor.data = buffer->data; return NDArray(GetObjectPtr(container)); } -Buffer Allocator::Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_hint, +Buffer Allocator::Alloc(Device dev, ShapeTuple shape, DLDataType type_hint, const std::string& mem_scope) { if (mem_scope.empty() || mem_scope == "global") { // by default, we can always redirect to the flat memory allocations - std::vector s; - for (int i = 0; i < ndims; ++i) { - s.push_back(shape[i]); - } - NDArray::Container container(nullptr, s, type_hint, dev); - size_t size = GetDataSize(container.dl_tensor); + NDArray::Container container(nullptr, shape, type_hint, dev); + size_t size = DeviceAPI::Get(dev)->GetDataSize(container.dl_tensor); size_t alignment = GetDataAlignment(container.dl_tensor); return Alloc(size, alignment, type_hint); } @@ -185,6 +198,6 @@ Buffer Allocator::Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_h return {}; } -} // namespace vm +} // namespace memory } // namespace runtime } // namespace tvm diff --git a/src/runtime/vm/naive_allocator.h b/src/runtime/memory/naive_allocator.h similarity index 78% rename from src/runtime/vm/naive_allocator.h rename to src/runtime/memory/naive_allocator.h index 799f16ad60bc..4ab96bdfd56d 100644 --- a/src/runtime/vm/naive_allocator.h +++ b/src/runtime/memory/naive_allocator.h @@ -18,20 +18,20 @@ */ /*! - * \file src/runtime/naive_allocator.h + * \file src/runtime/memory/naive_allocator.h */ -#ifndef TVM_RUNTIME_VM_NAIVE_ALLOCATOR_H_ -#define TVM_RUNTIME_VM_NAIVE_ALLOCATOR_H_ +#ifndef TVM_RUNTIME_MEMORY_NAIVE_ALLOCATOR_H_ +#define TVM_RUNTIME_MEMORY_NAIVE_ALLOCATOR_H_ #include -#include +#include #include #include namespace tvm { namespace runtime { -namespace vm { +namespace memory { class NaiveAllocator final : public Allocator { public: @@ -41,34 +41,35 @@ class NaiveAllocator final : public Allocator { Buffer buf; buf.device = device_; buf.size = nbytes; + buf.alloc_type = kNaive; buf.data = DeviceAPI::Get(device_)->AllocDataSpace(device_, nbytes, alignment, type_hint); used_memory_.fetch_add(nbytes, std::memory_order_relaxed); DLOG(INFO) << "allocate " << nbytes << " B, used memory " << used_memory_ << " B"; return buf; } - Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, - const std::string& mem_scope) override { + Buffer Alloc(ShapeTuple shape, DLDataType type_hint, const std::string& mem_scope) override { Buffer buf; size_t nbytes = 1; - for (int i = 0; i < ndims; ++i) { - buf.shape.push_back(shape[i]); + for (int i = 0; i < static_cast(shape.size()); ++i) { nbytes *= static_cast(shape[i]); } nbytes *= (type_hint.bits * type_hint.lanes + 7) / 8; buf.device = device_; if (mem_scope.empty() || mem_scope == "global") { - auto tmp_buf = Allocator::Alloc(device_, ndims, shape, type_hint, mem_scope); + auto tmp_buf = Allocator::Alloc(device_, shape, type_hint, mem_scope); buf.size = tmp_buf.size; buf.data = tmp_buf.data; + buf.alloc_type = kNaive; return buf; } buf.size = nbytes; - buf.data = DeviceAPI::Get(device_)->AllocDataSpace(device_, ndims, shape, type_hint, - String(mem_scope)); + buf.data = DeviceAPI::Get(device_)->AllocDataSpace(device_, shape.size(), shape.data(), + type_hint, String(mem_scope)); used_memory_.fetch_add(nbytes, std::memory_order_relaxed); DLOG(INFO) << "allocate " << nbytes << " B, used memory " << used_memory_ << " B"; + buf.alloc_type = kNaive; return buf; } @@ -85,8 +86,8 @@ class NaiveAllocator final : public Allocator { Device device_; }; -} // namespace vm +} // namespace memory } // namespace runtime } // namespace tvm -#endif // TVM_RUNTIME_VM_NAIVE_ALLOCATOR_H_ +#endif // TVM_RUNTIME_MEMORY_NAIVE_ALLOCATOR_H_ diff --git a/src/runtime/vm/pooled_allocator.h b/src/runtime/memory/pooled_allocator.h similarity index 88% rename from src/runtime/vm/pooled_allocator.h rename to src/runtime/memory/pooled_allocator.h index ea6059e0c64c..01dded966b5c 100644 --- a/src/runtime/vm/pooled_allocator.h +++ b/src/runtime/memory/pooled_allocator.h @@ -18,13 +18,13 @@ */ /*! - * \file runtime/pooled_allocator.h + * \file src/runtime/memory/pooled_allocator.h */ -#ifndef TVM_RUNTIME_VM_POOLED_ALLOCATOR_H_ -#define TVM_RUNTIME_VM_POOLED_ALLOCATOR_H_ +#ifndef TVM_RUNTIME_MEMORY_POOLED_ALLOCATOR_H_ +#define TVM_RUNTIME_MEMORY_POOLED_ALLOCATOR_H_ #include -#include +#include #include #include @@ -34,7 +34,7 @@ namespace tvm { namespace runtime { -namespace vm { +namespace memory { class PooledAllocator final : public Allocator { public: @@ -58,6 +58,7 @@ class PooledAllocator final : public Allocator { Buffer buf; buf.device = device_; buf.size = size; + buf.alloc_type = kPooled; try { buf.data = DeviceAPI::Get(device_)->AllocDataSpace(device_, size, alignment, type_hint); } catch (InternalError& err) { @@ -72,10 +73,9 @@ class PooledAllocator final : public Allocator { return buf; } - Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, - const std::string& mem_scope) override { + Buffer Alloc(ShapeTuple shape, DLDataType type_hint, const std::string& mem_scope) override { if (mem_scope.empty() || mem_scope == "global") { - return Allocator::Alloc(device_, ndims, shape, type_hint, mem_scope); + return Allocator::Alloc(device_, shape, type_hint, mem_scope); } LOG(FATAL) << "This alloc should be implemented"; return {}; @@ -114,8 +114,8 @@ class PooledAllocator final : public Allocator { Device device_; }; -} // namespace vm +} // namespace memory } // namespace runtime } // namespace tvm -#endif // TVM_RUNTIME_VM_POOLED_ALLOCATOR_H_ +#endif // TVM_RUNTIME_MEMORY_POOLED_ALLOCATOR_H_ diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index 188a4153e1c0..66857ca73434 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -94,8 +94,8 @@ inline ObjectRef CopyTo(ObjectRef src, const DLDevice& dev, Optional mem } } -std::vector ToShape(NDArray shape_tensor) { - std::vector shape; +ShapeTuple ToShape(NDArray shape_tensor) { + std::vector shape; auto rank = shape_tensor.Shape().size(); auto dtype = shape_tensor.DataType(); @@ -121,7 +121,7 @@ std::vector ToShape(NDArray shape_tensor) { LOG(FATAL) << "invalid shape tensor datatype: " << dtype; } - return shape; + return ShapeTuple(shape); } void VirtualMachine::OpStartHook(Instruction instr) {} @@ -839,9 +839,13 @@ void VirtualMachine::RunLoop(const std::vector& output_tensor_reg_indices << ", dtype_hint=" << DLDataType2String(instr.alloc_storage.dtype_hint) << ", device_index=" << instr.alloc_storage.device_index << ", memory_scope=" << mem_scope; + + std::vector shape_; + shape_.resize(instr.alloc_storage.ndim); + shape_.assign(instr.alloc_storage.shape, + instr.alloc_storage.shape + instr.alloc_storage.ndim); storage_obj->buffer = - allocator->Alloc(instr.alloc_storage.ndim, instr.alloc_storage.shape, - instr.alloc_storage.dtype_hint, mem_scope); + allocator->Alloc(ShapeTuple(shape_), instr.alloc_storage.dtype_hint, mem_scope); } else { auto size = LoadScalarInt(instr.alloc_storage.allocation_size); auto alignment = instr.alloc_storage.alignment; diff --git a/tests/cpp/runtime/vm/memory_manager_tests.cc b/tests/cpp/runtime/memory/memory_manager_tests.cc similarity index 86% rename from tests/cpp/runtime/vm/memory_manager_tests.cc rename to tests/cpp/runtime/memory/memory_manager_tests.cc index ac1ff201cf34..b51be91d7424 100644 --- a/tests/cpp/runtime/vm/memory_manager_tests.cc +++ b/tests/cpp/runtime/memory/memory_manager_tests.cc @@ -19,15 +19,15 @@ #include #include -#include +#include #include -#include "../../../../src/runtime/vm/pooled_allocator.h" +#include "../../../../src/runtime/memory/pooled_allocator.h" namespace tvm { namespace runtime { -namespace vm { +namespace memory { // MemoryManangerWrapper is necessary because in class MemoryManager we don't have access to its // protected members. In this class we add a new method which allow us to clear internal state of @@ -77,7 +77,7 @@ TEST_F(TvmVMMemoryManagerTest, NaiveEmptyBasic) { EXPECT_EQ(allocator->UsedMemory(), 0); auto dt = DataType::Float(32); size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); - std::vector shape = {1, 3, 6, 6}; + ShapeTuple shape = {1, 3, 6, 6}; { auto ndarray = allocator->Empty(shape, dt, dev); EXPECT_EQ(allocator->UsedMemory(), nbytes); @@ -93,7 +93,7 @@ TEST_F(TvmVMMemoryManagerTest, PooledEmptyBasic) { size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); size_t page_size = PooledAllocator::kDefaultPageSize; size_t size = ((nbytes + page_size - 1) / page_size) * page_size; - std::vector shape = {1, 3, 6, 6}; + ShapeTuple shape = {1, 3, 6, 6}; { auto ndarray = allocator->Empty(shape, dt, dev); EXPECT_EQ(allocator->UsedMemory(), size); @@ -107,14 +107,15 @@ TEST_F(TvmVMMemoryManagerTest, NaiveAllocWithShape) { EXPECT_EQ(allocator->UsedMemory(), 0); auto dt = DataType::Float(32); size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); - std::vector shape = {1, 3, 6, 6}; - auto buff = allocator->Alloc(shape.size(), shape.data(), dt); + ShapeTuple shape = {1, 3, 6, 6}; + auto buff = allocator->Alloc(shape, dt); EXPECT_EQ(allocator->UsedMemory(), nbytes); allocator->Free(buff); EXPECT_EQ(allocator->UsedMemory(), 0); try { - auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture"); + auto texture = allocator->Alloc(shape, dt, "global.texture"); + (void)texture; FAIL(); } catch (std::exception& e) { std::string pattern = @@ -132,14 +133,15 @@ TEST_F(TvmVMMemoryManagerTest, PooledAllocWithShape) { size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); size_t page_size = PooledAllocator::kDefaultPageSize; size_t size = ((nbytes + page_size - 1) / page_size) * page_size; - std::vector shape = {1, 3, 6, 6}; - auto buff = allocator->Alloc(shape.size(), shape.data(), dt); + ShapeTuple shape = {1, 3, 6, 6}; + auto buff = allocator->Alloc(shape, dt); EXPECT_EQ(allocator->UsedMemory(), size); allocator->Free(buff); EXPECT_EQ(allocator->UsedMemory(), size); try { - auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture"); + auto texture = allocator->Alloc(shape, dt, "global.texture"); + (void)texture; FAIL(); } catch (std::exception& e) { std::string pattern = "This alloc should be implemented"; @@ -159,13 +161,13 @@ TEST_F(TvmVMMemoryManagerTest, NaiveAllocOpenCLTexture) { EXPECT_EQ(allocator->UsedMemory(), 0); auto dt = DataType::Float(32); size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); - std::vector shape = {1, 3, 6, 6}; - auto buff = allocator->Alloc(shape.size(), shape.data(), dt); + ShapeTuple shape = {1, 3, 6, 6}; + auto buff = allocator->Alloc(shape, dt); EXPECT_EQ(allocator->UsedMemory(), nbytes); allocator->Free(buff); EXPECT_EQ(allocator->UsedMemory(), 0); - auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture"); + auto texture = allocator->Alloc(shape, dt, "global.texture"); EXPECT_EQ(allocator->UsedMemory(), nbytes); allocator->Free(texture); EXPECT_EQ(allocator->UsedMemory(), 0); @@ -184,14 +186,15 @@ TEST_F(TvmVMMemoryManagerTest, PooledAllocOpenCLTexture) { size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); size_t page_size = PooledAllocator::kDefaultPageSize; size_t size = ((nbytes + page_size - 1) / page_size) * page_size; - std::vector shape = {1, 3, 6, 6}; - auto buff = allocator->Alloc(shape.size(), shape.data(), dt); + ShapeTuple shape = {1, 3, 6, 6}; + auto buff = allocator->Alloc(shape, dt); EXPECT_EQ(allocator->UsedMemory(), size); allocator->Free(buff); EXPECT_EQ(allocator->UsedMemory(), size); try { - auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture"); + auto texture = allocator->Alloc(shape, dt, "global.texture"); + (void)texture; FAIL(); } catch (std::exception& e) { std::string pattern = "This alloc should be implemented"; @@ -199,6 +202,6 @@ TEST_F(TvmVMMemoryManagerTest, PooledAllocOpenCLTexture) { EXPECT_NE(what.find(pattern), std::string::npos) << what; } } -} // namespace vm +} // namespace memory } // namespace runtime } // namespace tvm