From 5eff332a960d7f748df36d41a6e8187035e9661a Mon Sep 17 00:00:00 2001 From: Siva Date: Wed, 27 Sep 2023 13:54:03 +0530 Subject: [PATCH 01/11] [VM] memory Manager moved up to runtime Now graph runtime also uses the same memory manager This acommodates a common memory manager with pooled and naive support. As a follow up we can move the WorkspacePool to use this common memory manager. --- include/tvm/runtime/{vm => }/memory_manager.h | 32 +++++++++---------- include/tvm/runtime/vm/vm.h | 2 +- src/relay/backend/vm/compiler.h | 2 +- src/runtime/graph_executor/graph_executor.cc | 4 ++- src/runtime/{vm => }/memory_manager.cc | 23 +++++++------ src/runtime/{vm => }/naive_allocator.h | 22 ++++++------- src/runtime/{vm => }/pooled_allocator.h | 24 +++++++------- tests/cpp/runtime/vm/memory_manager_tests.cc | 6 ++-- 8 files changed, 57 insertions(+), 58 deletions(-) rename include/tvm/runtime/{vm => }/memory_manager.h (85%) rename src/runtime/{vm => }/memory_manager.cc (91%) rename src/runtime/{vm => }/naive_allocator.h (84%) rename src/runtime/{vm => }/pooled_allocator.h (84%) diff --git a/include/tvm/runtime/vm/memory_manager.h b/include/tvm/runtime/memory_manager.h similarity index 85% rename from include/tvm/runtime/vm/memory_manager.h rename to include/tvm/runtime/memory_manager.h index feafc01f63d9..130acb60fcad 100644 --- a/include/tvm/runtime/vm/memory_manager.h +++ b/include/tvm/runtime/memory_manager.h @@ -18,11 +18,11 @@ */ /*! - * \file tvm/runtime/vm/memory_manager.h + * \file tvm/runtime/memory_manager.h * \brief Abstract device memory management API */ -#ifndef TVM_RUNTIME_VM_MEMORY_MANAGER_H_ -#define TVM_RUNTIME_VM_MEMORY_MANAGER_H_ +#ifndef TVM_RUNTIME_MEMORY_MANAGER_H_ +#define TVM_RUNTIME_MEMORY_MANAGER_H_ #include #include @@ -37,9 +37,8 @@ namespace tvm { namespace runtime { -namespace vm { -struct Buffer { +struct MBuffer { /*! \brief The pointer to the allocated block of memory. */ void* data{nullptr}; /*! \brief The size of the block. */ @@ -63,9 +62,11 @@ class Allocator { * \param shape The shape of the NDArray. * \param dtype The datatype of the NDArray. * \param dev The device where the array is allocated. + * \param mem_scope is the device memory scope hint. * \return The empty NDArray. */ - NDArray Empty(std::vector shape, DLDataType dtype, Device dev); + NDArray Empty(std::vector shape, DLDataType dtype, Device dev, + Optional mem_scope); /*! \brief Return the allocator type. */ inline AllocatorType type() const { return type_; } /*! \brief Allocate a buffer given a size, alignment and type. @@ -74,7 +75,7 @@ class Allocator { * \param type_hint A type hint to the allocator. * \return A sized allocation in the form of a buffer. */ - virtual Buffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) = 0; + virtual MBuffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) = 0; /*! \brief Allocate a buffer given a shape and type. * \param ndims The rank of the tensor. * \param shape The shape of the tensor. @@ -82,20 +83,20 @@ class Allocator { * \param mem_scope A memory scope of the buffer. * \return A sized allocation in the form of a buffer. */ - virtual Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, - const std::string& mem_scope = "") = 0; + virtual MBuffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope = "") = 0; /*! \brief Free a buffer allocated by the allocator. * \param buffer The buffer to free. */ - virtual void Free(const Buffer& buffer) = 0; + virtual void Free(const MBuffer& buffer) = 0; /*! \brief The amount of memory currently allocated. * \return The amount of memory currently allocated. */ virtual size_t UsedMemory() const = 0; protected: - virtual Buffer Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_hint, - const std::string& mem_scope); + virtual MBuffer Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope); private: AllocatorType type_; @@ -130,7 +131,7 @@ class MemoryManager { class StorageObj : public Object { public: /*! \brief The index into the VM function table. */ - Buffer buffer; + MBuffer buffer; /*! \brief Allocate an NDArray from a given piece of storage. */ NDArray AllocNDArray(size_t offset, std::vector shape, DLDataType dtype); @@ -151,13 +152,12 @@ class StorageObj : public Object { /*! \brief reference to storage. */ class Storage : public ObjectRef { public: - explicit Storage(Buffer buffer); + explicit Storage(MBuffer buffer); TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(Storage, ObjectRef, StorageObj); }; -} // namespace vm } // namespace runtime } // namespace tvm -#endif // TVM_RUNTIME_VM_MEMORY_MANAGER_H_ +#endif // TVM_RUNTIME_MEMORY_MANAGER_H_ diff --git a/include/tvm/runtime/vm/vm.h b/include/tvm/runtime/vm/vm.h index c2adc3b2a0af..300fb31034f4 100644 --- a/include/tvm/runtime/vm/vm.h +++ b/include/tvm/runtime/vm/vm.h @@ -25,13 +25,13 @@ #define TVM_RUNTIME_VM_VM_H_ #include +#include #include #include #include #include #include #include -#include #include #include diff --git a/src/relay/backend/vm/compiler.h b/src/relay/backend/vm/compiler.h index 5009d9084958..b98e16be45e0 100644 --- a/src/relay/backend/vm/compiler.h +++ b/src/relay/backend/vm/compiler.h @@ -41,7 +41,7 @@ #include #include -#include "../../../runtime/vm/naive_allocator.h" +#include "../../../runtime/naive_allocator.h" #include "../../../runtime/vm/profiler/vm.h" #include "../../transforms/pass_utils.h" #include "../te_compiler.h" diff --git a/src/runtime/graph_executor/graph_executor.cc b/src/runtime/graph_executor/graph_executor.cc index 777a5a442a98..a20cdf24907e 100644 --- a/src/runtime/graph_executor/graph_executor.cc +++ b/src/runtime/graph_executor/graph_executor.cc @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -466,7 +467,8 @@ void GraphExecutor::SetupStorage() { if (!pit.scope.empty()) { mem_scope = String(pit.scope); } - storage_pool_.push_back(NDArray::Empty(shape, pit.dtype, dev, mem_scope)); + storage_pool_.push_back(MemoryManager::GetOrCreateAllocator(dev, AllocatorType::kNaive) + ->Empty(shape, pit.dtype, dev, mem_scope)); } } diff --git a/src/runtime/vm/memory_manager.cc b/src/runtime/memory_manager.cc similarity index 91% rename from src/runtime/vm/memory_manager.cc rename to src/runtime/memory_manager.cc index cb52a4a4436c..98782da7b702 100644 --- a/src/runtime/vm/memory_manager.cc +++ b/src/runtime/memory_manager.cc @@ -18,10 +18,10 @@ */ /*! - * \file tvm/runtime/vm/memory_manager.cc + * \file tvm/runtime/memory_manager.cc * \brief Allocate and manage memory for the runtime. */ -#include +#include #include #include @@ -31,12 +31,11 @@ namespace tvm { namespace runtime { -namespace vm { static void BufferDeleter(Object* obj) { auto* ptr = static_cast(obj); ICHECK(ptr->manager_ctx != nullptr); - Buffer* buffer = reinterpret_cast(ptr->manager_ctx); + MBuffer* buffer = reinterpret_cast(ptr->manager_ctx); MemoryManager::GetAllocator(buffer->device)->Free(*(buffer)); delete buffer; delete ptr; @@ -154,21 +153,26 @@ Allocator* MemoryManager::GetAllocator(Device dev) { return it->second.get(); } -NDArray Allocator::Empty(std::vector shape, DLDataType dtype, DLDevice dev) { +NDArray Allocator::Empty(std::vector shape, DLDataType dtype, DLDevice dev, + Optional mem_scope) { VerifyDataType(dtype); NDArray::Container* container = new NDArray::Container(nullptr, shape, dtype, dev); container->SetDeleter(BufferDeleter); size_t size = GetDataSize(container->dl_tensor); size_t alignment = GetDataAlignment(container->dl_tensor); - Buffer* buffer = new Buffer; - *buffer = this->Alloc(size, alignment, dtype); + MBuffer* buffer = new MBuffer; + if (!mem_scope.defined() || mem_scope == "global") { + *buffer = this->Alloc(size, alignment, dtype); + } else { + *buffer = this->Alloc(shape.size(), shape.data(), dtype, mem_scope.value()); + } container->manager_ctx = reinterpret_cast(buffer); container->dl_tensor.data = buffer->data; return NDArray(GetObjectPtr(container)); } -Buffer Allocator::Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_hint, - const std::string& mem_scope) { +MBuffer Allocator::Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope) { if (mem_scope.empty() || mem_scope == "global") { // by default, we can always redirect to the flat memory allocations std::vector s; @@ -185,6 +189,5 @@ Buffer Allocator::Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_h return {}; } -} // namespace vm } // namespace runtime } // namespace tvm diff --git a/src/runtime/vm/naive_allocator.h b/src/runtime/naive_allocator.h similarity index 84% rename from src/runtime/vm/naive_allocator.h rename to src/runtime/naive_allocator.h index 799f16ad60bc..612a6e3de8fa 100644 --- a/src/runtime/vm/naive_allocator.h +++ b/src/runtime/naive_allocator.h @@ -20,25 +20,24 @@ /*! * \file src/runtime/naive_allocator.h */ -#ifndef TVM_RUNTIME_VM_NAIVE_ALLOCATOR_H_ -#define TVM_RUNTIME_VM_NAIVE_ALLOCATOR_H_ +#ifndef TVM_RUNTIME_NAIVE_ALLOCATOR_H_ +#define TVM_RUNTIME_NAIVE_ALLOCATOR_H_ #include -#include +#include #include #include namespace tvm { namespace runtime { -namespace vm { class NaiveAllocator final : public Allocator { public: explicit NaiveAllocator(Device dev) : Allocator(kNaive), used_memory_(0), device_(dev) {} - Buffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) override { - Buffer buf; + MBuffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) override { + MBuffer buf; buf.device = device_; buf.size = nbytes; buf.data = DeviceAPI::Get(device_)->AllocDataSpace(device_, nbytes, alignment, type_hint); @@ -47,9 +46,9 @@ class NaiveAllocator final : public Allocator { return buf; } - Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, - const std::string& mem_scope) override { - Buffer buf; + MBuffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope) override { + MBuffer buf; size_t nbytes = 1; for (int i = 0; i < ndims; ++i) { buf.shape.push_back(shape[i]); @@ -72,7 +71,7 @@ class NaiveAllocator final : public Allocator { return buf; } - void Free(const Buffer& buffer) override { + void Free(const MBuffer& buffer) override { DeviceAPI::Get(device_)->FreeDataSpace(buffer.device, buffer.data); used_memory_.fetch_sub(buffer.size, std::memory_order_relaxed); DLOG(INFO) << "free " << buffer.size << " B, used memory " << used_memory_ << " B"; @@ -85,8 +84,7 @@ class NaiveAllocator final : public Allocator { Device device_; }; -} // namespace vm } // namespace runtime } // namespace tvm -#endif // TVM_RUNTIME_VM_NAIVE_ALLOCATOR_H_ +#endif // TVM_RUNTIME_NAIVE_ALLOCATOR_H_ diff --git a/src/runtime/vm/pooled_allocator.h b/src/runtime/pooled_allocator.h similarity index 84% rename from src/runtime/vm/pooled_allocator.h rename to src/runtime/pooled_allocator.h index ea6059e0c64c..fb4a1e6c1488 100644 --- a/src/runtime/vm/pooled_allocator.h +++ b/src/runtime/pooled_allocator.h @@ -20,11 +20,11 @@ /*! * \file runtime/pooled_allocator.h */ -#ifndef TVM_RUNTIME_VM_POOLED_ALLOCATOR_H_ -#define TVM_RUNTIME_VM_POOLED_ALLOCATOR_H_ +#ifndef TVM_RUNTIME_POOLED_ALLOCATOR_H_ +#define TVM_RUNTIME_POOLED_ALLOCATOR_H_ #include -#include +#include #include #include @@ -34,7 +34,6 @@ namespace tvm { namespace runtime { -namespace vm { class PooledAllocator final : public Allocator { public: @@ -45,7 +44,7 @@ class PooledAllocator final : public Allocator { ~PooledAllocator() { ReleaseAll(); } - Buffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) override { + MBuffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) override { std::lock_guard lock(mu_); size_t size = ((nbytes + page_size_ - 1) / page_size_) * page_size_; auto&& it = memory_pool_.find(size); @@ -55,7 +54,7 @@ class PooledAllocator final : public Allocator { pool.pop_back(); return ret; } - Buffer buf; + MBuffer buf; buf.device = device_; buf.size = size; try { @@ -72,8 +71,8 @@ class PooledAllocator final : public Allocator { return buf; } - Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, - const std::string& mem_scope) override { + MBuffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope) override { if (mem_scope.empty() || mem_scope == "global") { return Allocator::Alloc(device_, ndims, shape, type_hint, mem_scope); } @@ -81,10 +80,10 @@ class PooledAllocator final : public Allocator { return {}; } - void Free(const Buffer& buffer) override { + void Free(const MBuffer& buffer) override { std::lock_guard lock(mu_); if (memory_pool_.find(buffer.size) == memory_pool_.end()) { - memory_pool_.emplace(buffer.size, std::vector{}); + memory_pool_.emplace(buffer.size, std::vector{}); } memory_pool_.at(buffer.size).push_back(buffer); VLOG(1) << "reclaim buffer " << buffer.size; @@ -109,13 +108,12 @@ class PooledAllocator final : public Allocator { private: size_t page_size_; std::atomic used_memory_; - std::unordered_map> memory_pool_; + std::unordered_map> memory_pool_; std::recursive_mutex mu_; Device device_; }; -} // namespace vm } // namespace runtime } // namespace tvm -#endif // TVM_RUNTIME_VM_POOLED_ALLOCATOR_H_ +#endif // TVM_RUNTIME_POOLED_ALLOCATOR_H_ diff --git a/tests/cpp/runtime/vm/memory_manager_tests.cc b/tests/cpp/runtime/vm/memory_manager_tests.cc index ac1ff201cf34..ad6209f83eab 100644 --- a/tests/cpp/runtime/vm/memory_manager_tests.cc +++ b/tests/cpp/runtime/vm/memory_manager_tests.cc @@ -19,15 +19,14 @@ #include #include -#include +#include #include -#include "../../../../src/runtime/vm/pooled_allocator.h" +#include "../../../../src/runtime/pooled_allocator.h" namespace tvm { namespace runtime { -namespace vm { // MemoryManangerWrapper is necessary because in class MemoryManager we don't have access to its // protected members. In this class we add a new method which allow us to clear internal state of @@ -199,6 +198,5 @@ TEST_F(TvmVMMemoryManagerTest, PooledAllocOpenCLTexture) { EXPECT_NE(what.find(pattern), std::string::npos) << what; } } -} // namespace vm } // namespace runtime } // namespace tvm From 76436b53eb16a27abb3348533818b5d3d6ca8da7 Mon Sep 17 00:00:00 2001 From: Siva Date: Wed, 27 Sep 2023 22:00:14 +0530 Subject: [PATCH 02/11] * update dependents with new file addition. --- apps/android_camera/app/src/main/jni/tvm_runtime.h | 1 + apps/android_deploy/app/src/main/jni/tvm_runtime.h | 1 + apps/android_rpc/app/src/main/jni/tvm_runtime.h | 1 + apps/bundle_deploy/runtime.cc | 1 + apps/howto_deploy/tvm_runtime_pack.cc | 1 + golang/src/tvm_runtime_pack.cc | 1 + 6 files changed, 6 insertions(+) diff --git a/apps/android_camera/app/src/main/jni/tvm_runtime.h b/apps/android_camera/app/src/main/jni/tvm_runtime.h index 0aac7f170ab4..c7f556473fff 100644 --- a/apps/android_camera/app/src/main/jni/tvm_runtime.h +++ b/apps/android_camera/app/src/main/jni/tvm_runtime.h @@ -40,6 +40,7 @@ #include "../src/runtime/graph_executor/graph_executor.cc" #include "../src/runtime/library_module.cc" #include "../src/runtime/logging.cc" +#include "../src/runtime/memory_manager.cc" #include "../src/runtime/minrpc/minrpc_logger.cc" #include "../src/runtime/module.cc" #include "../src/runtime/ndarray.cc" diff --git a/apps/android_deploy/app/src/main/jni/tvm_runtime.h b/apps/android_deploy/app/src/main/jni/tvm_runtime.h index a2f10701d6df..18dedb4474cc 100644 --- a/apps/android_deploy/app/src/main/jni/tvm_runtime.h +++ b/apps/android_deploy/app/src/main/jni/tvm_runtime.h @@ -35,6 +35,7 @@ #include "../src/runtime/graph_executor/graph_executor.cc" #include "../src/runtime/library_module.cc" #include "../src/runtime/logging.cc" +#include "../src/runtime/memory_manager.cc" #include "../src/runtime/module.cc" #include "../src/runtime/ndarray.cc" #include "../src/runtime/object.cc" diff --git a/apps/android_rpc/app/src/main/jni/tvm_runtime.h b/apps/android_rpc/app/src/main/jni/tvm_runtime.h index 260c8d0cd813..1c1bbbb59456 100644 --- a/apps/android_rpc/app/src/main/jni/tvm_runtime.h +++ b/apps/android_rpc/app/src/main/jni/tvm_runtime.h @@ -42,6 +42,7 @@ #include "../src/runtime/graph_executor/graph_executor_factory.cc" #include "../src/runtime/library_module.cc" #include "../src/runtime/logging.cc" +#include "../src/runtime/memory_manager.cc" #include "../src/runtime/minrpc/minrpc_logger.cc" #include "../src/runtime/module.cc" #include "../src/runtime/ndarray.cc" diff --git a/apps/bundle_deploy/runtime.cc b/apps/bundle_deploy/runtime.cc index 393fc3489af8..287d9fae68c7 100644 --- a/apps/bundle_deploy/runtime.cc +++ b/apps/bundle_deploy/runtime.cc @@ -29,6 +29,7 @@ #include "../../src/runtime/graph_executor/graph_executor.cc" #include "../../src/runtime/library_module.cc" #include "../../src/runtime/logging.cc" +#include "../../src/runtime/memory_manager.cc" #include "../../src/runtime/module.cc" #include "../../src/runtime/ndarray.cc" #include "../../src/runtime/object.cc" diff --git a/apps/howto_deploy/tvm_runtime_pack.cc b/apps/howto_deploy/tvm_runtime_pack.cc index 0ee61a7e50e4..bb8a0de77ca9 100644 --- a/apps/howto_deploy/tvm_runtime_pack.cc +++ b/apps/howto_deploy/tvm_runtime_pack.cc @@ -64,6 +64,7 @@ // Graph executor #include "../../src/runtime/graph_executor/graph_executor.cc" #include "../../src/runtime/graph_executor/graph_executor_factory.cc" +#include "../../src/runtime/memory_manager.cc" // Uncomment the following lines to enable RPC // #include "../../src/runtime/rpc/rpc_session.cc" diff --git a/golang/src/tvm_runtime_pack.cc b/golang/src/tvm_runtime_pack.cc index c2add6a36734..2ad3a1d1c497 100644 --- a/golang/src/tvm_runtime_pack.cc +++ b/golang/src/tvm_runtime_pack.cc @@ -46,6 +46,7 @@ // Graph executor #include "src/runtime/graph_executor/graph_executor.cc" +#include "src/runtime/memory_manager.cc" // Uncomment the following lines to enable RPC // #include "../../src/runtime/rpc/rpc_session.cc" From 20b23021f7ab5b5cb99a31803207f48897bf5830 Mon Sep 17 00:00:00 2001 From: Siva Date: Thu, 28 Sep 2023 12:26:29 +0530 Subject: [PATCH 03/11] * define memory_manager under new namespace --- CMakeLists.txt | 1 + .../app/src/main/jni/tvm_runtime.h | 2 +- .../app/src/main/jni/tvm_runtime.h | 2 +- .../app/src/main/jni/tvm_runtime.h | 2 +- apps/bundle_deploy/runtime.cc | 2 +- apps/howto_deploy/tvm_runtime_pack.cc | 2 +- golang/src/tvm_runtime_pack.cc | 2 +- .../tvm/runtime/{ => memory}/memory_manager.h | 30 ++++++++++--------- include/tvm/runtime/vm/vm.h | 9 +++++- src/relay/backend/vm/compiler.h | 2 +- src/runtime/graph_executor/graph_executor.cc | 1 - src/runtime/graph_executor/graph_executor.h | 4 +++ src/runtime/{ => memory}/memory_manager.cc | 14 +++++---- src/runtime/{ => memory}/naive_allocator.h | 27 ++++++++++------- src/runtime/{ => memory}/pooled_allocator.h | 24 ++++++++------- tests/cpp/runtime/vm/memory_manager_tests.cc | 6 ++-- 16 files changed, 77 insertions(+), 53 deletions(-) rename include/tvm/runtime/{ => memory}/memory_manager.h (86%) rename src/runtime/{ => memory}/memory_manager.cc (94%) rename src/runtime/{ => memory}/naive_allocator.h (80%) rename src/runtime/{ => memory}/pooled_allocator.h (84%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f989a3d904e..d4d599134587 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -343,6 +343,7 @@ list(APPEND COMPILER_SRCS "src/target/datatype/myfloat/myfloat.cc") tvm_file_glob(GLOB RUNTIME_SRCS src/runtime/*.cc src/runtime/vm/*.cc + src/runtime/memory/*.cc src/runtime/disco/*.cc src/runtime/minrpc/*.cc ) diff --git a/apps/android_camera/app/src/main/jni/tvm_runtime.h b/apps/android_camera/app/src/main/jni/tvm_runtime.h index c7f556473fff..e843b56360bb 100644 --- a/apps/android_camera/app/src/main/jni/tvm_runtime.h +++ b/apps/android_camera/app/src/main/jni/tvm_runtime.h @@ -40,7 +40,7 @@ #include "../src/runtime/graph_executor/graph_executor.cc" #include "../src/runtime/library_module.cc" #include "../src/runtime/logging.cc" -#include "../src/runtime/memory_manager.cc" +#include "../src/runtime/memory/memory_manager.cc" #include "../src/runtime/minrpc/minrpc_logger.cc" #include "../src/runtime/module.cc" #include "../src/runtime/ndarray.cc" diff --git a/apps/android_deploy/app/src/main/jni/tvm_runtime.h b/apps/android_deploy/app/src/main/jni/tvm_runtime.h index 18dedb4474cc..9eda834eb433 100644 --- a/apps/android_deploy/app/src/main/jni/tvm_runtime.h +++ b/apps/android_deploy/app/src/main/jni/tvm_runtime.h @@ -35,7 +35,7 @@ #include "../src/runtime/graph_executor/graph_executor.cc" #include "../src/runtime/library_module.cc" #include "../src/runtime/logging.cc" -#include "../src/runtime/memory_manager.cc" +#include "../src/runtime/memory/memory_manager.cc" #include "../src/runtime/module.cc" #include "../src/runtime/ndarray.cc" #include "../src/runtime/object.cc" diff --git a/apps/android_rpc/app/src/main/jni/tvm_runtime.h b/apps/android_rpc/app/src/main/jni/tvm_runtime.h index 1c1bbbb59456..fb14d84b794f 100644 --- a/apps/android_rpc/app/src/main/jni/tvm_runtime.h +++ b/apps/android_rpc/app/src/main/jni/tvm_runtime.h @@ -42,7 +42,7 @@ #include "../src/runtime/graph_executor/graph_executor_factory.cc" #include "../src/runtime/library_module.cc" #include "../src/runtime/logging.cc" -#include "../src/runtime/memory_manager.cc" +#include "../src/runtime/memory/memory_manager.cc" #include "../src/runtime/minrpc/minrpc_logger.cc" #include "../src/runtime/module.cc" #include "../src/runtime/ndarray.cc" diff --git a/apps/bundle_deploy/runtime.cc b/apps/bundle_deploy/runtime.cc index 287d9fae68c7..e52a4796bb48 100644 --- a/apps/bundle_deploy/runtime.cc +++ b/apps/bundle_deploy/runtime.cc @@ -29,7 +29,7 @@ #include "../../src/runtime/graph_executor/graph_executor.cc" #include "../../src/runtime/library_module.cc" #include "../../src/runtime/logging.cc" -#include "../../src/runtime/memory_manager.cc" +#include "../../src/runtime/memory/memory_manager.cc" #include "../../src/runtime/module.cc" #include "../../src/runtime/ndarray.cc" #include "../../src/runtime/object.cc" diff --git a/apps/howto_deploy/tvm_runtime_pack.cc b/apps/howto_deploy/tvm_runtime_pack.cc index bb8a0de77ca9..25e768302c38 100644 --- a/apps/howto_deploy/tvm_runtime_pack.cc +++ b/apps/howto_deploy/tvm_runtime_pack.cc @@ -64,7 +64,7 @@ // Graph executor #include "../../src/runtime/graph_executor/graph_executor.cc" #include "../../src/runtime/graph_executor/graph_executor_factory.cc" -#include "../../src/runtime/memory_manager.cc" +#include "../../src/runtime/memory/memory_manager.cc" // Uncomment the following lines to enable RPC // #include "../../src/runtime/rpc/rpc_session.cc" diff --git a/golang/src/tvm_runtime_pack.cc b/golang/src/tvm_runtime_pack.cc index 2ad3a1d1c497..e4056742eef4 100644 --- a/golang/src/tvm_runtime_pack.cc +++ b/golang/src/tvm_runtime_pack.cc @@ -46,7 +46,7 @@ // Graph executor #include "src/runtime/graph_executor/graph_executor.cc" -#include "src/runtime/memory_manager.cc" +#include "src/runtime/memory/memory_manager.cc" // Uncomment the following lines to enable RPC // #include "../../src/runtime/rpc/rpc_session.cc" diff --git a/include/tvm/runtime/memory_manager.h b/include/tvm/runtime/memory/memory_manager.h similarity index 86% rename from include/tvm/runtime/memory_manager.h rename to include/tvm/runtime/memory/memory_manager.h index 130acb60fcad..093253c8fedd 100644 --- a/include/tvm/runtime/memory_manager.h +++ b/include/tvm/runtime/memory/memory_manager.h @@ -18,11 +18,11 @@ */ /*! - * \file tvm/runtime/memory_manager.h + * \file tvm/runtime/memory/memory_manager.h * \brief Abstract device memory management API */ -#ifndef TVM_RUNTIME_MEMORY_MANAGER_H_ -#define TVM_RUNTIME_MEMORY_MANAGER_H_ +#ifndef TVM_RUNTIME_MEMORY_MEMORY_MANAGER_H_ +#define TVM_RUNTIME_MEMORY_MEMORY_MANAGER_H_ #include #include @@ -37,14 +37,15 @@ namespace tvm { namespace runtime { +namespace memory { -struct MBuffer { +struct Buffer { /*! \brief The pointer to the allocated block of memory. */ void* data{nullptr}; /*! \brief The size of the block. */ size_t size{0}; /*! \brief The shape of the tensor. */ - std::vector shape; + ShapeTuple shape; /*! \brief The context of the allocated buffers. */ Device device; }; @@ -62,7 +63,7 @@ class Allocator { * \param shape The shape of the NDArray. * \param dtype The datatype of the NDArray. * \param dev The device where the array is allocated. - * \param mem_scope is the device memory scope hint. + * \param mem_scope The device memory scope hint. * \return The empty NDArray. */ NDArray Empty(std::vector shape, DLDataType dtype, Device dev, @@ -75,7 +76,7 @@ class Allocator { * \param type_hint A type hint to the allocator. * \return A sized allocation in the form of a buffer. */ - virtual MBuffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) = 0; + virtual Buffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) = 0; /*! \brief Allocate a buffer given a shape and type. * \param ndims The rank of the tensor. * \param shape The shape of the tensor. @@ -83,20 +84,20 @@ class Allocator { * \param mem_scope A memory scope of the buffer. * \return A sized allocation in the form of a buffer. */ - virtual MBuffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, - const std::string& mem_scope = "") = 0; + virtual Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope = "") = 0; /*! \brief Free a buffer allocated by the allocator. * \param buffer The buffer to free. */ - virtual void Free(const MBuffer& buffer) = 0; + virtual void Free(const Buffer& buffer) = 0; /*! \brief The amount of memory currently allocated. * \return The amount of memory currently allocated. */ virtual size_t UsedMemory() const = 0; protected: - virtual MBuffer Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_hint, - const std::string& mem_scope); + virtual Buffer Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope); private: AllocatorType type_; @@ -131,7 +132,7 @@ class MemoryManager { class StorageObj : public Object { public: /*! \brief The index into the VM function table. */ - MBuffer buffer; + Buffer buffer; /*! \brief Allocate an NDArray from a given piece of storage. */ NDArray AllocNDArray(size_t offset, std::vector shape, DLDataType dtype); @@ -152,11 +153,12 @@ class StorageObj : public Object { /*! \brief reference to storage. */ class Storage : public ObjectRef { public: - explicit Storage(MBuffer buffer); + explicit Storage(Buffer buffer); TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(Storage, ObjectRef, StorageObj); }; +} // namespace memory } // namespace runtime } // namespace tvm diff --git a/include/tvm/runtime/vm/vm.h b/include/tvm/runtime/vm/vm.h index 300fb31034f4..a5fe91186d99 100644 --- a/include/tvm/runtime/vm/vm.h +++ b/include/tvm/runtime/vm/vm.h @@ -25,7 +25,7 @@ #define TVM_RUNTIME_VM_VM_H_ #include -#include +#include #include #include #include @@ -41,6 +41,13 @@ namespace tvm { namespace runtime { + +using memory::Allocator; +using memory::AllocatorType; +using memory::MemoryManager; +using memory::Storage; +using memory::StorageObj; + namespace vm { /*! diff --git a/src/relay/backend/vm/compiler.h b/src/relay/backend/vm/compiler.h index b98e16be45e0..acb4d2d1d258 100644 --- a/src/relay/backend/vm/compiler.h +++ b/src/relay/backend/vm/compiler.h @@ -41,7 +41,7 @@ #include #include -#include "../../../runtime/naive_allocator.h" +#include "../../../runtime/memory/naive_allocator.h" #include "../../../runtime/vm/profiler/vm.h" #include "../../transforms/pass_utils.h" #include "../te_compiler.h" diff --git a/src/runtime/graph_executor/graph_executor.cc b/src/runtime/graph_executor/graph_executor.cc index a20cdf24907e..5bd7967cab37 100644 --- a/src/runtime/graph_executor/graph_executor.cc +++ b/src/runtime/graph_executor/graph_executor.cc @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/src/runtime/graph_executor/graph_executor.h b/src/runtime/graph_executor/graph_executor.h index 2f6b8b8147e5..08e06f4e6bf3 100644 --- a/src/runtime/graph_executor/graph_executor.h +++ b/src/runtime/graph_executor/graph_executor.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -42,6 +43,9 @@ namespace tvm { namespace runtime { +using memory::AllocatorType; +using memory::MemoryManager; + /*! \brief macro to do C API call */ #define TVM_CCALL(func) \ { \ diff --git a/src/runtime/memory_manager.cc b/src/runtime/memory/memory_manager.cc similarity index 94% rename from src/runtime/memory_manager.cc rename to src/runtime/memory/memory_manager.cc index 98782da7b702..f6ea483ce766 100644 --- a/src/runtime/memory_manager.cc +++ b/src/runtime/memory/memory_manager.cc @@ -18,10 +18,10 @@ */ /*! - * \file tvm/runtime/memory_manager.cc + * \file tvm/runtime/memory/memory_manager.cc * \brief Allocate and manage memory for the runtime. */ -#include +#include #include #include @@ -31,11 +31,12 @@ namespace tvm { namespace runtime { +namespace memory { static void BufferDeleter(Object* obj) { auto* ptr = static_cast(obj); ICHECK(ptr->manager_ctx != nullptr); - MBuffer* buffer = reinterpret_cast(ptr->manager_ctx); + Buffer* buffer = reinterpret_cast(ptr->manager_ctx); MemoryManager::GetAllocator(buffer->device)->Free(*(buffer)); delete buffer; delete ptr; @@ -160,7 +161,7 @@ NDArray Allocator::Empty(std::vector shape, DLDataType dtype, DLDevice container->SetDeleter(BufferDeleter); size_t size = GetDataSize(container->dl_tensor); size_t alignment = GetDataAlignment(container->dl_tensor); - MBuffer* buffer = new MBuffer; + Buffer* buffer = new Buffer; if (!mem_scope.defined() || mem_scope == "global") { *buffer = this->Alloc(size, alignment, dtype); } else { @@ -171,8 +172,8 @@ NDArray Allocator::Empty(std::vector shape, DLDataType dtype, DLDevice return NDArray(GetObjectPtr(container)); } -MBuffer Allocator::Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_hint, - const std::string& mem_scope) { +Buffer Allocator::Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope) { if (mem_scope.empty() || mem_scope == "global") { // by default, we can always redirect to the flat memory allocations std::vector s; @@ -189,5 +190,6 @@ MBuffer Allocator::Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_ return {}; } +} // namespace memory } // namespace runtime } // namespace tvm diff --git a/src/runtime/naive_allocator.h b/src/runtime/memory/naive_allocator.h similarity index 80% rename from src/runtime/naive_allocator.h rename to src/runtime/memory/naive_allocator.h index 612a6e3de8fa..68b766935ab4 100644 --- a/src/runtime/naive_allocator.h +++ b/src/runtime/memory/naive_allocator.h @@ -18,26 +18,27 @@ */ /*! - * \file src/runtime/naive_allocator.h + * \file src/runtime/memory/naive_allocator.h */ -#ifndef TVM_RUNTIME_NAIVE_ALLOCATOR_H_ -#define TVM_RUNTIME_NAIVE_ALLOCATOR_H_ +#ifndef TVM_RUNTIME_MEMORY_NAIVE_ALLOCATOR_H_ +#define TVM_RUNTIME_MEMORY_NAIVE_ALLOCATOR_H_ #include -#include +#include #include #include namespace tvm { namespace runtime { +namespace memory { class NaiveAllocator final : public Allocator { public: explicit NaiveAllocator(Device dev) : Allocator(kNaive), used_memory_(0), device_(dev) {} - MBuffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) override { - MBuffer buf; + Buffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) override { + Buffer buf; buf.device = device_; buf.size = nbytes; buf.data = DeviceAPI::Get(device_)->AllocDataSpace(device_, nbytes, alignment, type_hint); @@ -46,12 +47,15 @@ class NaiveAllocator final : public Allocator { return buf; } - MBuffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, - const std::string& mem_scope) override { - MBuffer buf; + Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope) override { + Buffer buf; size_t nbytes = 1; + std::vector shape_; + shape_.resize(ndims); + shape_.assign(shape, shape + ndims); + buf.shape = ShapeTuple(shape_); for (int i = 0; i < ndims; ++i) { - buf.shape.push_back(shape[i]); nbytes *= static_cast(shape[i]); } nbytes *= (type_hint.bits * type_hint.lanes + 7) / 8; @@ -71,7 +75,7 @@ class NaiveAllocator final : public Allocator { return buf; } - void Free(const MBuffer& buffer) override { + void Free(const Buffer& buffer) override { DeviceAPI::Get(device_)->FreeDataSpace(buffer.device, buffer.data); used_memory_.fetch_sub(buffer.size, std::memory_order_relaxed); DLOG(INFO) << "free " << buffer.size << " B, used memory " << used_memory_ << " B"; @@ -84,6 +88,7 @@ class NaiveAllocator final : public Allocator { Device device_; }; +} // namespace memory } // namespace runtime } // namespace tvm diff --git a/src/runtime/pooled_allocator.h b/src/runtime/memory/pooled_allocator.h similarity index 84% rename from src/runtime/pooled_allocator.h rename to src/runtime/memory/pooled_allocator.h index fb4a1e6c1488..ce9e559ed287 100644 --- a/src/runtime/pooled_allocator.h +++ b/src/runtime/memory/pooled_allocator.h @@ -18,13 +18,13 @@ */ /*! - * \file runtime/pooled_allocator.h + * \file src/runtime/memory/pooled_allocator.h */ -#ifndef TVM_RUNTIME_POOLED_ALLOCATOR_H_ -#define TVM_RUNTIME_POOLED_ALLOCATOR_H_ +#ifndef TVM_RUNTIME_MEMORY_POOLED_ALLOCATOR_H_ +#define TVM_RUNTIME_MEMORY_POOLED_ALLOCATOR_H_ #include -#include +#include #include #include @@ -34,6 +34,7 @@ namespace tvm { namespace runtime { +namespace memory { class PooledAllocator final : public Allocator { public: @@ -44,7 +45,7 @@ class PooledAllocator final : public Allocator { ~PooledAllocator() { ReleaseAll(); } - MBuffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) override { + Buffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) override { std::lock_guard lock(mu_); size_t size = ((nbytes + page_size_ - 1) / page_size_) * page_size_; auto&& it = memory_pool_.find(size); @@ -54,7 +55,7 @@ class PooledAllocator final : public Allocator { pool.pop_back(); return ret; } - MBuffer buf; + Buffer buf; buf.device = device_; buf.size = size; try { @@ -71,8 +72,8 @@ class PooledAllocator final : public Allocator { return buf; } - MBuffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, - const std::string& mem_scope) override { + Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope) override { if (mem_scope.empty() || mem_scope == "global") { return Allocator::Alloc(device_, ndims, shape, type_hint, mem_scope); } @@ -80,10 +81,10 @@ class PooledAllocator final : public Allocator { return {}; } - void Free(const MBuffer& buffer) override { + void Free(const Buffer& buffer) override { std::lock_guard lock(mu_); if (memory_pool_.find(buffer.size) == memory_pool_.end()) { - memory_pool_.emplace(buffer.size, std::vector{}); + memory_pool_.emplace(buffer.size, std::vector{}); } memory_pool_.at(buffer.size).push_back(buffer); VLOG(1) << "reclaim buffer " << buffer.size; @@ -108,11 +109,12 @@ class PooledAllocator final : public Allocator { private: size_t page_size_; std::atomic used_memory_; - std::unordered_map> memory_pool_; + std::unordered_map> memory_pool_; std::recursive_mutex mu_; Device device_; }; +} // namespace memory } // namespace runtime } // namespace tvm diff --git a/tests/cpp/runtime/vm/memory_manager_tests.cc b/tests/cpp/runtime/vm/memory_manager_tests.cc index ad6209f83eab..dc721a02a803 100644 --- a/tests/cpp/runtime/vm/memory_manager_tests.cc +++ b/tests/cpp/runtime/vm/memory_manager_tests.cc @@ -19,14 +19,15 @@ #include #include -#include +#include #include -#include "../../../../src/runtime/pooled_allocator.h" +#include "../../../../src/runtime/memory/pooled_allocator.h" namespace tvm { namespace runtime { +namespace memory { // MemoryManangerWrapper is necessary because in class MemoryManager we don't have access to its // protected members. In this class we add a new method which allow us to clear internal state of @@ -198,5 +199,6 @@ TEST_F(TvmVMMemoryManagerTest, PooledAllocOpenCLTexture) { EXPECT_NE(what.find(pattern), std::string::npos) << what; } } +} // namespace memory } // namespace runtime } // namespace tvm From 841bf06e76ab28ce9386a4830c91cb6da429e387 Mon Sep 17 00:00:00 2001 From: Siva Date: Thu, 28 Sep 2023 14:38:37 +0530 Subject: [PATCH 04/11] * use ShapeTuple across vm executor and memory_manager --- include/tvm/runtime/memory/memory_manager.h | 5 ++--- src/runtime/memory/memory_manager.cc | 6 +++--- src/runtime/vm/vm.cc | 6 +++--- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/include/tvm/runtime/memory/memory_manager.h b/include/tvm/runtime/memory/memory_manager.h index 093253c8fedd..7e2a15375c1f 100644 --- a/include/tvm/runtime/memory/memory_manager.h +++ b/include/tvm/runtime/memory/memory_manager.h @@ -66,8 +66,7 @@ class Allocator { * \param mem_scope The device memory scope hint. * \return The empty NDArray. */ - NDArray Empty(std::vector shape, DLDataType dtype, Device dev, - Optional mem_scope); + NDArray Empty(ShapeTuple shape, DLDataType dtype, Device dev, Optional mem_scope); /*! \brief Return the allocator type. */ inline AllocatorType type() const { return type_; } /*! \brief Allocate a buffer given a size, alignment and type. @@ -135,7 +134,7 @@ class StorageObj : public Object { Buffer buffer; /*! \brief Allocate an NDArray from a given piece of storage. */ - NDArray AllocNDArray(size_t offset, std::vector shape, DLDataType dtype); + NDArray AllocNDArray(size_t offset, ShapeTuple shape, DLDataType dtype); /*! \brief The deleter for an NDArray when allocated from underlying storage. */ static void Deleter(Object* ptr); diff --git a/src/runtime/memory/memory_manager.cc b/src/runtime/memory/memory_manager.cc index f6ea483ce766..f70f7e55344b 100644 --- a/src/runtime/memory/memory_manager.cc +++ b/src/runtime/memory/memory_manager.cc @@ -76,7 +76,7 @@ inline size_t GetDataAlignment(const DLTensor& arr) { return align; } -NDArray StorageObj::AllocNDArray(size_t offset, std::vector shape, DLDataType dtype) { +NDArray StorageObj::AllocNDArray(size_t offset, ShapeTuple shape, DLDataType dtype) { VerifyDataType(dtype); // crtical zone: allocate header, cannot throw @@ -154,7 +154,7 @@ Allocator* MemoryManager::GetAllocator(Device dev) { return it->second.get(); } -NDArray Allocator::Empty(std::vector shape, DLDataType dtype, DLDevice dev, +NDArray Allocator::Empty(ShapeTuple shape, DLDataType dtype, DLDevice dev, Optional mem_scope) { VerifyDataType(dtype); NDArray::Container* container = new NDArray::Container(nullptr, shape, dtype, dev); @@ -165,7 +165,7 @@ NDArray Allocator::Empty(std::vector shape, DLDataType dtype, DLDevice if (!mem_scope.defined() || mem_scope == "global") { *buffer = this->Alloc(size, alignment, dtype); } else { - *buffer = this->Alloc(shape.size(), shape.data(), dtype, mem_scope.value()); + *buffer = this->Alloc(shape.size(), const_cast(shape.data()), dtype, mem_scope.value()); } container->manager_ctx = reinterpret_cast(buffer); container->dl_tensor.data = buffer->data; diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index 188a4153e1c0..836d59210489 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -94,8 +94,8 @@ inline ObjectRef CopyTo(ObjectRef src, const DLDevice& dev, Optional mem } } -std::vector ToShape(NDArray shape_tensor) { - std::vector shape; +ShapeTuple ToShape(NDArray shape_tensor) { + std::vector shape; auto rank = shape_tensor.Shape().size(); auto dtype = shape_tensor.DataType(); @@ -121,7 +121,7 @@ std::vector ToShape(NDArray shape_tensor) { LOG(FATAL) << "invalid shape tensor datatype: " << dtype; } - return shape; + return ShapeTuple(shape); } void VirtualMachine::OpStartHook(Instruction instr) {} From 13b319ecde01965a50f722b5f6bdbb2dc9a6d2e0 Mon Sep 17 00:00:00 2001 From: Siva Date: Thu, 28 Sep 2023 15:04:01 +0530 Subject: [PATCH 05/11] * ShapeTuple across the Allocators --- include/tvm/runtime/memory/memory_manager.h | 7 +++---- src/runtime/memory/memory_manager.cc | 10 +++------- src/runtime/memory/naive_allocator.h | 18 +++++++----------- src/runtime/memory/pooled_allocator.h | 7 +++---- src/runtime/vm/vm.cc | 8 ++++++-- 5 files changed, 22 insertions(+), 28 deletions(-) diff --git a/include/tvm/runtime/memory/memory_manager.h b/include/tvm/runtime/memory/memory_manager.h index 7e2a15375c1f..4635d074780f 100644 --- a/include/tvm/runtime/memory/memory_manager.h +++ b/include/tvm/runtime/memory/memory_manager.h @@ -77,13 +77,12 @@ class Allocator { */ virtual Buffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) = 0; /*! \brief Allocate a buffer given a shape and type. - * \param ndims The rank of the tensor. * \param shape The shape of the tensor. * \param type_hint A type hint to the allocator. * \param mem_scope A memory scope of the buffer. * \return A sized allocation in the form of a buffer. */ - virtual Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, + virtual Buffer Alloc(ShapeTuple shape, DLDataType type_hint, const std::string& mem_scope = "") = 0; /*! \brief Free a buffer allocated by the allocator. * \param buffer The buffer to free. @@ -95,7 +94,7 @@ class Allocator { virtual size_t UsedMemory() const = 0; protected: - virtual Buffer Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_hint, + virtual Buffer Alloc(Device dev, ShapeTuple shape, DLDataType type_hint, const std::string& mem_scope); private: @@ -161,4 +160,4 @@ class Storage : public ObjectRef { } // namespace runtime } // namespace tvm -#endif // TVM_RUNTIME_MEMORY_MANAGER_H_ +#endif // TVM_RUNTIME_MEMORY_MEMORY_MANAGER_H_ diff --git a/src/runtime/memory/memory_manager.cc b/src/runtime/memory/memory_manager.cc index f70f7e55344b..e426ba597832 100644 --- a/src/runtime/memory/memory_manager.cc +++ b/src/runtime/memory/memory_manager.cc @@ -165,22 +165,18 @@ NDArray Allocator::Empty(ShapeTuple shape, DLDataType dtype, DLDevice dev, if (!mem_scope.defined() || mem_scope == "global") { *buffer = this->Alloc(size, alignment, dtype); } else { - *buffer = this->Alloc(shape.size(), const_cast(shape.data()), dtype, mem_scope.value()); + *buffer = this->Alloc(shape, dtype, mem_scope.value()); } container->manager_ctx = reinterpret_cast(buffer); container->dl_tensor.data = buffer->data; return NDArray(GetObjectPtr(container)); } -Buffer Allocator::Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_hint, +Buffer Allocator::Alloc(Device dev, ShapeTuple shape, DLDataType type_hint, const std::string& mem_scope) { if (mem_scope.empty() || mem_scope == "global") { // by default, we can always redirect to the flat memory allocations - std::vector s; - for (int i = 0; i < ndims; ++i) { - s.push_back(shape[i]); - } - NDArray::Container container(nullptr, s, type_hint, dev); + NDArray::Container container(nullptr, shape, type_hint, dev); size_t size = GetDataSize(container.dl_tensor); size_t alignment = GetDataAlignment(container.dl_tensor); return Alloc(size, alignment, type_hint); diff --git a/src/runtime/memory/naive_allocator.h b/src/runtime/memory/naive_allocator.h index 68b766935ab4..979211b94cec 100644 --- a/src/runtime/memory/naive_allocator.h +++ b/src/runtime/memory/naive_allocator.h @@ -47,29 +47,25 @@ class NaiveAllocator final : public Allocator { return buf; } - Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, - const std::string& mem_scope) override { + Buffer Alloc(ShapeTuple shape, DLDataType type_hint, const std::string& mem_scope) override { Buffer buf; size_t nbytes = 1; - std::vector shape_; - shape_.resize(ndims); - shape_.assign(shape, shape + ndims); - buf.shape = ShapeTuple(shape_); - for (int i = 0; i < ndims; ++i) { + buf.shape = shape; + for (int i = 0; i < shape.size(); ++i) { nbytes *= static_cast(shape[i]); } nbytes *= (type_hint.bits * type_hint.lanes + 7) / 8; buf.device = device_; if (mem_scope.empty() || mem_scope == "global") { - auto tmp_buf = Allocator::Alloc(device_, ndims, shape, type_hint, mem_scope); + auto tmp_buf = Allocator::Alloc(device_, shape, type_hint, mem_scope); buf.size = tmp_buf.size; buf.data = tmp_buf.data; return buf; } buf.size = nbytes; - buf.data = DeviceAPI::Get(device_)->AllocDataSpace(device_, ndims, shape, type_hint, - String(mem_scope)); + buf.data = DeviceAPI::Get(device_)->AllocDataSpace(device_, shape.size(), shape.data(), + type_hint, String(mem_scope)); used_memory_.fetch_add(nbytes, std::memory_order_relaxed); DLOG(INFO) << "allocate " << nbytes << " B, used memory " << used_memory_ << " B"; return buf; @@ -92,4 +88,4 @@ class NaiveAllocator final : public Allocator { } // namespace runtime } // namespace tvm -#endif // TVM_RUNTIME_NAIVE_ALLOCATOR_H_ +#endif // TVM_RUNTIME_MEMORY_NAIVE_ALLOCATOR_H_ diff --git a/src/runtime/memory/pooled_allocator.h b/src/runtime/memory/pooled_allocator.h index ce9e559ed287..ba03f5651b7a 100644 --- a/src/runtime/memory/pooled_allocator.h +++ b/src/runtime/memory/pooled_allocator.h @@ -72,10 +72,9 @@ class PooledAllocator final : public Allocator { return buf; } - Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, - const std::string& mem_scope) override { + Buffer Alloc(ShapeTuple shape, DLDataType type_hint, const std::string& mem_scope) override { if (mem_scope.empty() || mem_scope == "global") { - return Allocator::Alloc(device_, ndims, shape, type_hint, mem_scope); + return Allocator::Alloc(device_, shape, type_hint, mem_scope); } LOG(FATAL) << "This alloc should be implemented"; return {}; @@ -118,4 +117,4 @@ class PooledAllocator final : public Allocator { } // namespace runtime } // namespace tvm -#endif // TVM_RUNTIME_POOLED_ALLOCATOR_H_ +#endif // TVM_RUNTIME_MEMORY_POOLED_ALLOCATOR_H_ diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index 836d59210489..66857ca73434 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -839,9 +839,13 @@ void VirtualMachine::RunLoop(const std::vector& output_tensor_reg_indices << ", dtype_hint=" << DLDataType2String(instr.alloc_storage.dtype_hint) << ", device_index=" << instr.alloc_storage.device_index << ", memory_scope=" << mem_scope; + + std::vector shape_; + shape_.resize(instr.alloc_storage.ndim); + shape_.assign(instr.alloc_storage.shape, + instr.alloc_storage.shape + instr.alloc_storage.ndim); storage_obj->buffer = - allocator->Alloc(instr.alloc_storage.ndim, instr.alloc_storage.shape, - instr.alloc_storage.dtype_hint, mem_scope); + allocator->Alloc(ShapeTuple(shape_), instr.alloc_storage.dtype_hint, mem_scope); } else { auto size = LoadScalarInt(instr.alloc_storage.allocation_size); auto alignment = instr.alloc_storage.alignment; From 2ac106fc37f9d3c3c690cd19bb8205acd5c76c97 Mon Sep 17 00:00:00 2001 From: Siva Date: Thu, 28 Sep 2023 15:40:03 +0530 Subject: [PATCH 06/11] * GetDataSize is moved to DeviceAPI and memory_manager uses this interface. --- include/tvm/runtime/device_api.h | 8 ++++++ include/tvm/runtime/memory/memory_manager.h | 3 +- src/runtime/c_runtime_api.cc | 14 ++++++++++ src/runtime/memory/memory_manager.cc | 6 ++-- src/runtime/memory/naive_allocator.h | 2 +- .../{vm => memory}/memory_manager_tests.cc | 28 +++++++++---------- 6 files changed, 42 insertions(+), 19 deletions(-) rename tests/cpp/runtime/{vm => memory}/memory_manager_tests.cc (88%) diff --git a/include/tvm/runtime/device_api.h b/include/tvm/runtime/device_api.h index cb0eb7c21f11..e33539daddb7 100644 --- a/include/tvm/runtime/device_api.h +++ b/include/tvm/runtime/device_api.h @@ -95,6 +95,14 @@ class TVM_DLL DeviceAPI { */ virtual void GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv) = 0; + /*! + * \brief Get the physical memory size required. + * \param arr the tensor object. + * \param mem_scope the memory scope if any + * \return the memory size. + */ + virtual size_t GetDataSize(const DLTensor& arr, Optional mem_scope = NullOpt); + /*! * \brief Query the device for specified properties. * diff --git a/include/tvm/runtime/memory/memory_manager.h b/include/tvm/runtime/memory/memory_manager.h index 4635d074780f..00c27fb73373 100644 --- a/include/tvm/runtime/memory/memory_manager.h +++ b/include/tvm/runtime/memory/memory_manager.h @@ -66,7 +66,8 @@ class Allocator { * \param mem_scope The device memory scope hint. * \return The empty NDArray. */ - NDArray Empty(ShapeTuple shape, DLDataType dtype, Device dev, Optional mem_scope); + NDArray Empty(ShapeTuple shape, DLDataType dtype, Device dev, + Optional mem_scope = String()); /*! \brief Return the allocator type. */ inline AllocatorType type() const { return type_; } /*! \brief Allocate a buffer given a size, alignment and type. diff --git a/src/runtime/c_runtime_api.cc b/src/runtime/c_runtime_api.cc index 980447214a67..3471adefeb7f 100644 --- a/src/runtime/c_runtime_api.cc +++ b/src/runtime/c_runtime_api.cc @@ -152,6 +152,20 @@ static size_t GetDataAlignment(const DLDataType dtype) { return align; } +size_t DeviceAPI::GetDataSize(const DLTensor& arr, Optional mem_scope) { + if (!mem_scope.defined() || mem_scope.value().empty() || mem_scope.value() == "global") { + size_t size = 1; + for (tvm_index_t i = 0; i < arr.ndim; ++i) { + size *= static_cast(arr.shape[i]); + } + size *= (arr.dtype.bits * arr.dtype.lanes + 7) / 8; + return size; + } + LOG(FATAL) << "Device does not support physical mem computation with " + << "specified memory scope: " << mem_scope.value(); + return 0; +} + void* DeviceAPI::AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype, Optional mem_scope) { if (!mem_scope.defined() || mem_scope.value() == "" || mem_scope.value() == "global") { diff --git a/src/runtime/memory/memory_manager.cc b/src/runtime/memory/memory_manager.cc index e426ba597832..5c18d7977df2 100644 --- a/src/runtime/memory/memory_manager.cc +++ b/src/runtime/memory/memory_manager.cc @@ -85,7 +85,7 @@ NDArray StorageObj::AllocNDArray(size_t offset, ShapeTuple shape, DLDataType dty container->dl_tensor.byte_offset = offset; container->SetDeleter(StorageObj::Deleter); - size_t needed_size = GetDataSize(container->dl_tensor); + size_t needed_size = DeviceAPI::Get(this->buffer.device)->GetDataSize(container->dl_tensor); this->IncRef(); // The manager context pointer must continue to point to the storage object // which owns the backing memory, and keeps track of the reference count. @@ -159,7 +159,7 @@ NDArray Allocator::Empty(ShapeTuple shape, DLDataType dtype, DLDevice dev, VerifyDataType(dtype); NDArray::Container* container = new NDArray::Container(nullptr, shape, dtype, dev); container->SetDeleter(BufferDeleter); - size_t size = GetDataSize(container->dl_tensor); + size_t size = DeviceAPI::Get(dev)->GetDataSize(container->dl_tensor); size_t alignment = GetDataAlignment(container->dl_tensor); Buffer* buffer = new Buffer; if (!mem_scope.defined() || mem_scope == "global") { @@ -177,7 +177,7 @@ Buffer Allocator::Alloc(Device dev, ShapeTuple shape, DLDataType type_hint, if (mem_scope.empty() || mem_scope == "global") { // by default, we can always redirect to the flat memory allocations NDArray::Container container(nullptr, shape, type_hint, dev); - size_t size = GetDataSize(container.dl_tensor); + size_t size = DeviceAPI::Get(dev)->GetDataSize(container.dl_tensor); size_t alignment = GetDataAlignment(container.dl_tensor); return Alloc(size, alignment, type_hint); } diff --git a/src/runtime/memory/naive_allocator.h b/src/runtime/memory/naive_allocator.h index 979211b94cec..927738974059 100644 --- a/src/runtime/memory/naive_allocator.h +++ b/src/runtime/memory/naive_allocator.h @@ -51,7 +51,7 @@ class NaiveAllocator final : public Allocator { Buffer buf; size_t nbytes = 1; buf.shape = shape; - for (int i = 0; i < shape.size(); ++i) { + for (int i = 0; i < static_cast(shape.size()); ++i) { nbytes *= static_cast(shape[i]); } nbytes *= (type_hint.bits * type_hint.lanes + 7) / 8; diff --git a/tests/cpp/runtime/vm/memory_manager_tests.cc b/tests/cpp/runtime/memory/memory_manager_tests.cc similarity index 88% rename from tests/cpp/runtime/vm/memory_manager_tests.cc rename to tests/cpp/runtime/memory/memory_manager_tests.cc index dc721a02a803..7010e3e3c610 100644 --- a/tests/cpp/runtime/vm/memory_manager_tests.cc +++ b/tests/cpp/runtime/memory/memory_manager_tests.cc @@ -77,7 +77,7 @@ TEST_F(TvmVMMemoryManagerTest, NaiveEmptyBasic) { EXPECT_EQ(allocator->UsedMemory(), 0); auto dt = DataType::Float(32); size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); - std::vector shape = {1, 3, 6, 6}; + ShapeTuple shape = {1, 3, 6, 6}; { auto ndarray = allocator->Empty(shape, dt, dev); EXPECT_EQ(allocator->UsedMemory(), nbytes); @@ -93,7 +93,7 @@ TEST_F(TvmVMMemoryManagerTest, PooledEmptyBasic) { size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); size_t page_size = PooledAllocator::kDefaultPageSize; size_t size = ((nbytes + page_size - 1) / page_size) * page_size; - std::vector shape = {1, 3, 6, 6}; + ShapeTuple shape = {1, 3, 6, 6}; { auto ndarray = allocator->Empty(shape, dt, dev); EXPECT_EQ(allocator->UsedMemory(), size); @@ -107,14 +107,14 @@ TEST_F(TvmVMMemoryManagerTest, NaiveAllocWithShape) { EXPECT_EQ(allocator->UsedMemory(), 0); auto dt = DataType::Float(32); size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); - std::vector shape = {1, 3, 6, 6}; - auto buff = allocator->Alloc(shape.size(), shape.data(), dt); + ShapeTuple shape = {1, 3, 6, 6}; + auto buff = allocator->Alloc(shape, dt); EXPECT_EQ(allocator->UsedMemory(), nbytes); allocator->Free(buff); EXPECT_EQ(allocator->UsedMemory(), 0); try { - auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture"); + auto texture = allocator->Alloc(shape, dt, "global.texture"); FAIL(); } catch (std::exception& e) { std::string pattern = @@ -132,14 +132,14 @@ TEST_F(TvmVMMemoryManagerTest, PooledAllocWithShape) { size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); size_t page_size = PooledAllocator::kDefaultPageSize; size_t size = ((nbytes + page_size - 1) / page_size) * page_size; - std::vector shape = {1, 3, 6, 6}; - auto buff = allocator->Alloc(shape.size(), shape.data(), dt); + ShapeTuple shape = {1, 3, 6, 6}; + auto buff = allocator->Alloc(shape, dt); EXPECT_EQ(allocator->UsedMemory(), size); allocator->Free(buff); EXPECT_EQ(allocator->UsedMemory(), size); try { - auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture"); + auto texture = allocator->Alloc(shape, dt, "global.texture"); FAIL(); } catch (std::exception& e) { std::string pattern = "This alloc should be implemented"; @@ -159,13 +159,13 @@ TEST_F(TvmVMMemoryManagerTest, NaiveAllocOpenCLTexture) { EXPECT_EQ(allocator->UsedMemory(), 0); auto dt = DataType::Float(32); size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); - std::vector shape = {1, 3, 6, 6}; - auto buff = allocator->Alloc(shape.size(), shape.data(), dt); + ShapeTuple shape = {1, 3, 6, 6}; + auto buff = allocator->Alloc(shape, dt); EXPECT_EQ(allocator->UsedMemory(), nbytes); allocator->Free(buff); EXPECT_EQ(allocator->UsedMemory(), 0); - auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture"); + auto texture = allocator->Alloc(shape, dt, "global.texture"); EXPECT_EQ(allocator->UsedMemory(), nbytes); allocator->Free(texture); EXPECT_EQ(allocator->UsedMemory(), 0); @@ -184,14 +184,14 @@ TEST_F(TvmVMMemoryManagerTest, PooledAllocOpenCLTexture) { size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); size_t page_size = PooledAllocator::kDefaultPageSize; size_t size = ((nbytes + page_size - 1) / page_size) * page_size; - std::vector shape = {1, 3, 6, 6}; - auto buff = allocator->Alloc(shape.size(), shape.data(), dt); + ShapeTuple shape = {1, 3, 6, 6}; + auto buff = allocator->Alloc(shape, dt); EXPECT_EQ(allocator->UsedMemory(), size); allocator->Free(buff); EXPECT_EQ(allocator->UsedMemory(), size); try { - auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture"); + auto texture = allocator->Alloc(shape, dt, "global.texture"); FAIL(); } catch (std::exception& e) { std::string pattern = "This alloc should be implemented"; From 089e668704493ef702ff18da5c3d462456547f28 Mon Sep 17 00:00:00 2001 From: Siva Date: Thu, 28 Sep 2023 22:40:08 +0530 Subject: [PATCH 07/11] * review comments --- include/tvm/runtime/memory/memory_manager.h | 4 +--- src/runtime/memory/memory_manager.cc | 6 ++++++ src/runtime/memory/naive_allocator.h | 1 - 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/tvm/runtime/memory/memory_manager.h b/include/tvm/runtime/memory/memory_manager.h index 00c27fb73373..38241e3eec82 100644 --- a/include/tvm/runtime/memory/memory_manager.h +++ b/include/tvm/runtime/memory/memory_manager.h @@ -44,8 +44,6 @@ struct Buffer { void* data{nullptr}; /*! \brief The size of the block. */ size_t size{0}; - /*! \brief The shape of the tensor. */ - ShapeTuple shape; /*! \brief The context of the allocated buffers. */ Device device; }; @@ -67,7 +65,7 @@ class Allocator { * \return The empty NDArray. */ NDArray Empty(ShapeTuple shape, DLDataType dtype, Device dev, - Optional mem_scope = String()); + Optional mem_scope = NullOpt); /*! \brief Return the allocator type. */ inline AllocatorType type() const { return type_; } /*! \brief Allocate a buffer given a size, alignment and type. diff --git a/src/runtime/memory/memory_manager.cc b/src/runtime/memory/memory_manager.cc index 5c18d7977df2..6dc8b599f182 100644 --- a/src/runtime/memory/memory_manager.cc +++ b/src/runtime/memory/memory_manager.cc @@ -42,6 +42,12 @@ static void BufferDeleter(Object* obj) { delete ptr; } +Storage::Storage(Buffer buffer) { + auto n = make_object(); + n->buffer = std::move(buffer); + data_ = std::move(n); +} + void StorageObj::Deleter(Object* obj) { auto* ptr = static_cast(obj); // When invoking AllocNDArray we don't own the underlying allocation diff --git a/src/runtime/memory/naive_allocator.h b/src/runtime/memory/naive_allocator.h index 927738974059..a01b6cb7a8f6 100644 --- a/src/runtime/memory/naive_allocator.h +++ b/src/runtime/memory/naive_allocator.h @@ -50,7 +50,6 @@ class NaiveAllocator final : public Allocator { Buffer Alloc(ShapeTuple shape, DLDataType type_hint, const std::string& mem_scope) override { Buffer buf; size_t nbytes = 1; - buf.shape = shape; for (int i = 0; i < static_cast(shape.size()); ++i) { nbytes *= static_cast(shape[i]); } From c502cad7b73847e92da54c4f79d452314469ad93 Mon Sep 17 00:00:00 2001 From: Siva Date: Fri, 29 Sep 2023 09:09:22 +0530 Subject: [PATCH 08/11] * Make compiler happy with unused variables --- tests/cpp/runtime/memory/memory_manager_tests.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/cpp/runtime/memory/memory_manager_tests.cc b/tests/cpp/runtime/memory/memory_manager_tests.cc index 7010e3e3c610..6f4ed74408a7 100644 --- a/tests/cpp/runtime/memory/memory_manager_tests.cc +++ b/tests/cpp/runtime/memory/memory_manager_tests.cc @@ -115,6 +115,7 @@ TEST_F(TvmVMMemoryManagerTest, NaiveAllocWithShape) { try { auto texture = allocator->Alloc(shape, dt, "global.texture"); + (void) texture; FAIL(); } catch (std::exception& e) { std::string pattern = @@ -140,6 +141,7 @@ TEST_F(TvmVMMemoryManagerTest, PooledAllocWithShape) { try { auto texture = allocator->Alloc(shape, dt, "global.texture"); + (void) texture; FAIL(); } catch (std::exception& e) { std::string pattern = "This alloc should be implemented"; @@ -192,6 +194,7 @@ TEST_F(TvmVMMemoryManagerTest, PooledAllocOpenCLTexture) { try { auto texture = allocator->Alloc(shape, dt, "global.texture"); + (void) texture; FAIL(); } catch (std::exception& e) { std::string pattern = "This alloc should be implemented"; From f4dd188d177d541f4ba5f3d6119409b8f9f6468a Mon Sep 17 00:00:00 2001 From: Siva Date: Fri, 29 Sep 2023 11:01:34 +0530 Subject: [PATCH 09/11] * lint --- tests/cpp/runtime/memory/memory_manager_tests.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/cpp/runtime/memory/memory_manager_tests.cc b/tests/cpp/runtime/memory/memory_manager_tests.cc index 6f4ed74408a7..b51be91d7424 100644 --- a/tests/cpp/runtime/memory/memory_manager_tests.cc +++ b/tests/cpp/runtime/memory/memory_manager_tests.cc @@ -115,7 +115,7 @@ TEST_F(TvmVMMemoryManagerTest, NaiveAllocWithShape) { try { auto texture = allocator->Alloc(shape, dt, "global.texture"); - (void) texture; + (void)texture; FAIL(); } catch (std::exception& e) { std::string pattern = @@ -141,7 +141,7 @@ TEST_F(TvmVMMemoryManagerTest, PooledAllocWithShape) { try { auto texture = allocator->Alloc(shape, dt, "global.texture"); - (void) texture; + (void)texture; FAIL(); } catch (std::exception& e) { std::string pattern = "This alloc should be implemented"; @@ -194,7 +194,7 @@ TEST_F(TvmVMMemoryManagerTest, PooledAllocOpenCLTexture) { try { auto texture = allocator->Alloc(shape, dt, "global.texture"); - (void) texture; + (void)texture; FAIL(); } catch (std::exception& e) { std::string pattern = "This alloc should be implemented"; From 82e298cc600087fda578e20dd0974eed812c273d Mon Sep 17 00:00:00 2001 From: Siva Date: Fri, 29 Sep 2023 18:23:53 +0530 Subject: [PATCH 10/11] Update src/runtime/memory/memory_manager.cc Co-authored-by: Egor Churaev --- src/runtime/memory/memory_manager.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/memory/memory_manager.cc b/src/runtime/memory/memory_manager.cc index 6dc8b599f182..f665daad41d6 100644 --- a/src/runtime/memory/memory_manager.cc +++ b/src/runtime/memory/memory_manager.cc @@ -168,7 +168,7 @@ NDArray Allocator::Empty(ShapeTuple shape, DLDataType dtype, DLDevice dev, size_t size = DeviceAPI::Get(dev)->GetDataSize(container->dl_tensor); size_t alignment = GetDataAlignment(container->dl_tensor); Buffer* buffer = new Buffer; - if (!mem_scope.defined() || mem_scope == "global") { + if (!mem_scope.defined() || mem_scope.value().empty() || mem_scope.value() == "global") { *buffer = this->Alloc(size, alignment, dtype); } else { *buffer = this->Alloc(shape, dtype, mem_scope.value()); From b1ee28fc945493fae09849df2845ff68f571d9be Mon Sep 17 00:00:00 2001 From: Siva Date: Tue, 3 Oct 2023 15:28:17 +0530 Subject: [PATCH 11/11] * allow multiple allocators to coexist for the same device. Using available allocator instead of requested is leading to an unpexpected crash --- include/tvm/runtime/memory/memory_manager.h | 20 ++++++++++++-------- src/runtime/memory/memory_manager.cc | 20 +++++++++++++------- src/runtime/memory/naive_allocator.h | 3 +++ src/runtime/memory/pooled_allocator.h | 1 + 4 files changed, 29 insertions(+), 15 deletions(-) diff --git a/include/tvm/runtime/memory/memory_manager.h b/include/tvm/runtime/memory/memory_manager.h index 38241e3eec82..8b38fbf6f0ff 100644 --- a/include/tvm/runtime/memory/memory_manager.h +++ b/include/tvm/runtime/memory/memory_manager.h @@ -39,6 +39,11 @@ namespace tvm { namespace runtime { namespace memory { +enum AllocatorType { + kNaive = 1, + kPooled, +}; + struct Buffer { /*! \brief The pointer to the allocated block of memory. */ void* data{nullptr}; @@ -46,11 +51,8 @@ struct Buffer { size_t size{0}; /*! \brief The context of the allocated buffers. */ Device device; -}; - -enum AllocatorType { - kNaive = 1, - kPooled, + /*! \brief The allocator that created this buffer. */ + AllocatorType alloc_type; }; class Allocator { @@ -113,16 +115,18 @@ class MemoryManager { /*! * \brief Get an allocator given the context. * \param dev The TVM device + * \param type The allocator type * \return The memory allocator. */ - static Allocator* GetAllocator(Device dev); + static Allocator* GetAllocator(Device dev, AllocatorType type); private: MemoryManager() {} protected: std::mutex mu_; - std::unordered_map> allocators_; + std::unordered_map>> + allocators_; }; /*! \brief An object representing a storage allocation. */ @@ -138,7 +142,7 @@ class StorageObj : public Object { static void Deleter(Object* ptr); ~StorageObj() { - auto alloc = MemoryManager::Global()->GetAllocator(buffer.device); + auto alloc = MemoryManager::Global()->GetAllocator(buffer.device, buffer.alloc_type); alloc->Free(buffer); } diff --git a/src/runtime/memory/memory_manager.cc b/src/runtime/memory/memory_manager.cc index f665daad41d6..e72934ed2eeb 100644 --- a/src/runtime/memory/memory_manager.cc +++ b/src/runtime/memory/memory_manager.cc @@ -37,7 +37,7 @@ static void BufferDeleter(Object* obj) { auto* ptr = static_cast(obj); ICHECK(ptr->manager_ctx != nullptr); Buffer* buffer = reinterpret_cast(ptr->manager_ctx); - MemoryManager::GetAllocator(buffer->device)->Free(*(buffer)); + MemoryManager::GetAllocator(buffer->device, buffer->alloc_type)->Free(*(buffer)); delete buffer; delete ptr; } @@ -122,6 +122,9 @@ Allocator* MemoryManager::GetOrCreateAllocator(Device dev, AllocatorType type) { MemoryManager* m = MemoryManager::Global(); std::lock_guard lock(m->mu_); if (m->allocators_.find(dev) == m->allocators_.end()) { + m->allocators_.emplace(dev, std::unordered_map>()); + } + if (m->allocators_.at(dev).find(type) == m->allocators_.at(dev).end()) { std::unique_ptr alloc; switch (type) { case kNaive: { @@ -138,26 +141,29 @@ Allocator* MemoryManager::GetOrCreateAllocator(Device dev, AllocatorType type) { LOG(FATAL) << "Unknown allocator type: " << type; } auto ret = alloc.get(); - m->allocators_.emplace(dev, std::move(alloc)); + m->allocators_.at(dev).emplace(type, std::move(alloc)); return ret; } - auto alloc = m->allocators_.at(dev).get(); - if (alloc->type() != type) { + auto alloc = m->allocators_.at(dev).at(type).get(); + /*if (alloc->type() != type) { LOG(WARNING) << "The type of existing allocator for " << dev << " is different from the request type (" << alloc->type() << " vs " << type << ")"; - } + }*/ return alloc; } -Allocator* MemoryManager::GetAllocator(Device dev) { +Allocator* MemoryManager::GetAllocator(Device dev, AllocatorType type) { MemoryManager* m = MemoryManager::Global(); std::lock_guard lock(m->mu_); auto it = m->allocators_.find(dev); if (it == m->allocators_.end()) { LOG(FATAL) << "Allocator for " << dev << " has not been created yet."; } - return it->second.get(); + if (it->second.find(type) == it->second.end()) { + LOG(FATAL) << "Allocator for " << dev << " of type " << type << " has not been created yet."; + } + return it->second.at(type).get(); } NDArray Allocator::Empty(ShapeTuple shape, DLDataType dtype, DLDevice dev, diff --git a/src/runtime/memory/naive_allocator.h b/src/runtime/memory/naive_allocator.h index a01b6cb7a8f6..4ab96bdfd56d 100644 --- a/src/runtime/memory/naive_allocator.h +++ b/src/runtime/memory/naive_allocator.h @@ -41,6 +41,7 @@ class NaiveAllocator final : public Allocator { Buffer buf; buf.device = device_; buf.size = nbytes; + buf.alloc_type = kNaive; buf.data = DeviceAPI::Get(device_)->AllocDataSpace(device_, nbytes, alignment, type_hint); used_memory_.fetch_add(nbytes, std::memory_order_relaxed); DLOG(INFO) << "allocate " << nbytes << " B, used memory " << used_memory_ << " B"; @@ -59,6 +60,7 @@ class NaiveAllocator final : public Allocator { auto tmp_buf = Allocator::Alloc(device_, shape, type_hint, mem_scope); buf.size = tmp_buf.size; buf.data = tmp_buf.data; + buf.alloc_type = kNaive; return buf; } @@ -67,6 +69,7 @@ class NaiveAllocator final : public Allocator { type_hint, String(mem_scope)); used_memory_.fetch_add(nbytes, std::memory_order_relaxed); DLOG(INFO) << "allocate " << nbytes << " B, used memory " << used_memory_ << " B"; + buf.alloc_type = kNaive; return buf; } diff --git a/src/runtime/memory/pooled_allocator.h b/src/runtime/memory/pooled_allocator.h index ba03f5651b7a..01dded966b5c 100644 --- a/src/runtime/memory/pooled_allocator.h +++ b/src/runtime/memory/pooled_allocator.h @@ -58,6 +58,7 @@ class PooledAllocator final : public Allocator { Buffer buf; buf.device = device_; buf.size = size; + buf.alloc_type = kPooled; try { buf.data = DeviceAPI::Get(device_)->AllocDataSpace(device_, size, alignment, type_hint); } catch (InternalError& err) {