From f377d688dc03013cfba97b987bb4c9a74265d014 Mon Sep 17 00:00:00 2001 From: Liangfu Chen Date: Tue, 17 Mar 2020 18:16:23 +0800 Subject: [PATCH 01/16] initial crt_memory and memory leak fix in graph_runtime Change-Id: I0f79f909a04d1c677aabb80f202f0612c5ce7f2a --- apps/bundle_deploy/Makefile | 8 - apps/bundle_deploy/runtime.c | 39 +++- include/tvm/runtime/crt/logging.h | 52 ++++++ include/tvm/runtime/crt/memory.h | 44 +++++ src/runtime/crt/crt_backend_api.c | 11 +- src/runtime/crt/graph_runtime.c | 43 +++-- src/runtime/crt/load_json.c | 14 +- src/runtime/crt/memory.c | 294 ++++++++++++++++++++++++++++++ src/runtime/crt/module.h | 2 +- src/runtime/crt/ndarray.c | 10 +- 10 files changed, 463 insertions(+), 54 deletions(-) create mode 100644 include/tvm/runtime/crt/logging.h create mode 100644 include/tvm/runtime/crt/memory.h create mode 100644 src/runtime/crt/memory.c diff --git a/apps/bundle_deploy/Makefile b/apps/bundle_deploy/Makefile index c80765f28e09..2a0f632155a2 100644 --- a/apps/bundle_deploy/Makefile +++ b/apps/bundle_deploy/Makefile @@ -71,14 +71,6 @@ $(build_dir)/graph.json.c: $(build_dir)/graph.json $(build_dir)/params.bin.c: $(build_dir)/params.bin xxd -i $^ > $@ -# # Serialize our test_graph.json file. -# $(build_dir)/test_graph.json.c: $(build_dir)/test_graph.json -# xxd -i $^ > $@ -# -# # Serialize our test_params.bin file. -# $(build_dir)/test_params.bin.c: $(build_dir)/test_params.bin -# xxd -i $^ > $@ - $(build_dir)/model.o $(build_dir)/graph.json $(build_dir)/params.bin $(build_dir)/cat.bin: build_model.py python3 $< -o $(build_dir) diff --git a/apps/bundle_deploy/runtime.c b/apps/bundle_deploy/runtime.c index 6a53aa15f573..521e701213fc 100644 --- a/apps/bundle_deploy/runtime.c +++ b/apps/bundle_deploy/runtime.c @@ -30,23 +30,46 @@ #define TVM_CRT_MAX_NDIM 6 /*! Maximum supported arguments in generated functions */ #define TVM_CRT_MAX_ARGS 10 - -/*! Maximum inputs in a GraphRuntimeNode */ -#define GRAPH_RUNTIME_NODE_MAX_INPUTS 300 /*! Maximum supported contexts in a GraphRuntime */ #define GRAPH_RUNTIME_MAX_CONTEXTS 1 + +/*! Maximum inputs in a GraphRuntimeNode */ +#define GRAPH_RUNTIME_NODE_MAX_INPUTS 3 /*! Maximum supported nodes in a GraphRuntime */ -#define GRAPH_RUNTIME_MAX_NODES 400 +#define GRAPH_RUNTIME_MAX_NODES 4 /*! Maximum input nodes in a GraphRuntime */ -#define GRAPH_RUNTIME_MAX_INPUT_NODES 300 +#define GRAPH_RUNTIME_MAX_INPUT_NODES 3 /*! Maximum nodes in a GraphRuntime for quick entry indexing */ -#define GRAPH_RUNTIME_MAX_NODE_ROW_PTR 300 +#define GRAPH_RUNTIME_MAX_NODE_ROW_PTR 3 /*! Maximum output entries in a GraphRuntime */ -#define GRAPH_RUNTIME_MAX_OUTPUTS 300 +#define GRAPH_RUNTIME_MAX_OUTPUTS 3 + +/*! + * \brief Log memory pool size for virtual memory allocation + * + * Here is a list of possible choices: + * * use 16 for 64 KiB memory space + * * use 17 for 128 KiB memory space + * * use 18 for 256 KiB memory space + * * use 19 for 512 KiB memory space + * * use 20 for 1 MiB memory space + * * use 21 for 2 MiB memory space + * * use 22 for 4 MiB memory space + * * use 23 for 8 MiB memory space + * * use 24 for 16 MiB memory space + * * use 25 for 32 MiB memory space + * * use 26 for 64 MiB memory space + * * use 27 for 128 MiB memory space + * * use 28 for 256 MiB memory space + */ +#define TVM_CRT_LOG_VIRT_MEM_SIZE 16 + +/*! \brief Page size for virtual memory allocation */ +#define TVM_CRT_PAGE_BYTES 4096 #include "../../src/runtime/crt/crt_runtime_api.c" #include "../../src/runtime/crt/crt_backend_api.c" #include "../../src/runtime/crt/graph_runtime.c" #include "../../src/runtime/crt/load_json.c" #include "../../src/runtime/crt/ndarray.c" - +#include "../../src/runtime/crt/memory.c" diff --git a/include/tvm/runtime/crt/logging.h b/include/tvm/runtime/crt/logging.h new file mode 100644 index 000000000000..9e87637ccc6d --- /dev/null +++ b/include/tvm/runtime/crt/logging.h @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file loggin.h + * \brief A replacement of the dmlc logging system that avoids + * the usage of GLOG and C++ headers + */ + +#ifndef TVM_RUNTIME_CRT_LOGGING_H_ +#define TVM_RUNTIME_CRT_LOGGING_H_ + +#define CHECK(x) \ + do { \ + if (!(x)) { \ + fprintf(stderr, "Check failed: %s\n", #x); \ + exit(-1); \ + } \ + }while(0) + +#define CHECK_BINARY_OP(op, x, y, fmt, ...) \ + do { \ + if (!(x op y)) { \ + fprintf(stderr, "Check failed: %s %s %s: " fmt "\n", #x, #op, #y, ##__VA_ARGS__); \ + exit(-1); \ + } \ + }while(0) + +#define CHECK_LT(x, y, fmt, ...) CHECK_BINARY_OP(<, x, y, fmt, ##__VA_ARGS__) +#define CHECK_GT(x, y, fmt, ...) CHECK_BINARY_OP(>, x, y, fmt, ##__VA_ARGS__) +#define CHECK_LE(x, y, fmt, ...) CHECK_BINARY_OP(<=, x, y, fmt, ##__VA_ARGS__) +#define CHECK_GE(x, y, fmt, ...) CHECK_BINARY_OP(>=, x, y, fmt, ##__VA_ARGS__) +#define CHECK_EQ(x, y, fmt, ...) CHECK_BINARY_OP(==, x, y, fmt, ##__VA_ARGS__) +#define CHECK_NE(x, y, fmt, ...) CHECK_BINARY_OP(!=, x, y, fmt, ##__VA_ARGS__) + +#endif // TVM_RUNTIME_CRT_LOGGING_H_ diff --git a/include/tvm/runtime/crt/memory.h b/include/tvm/runtime/crt/memory.h new file mode 100644 index 000000000000..867f1379318d --- /dev/null +++ b/include/tvm/runtime/crt/memory.h @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file memory.h + * \brief The virtual memory manager for micro-controllers + */ + +#ifndef TVM_RUNTIME_CRT_MEMORY_H_ +#define TVM_RUNTIME_CRT_MEMORY_H_ + +/** \brief Allocate memory from manager */ +void * vmalloc(size_t size); + +/** \brief Release memory from manager */ +void vfree(void * ptr); + +static int vleak_size = 0; + +// #define vmalloc(size) \ +// vmalloc_(size); \ +// printf("%s: %d: info: size=%d, vleak=%d\n", __FILE__, __LINE__, size, ++vleak_size) + +// #define vfree(ptr) \ +// vfree_(ptr); \ +// printf("%s: %d: error: addr=%p, vleak=%d\n", __FILE__, __LINE__, ptr, --vleak_size) + +#endif // TVM_RUNTIME_CRT_MEMORY_H_ diff --git a/src/runtime/crt/crt_backend_api.c b/src/runtime/crt/crt_backend_api.c index e011e47b2576..45dd913b5199 100644 --- a/src/runtime/crt/crt_backend_api.c +++ b/src/runtime/crt/crt_backend_api.c @@ -18,6 +18,7 @@ */ #include +#include #include #include @@ -29,18 +30,12 @@ void* TVMBackendAllocWorkspace(int device_type, int device_id, uint64_t nbytes, void* ptr = 0; assert(nbytes > 0); unsigned int dtype_bytes = dtype_bits_hint / 8; -#ifdef __ANDROID__ - ptr = memalign(64, nbytes * dtype_bytes); -#else - const int ret = posix_memalign(&ptr, 64, nbytes * dtype_bytes); - (void)ret; - assert(ret == 0); -#endif + ptr = vmalloc(nbytes * dtype_bytes); return ptr; } int TVMBackendFreeWorkspace(int device_type, int device_id, void* ptr) { - free(ptr); + vfree(ptr); return 0; } diff --git a/src/runtime/crt/graph_runtime.c b/src/runtime/crt/graph_runtime.c index 89c325acb216..8e0b1fb4372c 100644 --- a/src/runtime/crt/graph_runtime.c +++ b/src/runtime/crt/graph_runtime.c @@ -21,6 +21,10 @@ * \file graph_runtime.c * \brief implement graph runtime in pure C */ + +#include +#include + #include "graph_runtime.h" #ifndef MAX @@ -380,7 +384,7 @@ int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) { break; #if TVM_CRT_DEBUG } else { - printf("layer %u: `%s` loaded.\n", runtime->nodes_count, node->name); + printf("loading: node (%u) %s loaded.\n", runtime->nodes_count, node->name); #endif // TVM_CRT_DEBUG } runtime->nodes_count++; @@ -458,9 +462,7 @@ int TVMGraphRuntime_GetInputIndex(TVMGraphRuntime * runtime, const char * name) break; } } - if (rv < 0) { - fprintf(stderr, "cannot find \"%s\" among input\n", name); - } + CHECK_GE(rv, 0, "cannot find '%s' among input.", name); return rv; } @@ -476,7 +478,7 @@ void TVMGraphRuntime_SetInput(TVMGraphRuntime * runtime, const char * name, DLTe fprintf(stderr, "given index is greater than num of input nodes.\n"); } uint32_t eid = runtime->GetEntryId(runtime, runtime->input_nodes[index], 0); - runtime->data_entry[eid].dl_tensor = *data_in; + runtime->data_entry[eid].dl_tensor.data = data_in->data; } /*! @@ -545,7 +547,7 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime * runtime, const char * param_blo status |= TVMNDArray_Load(&(runtime->data_entry[eid]), &bptr); #if TVM_CRT_DEBUG TVMNDArray * entry = &(runtime->data_entry[eid]); - printf("param %s loaded, in_idx=%d, eid=%d, ndim=%d, data[0]=%f\n", + printf("loading: param %s loaded, in_idx=%d, eid=%d, ndim=%d, data[0]=%f\n", names[idx], in_idx, eid, entry->dl_tensor.ndim, ((float*)entry->dl_tensor.data)[0]); // NOLINT(*) #endif // TVM_CRT_DEBUG @@ -564,7 +566,7 @@ void TVMGraphRuntime_Run(TVMGraphRuntime * runtime) { for (idx = 0; idx < runtime->op_execs_count; ++idx) { if (runtime->op_execs[idx].fexec) { #if TVM_CRT_DEBUG - printf("calling %s (%d)\n", runtime->op_execs[idx].name, idx); + printf("calling: %s (%d)\n", runtime->op_execs[idx].name, idx); #endif // TVM_CRT_DEBUG runtime->op_execs[idx].Call(&(runtime->op_execs[idx])); } @@ -581,9 +583,9 @@ int TVMGraphRuntime_GetOutput(TVMGraphRuntime * runtime, const int32_t idx, DLTe int32_t elem_bytes = out->dtype.bits / 8; int64_t size = Shape_Accumulate(out->shape, out->ndim); DLTensor * tensor = &(runtime->data_entry[eid].dl_tensor); - assert(out->ndim == tensor->ndim); - assert(out->dtype.bits == tensor->dtype.bits); - assert(Shape_Accumulate(out->shape, out->ndim) == Shape_Accumulate(tensor->shape, tensor->ndim)); + CHECK(out->ndim == tensor->ndim); + CHECK(out->dtype.bits == tensor->dtype.bits); + CHECK(Shape_Accumulate(out->shape, out->ndim) == Shape_Accumulate(tensor->shape, tensor->ndim)); memcpy(out->data, tensor->data, size * elem_bytes); return status; } @@ -628,9 +630,8 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime * runtime) { DLDataType dtype = {kDLFloat, 32, 1}; shape[0] = (pit.size + 3) / 4; runtime->storage_pool[runtime->storage_pool_count] = TVMNDArray_Empty(1, shape, dtype, ctx); - if (runtime->storage_pool[runtime->storage_pool_count].dl_tensor.data == 0) { - fprintf(stderr, "fail to create storage_pool with idx=%d\n", idx); - } + CHECK_NE(runtime->storage_pool[runtime->storage_pool_count].dl_tensor.data, 0, + "fail to create storage_pool with idx=%d\n", idx); runtime->storage_pool_count++; } @@ -640,13 +641,12 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime * runtime) { runtime->data_entry_count = runtime->node_row_ptr[runtime->node_row_ptr_count - 1]; for (idx = 0; idx < runtime->data_entry_count; ++idx) { size_t storage_id = attrs->storage_id[idx]; - assert(storage_id < runtime->storage_pool_count); + CHECK(storage_id < runtime->storage_pool_count); runtime->data_entry[idx] = TVMNDArray_CreateView(&(runtime->storage_pool[storage_id]), attrs->shape[idx], attrs->ndim[idx], vtype[idx]); - if (runtime->data_entry[idx].dl_tensor.data == 0) { - fprintf(stderr, "fail to create for node with idx=%d, storage_id=%d\n", idx, storage_id); - } + CHECK_NE(runtime->data_entry[idx].dl_tensor.data, 0, + "fail to create for node with idx=%d, storage_id=%d\n", idx, storage_id); } } @@ -682,7 +682,7 @@ int TVMGraphRuntime_SetupOpExecs(TVMGraphRuntime * runtime) { break; } #if TVM_CRT_DEBUG - printf("creating tvm_op: %s with node_id=%d\n", inode->param.func_name, nid); + printf("tvm_op: creating %s with node_id=%d\n", inode->param.func_name, nid); #endif // TVM_CRT_DEBUG TVMPackedFunc pf; runtime->CreateTVMOp(runtime, &(inode->param), args, args_count, inode->inputs_count, &pf); @@ -762,7 +762,7 @@ void TVMGraphRuntime_Init(TVMGraphRuntime * runtime, const char * graph_json, TVMGraphRuntime * TVMGraphRuntimeCreate(const char * sym_json, const TVMModule * m, const TVMContext * ctxs) { - TVMGraphRuntime * runtime = (TVMGraphRuntime*)malloc(sizeof(TVMGraphRuntime)); // NOLINT(*) + TVMGraphRuntime * runtime = (TVMGraphRuntime*)vmalloc(sizeof(TVMGraphRuntime)); // NOLINT(*) memset(runtime, 0, sizeof(TVMGraphRuntime)); runtime->GetEntryId = TVMGraphRuntime_GetEntryId; runtime->GetInputIndex = TVMGraphRuntime_GetInputIndex; @@ -787,5 +787,8 @@ void TVMGraphRuntimeRelease(TVMGraphRuntime ** pptr) { for (idx = 0; idx < runtime->storage_pool_count; ++idx) { TVMNDArray_Release(&(runtime->storage_pool[idx])); } - free(*pptr); + for (idx = 0; idx < runtime->data_entry_count; ++idx) { + vfree(runtime->data_entry[idx].dl_tensor.shape); + } + vfree(*pptr); } diff --git a/src/runtime/crt/load_json.c b/src/runtime/crt/load_json.c index 894ab8938a10..43e3e06c2e1d 100644 --- a/src/runtime/crt/load_json.c +++ b/src/runtime/crt/load_json.c @@ -21,6 +21,8 @@ * \file load_json.c * \brief Load graph from JSON file. */ +#include + #include "load_json.h" // the node entry structure in serialized format @@ -74,10 +76,10 @@ void SeqPop(Seq * seq) { } Seq * SeqCreate(uint64_t len) { - Seq * seq = (Seq*)malloc(sizeof(Seq)); // NOLINT(*) + Seq * seq = (Seq*)vmalloc(sizeof(Seq)); // NOLINT(*) memset(seq, 0, sizeof(Seq)); seq->allocated = len; - seq->data = (uint32_t*)malloc(sizeof(uint32_t)*len); // NOLINT(*) + seq->data = (uint32_t*)vmalloc(sizeof(uint32_t)*len); // NOLINT(*) seq->push_back = SeqPush; seq->back = SeqBack; seq->pop_back = SeqPop; @@ -85,8 +87,8 @@ Seq * SeqCreate(uint64_t len) { } void SeqRelease(Seq ** seq) { - free((*seq)->data); - free(*seq); + vfree((*seq)->data); + vfree(*seq); } @@ -346,7 +348,7 @@ JSONReader JSONReader_Create(const char * is) { reader.BeginObject = JSONReader_BeginObject; reader.NextArrayItem = JSONReader_NextArrayItem; reader.NextObjectItem = JSONReader_NextObjectItem; - reader.is_ = (char*)malloc(strlen(is)+1); // NOLINT(*) + reader.is_ = (char*)vmalloc(strlen(is)+1); // NOLINT(*) memset(reader.is_, 0, strlen(is)+1); snprintf(reader.is_, strlen(is)+1, "%s", is); reader.isptr = reader.is_; @@ -355,5 +357,5 @@ JSONReader JSONReader_Create(const char * is) { void JSONReader_Release(JSONReader * reader) { SeqRelease(&(reader->scope_counter_)); - free(reader->is_); + vfree(reader->is_); } diff --git a/src/runtime/crt/memory.c b/src/runtime/crt/memory.c new file mode 100644 index 000000000000..1d153f1b75b3 --- /dev/null +++ b/src/runtime/crt/memory.c @@ -0,0 +1,294 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file memory.c + * \brief Virtal memory manager + * + * To maximize portability, thread-safe feature has been dropped for now. + */ + +#include +#include +#include + +/*! Number of bits in a page */ +#define TVM_CRT_PAGE_BITS (TVM_CRT_PAGE_BYTES << 3) + +/*! \brief Translate log memory size into bytes */ +#define TVM_CRT_VIRT_MEM_SIZE (1 << TVM_CRT_LOG_VIRT_MEM_SIZE) + +/*! \brief Number of possible page entries in total */ +#define TVM_CRT_MAX_PAGES (TVM_CRT_VIRT_MEM_SIZE / TVM_CRT_PAGE_BYTES) + +/*! \brief Physical address type */ +typedef uint32_t tvm_phy_addr_t; + +/*! \brief The bits in page table */ +static const tvm_phy_addr_t kPageBits = TVM_CRT_PAGE_BITS; + +/*! \brief Page size, also the maximum allocable size */ +static const tvm_phy_addr_t kPageSize = TVM_CRT_PAGE_BYTES; + +/** + * \brief Memory pool for virtual dynamic memory allocation + */ +static char g_memory_pool[TVM_CRT_VIRT_MEM_SIZE]; + +/*! \brief A page in the DRAM */ +typedef struct Page { + /*! \brief Start location in page table */ + tvm_index_t ptable_begin; + /*! \brief The total number of pages */ + tvm_index_t num_pages; + /*! \brief Data */ + char * data; +} Page; + +// construct a new page +Page PageCreate(tvm_index_t ptable_begin, tvm_index_t num_pages) { + Page page; + page.ptable_begin = ptable_begin; + page.num_pages = num_pages; + page.data = g_memory_pool + ptable_begin * kPageSize; + return page; +} + +typedef struct PageTable { + Page page[TVM_CRT_MAX_PAGES]; + uint32_t count; + void (*resize)(struct PageTable * ptable, uint32_t size, Page * page); +} PageTable; + +void PageTable_Resize(struct PageTable * ptable, uint32_t new_size, Page * page) { + CHECK_LE(ptable->count, new_size, + "size value (%d) is smaller than expected (%d).", new_size, ptable->count); + for (uint32_t idx = ptable->count; idx < new_size; idx++) { + ptable->page[idx] = *page; + } + ptable->count = new_size; +} + +typedef struct PageEntry { + char * addr; + Page page; +} PageEntry; + +typedef struct TLB { + PageEntry entries[TVM_CRT_MAX_PAGES]; + uint32_t count; + void (*set)(struct TLB * tlb, char * data, Page * page); + PageEntry * (*find)(struct TLB * tlb, char * data); +} TLB; + +void TLB_Set(TLB * tlb, char * data, Page * page) { + PageEntry * entry = tlb->find(tlb, data); + if (entry == 0) { + tlb->entries[tlb->count].addr = data; + tlb->entries[tlb->count].page = *page; + tlb->count ++; + } else { + entry->addr = data; + entry->page = *page; + } +} + +PageEntry * TLB_Find(TLB * tlb, char * data) { + PageEntry * entry = 0; + for (uint32_t idx = 0; idx < tlb->count; idx++) { + if (tlb->entries[idx].addr == data) { + entry = tlb->entries + idx; + break; + } + } + return entry; +} + +typedef struct IndexedEntry { + tvm_index_t index; + Page page; +} IndexedEntry; + +typedef struct MultiMap { + IndexedEntry entries[TVM_CRT_MAX_PAGES]; + uint32_t count; + IndexedEntry * (*lower_bound)(struct MultiMap * map, uint32_t npage); + IndexedEntry * (*end)(struct MultiMap * map); + IndexedEntry * (*erase)(struct MultiMap * map, IndexedEntry * entry); + void (*insert)(struct MultiMap * map, uint32_t npage, Page * p); +} MultiMap; + +IndexedEntry * MultiMap_LowerBound(struct MultiMap * map, uint32_t npage) { + IndexedEntry * entry = 0; + for (uint32_t idx = 0; idx < map->count; idx++) { + if (map->entries[idx].index == npage) { + entry = map->entries + idx; + break; + } + } + return entry; +} + +IndexedEntry * MultiMap_End(struct MultiMap * map) { + IndexedEntry * entry = 0; + return entry; +} + +IndexedEntry * MultiMap_Erase(struct MultiMap * map, IndexedEntry * entry) { + for (uint32_t idx = 0; idx < map->count; idx++) { + if ((map->entries + idx) == entry) { + memcpy(map->entries + idx, map->entries + (idx + 1), sizeof(IndexedEntry) * (map->count - idx)); + map->count--; + break; + } + } +} + +void MultiMap_Insert(struct MultiMap * map, uint32_t npage, Page * p) { + CHECK_LE(map->count + 1, TVM_CRT_MAX_PAGES, "invalid number of free pages."); + for (uint32_t idx = map->count; idx < (map->count + npage); idx++) { + map->entries[map->count].index = npage; + map->entries[map->count].page = *p; + } + map->count++; +} + +/*! + * \brief DRAM memory manager + * Implements simple paging to allow physical address translation. + */ +typedef struct MemoryManager { + /*! + * \brief Allocate memory from manager + * \param size The size of memory + * \return The virtual address + */ + void* (*Alloc)(struct MemoryManager * mgr, tvm_index_t size); + /*! + * \brief Free the memory. + * \param size The size of memory + * \return The virtual address + */ + void (*Free)(struct MemoryManager * mgr, void* data); + + // Physical address -> page + PageTable ptable; + // Virtual address -> page + TLB pmap; + // Free map + MultiMap free_map; +} MemoryManager; + +/*! + * \brief Allocate memory from manager + * \param size The size of memory + * \return The virtual address + */ +void* MemoryManager_Alloc(MemoryManager * mgr, tvm_index_t size) { + tvm_index_t npage = (size + kPageSize - 1) / kPageSize; + MultiMap * free_map = &(mgr->free_map); + IndexedEntry * it = free_map->lower_bound(free_map, npage); + void * data = 0; + tvm_index_t start = 0; + if (it != free_map->end(free_map)) { + Page p = it->page; + free_map->erase(free_map, it); + data = (void*)p.data; + start = p.ptable_begin; + npage = p.num_pages; + } else { + PageTable * ptable = &(mgr->ptable); + start = ptable->count; + CHECK_LE((start + npage), (sizeof(g_memory_pool) / kPageSize), "insufficient memory."); + /* insert page entry */ + Page p = PageCreate(start, npage); + ptable->resize(ptable, start + npage, &p); + data = (void*)p.data; + TLB * pmap = &(mgr->pmap); + pmap->set(pmap, data, &p); + } + vleak_size++; +#if TVM_CRT_DEBUG > 1 + printf("allocate: addr=%p, start=%d, npage=%d, vleak=%d\n", data, start, npage, vleak_size); +#endif // TVM_CRT_DEBUG + return data; +} + +/*! + * \brief Free the memory. + * \param size The size of memory + * \return The virtual address + */ +void MemoryManager_Free(MemoryManager * mgr, void* data) { + TLB * pmap = &(mgr->pmap); + if (pmap->count == 0) { return; } + PageEntry * entry = pmap->find(pmap, data); + CHECK_NE(entry, 0, "no valid page entry found."); + Page * p = &(entry->page); + MultiMap * free_map = &(mgr->free_map); + free_map->insert(free_map, p->num_pages, p); + vleak_size--; +#if TVM_CRT_DEBUG > 1 + printf("release: addr=%p, start=%d, npage=%d, vleak=%d\n", + data, entry->page.ptable_begin, entry->page.num_pages, vleak_size); +#endif // TVM_CRT_DEBUG +} + +MemoryManager * MemoryManagerCreate() { + static MemoryManager mgr; + memset(&mgr, 0, sizeof(MemoryManager)); + /* handle MemoryManager member functions */ + mgr.Alloc = MemoryManager_Alloc; + mgr.Free = MemoryManager_Free; + /* handle PageTable member functions */ + mgr.ptable.resize = PageTable_Resize; + /* handle TLB member functions */ + mgr.pmap.set = TLB_Set; + mgr.pmap.find = TLB_Find; + /* handle free_map member functions */ + mgr.free_map.lower_bound = MultiMap_LowerBound; + mgr.free_map.end = MultiMap_End; + mgr.free_map.erase = MultiMap_Erase; + mgr.free_map.insert = MultiMap_Insert; + return &mgr; +} + +MemoryManager * TVMGetGlobalMemoryManager() { + /* initialize once */ + static uint32_t initialized = 0; + static MemoryManager * mgr; + if (!initialized) { + mgr = MemoryManagerCreate(); + memset(g_memory_pool, 0, sizeof(g_memory_pool)); + initialized = 1; + } + return mgr; +} + +/** \brief Allocate memory from manager */ +void * vmalloc(size_t size) { + MemoryManager * mgr = TVMGetGlobalMemoryManager(); + return mgr->Alloc(mgr, size); +} + +/** \brief Release memory from manager */ +void vfree(void * ptr) { + MemoryManager * mgr = TVMGetGlobalMemoryManager(); + mgr->Free(mgr, ptr); +} diff --git a/src/runtime/crt/module.h b/src/runtime/crt/module.h index 8ff979b872e6..ed63c1428f47 100644 --- a/src/runtime/crt/module.h +++ b/src/runtime/crt/module.h @@ -24,8 +24,8 @@ #ifndef TVM_RUNTIME_CRT_MODULE_H_ #define TVM_RUNTIME_CRT_MODULE_H_ -#include #include +#include struct TVMPackedFunc; typedef struct TVMPackedFunc TVMPackedFunc; diff --git a/src/runtime/crt/ndarray.c b/src/runtime/crt/ndarray.c index 016fdd5add95..4b4ab68b3325 100644 --- a/src/runtime/crt/ndarray.c +++ b/src/runtime/crt/ndarray.c @@ -22,6 +22,8 @@ * \brief NDArray container infratructure. */ +#include + #include "ndarray.h" TVMNDArray TVMNDArray_Create(uint32_t ndim, const tvm_index_t * shape, @@ -29,7 +31,7 @@ TVMNDArray TVMNDArray_Create(uint32_t ndim, const tvm_index_t * shape, TVMNDArray ret; memset(&ret, 0, sizeof(TVMNDArray)); ret.dl_tensor.ndim = ndim; - ret.dl_tensor.shape = (int64_t*)malloc(sizeof(int64_t)*ndim); // NOLINT(*) + ret.dl_tensor.shape = (int64_t*)vmalloc(sizeof(int64_t)*ndim); // NOLINT(*) memcpy(ret.dl_tensor.shape, shape, sizeof(int64_t)*ndim); ret.dl_tensor.dtype = dtype; ret.dl_tensor.ctx = ctx; @@ -109,7 +111,9 @@ TVMNDArray TVMNDArray_CreateView(TVMNDArray * arr, const tvm_index_t * shape, } int TVMNDArray_Release(TVMNDArray * arr) { - free(arr->dl_tensor.data); - free(arr->dl_tensor.shape); + vfree(arr->dl_tensor.data); + arr->dl_tensor.data = 0; + vfree(arr->dl_tensor.shape); + arr->dl_tensor.shape = 0; return 0; } From 4f12f5415664c87d2f8fa1a696fce5d1d13c758a Mon Sep 17 00:00:00 2001 From: Liangfu Chen Date: Tue, 17 Mar 2020 18:27:23 +0800 Subject: [PATCH 02/16] fix memory leak Change-Id: I37104c09e28112b1974fa2b064c809d0a8d686c3 --- src/runtime/crt/graph_runtime.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/runtime/crt/graph_runtime.c b/src/runtime/crt/graph_runtime.c index 8e0b1fb4372c..412c87ba3b71 100644 --- a/src/runtime/crt/graph_runtime.c +++ b/src/runtime/crt/graph_runtime.c @@ -544,6 +544,14 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime * runtime, const char * param_blo status = -1; } + if (runtime->data_entry[eid].dl_tensor.shape) { + vfree(runtime->data_entry[eid].dl_tensor.shape); + runtime->data_entry[eid].dl_tensor.shape = 0; + } + if (runtime->data_entry[eid].dl_tensor.data) { + vfree(runtime->data_entry[eid].dl_tensor.data); + runtime->data_entry[eid].dl_tensor.data = 0; + } status |= TVMNDArray_Load(&(runtime->data_entry[eid]), &bptr); #if TVM_CRT_DEBUG TVMNDArray * entry = &(runtime->data_entry[eid]); @@ -791,4 +799,5 @@ void TVMGraphRuntimeRelease(TVMGraphRuntime ** pptr) { vfree(runtime->data_entry[idx].dl_tensor.shape); } vfree(*pptr); + CHECK_EQ(vleak_size, 0, "found memory leak."); } From e2ff2e424e85e6fa0b99752e0b0f3ab147561798 Mon Sep 17 00:00:00 2001 From: Liangfu Chen Date: Wed, 18 Mar 2020 13:45:34 +0800 Subject: [PATCH 03/16] clean up Change-Id: I039b12015a1d56c8f4120867cd5a5292da34f3e3 --- src/runtime/crt/graph_runtime.h | 2 +- src/runtime/crt/memory.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/crt/graph_runtime.h b/src/runtime/crt/graph_runtime.h index 5b6e9058840d..ba54d059cd00 100644 --- a/src/runtime/crt/graph_runtime.h +++ b/src/runtime/crt/graph_runtime.h @@ -169,7 +169,7 @@ typedef struct TVMGraphRuntime { // Get node entry index. uint32_t (*GetEntryId)(struct TVMGraphRuntime * runtime, uint32_t nid, uint32_t index); - // /*! \brief The graph nodes. */ + /*! \brief The graph nodes. */ TVMGraphRuntimeNode nodes[GRAPH_RUNTIME_MAX_NODES]; uint32_t nodes_count; /*! \brief The argument nodes. */ diff --git a/src/runtime/crt/memory.c b/src/runtime/crt/memory.c index 1d153f1b75b3..ba8298487c17 100644 --- a/src/runtime/crt/memory.c +++ b/src/runtime/crt/memory.c @@ -137,7 +137,7 @@ typedef struct MultiMap { IndexedEntry * MultiMap_LowerBound(struct MultiMap * map, uint32_t npage) { IndexedEntry * entry = 0; for (uint32_t idx = 0; idx < map->count; idx++) { - if (map->entries[idx].index == npage) { + if (map->entries[idx].index >= npage) { entry = map->entries + idx; break; } From 9cd4e1348c53882d9f13b1431002c7bbe4f4f177 Mon Sep 17 00:00:00 2001 From: Liangfu Chen Date: Fri, 20 Mar 2020 19:37:43 +0800 Subject: [PATCH 04/16] implement vrealloc Change-Id: I35800470bcbfcf96652494f359711cb4c2d34398 --- apps/bundle_deploy/runtime.c | 12 ++-- include/tvm/runtime/crt/memory.h | 32 +++++---- src/runtime/crt/graph_runtime.c | 33 +++++++++- src/runtime/crt/graph_runtime.h | 25 +++++--- src/runtime/crt/memory.c | 107 ++++++++++++++++++++++++++++--- 5 files changed, 171 insertions(+), 38 deletions(-) diff --git a/apps/bundle_deploy/runtime.c b/apps/bundle_deploy/runtime.c index 521e701213fc..fc905b60a48f 100644 --- a/apps/bundle_deploy/runtime.c +++ b/apps/bundle_deploy/runtime.c @@ -34,15 +34,15 @@ #define GRAPH_RUNTIME_MAX_CONTEXTS 1 /*! Maximum inputs in a GraphRuntimeNode */ -#define GRAPH_RUNTIME_NODE_MAX_INPUTS 3 +#define GRAPH_RUNTIME_NODE_MAX_INPUTS 100 /*! Maximum supported nodes in a GraphRuntime */ -#define GRAPH_RUNTIME_MAX_NODES 4 +#define GRAPH_RUNTIME_MAX_NODES 100 /*! Maximum input nodes in a GraphRuntime */ -#define GRAPH_RUNTIME_MAX_INPUT_NODES 3 +#define GRAPH_RUNTIME_MAX_INPUT_NODES 100 /*! Maximum nodes in a GraphRuntime for quick entry indexing */ -#define GRAPH_RUNTIME_MAX_NODE_ROW_PTR 3 +#define GRAPH_RUNTIME_MAX_NODE_ROW_PTR 100 /*! Maximum output entries in a GraphRuntime */ -#define GRAPH_RUNTIME_MAX_OUTPUTS 3 +#define GRAPH_RUNTIME_MAX_OUTPUTS 100 /*! * \brief Log memory pool size for virtual memory allocation @@ -62,7 +62,7 @@ * * use 27 for 128 MiB memory space * * use 28 for 256 MiB memory space */ -#define TVM_CRT_LOG_VIRT_MEM_SIZE 16 +#define TVM_CRT_LOG_VIRT_MEM_SIZE 24 /*! \brief Page size for virtual memory allocation */ #define TVM_CRT_PAGE_BYTES 4096 diff --git a/include/tvm/runtime/crt/memory.h b/include/tvm/runtime/crt/memory.h index 867f1379318d..6fb88c5fb3b1 100644 --- a/include/tvm/runtime/crt/memory.h +++ b/include/tvm/runtime/crt/memory.h @@ -25,20 +25,28 @@ #ifndef TVM_RUNTIME_CRT_MEMORY_H_ #define TVM_RUNTIME_CRT_MEMORY_H_ -/** \brief Allocate memory from manager */ -void * vmalloc(size_t size); - -/** \brief Release memory from manager */ -void vfree(void * ptr); - static int vleak_size = 0; -// #define vmalloc(size) \ -// vmalloc_(size); \ -// printf("%s: %d: info: size=%d, vleak=%d\n", __FILE__, __LINE__, size, ++vleak_size) +/*! + * \brief Allocate memory from manager + * \param size The size of memory + * \return The virtual address + */ +void * vmalloc(size_t size); -// #define vfree(ptr) \ -// vfree_(ptr); \ -// printf("%s: %d: error: addr=%p, vleak=%d\n", __FILE__, __LINE__, ptr, --vleak_size) +/*! + * \brief Reallocate memory from manager + * \param ptr The pointer to the memory area to be reallocated + * \param size The size of memory + * \return The virtual address + */ +void * vrealloc(void * ptr, size_t size); + +/*! + * \brief Free the memory. + * \param ptr The pointer to the memory to deallocate + * \return The virtual address + */ +void vfree(void * ptr); #endif // TVM_RUNTIME_CRT_MEMORY_H_ diff --git a/src/runtime/crt/graph_runtime.c b/src/runtime/crt/graph_runtime.c index 412c87ba3b71..0b0c95720b54 100644 --- a/src/runtime/crt/graph_runtime.c +++ b/src/runtime/crt/graph_runtime.c @@ -116,6 +116,7 @@ int TVMGraphRuntimeNode_Load(TVMGraphRuntimeNode * node, JSONReader *reader) { } reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { + node->inputs = vrealloc(node->inputs, sizeof(TVMGraphRuntimeNodeEntry)*(count+1)); TVMGraphRuntimeNodeEntry * inputs = node->inputs + count; reader->BeginArray(reader); if (!reader->NextArrayItem(reader)) { @@ -173,6 +174,14 @@ TVMGraphRuntimeNode TVMGraphRuntimeNodeCreate() { return node; } +void TVMGraphRuntimeNodeRelease(TVMGraphRuntimeNode * node) { + if (!node) { return; } + if (node->inputs) { + vfree(node->inputs); + node->inputs = 0; + } +} + int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *reader) { int status = 0; int bitmask = 0; @@ -233,6 +242,7 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r } reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { + attr->storage_id = vrealloc(attr->storage_id, sizeof(uint32_t)*(storage_id_count+1)); reader->ReadUnsignedInteger(reader, &(attr->storage_id[storage_id_count])); storage_id_count++; } @@ -312,6 +322,7 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r break; } while (reader->NextArrayItem(reader)) { + attr->device_index = vrealloc(attr->device_index, sizeof(uint32_t)*(device_index_count+1)); reader->ReadUnsignedInteger(reader, &(attr->device_index[device_index_count])); device_index_count++; } @@ -368,6 +379,18 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r return status; } +void TVMGraphRuntimeGraphAttr_Release(TVMGraphRuntimeGraphAttr * attr) { + if (!attr) { return; } + if (attr->storage_id) { + vfree(attr->storage_id); + attr->storage_id = 0; + } + if (attr->device_index) { + vfree(attr->device_index); + attr->device_index = 0; + } +} + int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) { int status = 0; reader->BeginObject(reader); @@ -377,6 +400,7 @@ int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) { if (!strcmp(key, "nodes")) { reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { + runtime->nodes = vrealloc(runtime->nodes, sizeof(TVMGraphRuntimeNode)*(runtime->nodes_count+1)); TVMGraphRuntimeNode * node = runtime->nodes + runtime->nodes_count; status = TVMGraphRuntimeNode_Load(node, reader); if (status != 0) { @@ -679,7 +703,7 @@ int TVMGraphRuntime_SetupOpExecs(TVMGraphRuntime * runtime) { args_count++; } if (strcmp(inode->op_type, "tvm_op")) { - fprintf(stderr, "Can only take tvm_op as op\n"); + fprintf(stderr, "Can only take tvm_op as op, but \"%s\" is found.\n", inode->op_type); status = -1; break; } @@ -792,6 +816,11 @@ TVMGraphRuntime * TVMGraphRuntimeCreate(const char * sym_json, void TVMGraphRuntimeRelease(TVMGraphRuntime ** pptr) { int32_t idx; TVMGraphRuntime * runtime = *pptr; + for (idx = 0; idx < runtime->nodes_count; ++idx) { + TVMGraphRuntimeNodeRelease(&(runtime->nodes[idx])); + } + vfree(runtime->nodes); + TVMGraphRuntimeGraphAttr_Release(&(runtime->attrs)); for (idx = 0; idx < runtime->storage_pool_count; ++idx) { TVMNDArray_Release(&(runtime->storage_pool[idx])); } @@ -799,5 +828,5 @@ void TVMGraphRuntimeRelease(TVMGraphRuntime ** pptr) { vfree(runtime->data_entry[idx].dl_tensor.shape); } vfree(*pptr); - CHECK_EQ(vleak_size, 0, "found memory leak."); + CHECK_EQ(vleak_size, 0, "found memory leak, leak size=%d", vleak_size); } diff --git a/src/runtime/crt/graph_runtime.h b/src/runtime/crt/graph_runtime.h index ba54d059cd00..b2e55c895e78 100644 --- a/src/runtime/crt/graph_runtime.h +++ b/src/runtime/crt/graph_runtime.h @@ -63,10 +63,12 @@ typedef struct TVMGraphRuntimeNode { // parameters TVMOpParam param; // inputs - TVMGraphRuntimeNodeEntry inputs[GRAPH_RUNTIME_NODE_MAX_INPUTS]; - size_t inputs_count; + // TVMGraphRuntimeNodeEntry inputs[GRAPH_RUNTIME_NODE_MAX_INPUTS]; // TODO: remove + TVMGraphRuntimeNodeEntry * inputs; + // number of inputs + size_t inputs_count; // control deps - uint32_t control_deps[200]; + uint32_t control_deps[20]; // JSON Loader void (*LoadAttrs)(struct TVMGraphRuntimeNode * node, JSONReader *reader, TVMOpParam* param); // JSON Loader @@ -76,8 +78,10 @@ typedef struct TVMGraphRuntimeNode { // Graph attribute typedef struct TVMGraphRuntimeGraphAttr { uint32_t storage_num_not_alloctaed; - uint32_t storage_id[GRAPH_RUNTIME_MAX_NODES]; - uint32_t device_index[GRAPH_RUNTIME_MAX_NODES]; + // uint32_t storage_id[GRAPH_RUNTIME_MAX_NODES]; // TODO: remove + uint32_t * storage_id; + // uint32_t device_index[GRAPH_RUNTIME_MAX_NODES]; // TODO: remove + uint32_t * device_index; char dltype[GRAPH_RUNTIME_MAX_NODES][10]; // "int8", "int16", "float32" uint32_t dltype_count; int64_t shape[GRAPH_RUNTIME_MAX_NODES][TVM_CRT_MAX_NDIM]; @@ -170,17 +174,20 @@ typedef struct TVMGraphRuntime { uint32_t (*GetEntryId)(struct TVMGraphRuntime * runtime, uint32_t nid, uint32_t index); /*! \brief The graph nodes. */ - TVMGraphRuntimeNode nodes[GRAPH_RUNTIME_MAX_NODES]; - uint32_t nodes_count; + // TVMGraphRuntimeNode nodes[GRAPH_RUNTIME_MAX_NODES]; + TVMGraphRuntimeNode * nodes; + /*! \brief The graph nodes counter. */ + uint32_t nodes_count; /*! \brief The argument nodes. */ uint32_t input_nodes[GRAPH_RUNTIME_MAX_INPUT_NODES]; - uint32_t input_nodes_count; + uint32_t input_nodes_count; /*! \brief Used for quick entry indexing. */ uint32_t node_row_ptr[GRAPH_RUNTIME_MAX_NODE_ROW_PTR]; uint32_t node_row_ptr_count; /*! \brief Output entries. */ TVMGraphRuntimeNodeEntry outputs[GRAPH_RUNTIME_MAX_OUTPUTS]; - uint32_t outputs_count; + /*! \brief Output entries counter. */ + uint32_t outputs_count; /*! \brief Additional graph attributes. */ TVMGraphRuntimeGraphAttr attrs; /*! \brief The code module that contains both host and device code. */ diff --git a/src/runtime/crt/memory.c b/src/runtime/crt/memory.c index ba8298487c17..ff456db5346f 100644 --- a/src/runtime/crt/memory.c +++ b/src/runtime/crt/memory.c @@ -181,10 +181,17 @@ typedef struct MemoryManager { */ void* (*Alloc)(struct MemoryManager * mgr, tvm_index_t size); /*! - * \brief Free the memory. + * \brief Allocate memory from manager + * \param ptr The pointer to the memory area to be reallocated * \param size The size of memory * \return The virtual address */ + void* (*Realloc)(struct MemoryManager * mgr, void * ptr, tvm_index_t size); + /*! + * \brief Free the memory. + * \param ptr The pointer to the memory to deallocate + * \return The virtual address + */ void (*Free)(struct MemoryManager * mgr, void* data); // Physical address -> page @@ -201,10 +208,10 @@ typedef struct MemoryManager { * \return The virtual address */ void* MemoryManager_Alloc(MemoryManager * mgr, tvm_index_t size) { + void * data = 0; tvm_index_t npage = (size + kPageSize - 1) / kPageSize; MultiMap * free_map = &(mgr->free_map); IndexedEntry * it = free_map->lower_bound(free_map, npage); - void * data = 0; tvm_index_t start = 0; if (it != free_map->end(free_map)) { Page p = it->page; @@ -225,28 +232,103 @@ void* MemoryManager_Alloc(MemoryManager * mgr, tvm_index_t size) { } vleak_size++; #if TVM_CRT_DEBUG > 1 - printf("allocate: addr=%p, start=%d, npage=%d, vleak=%d\n", data, start, npage, vleak_size); + printf("allocate: addr=%p, start=%d/%d, npage=%d, vleak=%d\n", + data, start, TVM_CRT_MAX_PAGES, npage, vleak_size); #endif // TVM_CRT_DEBUG return data; } /*! - * \brief Free the memory. + * \brief Reallocate memory from manager + * \param ptr The pointer to the memory area to be reallocated * \param size The size of memory * \return The virtual address */ -void MemoryManager_Free(MemoryManager * mgr, void* data) { +void* MemoryManager_Realloc(MemoryManager * mgr, void * ptr, tvm_index_t size) { + void * data = ptr; + PageTable * ptable = &(mgr->ptable); TLB * pmap = &(mgr->pmap); - if (pmap->count == 0) { return; } - PageEntry * entry = pmap->find(pmap, data); + MultiMap * free_map = &(mgr->free_map); + tvm_index_t start = 0; + tvm_index_t npage = (size + kPageSize - 1) / kPageSize; + if (ptr) { + // get page size for given pointer + CHECK_NE(pmap->count, 0, "invalid translation look-aside buffer."); + PageEntry * entry = pmap->find(pmap, ptr); + CHECK_NE(entry, 0, "no valid page entry found."); + Page * pptr = &(entry->page); + // if the page size is smaller than target page size, + // try allocate new space + if (pptr->num_pages < npage) { + // TODO(liangfu): found out whether we can extend current entry + // + // insert new page entry + IndexedEntry * it = free_map->lower_bound(free_map, npage); + if (it != free_map->end(free_map)) { + data = (void*)it->page.data; + start = it->page.ptable_begin; + npage = it->page.num_pages; + free_map->erase(free_map, it); + } else { + start = ptable->count; + CHECK_LE((start + npage), (sizeof(g_memory_pool) / kPageSize), "insufficient memory."); + Page p = PageCreate(start, npage); + ptable->resize(ptable, start + npage, &p); + data = (void*)p.data; + pmap->set(pmap, data, &p); + } + // copy previous data to the new entry + memcpy(data, ptr, kPageSize * pptr->num_pages); + // release memory + free_map->insert(free_map, pptr->num_pages, pptr); + } else { + start = pptr->ptable_begin; + } + } else { + IndexedEntry * it = free_map->lower_bound(free_map, npage); + if (it != free_map->end(free_map)) { + Page p = it->page; + free_map->erase(free_map, it); + data = (void*)p.data; + start = p.ptable_begin; + npage = p.num_pages; + } else { + PageTable * ptable = &(mgr->ptable); + start = ptable->count; + CHECK_LE((start + npage), (sizeof(g_memory_pool) / kPageSize), "insufficient memory."); + /* insert page entry */ + Page p = PageCreate(start, npage); + ptable->resize(ptable, start + npage, &p); + data = (void*)p.data; + TLB * pmap = &(mgr->pmap); + pmap->set(pmap, data, &p); + } + vleak_size++; + } +#if TVM_CRT_DEBUG > 1 + printf("reallocate: addr=%p, start=%d/%d, npage=%d, vleak=%d, size=%d\n", + data, start, TVM_CRT_MAX_PAGES, npage, vleak_size, size); +#endif // TVM_CRT_DEBUG + return data; +} + +/*! + * \brief Free the memory. + * \param ptr The pointer to the memory to deallocate + * \return The virtual address + */ +void MemoryManager_Free(MemoryManager * mgr, void* ptr) { + TLB * pmap = &(mgr->pmap); + CHECK_NE(pmap->count, 0, "invalid translation look-aside buffer."); + PageEntry * entry = pmap->find(pmap, ptr); CHECK_NE(entry, 0, "no valid page entry found."); Page * p = &(entry->page); MultiMap * free_map = &(mgr->free_map); free_map->insert(free_map, p->num_pages, p); vleak_size--; #if TVM_CRT_DEBUG > 1 - printf("release: addr=%p, start=%d, npage=%d, vleak=%d\n", - data, entry->page.ptable_begin, entry->page.num_pages, vleak_size); + printf("release: addr=%p, start=%d/%d, npage=%d, vleak=%d\n", + ptr, entry->page.ptable_begin, TVM_CRT_MAX_PAGES, entry->page.num_pages, vleak_size); #endif // TVM_CRT_DEBUG } @@ -255,6 +337,7 @@ MemoryManager * MemoryManagerCreate() { memset(&mgr, 0, sizeof(MemoryManager)); /* handle MemoryManager member functions */ mgr.Alloc = MemoryManager_Alloc; + mgr.Realloc = MemoryManager_Realloc; mgr.Free = MemoryManager_Free; /* handle PageTable member functions */ mgr.ptable.resize = PageTable_Resize; @@ -287,6 +370,12 @@ void * vmalloc(size_t size) { return mgr->Alloc(mgr, size); } +/** \brief Reallocate memory from manager */ +void * vrealloc(void * ptr, size_t size) { + MemoryManager * mgr = TVMGetGlobalMemoryManager(); + return mgr->Realloc(mgr, ptr, size); +} + /** \brief Release memory from manager */ void vfree(void * ptr) { MemoryManager * mgr = TVMGetGlobalMemoryManager(); From 4707191dd949b159db3188ec72a809686b09ee3c Mon Sep 17 00:00:00 2001 From: Liangfu Chen Date: Mon, 23 Mar 2020 12:46:43 +0800 Subject: [PATCH 05/16] allocate from stack memory for most of the variables Change-Id: I72071289843fff4031c0df8796868a0b9fbc57ee --- apps/bundle_deploy/demo.cc | 10 ++--- apps/bundle_deploy/runtime.c | 12 +----- apps/bundle_deploy/test.cc | 10 ++--- src/runtime/crt/graph_runtime.c | 74 ++++++++++++++++++++++++--------- src/runtime/crt/graph_runtime.h | 26 +++++++----- src/runtime/crt/memory.c | 12 ++++-- 6 files changed, 92 insertions(+), 52 deletions(-) diff --git a/apps/bundle_deploy/demo.cc b/apps/bundle_deploy/demo.cc index 34be27958c91..0de10d7177eb 100644 --- a/apps/bundle_deploy/demo.cc +++ b/apps/bundle_deploy/demo.cc @@ -109,11 +109,11 @@ int main(int argc, char **argv) { max_index, max_iter); printf("timing: %.2f ms (create), %.2f ms (set_input), %.2f ms (run), " "%.2f ms (get_output), %.2f ms (destroy)\n", - (t1.tv_sec-t0.tv_sec)*1000000 + (t1.tv_usec-t0.tv_usec)/1000.f, - (t2.tv_sec-t1.tv_sec)*1000000 + (t2.tv_usec-t1.tv_usec)/1000.f, - (t3.tv_sec-t2.tv_sec)*1000000 + (t3.tv_usec-t2.tv_usec)/1000.f, - (t4.tv_sec-t3.tv_sec)*1000000 + (t4.tv_usec-t3.tv_usec)/1000.f, - (t5.tv_sec-t4.tv_sec)*1000000 + (t5.tv_usec-t4.tv_usec)/1000.f); + (t1.tv_sec-t0.tv_sec)*1000.0f + (t1.tv_usec-t0.tv_usec)/1000.f, + (t2.tv_sec-t1.tv_sec)*1000.0f + (t2.tv_usec-t1.tv_usec)/1000.f, + (t3.tv_sec-t2.tv_sec)*1000.0f + (t3.tv_usec-t2.tv_usec)/1000.f, + (t4.tv_sec-t3.tv_sec)*1000.0f + (t4.tv_usec-t3.tv_usec)/1000.f, + (t5.tv_sec-t4.tv_sec)*1000.0f + (t5.tv_usec-t4.tv_usec)/1000.f); dlclose(bundle); return 0; diff --git a/apps/bundle_deploy/runtime.c b/apps/bundle_deploy/runtime.c index fc905b60a48f..b979fb41ac05 100644 --- a/apps/bundle_deploy/runtime.c +++ b/apps/bundle_deploy/runtime.c @@ -30,19 +30,11 @@ #define TVM_CRT_MAX_NDIM 6 /*! Maximum supported arguments in generated functions */ #define TVM_CRT_MAX_ARGS 10 -/*! Maximum supported contexts in a GraphRuntime */ -#define GRAPH_RUNTIME_MAX_CONTEXTS 1 +/*! Maximum supported string length in dltype, e.g. "int8", "int16", "float32" */ +#define TVM_CRT_STRLEN_DLTYPE 10 -/*! Maximum inputs in a GraphRuntimeNode */ -#define GRAPH_RUNTIME_NODE_MAX_INPUTS 100 /*! Maximum supported nodes in a GraphRuntime */ #define GRAPH_RUNTIME_MAX_NODES 100 -/*! Maximum input nodes in a GraphRuntime */ -#define GRAPH_RUNTIME_MAX_INPUT_NODES 100 -/*! Maximum nodes in a GraphRuntime for quick entry indexing */ -#define GRAPH_RUNTIME_MAX_NODE_ROW_PTR 100 -/*! Maximum output entries in a GraphRuntime */ -#define GRAPH_RUNTIME_MAX_OUTPUTS 100 /*! * \brief Log memory pool size for virtual memory allocation diff --git a/apps/bundle_deploy/test.cc b/apps/bundle_deploy/test.cc index 643f1adff320..c92400d29516 100644 --- a/apps/bundle_deploy/test.cc +++ b/apps/bundle_deploy/test.cc @@ -122,11 +122,11 @@ int main(int argc, char **argv) { printf("timing: %.2f ms (create), %.2f ms (set_input), %.2f ms (run), " "%.2f ms (get_output), %.2f ms (destroy)\n", - (t1.tv_sec-t0.tv_sec)*1000000 + (t1.tv_usec-t0.tv_usec)/1000.f, - (t2.tv_sec-t1.tv_sec)*1000000 + (t2.tv_usec-t1.tv_usec)/1000.f, - (t3.tv_sec-t2.tv_sec)*1000000 + (t3.tv_usec-t2.tv_usec)/1000.f, - (t4.tv_sec-t3.tv_sec)*1000000 + (t4.tv_usec-t3.tv_usec)/1000.f, - (t5.tv_sec-t4.tv_sec)*1000000 + (t5.tv_usec-t4.tv_usec)/1000.f); + (t1.tv_sec-t0.tv_sec)*1000.0f + (t1.tv_usec-t0.tv_usec)/1000.f, + (t2.tv_sec-t1.tv_sec)*1000.0f + (t2.tv_usec-t1.tv_usec)/1000.f, + (t3.tv_sec-t2.tv_sec)*1000.0f + (t3.tv_usec-t2.tv_usec)/1000.f, + (t4.tv_sec-t3.tv_sec)*1000.0f + (t4.tv_usec-t3.tv_usec)/1000.f, + (t5.tv_sec-t4.tv_sec)*1000.0f + (t5.tv_usec-t4.tv_usec)/1000.f); free(json_data); free(params_data); diff --git a/src/runtime/crt/graph_runtime.c b/src/runtime/crt/graph_runtime.c index 0b0c95720b54..af99d2c538f0 100644 --- a/src/runtime/crt/graph_runtime.c +++ b/src/runtime/crt/graph_runtime.c @@ -109,11 +109,6 @@ int TVMGraphRuntimeNode_Load(TVMGraphRuntimeNode * node, JSONReader *reader) { bitmask |= 2; } else if (!strcmp(key, "inputs")) { size_t count = node->inputs_count; - if (count >= GRAPH_RUNTIME_NODE_MAX_INPUTS) { - fprintf(stderr, "The number of inputs in graph runtime node is greater than expected.\n"); - status = -1; - break; - } reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { node->inputs = vrealloc(node->inputs, sizeof(TVMGraphRuntimeNodeEntry)*(count+1)); @@ -212,7 +207,8 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r } reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { - reader->ReadString(reader, attr->dltype[dltype_count]); + attr->dltype = vrealloc(attr->dltype, TVM_CRT_STRLEN_DLTYPE * (dltype_count + 1)); + reader->ReadString(reader, attr->dltype + dltype_count * TVM_CRT_STRLEN_DLTYPE); dltype_count++; } attr->dltype_count = dltype_count;; @@ -272,20 +268,22 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r } reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { + attr->shape = vrealloc(attr->shape, sizeof(attr->shape[0])*(shape_count+1)*TVM_CRT_MAX_NDIM); + attr->ndim = vrealloc(attr->ndim, sizeof(attr->ndim[0])*(shape_count+1)); reader->BeginArray(reader); - reader->ReadInteger(reader, &(attr->shape[shape_count][0])); + reader->ReadInteger(reader, &(attr->shape[shape_count*TVM_CRT_MAX_NDIM+0])); uint32_t ndim = 1; if (reader->NextArrayItem(reader)) { if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, &(attr->shape[shape_count][1])); ndim++; + reader->ReadInteger(reader, &(attr->shape[shape_count*TVM_CRT_MAX_NDIM+1])); ndim++; if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, &(attr->shape[shape_count][2])); ndim++; + reader->ReadInteger(reader, &(attr->shape[shape_count*TVM_CRT_MAX_NDIM+2])); ndim++; if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, &(attr->shape[shape_count][3])); ndim++; + reader->ReadInteger(reader, &(attr->shape[shape_count*TVM_CRT_MAX_NDIM+3])); ndim++; if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, &(attr->shape[shape_count][4])); ndim++; + reader->ReadInteger(reader, &(attr->shape[shape_count*TVM_CRT_MAX_NDIM+4])); ndim++; if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, &(attr->shape[shape_count][5])); ndim++; + reader->ReadInteger(reader, &(attr->shape[shape_count*TVM_CRT_MAX_NDIM+5])); ndim++; reader->NextArrayItem(reader); } } @@ -345,13 +343,18 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r status = -1; break; } - uint32_t temp[GRAPH_RUNTIME_MAX_NODES]; + uint32_t * temp = 0; uint32_t temp_count = 0; reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { + temp = vrealloc(temp, sizeof(uint32_t) * (temp_count + 1)); reader->ReadUnsignedInteger(reader, &(temp[temp_count])); temp_count++; } + if (temp) { + vfree(temp); + temp = 0; + } } else if (!strcmp(type, "size_t")) { if (!(reader->NextArrayItem(reader))) { fprintf(stderr, "Invalid json format\n"); @@ -389,6 +392,18 @@ void TVMGraphRuntimeGraphAttr_Release(TVMGraphRuntimeGraphAttr * attr) { vfree(attr->device_index); attr->device_index = 0; } + if (attr->dltype) { + vfree(attr->dltype); + attr->dltype = 0; + } + if (attr->shape) { + vfree(attr->shape); + attr->shape = 0; + } + if (attr->ndim) { + vfree(attr->ndim); + attr->ndim = 0; + } } int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) { @@ -400,7 +415,8 @@ int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) { if (!strcmp(key, "nodes")) { reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { - runtime->nodes = vrealloc(runtime->nodes, sizeof(TVMGraphRuntimeNode)*(runtime->nodes_count+1)); + runtime->nodes = + vrealloc(runtime->nodes, sizeof(TVMGraphRuntimeNode) * (runtime->nodes_count + 1)); TVMGraphRuntimeNode * node = runtime->nodes + runtime->nodes_count; status = TVMGraphRuntimeNode_Load(node, reader); if (status != 0) { @@ -417,6 +433,8 @@ int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) { } else if (!strcmp(key, "arg_nodes")) { reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { + runtime->input_nodes = + vrealloc(runtime->input_nodes, sizeof(uint32_t) * (runtime->input_nodes_count + 1)); uint32_t * node = runtime->input_nodes + runtime->input_nodes_count; reader->ReadUnsignedInteger(reader, node); runtime->input_nodes_count++; @@ -425,6 +443,8 @@ int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) { } else if (!strcmp(key, "node_row_ptr")) { reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { + runtime->node_row_ptr = + vrealloc(runtime->node_row_ptr, sizeof(uint32_t) * (runtime->node_row_ptr_count + 1)); uint32_t count = runtime->node_row_ptr_count; uint32_t * node = runtime->node_row_ptr + count; reader->ReadUnsignedInteger(reader, node); @@ -434,6 +454,9 @@ int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) { } else if (!strcmp(key, "heads")) { reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { + runtime->outputs = + vrealloc(runtime->outputs, + sizeof(TVMGraphRuntimeNodeEntry) * (runtime->outputs_count + 1)); TVMGraphRuntimeNodeEntry * entry = runtime->outputs + runtime->outputs_count; status = NodeEntry_Load(entry, reader); if (status != 0) { @@ -626,10 +649,10 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime * runtime) { uint32_t idx, dim; // Grab saved optimization plan from graph. - DLDataType vtype[GRAPH_RUNTIME_MAX_NODES]; TVMGraphRuntimeGraphAttr * attrs = &(runtime->attrs); + DLDataType * vtype = vmalloc(sizeof(DLDataType) * attrs->dltype_count); for (idx = 0; idx < attrs->dltype_count; idx++) { - vtype[idx] = String2DLDataType(attrs->dltype[idx]); + vtype[idx] = String2DLDataType(attrs->dltype + idx * TVM_CRT_STRLEN_DLTYPE); } // Size and device type of each storage pool entry. @@ -641,7 +664,7 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime * runtime) { int storage_id = attrs->storage_id[idx]; // Use the fallback device if no device index is available. int device_type = runtime->ctxs[0].device_type; - uint32_t size = Shape_Accumulate(attrs->shape[idx], attrs->ndim[idx]); + uint32_t size = Shape_Accumulate(attrs->shape+idx*TVM_CRT_MAX_NDIM, attrs->ndim[idx]); DLDataType t = vtype[idx]; uint32_t bits = t.bits * t.lanes; size_t bytes = ((bits + 7U) / 8U) * size; @@ -656,6 +679,8 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime * runtime) { // Allocate the space. for (idx = 0; idx < pool_entry_count; idx++) { + runtime->storage_pool = + vrealloc(runtime->storage_pool, sizeof(TVMNDArray) * (runtime->storage_pool_count + 1)); TVMGraphRuntimePoolEntry pit = pool_entry[idx]; int64_t shape[TVM_CRT_MAX_NDIM] = {0, }; TVMContext ctx = runtime->ctxs[0]; @@ -671,25 +696,30 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime * runtime) { // memory assignment for each node entry. The allocated memory on each device // is mapped to this pool. runtime->data_entry_count = runtime->node_row_ptr[runtime->node_row_ptr_count - 1]; + runtime->data_entry = vmalloc(sizeof(TVMNDArray) * runtime->data_entry_count); for (idx = 0; idx < runtime->data_entry_count; ++idx) { size_t storage_id = attrs->storage_id[idx]; CHECK(storage_id < runtime->storage_pool_count); runtime->data_entry[idx] = TVMNDArray_CreateView(&(runtime->storage_pool[storage_id]), - attrs->shape[idx], attrs->ndim[idx], vtype[idx]); + attrs->shape+idx*TVM_CRT_MAX_NDIM, attrs->ndim[idx], vtype[idx]); CHECK_NE(runtime->data_entry[idx].dl_tensor.data, 0, "fail to create for node with idx=%d, storage_id=%d\n", idx, storage_id); } + + // Release memory + vfree(vtype); } int TVMGraphRuntime_SetupOpExecs(TVMGraphRuntime * runtime) { int status = 0; uint32_t nid, idx; runtime->op_execs_count = runtime->nodes_count; + runtime->op_execs = vmalloc(sizeof(TVMPackedFunc) * runtime->op_execs_count); for (nid = 0; nid < runtime->nodes_count; nid++) { const TVMGraphRuntimeNode * inode = runtime->nodes + nid; if (strcmp(inode->op_type, "null")) { - DLTensorPtr args[GRAPH_RUNTIME_MAX_NODES]; + DLTensorPtr args[TVM_CRT_MAX_ARGS]; uint32_t args_count = 0; for (idx = 0; idx < inode->inputs_count; idx++) { const TVMGraphRuntimeNodeEntry * entry = inode->inputs + idx; @@ -827,6 +857,12 @@ void TVMGraphRuntimeRelease(TVMGraphRuntime ** pptr) { for (idx = 0; idx < runtime->data_entry_count; ++idx) { vfree(runtime->data_entry[idx].dl_tensor.shape); } + vfree((*pptr)->input_nodes); + vfree((*pptr)->node_row_ptr); + vfree((*pptr)->outputs); + vfree((*pptr)->storage_pool); + vfree((*pptr)->data_entry); + vfree((*pptr)->op_execs); vfree(*pptr); CHECK_EQ(vleak_size, 0, "found memory leak, leak size=%d", vleak_size); } diff --git a/src/runtime/crt/graph_runtime.h b/src/runtime/crt/graph_runtime.h index b2e55c895e78..259967a425e3 100644 --- a/src/runtime/crt/graph_runtime.h +++ b/src/runtime/crt/graph_runtime.h @@ -82,10 +82,12 @@ typedef struct TVMGraphRuntimeGraphAttr { uint32_t * storage_id; // uint32_t device_index[GRAPH_RUNTIME_MAX_NODES]; // TODO: remove uint32_t * device_index; - char dltype[GRAPH_RUNTIME_MAX_NODES][10]; // "int8", "int16", "float32" + char * dltype; // "int8", "int16", "float32" uint32_t dltype_count; - int64_t shape[GRAPH_RUNTIME_MAX_NODES][TVM_CRT_MAX_NDIM]; - uint32_t ndim[GRAPH_RUNTIME_MAX_NODES]; + // int64_t shape[GRAPH_RUNTIME_MAX_NODES][TVM_CRT_MAX_NDIM]; + // uint32_t ndim[GRAPH_RUNTIME_MAX_NODES]; + int64_t * shape; + uint32_t * ndim; uint32_t shape_count; } TVMGraphRuntimeGraphAttr; @@ -179,13 +181,16 @@ typedef struct TVMGraphRuntime { /*! \brief The graph nodes counter. */ uint32_t nodes_count; /*! \brief The argument nodes. */ - uint32_t input_nodes[GRAPH_RUNTIME_MAX_INPUT_NODES]; + // uint32_t input_nodes[GRAPH_RUNTIME_MAX_INPUT_NODES]; + uint32_t * input_nodes; uint32_t input_nodes_count; /*! \brief Used for quick entry indexing. */ - uint32_t node_row_ptr[GRAPH_RUNTIME_MAX_NODE_ROW_PTR]; + // uint32_t node_row_ptr[GRAPH_RUNTIME_MAX_NODE_ROW_PTR]; + uint32_t * node_row_ptr; uint32_t node_row_ptr_count; /*! \brief Output entries. */ - TVMGraphRuntimeNodeEntry outputs[GRAPH_RUNTIME_MAX_OUTPUTS]; + // TVMGraphRuntimeNodeEntry outputs[GRAPH_RUNTIME_MAX_OUTPUTS]; + TVMGraphRuntimeNodeEntry * outputs; /*! \brief Output entries counter. */ uint32_t outputs_count; /*! \brief Additional graph attributes. */ @@ -193,16 +198,17 @@ typedef struct TVMGraphRuntime { /*! \brief The code module that contains both host and device code. */ TVMModule module; /*! \brief Execution context of all devices including the host. */ - TVMContext ctxs[GRAPH_RUNTIME_MAX_CONTEXTS]; + TVMContext ctxs[1]; uint32_t ctxs_count; /*! \brief Common storage pool for all devices. */ - TVMNDArray storage_pool[GRAPH_RUNTIME_MAX_NODES]; + // TVMNDArray storage_pool[GRAPH_RUNTIME_MAX_NODES]; + TVMNDArray * storage_pool; uint32_t storage_pool_count; /*! \brief Data entry of each node. */ - TVMNDArray data_entry[GRAPH_RUNTIME_MAX_NODES]; + TVMNDArray * data_entry; uint32_t data_entry_count; /*! \brief Operator on each node. */ - TVMPackedFunc op_execs[GRAPH_RUNTIME_MAX_NODES]; + TVMPackedFunc * op_execs; uint32_t op_execs_count; } TVMGraphRuntime; diff --git a/src/runtime/crt/memory.c b/src/runtime/crt/memory.c index ff456db5346f..0f1e3d2cdb46 100644 --- a/src/runtime/crt/memory.c +++ b/src/runtime/crt/memory.c @@ -222,7 +222,9 @@ void* MemoryManager_Alloc(MemoryManager * mgr, tvm_index_t size) { } else { PageTable * ptable = &(mgr->ptable); start = ptable->count; - CHECK_LE((start + npage), (sizeof(g_memory_pool) / kPageSize), "insufficient memory."); + CHECK_LE((start + npage), (sizeof(g_memory_pool) / kPageSize), + "insufficient memory, start=%d, npage=%d, total=%d", + start, npage, start + npage); /* insert page entry */ Page p = PageCreate(start, npage); ptable->resize(ptable, start + npage, &p); @@ -271,7 +273,9 @@ void* MemoryManager_Realloc(MemoryManager * mgr, void * ptr, tvm_index_t size) { free_map->erase(free_map, it); } else { start = ptable->count; - CHECK_LE((start + npage), (sizeof(g_memory_pool) / kPageSize), "insufficient memory."); + CHECK_LE((start + npage), (sizeof(g_memory_pool) / kPageSize), + "insufficient memory, start=%d, npage=%d, total=%d", + start, npage, start + npage); Page p = PageCreate(start, npage); ptable->resize(ptable, start + npage, &p); data = (void*)p.data; @@ -295,7 +299,9 @@ void* MemoryManager_Realloc(MemoryManager * mgr, void * ptr, tvm_index_t size) { } else { PageTable * ptable = &(mgr->ptable); start = ptable->count; - CHECK_LE((start + npage), (sizeof(g_memory_pool) / kPageSize), "insufficient memory."); + CHECK_LE((start + npage), (sizeof(g_memory_pool) / kPageSize), + "insufficient memory, start=%d, npage=%d, total=%d", + start, npage, start + npage); /* insert page entry */ Page p = PageCreate(start, npage); ptable->resize(ptable, start + npage, &p); From c8fc36806ce73676ef7e7075d613017e88ef23f5 Mon Sep 17 00:00:00 2001 From: Liangfu Chen Date: Mon, 23 Mar 2020 14:16:13 +0800 Subject: [PATCH 06/16] allocate from stack memory for all of the variables Change-Id: I32dba85ac1660c77f51c2d0d8ab6436ed0c01c74 --- apps/bundle_deploy/runtime.c | 5 ++--- src/runtime/crt/crt_backend_api.c | 1 + src/runtime/crt/graph_runtime.c | 29 ++++++++++++++++++++--------- src/runtime/crt/graph_runtime.h | 10 ---------- src/runtime/crt/module.h | 2 +- src/runtime/crt/packed_func.h | 10 ++++------ 6 files changed, 28 insertions(+), 29 deletions(-) diff --git a/apps/bundle_deploy/runtime.c b/apps/bundle_deploy/runtime.c index b979fb41ac05..a7ffea9bbf91 100644 --- a/apps/bundle_deploy/runtime.c +++ b/apps/bundle_deploy/runtime.c @@ -32,9 +32,8 @@ #define TVM_CRT_MAX_ARGS 10 /*! Maximum supported string length in dltype, e.g. "int8", "int16", "float32" */ #define TVM_CRT_STRLEN_DLTYPE 10 - -/*! Maximum supported nodes in a GraphRuntime */ -#define GRAPH_RUNTIME_MAX_NODES 100 +/*! Maximum supported string length in function names */ +#define TVM_CRT_STRLEN_NAME 80 /*! * \brief Log memory pool size for virtual memory allocation diff --git a/src/runtime/crt/crt_backend_api.c b/src/runtime/crt/crt_backend_api.c index 45dd913b5199..52cefafe3980 100644 --- a/src/runtime/crt/crt_backend_api.c +++ b/src/runtime/crt/crt_backend_api.c @@ -47,6 +47,7 @@ int TVMBackendParallelLaunch(FTVMParallelLambda flambda, void* cdata, int num_ta } int TVMBackendRegisterSystemLibSymbol(const char* name, void* ptr) { + g_fexecs = vrealloc(g_fexecs, sizeof(TVMPackedFunc) * (g_fexecs_count + 1)); snprintf(g_fexecs[g_fexecs_count].name, sizeof(g_fexecs[g_fexecs_count].name), name); g_fexecs[g_fexecs_count].fexec = ptr; g_fexecs_count++; diff --git a/src/runtime/crt/graph_runtime.c b/src/runtime/crt/graph_runtime.c index af99d2c538f0..b8c686e93182 100644 --- a/src/runtime/crt/graph_runtime.c +++ b/src/runtime/crt/graph_runtime.c @@ -550,8 +550,8 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime * runtime, const char * param_blo bptr += sizeof(reserved); // read names - char names[GRAPH_RUNTIME_MAX_NODES][80]; - memset(names, 0, sizeof(names)); + char * names = vmalloc(TVM_CRT_STRLEN_NAME * runtime->nodes_count); + memset(names, 0, TVM_CRT_STRLEN_NAME * runtime->nodes_count); uint64_t names_count; int idx; names_count = ((uint64_t*)bptr)[0]; // NOLINT(*) @@ -564,7 +564,7 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime * runtime, const char * param_blo fprintf(stderr, "Error: function name longer than expected.\n"); status = -1; } - memcpy(names[idx], bptr, name_length); + memcpy(names + TVM_CRT_STRLEN_NAME * idx, bptr, name_length); bptr += name_length; } @@ -579,9 +579,9 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime * runtime, const char * param_blo } for (idx = 0; idx < size; idx++) { - int32_t in_idx = runtime->GetInputIndex(runtime, names[idx]); + int32_t in_idx = runtime->GetInputIndex(runtime, names + TVM_CRT_STRLEN_NAME * idx); if (!(in_idx >= 0)) { - fprintf(stderr, "Found param for non-existent input: %s\n", names[idx]); + fprintf(stderr, "Found param for non-existent input: %s\n", names + TVM_CRT_STRLEN_NAME * idx); status = -1; } uint32_t eid = runtime->GetEntryId(runtime, runtime->input_nodes[in_idx], 0); @@ -603,11 +603,14 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime * runtime, const char * param_blo #if TVM_CRT_DEBUG TVMNDArray * entry = &(runtime->data_entry[eid]); printf("loading: param %s loaded, in_idx=%d, eid=%d, ndim=%d, data[0]=%f\n", - names[idx], in_idx, eid, entry->dl_tensor.ndim, + names + TVM_CRT_STRLEN_NAME * idx, in_idx, eid, entry->dl_tensor.ndim, ((float*)entry->dl_tensor.data)[0]); // NOLINT(*) #endif // TVM_CRT_DEBUG } + // Release memory + vfree(names); + return status; } @@ -656,8 +659,9 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime * runtime) { } // Size and device type of each storage pool entry. - TVMGraphRuntimePoolEntry pool_entry[GRAPH_RUNTIME_MAX_NODES]; - memset(pool_entry, 0, sizeof(pool_entry)); + TVMGraphRuntimePoolEntry * pool_entry = + vmalloc(sizeof(TVMGraphRuntimePoolEntry) * runtime->nodes_count); + memset(pool_entry, 0, sizeof(TVMGraphRuntimePoolEntry) * runtime->nodes_count); uint32_t pool_entry_count = 0; // Find the maximum space size. for (idx = 0; idx < attrs->shape_count; idx++) { @@ -709,6 +713,7 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime * runtime) { // Release memory vfree(vtype); + vfree(pool_entry); } int TVMGraphRuntime_SetupOpExecs(TVMGraphRuntime * runtime) { @@ -797,7 +802,7 @@ int32_t TVMGraphRuntime_CreateTVMOp(TVMGraphRuntime * runtime, const TVMOpParam status = -1; } - runtime->module.GetFunction(param->func_name, pf); + runtime->module.GetFunction(&(runtime->module), param->func_name, pf); TVMArgs targs = TVMArgs_Create(arg_ptr.arg_values, arg_ptr.arg_tcodes, arg_ptr.arg_values_count); pf->SetArgs(pf, &targs); @@ -864,5 +869,11 @@ void TVMGraphRuntimeRelease(TVMGraphRuntime ** pptr) { vfree((*pptr)->data_entry); vfree((*pptr)->op_execs); vfree(*pptr); + + if (g_fexecs) { + vfree(g_fexecs); + g_fexecs = 0; + } + CHECK_EQ(vleak_size, 0, "found memory leak, leak size=%d", vleak_size); } diff --git a/src/runtime/crt/graph_runtime.h b/src/runtime/crt/graph_runtime.h index 259967a425e3..3cb8ba95e0fa 100644 --- a/src/runtime/crt/graph_runtime.h +++ b/src/runtime/crt/graph_runtime.h @@ -63,7 +63,6 @@ typedef struct TVMGraphRuntimeNode { // parameters TVMOpParam param; // inputs - // TVMGraphRuntimeNodeEntry inputs[GRAPH_RUNTIME_NODE_MAX_INPUTS]; // TODO: remove TVMGraphRuntimeNodeEntry * inputs; // number of inputs size_t inputs_count; @@ -78,14 +77,10 @@ typedef struct TVMGraphRuntimeNode { // Graph attribute typedef struct TVMGraphRuntimeGraphAttr { uint32_t storage_num_not_alloctaed; - // uint32_t storage_id[GRAPH_RUNTIME_MAX_NODES]; // TODO: remove uint32_t * storage_id; - // uint32_t device_index[GRAPH_RUNTIME_MAX_NODES]; // TODO: remove uint32_t * device_index; char * dltype; // "int8", "int16", "float32" uint32_t dltype_count; - // int64_t shape[GRAPH_RUNTIME_MAX_NODES][TVM_CRT_MAX_NDIM]; - // uint32_t ndim[GRAPH_RUNTIME_MAX_NODES]; int64_t * shape; uint32_t * ndim; uint32_t shape_count; @@ -176,20 +171,16 @@ typedef struct TVMGraphRuntime { uint32_t (*GetEntryId)(struct TVMGraphRuntime * runtime, uint32_t nid, uint32_t index); /*! \brief The graph nodes. */ - // TVMGraphRuntimeNode nodes[GRAPH_RUNTIME_MAX_NODES]; TVMGraphRuntimeNode * nodes; /*! \brief The graph nodes counter. */ uint32_t nodes_count; /*! \brief The argument nodes. */ - // uint32_t input_nodes[GRAPH_RUNTIME_MAX_INPUT_NODES]; uint32_t * input_nodes; uint32_t input_nodes_count; /*! \brief Used for quick entry indexing. */ - // uint32_t node_row_ptr[GRAPH_RUNTIME_MAX_NODE_ROW_PTR]; uint32_t * node_row_ptr; uint32_t node_row_ptr_count; /*! \brief Output entries. */ - // TVMGraphRuntimeNodeEntry outputs[GRAPH_RUNTIME_MAX_OUTPUTS]; TVMGraphRuntimeNodeEntry * outputs; /*! \brief Output entries counter. */ uint32_t outputs_count; @@ -201,7 +192,6 @@ typedef struct TVMGraphRuntime { TVMContext ctxs[1]; uint32_t ctxs_count; /*! \brief Common storage pool for all devices. */ - // TVMNDArray storage_pool[GRAPH_RUNTIME_MAX_NODES]; TVMNDArray * storage_pool; uint32_t storage_pool_count; /*! \brief Data entry of each node. */ diff --git a/src/runtime/crt/module.h b/src/runtime/crt/module.h index ed63c1428f47..e527ec01e5d3 100644 --- a/src/runtime/crt/module.h +++ b/src/runtime/crt/module.h @@ -42,7 +42,7 @@ typedef struct TVMModule { * * This function will return PackedFunc(nullptr) if function do not exist. */ - void (*GetFunction)(const char * name, TVMPackedFunc * pf); + void (*GetFunction)(struct TVMModule * mod, const char * name, TVMPackedFunc * pf); } TVMModule; #endif // TVM_RUNTIME_CRT_MODULE_H_ diff --git a/src/runtime/crt/packed_func.h b/src/runtime/crt/packed_func.h index 21370b69c8c0..93898a436c88 100644 --- a/src/runtime/crt/packed_func.h +++ b/src/runtime/crt/packed_func.h @@ -112,14 +112,12 @@ static inline void TVMPackedFunc_SetArgs(TVMPackedFunc * pf, const TVMArgs * arg memcpy(&(pf->args), args, sizeof(TVMArgs)); } -TVMPackedFunc g_fexecs[GRAPH_RUNTIME_MAX_NODES]; +TVMPackedFunc * g_fexecs = 0; uint32_t g_fexecs_count = 0; -void TVMPackedFunc_SetupExecs(); - // Implement TVMModule::GetFunction // Put implementation in this file so we have seen the TVMPackedFunc -static inline void TVMModule_GetFunction(const char * name, TVMPackedFunc * pf) { +static inline void TVMModule_GetFunction(TVMModule * mod, const char * name, TVMPackedFunc * pf) { int idx; memset(pf, 0, sizeof(TVMPackedFunc)); assert(strlen(name) <= sizeof(pf->name)); @@ -127,13 +125,13 @@ static inline void TVMModule_GetFunction(const char * name, TVMPackedFunc * pf) pf->Call = TVMPackedFunc_Call; pf->SetArgs = TVMPackedFunc_SetArgs; pf->fexec = &TVMNoOperation; - for (idx = 0; idx < GRAPH_RUNTIME_MAX_NODES; idx++) { + for (idx = 0; idx < g_fexecs_count; idx++) { if (!strcmp(g_fexecs[idx].name, name)) { pf->fexec = g_fexecs[idx].fexec; break; } } - if (idx == GRAPH_RUNTIME_MAX_NODES) { + if (idx == g_fexecs_count) { fprintf(stderr, "function handle for %s not found\n", name); } } From e708e6fd29525405e899f4bb4f980b694df34c9c Mon Sep 17 00:00:00 2001 From: Liangfu Chen Date: Mon, 23 Mar 2020 14:51:42 +0800 Subject: [PATCH 07/16] lint Change-Id: If12cd240685d7791fc60bc0cfb66389cdc186b73 --- apps/bundle_deploy/Makefile | 4 ++-- src/runtime/crt/graph_runtime.c | 28 ++++++++++++++-------------- src/runtime/crt/load_json.c | 10 +++++----- src/runtime/crt/memory.c | 29 +++++++++++++++-------------- 4 files changed, 36 insertions(+), 35 deletions(-) diff --git a/apps/bundle_deploy/Makefile b/apps/bundle_deploy/Makefile index 2a0f632155a2..d86ba54e1f29 100644 --- a/apps/bundle_deploy/Makefile +++ b/apps/bundle_deploy/Makefile @@ -20,11 +20,11 @@ # Setup build environment TVM_ROOT=$(shell cd ../..; pwd) DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core -PKG_CXXFLAGS = -std=c++14 -O2 -fPIC \ +PKG_CXXFLAGS = -Wall -std=c++14 -O2 -fPIC \ -I${TVM_ROOT}/include \ -I${DMLC_CORE}/include \ -I${TVM_ROOT}/3rdparty/dlpack/include -PKG_CFLAGS = -std=c99 -O2 -fPIC \ +PKG_CFLAGS = -Wall -std=c99 -O2 -fPIC \ -I${TVM_ROOT}/include \ -I${DMLC_CORE}/include \ -I${TVM_ROOT}/3rdparty/dlpack/include diff --git a/src/runtime/crt/graph_runtime.c b/src/runtime/crt/graph_runtime.c index b8c686e93182..471bbc1939cb 100644 --- a/src/runtime/crt/graph_runtime.c +++ b/src/runtime/crt/graph_runtime.c @@ -268,22 +268,24 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r } reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { - attr->shape = vrealloc(attr->shape, sizeof(attr->shape[0])*(shape_count+1)*TVM_CRT_MAX_NDIM); + attr->shape = + vrealloc(attr->shape, sizeof(attr->shape[0])*(shape_count+1)*TVM_CRT_MAX_NDIM); attr->ndim = vrealloc(attr->ndim, sizeof(attr->ndim[0])*(shape_count+1)); reader->BeginArray(reader); - reader->ReadInteger(reader, &(attr->shape[shape_count*TVM_CRT_MAX_NDIM+0])); + int64_t * attr_shape_ptr = attr->shape + shape_count*TVM_CRT_MAX_NDIM; + reader->ReadInteger(reader, attr_shape_ptr + 0); uint32_t ndim = 1; if (reader->NextArrayItem(reader)) { if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, &(attr->shape[shape_count*TVM_CRT_MAX_NDIM+1])); ndim++; + reader->ReadInteger(reader, attr_shape_ptr + 1); ndim++; if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, &(attr->shape[shape_count*TVM_CRT_MAX_NDIM+2])); ndim++; + reader->ReadInteger(reader, attr_shape_ptr + 2); ndim++; if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, &(attr->shape[shape_count*TVM_CRT_MAX_NDIM+3])); ndim++; + reader->ReadInteger(reader, attr_shape_ptr + 3); ndim++; if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, &(attr->shape[shape_count*TVM_CRT_MAX_NDIM+4])); ndim++; + reader->ReadInteger(reader, attr_shape_ptr + 4); ndim++; if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, &(attr->shape[shape_count*TVM_CRT_MAX_NDIM+5])); ndim++; + reader->ReadInteger(reader, attr_shape_ptr + 5); ndim++; reader->NextArrayItem(reader); } } @@ -580,10 +582,8 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime * runtime, const char * param_blo for (idx = 0; idx < size; idx++) { int32_t in_idx = runtime->GetInputIndex(runtime, names + TVM_CRT_STRLEN_NAME * idx); - if (!(in_idx >= 0)) { - fprintf(stderr, "Found param for non-existent input: %s\n", names + TVM_CRT_STRLEN_NAME * idx); - status = -1; - } + CHECK_GT(in_idx, 0, + "Found param for non-existent input: %s\n", names + TVM_CRT_STRLEN_NAME * idx); uint32_t eid = runtime->GetEntryId(runtime, runtime->input_nodes[in_idx], 0); if (!(eid < runtime->data_entry_count)) { fprintf(stderr, "`entry_id`=%d is greater than expected(%d).\n", @@ -649,7 +649,7 @@ int TVMGraphRuntime_GetOutput(TVMGraphRuntime * runtime, const int32_t idx, DLTe } void TVMGraphRuntime_SetupStorage(TVMGraphRuntime * runtime) { - uint32_t idx, dim; + uint32_t idx; // Grab saved optimization plan from graph. TVMGraphRuntimeGraphAttr * attrs = &(runtime->attrs); @@ -708,7 +708,7 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime * runtime) { TVMNDArray_CreateView(&(runtime->storage_pool[storage_id]), attrs->shape+idx*TVM_CRT_MAX_NDIM, attrs->ndim[idx], vtype[idx]); CHECK_NE(runtime->data_entry[idx].dl_tensor.data, 0, - "fail to create for node with idx=%d, storage_id=%d\n", idx, storage_id); + "fail to create for node with idx=%d, storage_id=%lu\n", idx, storage_id); } // Release memory @@ -874,6 +874,6 @@ void TVMGraphRuntimeRelease(TVMGraphRuntime ** pptr) { vfree(g_fexecs); g_fexecs = 0; } - + CHECK_EQ(vleak_size, 0, "found memory leak, leak size=%d", vleak_size); } diff --git a/src/runtime/crt/load_json.c b/src/runtime/crt/load_json.c index 43e3e06c2e1d..cf9492b8e2fa 100644 --- a/src/runtime/crt/load_json.c +++ b/src/runtime/crt/load_json.c @@ -158,11 +158,11 @@ int JSONReader_ReadString(JSONReader * reader, char * out_str) { if (ch == '\\') { char sch = reader->NextChar(reader); switch (sch) { - case 'r': snprintf(output, sizeof(output), "%s\r", output); break; - case 'n': snprintf(output, sizeof(output), "%s\n", output); break; - case '\\': snprintf(output, sizeof(output), "%s\\", output); break; - case 't': snprintf(output, sizeof(output), "%s\t", output); break; - case '\"': snprintf(output, sizeof(output), "%s\"", output); break; + case 'r': snprintf(output + strlen(output), sizeof(output), "\r"); break; + case 'n': snprintf(output + strlen(output), sizeof(output), "\n"); break; + case '\\': snprintf(output + strlen(output), sizeof(output), "\\"); break; + case 't': snprintf(output + strlen(output), sizeof(output), "\t"); break; + case '\"': snprintf(output + strlen(output), sizeof(output), "\""); break; default: fprintf(stderr, "unknown string escape %c\n", sch); } } else { diff --git a/src/runtime/crt/memory.c b/src/runtime/crt/memory.c index 0f1e3d2cdb46..b6346ecdc1cc 100644 --- a/src/runtime/crt/memory.c +++ b/src/runtime/crt/memory.c @@ -102,7 +102,7 @@ void TLB_Set(TLB * tlb, char * data, Page * page) { if (entry == 0) { tlb->entries[tlb->count].addr = data; tlb->entries[tlb->count].page = *page; - tlb->count ++; + tlb->count++; } else { entry->addr = data; entry->page = *page; @@ -130,7 +130,7 @@ typedef struct MultiMap { uint32_t count; IndexedEntry * (*lower_bound)(struct MultiMap * map, uint32_t npage); IndexedEntry * (*end)(struct MultiMap * map); - IndexedEntry * (*erase)(struct MultiMap * map, IndexedEntry * entry); + void (*erase)(struct MultiMap * map, IndexedEntry * entry); void (*insert)(struct MultiMap * map, uint32_t npage, Page * p); } MultiMap; @@ -150,10 +150,11 @@ IndexedEntry * MultiMap_End(struct MultiMap * map) { return entry; } -IndexedEntry * MultiMap_Erase(struct MultiMap * map, IndexedEntry * entry) { +void MultiMap_Erase(struct MultiMap * map, IndexedEntry * entry) { for (uint32_t idx = 0; idx < map->count; idx++) { if ((map->entries + idx) == entry) { - memcpy(map->entries + idx, map->entries + (idx + 1), sizeof(IndexedEntry) * (map->count - idx)); + memcpy(map->entries + idx, map->entries + (idx + 1), + sizeof(IndexedEntry) * (map->count - idx)); map->count--; break; } @@ -193,7 +194,7 @@ typedef struct MemoryManager { * \return The virtual address */ void (*Free)(struct MemoryManager * mgr, void* data); - + // Physical address -> page PageTable ptable; // Virtual address -> page @@ -216,19 +217,19 @@ void* MemoryManager_Alloc(MemoryManager * mgr, tvm_index_t size) { if (it != free_map->end(free_map)) { Page p = it->page; free_map->erase(free_map, it); - data = (void*)p.data; + data = p.data; start = p.ptable_begin; npage = p.num_pages; } else { PageTable * ptable = &(mgr->ptable); start = ptable->count; CHECK_LE((start + npage), (sizeof(g_memory_pool) / kPageSize), - "insufficient memory, start=%d, npage=%d, total=%d", + "insufficient memory, start=%ld, npage=%ld, total=%ld", start, npage, start + npage); /* insert page entry */ Page p = PageCreate(start, npage); ptable->resize(ptable, start + npage, &p); - data = (void*)p.data; + data = p.data; TLB * pmap = &(mgr->pmap); pmap->set(pmap, data, &p); } @@ -267,18 +268,18 @@ void* MemoryManager_Realloc(MemoryManager * mgr, void * ptr, tvm_index_t size) { // insert new page entry IndexedEntry * it = free_map->lower_bound(free_map, npage); if (it != free_map->end(free_map)) { - data = (void*)it->page.data; + data = it->page.data; start = it->page.ptable_begin; npage = it->page.num_pages; free_map->erase(free_map, it); } else { start = ptable->count; CHECK_LE((start + npage), (sizeof(g_memory_pool) / kPageSize), - "insufficient memory, start=%d, npage=%d, total=%d", + "insufficient memory, start=%ld, npage=%ld, total=%ld", start, npage, start + npage); Page p = PageCreate(start, npage); ptable->resize(ptable, start + npage, &p); - data = (void*)p.data; + data = p.data; pmap->set(pmap, data, &p); } // copy previous data to the new entry @@ -293,19 +294,19 @@ void* MemoryManager_Realloc(MemoryManager * mgr, void * ptr, tvm_index_t size) { if (it != free_map->end(free_map)) { Page p = it->page; free_map->erase(free_map, it); - data = (void*)p.data; + data = p.data; start = p.ptable_begin; npage = p.num_pages; } else { PageTable * ptable = &(mgr->ptable); start = ptable->count; CHECK_LE((start + npage), (sizeof(g_memory_pool) / kPageSize), - "insufficient memory, start=%d, npage=%d, total=%d", + "insufficient memory, start=%ld, npage=%ld, total=%ld", start, npage, start + npage); /* insert page entry */ Page p = PageCreate(start, npage); ptable->resize(ptable, start + npage, &p); - data = (void*)p.data; + data = p.data; TLB * pmap = &(mgr->pmap); pmap->set(pmap, data, &p); } From c1a118a0e8d02d694a25d017e61a6ab303e718cd Mon Sep 17 00:00:00 2001 From: Liangfu Chen Date: Mon, 23 Mar 2020 15:29:01 +0800 Subject: [PATCH 08/16] lint Change-Id: I7c9d90c11b60b8edda2427ebd189ebe535af2100 --- include/tvm/runtime/crt/logging.h | 2 +- include/tvm/runtime/crt/memory.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/tvm/runtime/crt/logging.h b/include/tvm/runtime/crt/logging.h index 9e87637ccc6d..ac5b658c628e 100644 --- a/include/tvm/runtime/crt/logging.h +++ b/include/tvm/runtime/crt/logging.h @@ -18,7 +18,7 @@ */ /*! - * \file loggin.h + * \file tvm/runtime/crt/loggin.h * \brief A replacement of the dmlc logging system that avoids * the usage of GLOG and C++ headers */ diff --git a/include/tvm/runtime/crt/memory.h b/include/tvm/runtime/crt/memory.h index 6fb88c5fb3b1..3e47060a86c4 100644 --- a/include/tvm/runtime/crt/memory.h +++ b/include/tvm/runtime/crt/memory.h @@ -18,7 +18,7 @@ */ /*! - * \file memory.h + * \file tvm/runtime/crt/memory.h * \brief The virtual memory manager for micro-controllers */ From 6c0d01244302d1f5cf1c8aaca318e3bc61c9c9db Mon Sep 17 00:00:00 2001 From: Liangfu Chen Date: Thu, 26 Mar 2020 16:38:23 +0800 Subject: [PATCH 09/16] facilitate the growth of TVM_CRT_MAX_NDIM Change-Id: I939fa43027a5c7529c5c7c6bd8d6e6beb91b7581 --- src/runtime/crt/graph_runtime.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/src/runtime/crt/graph_runtime.c b/src/runtime/crt/graph_runtime.c index 471bbc1939cb..e1bcb026a357 100644 --- a/src/runtime/crt/graph_runtime.c +++ b/src/runtime/crt/graph_runtime.c @@ -276,22 +276,16 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r reader->ReadInteger(reader, attr_shape_ptr + 0); uint32_t ndim = 1; if (reader->NextArrayItem(reader)) { - if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, attr_shape_ptr + 1); ndim++; + for (ndim = 1; ndim < TVM_CRT_MAX_NDIM; ndim++) { if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, attr_shape_ptr + 2); ndim++; - if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, attr_shape_ptr + 3); ndim++; - if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, attr_shape_ptr + 4); ndim++; - if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, attr_shape_ptr + 5); ndim++; - reader->NextArrayItem(reader); - } - } - } + reader->ReadInteger(reader, attr_shape_ptr + ndim); + } else { + break; } } + if (ndim == TVM_CRT_MAX_NDIM) { + reader->NextArrayItem(reader); + } } attr->ndim[shape_count] = ndim; shape_count++; @@ -862,12 +856,12 @@ void TVMGraphRuntimeRelease(TVMGraphRuntime ** pptr) { for (idx = 0; idx < runtime->data_entry_count; ++idx) { vfree(runtime->data_entry[idx].dl_tensor.shape); } - vfree((*pptr)->input_nodes); - vfree((*pptr)->node_row_ptr); - vfree((*pptr)->outputs); - vfree((*pptr)->storage_pool); - vfree((*pptr)->data_entry); - vfree((*pptr)->op_execs); + vfree(runtime->input_nodes); + vfree(runtime->node_row_ptr); + vfree(runtime->outputs); + vfree(runtime->storage_pool); + vfree(runtime->data_entry); + vfree(runtime->op_execs); vfree(*pptr); if (g_fexecs) { From 364e9e5aa3ff91f9d75a5a699a8262c42f01d173 Mon Sep 17 00:00:00 2001 From: Liangfu Chen Date: Thu, 26 Mar 2020 17:25:36 +0800 Subject: [PATCH 10/16] extend test coverage of vmalloc Change-Id: Ie4ff6b64fdfe6810836cf8fd44dace82a20c4581 --- include/tvm/runtime/crt/logging.h | 21 ++++++++++++ src/runtime/crt/memory.c | 8 ++--- tests/cpp/crt_memory_test.cc | 54 +++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 4 deletions(-) create mode 100644 tests/cpp/crt_memory_test.cc diff --git a/include/tvm/runtime/crt/logging.h b/include/tvm/runtime/crt/logging.h index ac5b658c628e..1ff90fb510fc 100644 --- a/include/tvm/runtime/crt/logging.h +++ b/include/tvm/runtime/crt/logging.h @@ -26,6 +26,7 @@ #ifndef TVM_RUNTIME_CRT_LOGGING_H_ #define TVM_RUNTIME_CRT_LOGGING_H_ +#ifndef CHECK #define CHECK(x) \ do { \ if (!(x)) { \ @@ -33,7 +34,9 @@ exit(-1); \ } \ }while(0) +#endif +#ifndef CHECK_BINARY_OP #define CHECK_BINARY_OP(op, x, y, fmt, ...) \ do { \ if (!(x op y)) { \ @@ -41,12 +44,30 @@ exit(-1); \ } \ }while(0) +#endif +#ifndef CHECK_LT #define CHECK_LT(x, y, fmt, ...) CHECK_BINARY_OP(<, x, y, fmt, ##__VA_ARGS__) +#endif + +#ifndef CHECK_GT #define CHECK_GT(x, y, fmt, ...) CHECK_BINARY_OP(>, x, y, fmt, ##__VA_ARGS__) +#endif + +#ifndef CHECK_LE #define CHECK_LE(x, y, fmt, ...) CHECK_BINARY_OP(<=, x, y, fmt, ##__VA_ARGS__) +#endif + +#ifndef CHECK_GE #define CHECK_GE(x, y, fmt, ...) CHECK_BINARY_OP(>=, x, y, fmt, ##__VA_ARGS__) +#endif + +#ifndef CHECK_EQ #define CHECK_EQ(x, y, fmt, ...) CHECK_BINARY_OP(==, x, y, fmt, ##__VA_ARGS__) +#endif + +#ifndef CHECK_NE #define CHECK_NE(x, y, fmt, ...) CHECK_BINARY_OP(!=, x, y, fmt, ##__VA_ARGS__) +#endif #endif // TVM_RUNTIME_CRT_LOGGING_H_ diff --git a/src/runtime/crt/memory.c b/src/runtime/crt/memory.c index b6346ecdc1cc..d73a66249107 100644 --- a/src/runtime/crt/memory.c +++ b/src/runtime/crt/memory.c @@ -209,7 +209,7 @@ typedef struct MemoryManager { * \return The virtual address */ void* MemoryManager_Alloc(MemoryManager * mgr, tvm_index_t size) { - void * data = 0; + char * data = 0; tvm_index_t npage = (size + kPageSize - 1) / kPageSize; MultiMap * free_map = &(mgr->free_map); IndexedEntry * it = free_map->lower_bound(free_map, npage); @@ -248,7 +248,7 @@ void* MemoryManager_Alloc(MemoryManager * mgr, tvm_index_t size) { * \return The virtual address */ void* MemoryManager_Realloc(MemoryManager * mgr, void * ptr, tvm_index_t size) { - void * data = ptr; + char * data = (char*)ptr; // NOLINT(*) PageTable * ptable = &(mgr->ptable); TLB * pmap = &(mgr->pmap); MultiMap * free_map = &(mgr->free_map); @@ -257,7 +257,7 @@ void* MemoryManager_Realloc(MemoryManager * mgr, void * ptr, tvm_index_t size) { if (ptr) { // get page size for given pointer CHECK_NE(pmap->count, 0, "invalid translation look-aside buffer."); - PageEntry * entry = pmap->find(pmap, ptr); + PageEntry * entry = pmap->find(pmap, (char*)ptr); // NOLINT(*) CHECK_NE(entry, 0, "no valid page entry found."); Page * pptr = &(entry->page); // if the page size is smaller than target page size, @@ -327,7 +327,7 @@ void* MemoryManager_Realloc(MemoryManager * mgr, void * ptr, tvm_index_t size) { void MemoryManager_Free(MemoryManager * mgr, void* ptr) { TLB * pmap = &(mgr->pmap); CHECK_NE(pmap->count, 0, "invalid translation look-aside buffer."); - PageEntry * entry = pmap->find(pmap, ptr); + PageEntry * entry = pmap->find(pmap, (char*)ptr); // NOLINT(*) CHECK_NE(entry, 0, "no valid page entry found."); Page * p = &(entry->page); MultiMap * free_map = &(mgr->free_map); diff --git a/tests/cpp/crt_memory_test.cc b/tests/cpp/crt_memory_test.cc new file mode 100644 index 000000000000..668208a1b79e --- /dev/null +++ b/tests/cpp/crt_memory_test.cc @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#define TVM_CRT_LOG_VIRT_MEM_SIZE 16 +#define TVM_CRT_PAGE_BYTES 4096 + +#include +#include +#include + +#include "../../src/runtime/crt/memory.c" + +TEST(CRTMemory, Alloc) { + for (int idx = 0; idx < 65536; idx++) { + void * a = vmalloc(1); + EXPECT_EQ(vleak_size, 1); + vfree(a); + EXPECT_EQ(vleak_size, 0); + } +} + +TEST(CRTMemory, Realloc) { + for (int idx = 0; idx < 65536; idx++) { + void * a = vrealloc(0, 1); + EXPECT_EQ(vleak_size, 1); + void * b = vrealloc(a, 1); + EXPECT_EQ(a, b); + EXPECT_EQ(vleak_size, 1); + vfree(a); + EXPECT_EQ(vleak_size, 0); + } +} + +int main(int argc, char ** argv) { + testing::InitGoogleTest(&argc, argv); + testing::FLAGS_gtest_death_test_style = "threadsafe"; + return RUN_ALL_TESTS(); +} From 24bf74d50e8c99d6dc47f4a5a74bcb64623b9f92 Mon Sep 17 00:00:00 2001 From: Liangfu Chen Date: Thu, 26 Mar 2020 17:51:57 +0800 Subject: [PATCH 11/16] lint Change-Id: Ibf3c06619ef296df5c49f3945cb6428777781d69 --- src/runtime/crt/memory.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/runtime/crt/memory.c b/src/runtime/crt/memory.c index d73a66249107..fc5917dca9ae 100644 --- a/src/runtime/crt/memory.c +++ b/src/runtime/crt/memory.c @@ -28,6 +28,8 @@ #include #include +#include + /*! Number of bits in a page */ #define TVM_CRT_PAGE_BITS (TVM_CRT_PAGE_BYTES << 3) @@ -223,8 +225,8 @@ void* MemoryManager_Alloc(MemoryManager * mgr, tvm_index_t size) { } else { PageTable * ptable = &(mgr->ptable); start = ptable->count; - CHECK_LE((start + npage), (sizeof(g_memory_pool) / kPageSize), - "insufficient memory, start=%ld, npage=%ld, total=%ld", + CHECK_LE((unsigned)(start + npage), (sizeof(g_memory_pool) / kPageSize), + "insufficient memory, start=%" PRId64 ", npage=%" PRId64 ", total=%" PRId64 "", start, npage, start + npage); /* insert page entry */ Page p = PageCreate(start, npage); @@ -274,8 +276,8 @@ void* MemoryManager_Realloc(MemoryManager * mgr, void * ptr, tvm_index_t size) { free_map->erase(free_map, it); } else { start = ptable->count; - CHECK_LE((start + npage), (sizeof(g_memory_pool) / kPageSize), - "insufficient memory, start=%ld, npage=%ld, total=%ld", + CHECK_LE((unsigned)(start + npage), (sizeof(g_memory_pool) / kPageSize), + "insufficient memory, start=%" PRId64 ", npage=%" PRId64 ", total=%" PRId64 "", start, npage, start + npage); Page p = PageCreate(start, npage); ptable->resize(ptable, start + npage, &p); @@ -300,8 +302,8 @@ void* MemoryManager_Realloc(MemoryManager * mgr, void * ptr, tvm_index_t size) { } else { PageTable * ptable = &(mgr->ptable); start = ptable->count; - CHECK_LE((start + npage), (sizeof(g_memory_pool) / kPageSize), - "insufficient memory, start=%ld, npage=%ld, total=%ld", + CHECK_LE((unsigned)(start + npage), (sizeof(g_memory_pool) / kPageSize), + "insufficient memory, start=%" PRId64 ", npage=%" PRId64 ", total=%" PRId64 "", start, npage, start + npage); /* insert page entry */ Page p = PageCreate(start, npage); From 30a50f681887f39d9ce7758518fd5b5f604c9ce9 Mon Sep 17 00:00:00 2001 From: Liangfu Chen Date: Fri, 3 Apr 2020 18:57:42 +0800 Subject: [PATCH 12/16] move logging.h to src --- src/runtime/crt/graph_runtime.c | 2 +- {include/tvm => src}/runtime/crt/logging.h | 2 +- src/runtime/crt/memory.c | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) rename {include/tvm => src}/runtime/crt/logging.h (98%) diff --git a/src/runtime/crt/graph_runtime.c b/src/runtime/crt/graph_runtime.c index e1bcb026a357..b5ed3b70281b 100644 --- a/src/runtime/crt/graph_runtime.c +++ b/src/runtime/crt/graph_runtime.c @@ -22,9 +22,9 @@ * \brief implement graph runtime in pure C */ -#include #include +#include "logging.h" #include "graph_runtime.h" #ifndef MAX diff --git a/include/tvm/runtime/crt/logging.h b/src/runtime/crt/logging.h similarity index 98% rename from include/tvm/runtime/crt/logging.h rename to src/runtime/crt/logging.h index 1ff90fb510fc..2c58834ca6a9 100644 --- a/include/tvm/runtime/crt/logging.h +++ b/src/runtime/crt/logging.h @@ -18,7 +18,7 @@ */ /*! - * \file tvm/runtime/crt/loggin.h + * \file runtime/crt/loggin.h * \brief A replacement of the dmlc logging system that avoids * the usage of GLOG and C++ headers */ diff --git a/src/runtime/crt/memory.c b/src/runtime/crt/memory.c index fc5917dca9ae..24175f6d6e55 100644 --- a/src/runtime/crt/memory.c +++ b/src/runtime/crt/memory.c @@ -25,11 +25,12 @@ */ #include -#include #include #include +#include "logging.h" + /*! Number of bits in a page */ #define TVM_CRT_PAGE_BITS (TVM_CRT_PAGE_BYTES << 3) From 2eda3f767b7c751fcaef40f9225a82ef761ba3aa Mon Sep 17 00:00:00 2001 From: Liangfu Chen Date: Fri, 3 Apr 2020 19:11:55 +0800 Subject: [PATCH 13/16] fix an error in macOS --- src/runtime/crt/module.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/runtime/crt/module.h b/src/runtime/crt/module.h index e527ec01e5d3..f640f1e1f503 100644 --- a/src/runtime/crt/module.h +++ b/src/runtime/crt/module.h @@ -28,7 +28,6 @@ #include struct TVMPackedFunc; -typedef struct TVMPackedFunc TVMPackedFunc; /*! * \brief Module container of TVM. From 799df935d2ccb198bd984258b554568381747a66 Mon Sep 17 00:00:00 2001 From: Liangfu Chen Date: Mon, 6 Apr 2020 10:45:26 +0800 Subject: [PATCH 14/16] remove logging.h --- tests/cpp/crt_memory_test.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/cpp/crt_memory_test.cc b/tests/cpp/crt_memory_test.cc index 668208a1b79e..1c129166f122 100644 --- a/tests/cpp/crt_memory_test.cc +++ b/tests/cpp/crt_memory_test.cc @@ -21,7 +21,6 @@ #define TVM_CRT_PAGE_BYTES 4096 #include -#include #include #include "../../src/runtime/crt/memory.c" From 837b23399262265d9642f04bd85711807ee868c4 Mon Sep 17 00:00:00 2001 From: Liangfu Chen Date: Mon, 6 Apr 2020 15:17:23 +0800 Subject: [PATCH 15/16] use cflags for gcc --- apps/bundle_deploy/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/bundle_deploy/Makefile b/apps/bundle_deploy/Makefile index d86ba54e1f29..73f9d75b12f8 100644 --- a/apps/bundle_deploy/Makefile +++ b/apps/bundle_deploy/Makefile @@ -57,11 +57,11 @@ $(build_dir)/test_dynamic: test.cc ${build_dir}/test_graph.json ${build_dir}/tes $(build_dir)/demo_static: demo_static.c ${build_dir}/bundle_static.o ${build_dir}/model.o ${build_dir}/graph.json.c ${build_dir}/params.bin.c @mkdir -p $(@D) - gcc $(PKG_CXXFLAGS) -o $@ demo_static.c ${build_dir}/bundle_static.o ${build_dir}/model.o -lm + gcc $(PKG_CFLAGS) -o $@ demo_static.c ${build_dir}/bundle_static.o ${build_dir}/model.o -lm $(build_dir)/test_static: test_static.c ${build_dir}/bundle_static.o ${build_dir}/test_model.o @mkdir -p $(@D) - gcc $(PKG_CXXFLAGS) -o $@ $^ + gcc $(PKG_CFLAGS) -o $@ $^ # Serialize our graph.json file. $(build_dir)/graph.json.c: $(build_dir)/graph.json From 1bcd21269b3695cd6b2f60352a5981f885e548a9 Mon Sep 17 00:00:00 2001 From: Liangfu Chen Date: Mon, 6 Apr 2020 15:19:25 +0800 Subject: [PATCH 16/16] fix compilation error --- src/runtime/crt/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/crt/module.h b/src/runtime/crt/module.h index f640f1e1f503..9ef287d650d8 100644 --- a/src/runtime/crt/module.h +++ b/src/runtime/crt/module.h @@ -41,7 +41,7 @@ typedef struct TVMModule { * * This function will return PackedFunc(nullptr) if function do not exist. */ - void (*GetFunction)(struct TVMModule * mod, const char * name, TVMPackedFunc * pf); + void (*GetFunction)(struct TVMModule * mod, const char * name, struct TVMPackedFunc * pf); } TVMModule; #endif // TVM_RUNTIME_CRT_MODULE_H_