Skip to content
Closed
4 changes: 3 additions & 1 deletion apps/bundle_deploy/bundle.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@

#define CRT_MEMORY_NUM_PAGES 16384
#define CRT_MEMORY_PAGE_SIZE_LOG2 10
#define CRT_MEMORY_PAGE_SIZE_BYTES (1 << CRT_MEMORY_PAGE_SIZE_LOG2)

static uint8_t g_crt_memory[CRT_MEMORY_NUM_PAGES * (1 << CRT_MEMORY_PAGE_SIZE_LOG2)];
static uint8_t g_crt_memory[CRT_MEMORY_NUM_PAGES * CRT_MEMORY_PAGE_SIZE_BYTES]
__attribute__((aligned(CRT_MEMORY_PAGE_SIZE_BYTES)));
static MemoryManagerInterface* g_memory_manager;

/*! \brief macro to do C API call */
Expand Down
4 changes: 3 additions & 1 deletion apps/bundle_deploy/bundle_static.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@

#define CRT_MEMORY_NUM_PAGES 16384
#define CRT_MEMORY_PAGE_SIZE_LOG2 10
#define CRT_MEMORY_PAGE_SIZE_BYTES (1 << CRT_MEMORY_PAGE_SIZE_LOG2)

static uint8_t g_crt_memory[CRT_MEMORY_NUM_PAGES * (1 << CRT_MEMORY_PAGE_SIZE_LOG2)];
static uint8_t g_crt_memory[CRT_MEMORY_NUM_PAGES * CRT_MEMORY_PAGE_SIZE_BYTES]
__attribute__((aligned(CRT_MEMORY_PAGE_SIZE_BYTES)));
static MemoryManagerInterface* g_memory_manager;

/*! \brief macro to do C API call */
Expand Down
8 changes: 6 additions & 2 deletions apps/bundle_deploy/test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <sys/time.h>
#include <tvm/runtime/c_runtime_api.h>

#include <cstdlib>
#include <iostream>
#include <random>
#include <vector>
Expand Down Expand Up @@ -52,7 +53,7 @@ char* read_all_or_die(const char* name, const char* file_path, size_t* out_size)
*out_size = st.st_size;
}

char* data = (char*)malloc(st.st_size);
char* data = (char*)std::aligned_alloc(64, st.st_size);
FILE* fp = fopen(file_path, "rb");
size_t bytes_to_read = st.st_size;
size_t bytes_read = 0;
Expand Down Expand Up @@ -129,7 +130,7 @@ int main(int argc, char** argv) {
ftvm_runtime_run(handle);
gettimeofday(&t3, 0);

float output_storage[10 * 5];
float* output_storage = static_cast<float*>(std::aligned_alloc(64, 10 * 5 * sizeof(float)));
std::vector<int64_t> output_shape = {10, 5};
DLTensor output;
output.data = output_storage;
Expand Down Expand Up @@ -162,6 +163,9 @@ int main(int argc, char** argv) {
(t4.tv_sec - t3.tv_sec) * 1000.0f + (t4.tv_usec - t3.tv_usec) / 1000.f,
(t5.tv_sec - t4.tv_sec) * 1000.0f + (t5.tv_usec - t4.tv_usec) / 1000.f);

free(output_storage);
free(result_storage);
free(input_storage);
free(json_data);
free(params_data);
dlclose(bundle);
Expand Down
9 changes: 6 additions & 3 deletions apps/bundle_deploy/test_static.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@ int main(int argc, char** argv) {
void* handle = tvm_runtime_create(json_data, params_data, params_size, argv[0]);
gettimeofday(&t1, 0);

float input_storage[10 * 5];
float* input_storage = aligned_alloc(64, 10 * 5 * sizeof(float));
fp = fopen(argv[1], "rb");
fread(input_storage, 10 * 5, 4, fp);
fclose(fp);

float result_storage[10 * 5];
float* result_storage = aligned_alloc(64, 10 * 5 * sizeof(float));
fp = fopen(argv[2], "rb");
fread(result_storage, 10 * 5, 4, fp);
fclose(fp);
Expand All @@ -82,7 +82,7 @@ int main(int argc, char** argv) {
tvm_runtime_run(handle);
gettimeofday(&t3, 0);

float output_storage[10 * 5];
float* output_storage = aligned_alloc(64, 10 * 5 * sizeof(float));
DLTensor output;
output.data = output_storage;
DLDevice out_dev = {kDLCPU, 0};
Expand Down Expand Up @@ -117,6 +117,9 @@ int main(int argc, char** argv) {
(t4.tv_sec - t3.tv_sec) * 1000 + (t4.tv_usec - t3.tv_usec) / 1000.f,
(t5.tv_sec - t4.tv_sec) * 1000 + (t5.tv_usec - t4.tv_usec) / 1000.f);

free(output_storage);
free(result_storage);
free(input_storage);
free(json_data);
free(params_data);

Expand Down
1 change: 1 addition & 0 deletions cmake/utils/CRTConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ function(generate_crt_config platform output_path)
set(TVM_CRT_MAX_STRLEN_DLTYPE 10)
set(TVM_CRT_MAX_STRLEN_FUNCTION_NAME 120)
set(TVM_CRT_MAX_STRLEN_PARAM_NAME 80)
set(TVM_CRT_ALLOC_ALIGNMENT 64)

if("${platform}" STREQUAL "zephyr")
set(TVM_CRT_MAX_PACKET_SIZE_BYTES 512)
Expand Down
24 changes: 15 additions & 9 deletions rust/tvm-graph-rt/tests/test_tvm_basic/src/build_test_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,25 @@

import tvm
from tvm.relay.backend import Runtime
from tvm import te
from tvm.script import tir as T


def main():
n = te.var("n")
A = te.placeholder((n,), name="A")
B = te.placeholder((n,), name="B")
C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name="C")
s = tvm.te.create_schedule(C.op)
s[C].parallel(s[C].op.axis[0])
@T.prim_func
def func(var_A: T.handle, var_B: T.handle, var_C: T.handle):
T.func_attr({"global_symbol": "main", "tir.noalias": T.bool(True)})
n = T.int32()
A = T.match_buffer(var_A, (n,), align=1)
B = T.match_buffer(var_B, (n,), align=1)
C = T.match_buffer(var_C, (n,), align=1)
for i in T.parallel(n):
with T.block("C"):
vi = T.axis.spatial(n, i)
C[vi] = A[vi] + B[vi]

runtime = Runtime("cpp", {"system-lib": True})
print(tvm.lower(s, [A, B, C], simple_mode=True))
tvm.build(s, [A, B, C], "llvm", runtime=runtime).save(osp.join(sys.argv[1], "test.o"))
print(tvm.lower(func, simple_mode=True))
tvm.build(func, target="llvm", runtime=runtime).save(osp.join(sys.argv[1], "test.o"))


if __name__ == "__main__":
Expand Down
24 changes: 15 additions & 9 deletions rust/tvm-graph-rt/tests/test_tvm_dso/src/build_test_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,26 @@
import sys

import tvm
from tvm import te
from tvm.contrib import cc
from tvm.script import tir as T


def main():
n = te.var("n")
A = te.placeholder((n,), name="A")
B = te.placeholder((n,), name="B")
C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name="C")
s = tvm.te.create_schedule(C.op)
s[C].parallel(s[C].op.axis[0])
print(tvm.lower(s, [A, B, C], simple_mode=True))
@T.prim_func
def func(var_A: T.handle, var_B: T.handle, var_C: T.handle):
T.func_attr({"global_symbol": "main", "tir.noalias": T.bool(True)})
n = T.int32()
A = T.match_buffer(var_A, (n,), align=1)
B = T.match_buffer(var_B, (n,), align=1)
C = T.match_buffer(var_C, (n,), align=1)
for i in T.parallel(n):
with T.block("C"):
vi = T.axis.spatial(n, i)
C[vi] = A[vi] + B[vi]

print(tvm.lower(func, simple_mode=True))
obj_file = osp.join(sys.argv[1], "test.o")
tvm.build(s, [A, B, C], "llvm").save(obj_file)
tvm.build(func, "llvm").save(obj_file)
cc.create_shared(osp.join(sys.argv[1], "test.so"), [obj_file])


Expand Down
24 changes: 15 additions & 9 deletions rust/tvm-graph-rt/tests/test_wasm32/src/build_test_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,26 @@
import sys

import tvm
from tvm import te
from tvm.relay.backend import Runtime
from tvm.script import tir as T


def main():
n = te.var("n")
A = te.placeholder((n,), name="A")
B = te.placeholder((n,), name="B")
C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name="C")
s = tvm.te.create_schedule(C.op)
s[C].parallel(s[C].op.axis[0])
print(tvm.lower(s, [A, B, C], simple_mode=True))
@T.prim_func
def func(var_A: T.handle, var_B: T.handle, var_C: T.handle):
T.func_attr({"global_symbol": "main", "tir.noalias": T.bool(True)})
n = T.int32()
A = T.match_buffer(var_A, (n,), align=1)
B = T.match_buffer(var_B, (n,), align=1)
C = T.match_buffer(var_C, (n,), align=1)
for i in T.parallel(n):
with T.block("C"):
vi = T.axis.spatial(n, i)
C[vi] = A[vi] + B[vi]

print(tvm.lower(s, simple_mode=True))
runtime = Runtime("cpp", {"system-lib": True})
tvm.build(s, [A, B, C], "llvm -mtriple=wasm32-unknown-unknown", runtime=runtime).save(
tvm.build(func, target="llvm -mtriple=wasm32-unknown-unknown", runtime=runtime).save(
osp.join(sys.argv[1], "test.o")
)

Expand Down
31 changes: 21 additions & 10 deletions rust/tvm/tests/basics/src/tvm_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,34 @@
import sys

import tvm
from tvm import te
from tvm.contrib import cc
from tvm.script import tir as T


def main(target, out_dir):
n = te.var("n")
A = te.placeholder((n,), name="A")
B = te.placeholder((n,), name="B")
C = te.compute(A.shape, lambda i: A[i] + B[i], name="C")
s = te.create_schedule(C.op)
@T.prim_func
def func(var_A: T.handle, var_B: T.handle, var_C: T.handle):
T.func_attr({"global_symbol": "main", "tir.noalias": T.bool(True)})
n = T.int32()
A = T.match_buffer(var_A, (n,), align=1)
B = T.match_buffer(var_B, (n,), align=1)
C = T.match_buffer(var_C, (n,), align=1)
# with T.block("root"):
for i in range(n):
with T.block("C"):
v_i = T.axis.spatial(n, i)
T.reads(A[v_i], B[v_i])
T.writes(C[v_i])
C[v_i] = A[v_i] + B[v_i]

if target == "cuda":
bx, tx = s[C].split(C.op.axis[0], factor=64)
s[C].bind(bx, te.thread_axis("blockIdx.x"))
s[C].bind(tx, te.thread_axis("threadIdx.x"))
sch = tvm.tir.Schedule(func)
i, j = sch.split(sch.get_loops("C")[0], [None, 64])
sch.bind(i, "blockIdx.x")
sch.bind(j, "threadIdx.x")
func = sch.mod["main"]

fadd = tvm.build(s, [A, B, C], tvm.target.Target(target, host="llvm"), name="myadd")
fadd = tvm.build(func, target=tvm.target.Target(target, host="llvm"), name="myadd")
fadd.save(osp.join(out_dir, "test_add.o"))
if target == "cuda":
fadd.imported_modules[0].save(osp.join(out_dir, "test_add.ptx"))
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/crt/aot_executor_module/aot_executor_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ int32_t TVMAotExecutorModule_GetInputName(TVMValue* args, int* tcodes, int nargs
return kTvmErrorFunctionCallNumArguments;
}

char* name;
const char* name;
int ret = TVMAotExecutor_GetInputName(aot_executor.executor, args[0].v_int64, &name);
if (ret < 0) {
return kTvmErrorExecutorModuleNoSuchInput;
Expand Down
30 changes: 25 additions & 5 deletions src/runtime/crt/common/crt_runtime_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <math.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Expand Down Expand Up @@ -90,10 +91,26 @@ int TVMArrayFree(TVMArrayHandle handle) {

int TVMDeviceAllocDataSpace(DLDevice dev, size_t nbytes, size_t alignment, DLDataType type_hint,
void** out_data) {
if (alignment != 1) {
nbytes = (nbytes + alignment - 1) / alignment * alignment;
}
return TVMPlatformMemoryAllocate(nbytes, dev, out_data);
// The TVMPlatformMemoryAllocate function does not guarantee the
// alignment of the allocation. Therefore, deliberately
// overallocate by (alignment-1) and return an aligned region from
// it.
size_t total_bytes = nbytes + sizeof(void*) + (alignment - 1);
void* allocated_buf;
int err = TVMPlatformMemoryAllocate(total_bytes, dev, &allocated_buf);
if (err) return err;

void* first_allowed_data_ptr = ((uint8_t*)allocated_buf) + sizeof(void*);
uintptr_t offset = (alignment - ((uintptr_t)first_allowed_data_ptr) % alignment);
void* data_ptr = first_allowed_data_ptr + offset;

// Must keep a pointer to the original allocation, so that it can be
// passed to TVMPlatformMemoryFree.
((void**)data_ptr)[-1] = allocated_buf;

*out_data = data_ptr;

return err;
}

int TVMDeviceAllocDataSpaceWithScope(DLDevice dev, int ndim, const int64_t* shape, DLDataType dtype,
Expand All @@ -110,7 +127,10 @@ int TVMDeviceAllocDataSpaceWithScope(DLDevice dev, int ndim, const int64_t* shap
return TVMDeviceAllocDataSpace(dev, nbytes, align, dtype, out_data);
}

int TVMDeviceFreeDataSpace(DLDevice dev, void* ptr) { return TVMPlatformMemoryFree(ptr, dev); }
int TVMDeviceFreeDataSpace(DLDevice dev, void* ptr) {
void* allocated_buf = ((void**)ptr)[-1];
return TVMPlatformMemoryFree(allocated_buf, dev);
}

TVM_ATTRIBUTE_UNUSED static bool IsContiguous(const DLTensor* arr) {
if (arr->strides == NULL) return true;
Expand Down
6 changes: 3 additions & 3 deletions src/runtime/crt/common/ndarray.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ int TVMNDArray_Empty(int32_t ndim, const tvm_index_t* shape, DLDataType dtype, D
return status;
}
int total_elem_bytes = TVMNDArray_DataSizeBytes(array);
array->dl_tensor.data =
TVMBackendAllocWorkspace(kDLCPU, 0, total_elem_bytes, dtype.code, dtype.bits);
TVMDeviceAllocDataSpace(dev, total_elem_bytes, TVM_CRT_ALLOC_ALIGNMENT, dtype,
&array->dl_tensor.data);
memset(array->dl_tensor.data, 0, total_elem_bytes);
return 0;
}
Expand Down Expand Up @@ -167,7 +167,7 @@ int TVMNDArray_Release(TVMNDArray* arr) {
return 0;
}

err = TVMPlatformMemoryFree(arr->dl_tensor.data, dev);
err = TVMDeviceFreeDataSpace(dev, arr->dl_tensor.data);
if (err != kTvmErrorNoError) {
return err;
}
Expand Down
3 changes: 3 additions & 0 deletions src/runtime/crt/crt_config.h.template
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@
/*! Maximum supported string length in parameter names */
#define TVM_CRT_MAX_STRLEN_PARAM_NAME ${TVM_CRT_MAX_STRLEN_PARAM_NAME}

/*! Alignment (in bytes) for data buffer allocation */
#define TVM_CRT_ALLOC_ALIGNMENT ${TVM_CRT_ALLOC_ALIGNMENT}

/*! Enable checks to enforce the stack allocator with a FIFO ordering. Off by default */
// #define TVM_CRT_STACK_ALLOCATOR_ENABLE_FIFO_CHECK

Expand Down
11 changes: 3 additions & 8 deletions src/runtime/crt/graph_executor/graph_executor.c
Original file line number Diff line number Diff line change
Expand Up @@ -841,15 +841,10 @@ int TVMGraphExecutor_LoadParams(TVMGraphExecutor* executor, const char* param_bl
status = -1;
}

if (executor->data_entry[eid].dl_tensor.shape) {
err = TVMPlatformMemoryFree(executor->data_entry[eid].dl_tensor.shape, dev);
if (err != kTvmErrorNoError) {
status = -1;
}
executor->data_entry[eid].dl_tensor.shape = 0;
}
// The memory in the executor->data_entry[eid].dl_tensor.shape is
// owned by attrs->shape, and should not be freed here.
if (executor->data_entry[eid].dl_tensor.data) {
err = TVMPlatformMemoryFree(executor->data_entry[eid].dl_tensor.data, dev);
err = TVMDeviceFreeDataSpace(dev, executor->data_entry[eid].dl_tensor.data);
if (err != kTvmErrorNoError) {
status = -1;
}
Expand Down
12 changes: 10 additions & 2 deletions src/runtime/crt/host/CMakeLists.txt.template
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,26 @@ set(CRT_LIBS microtvm_rpc_server
memory
)


add_library(tvm_model)

# Build CRT libraries
foreach(crt_lib_name ${CRT_LIBS})
add_library(${crt_lib_name})
file(GLOB_RECURSE crt_lib_srcs ${CRT_LIB_BASE}/${crt_lib_name}/*.c ${CRT_LIB_BASE}/${crt_lib_name}/*.cc)
target_sources(${crt_lib_name} PRIVATE ${crt_lib_srcs})
target_include_directories(${crt_lib_name} PRIVATE crt_config crt/include)
target_compile_definitions(${crt_lib_name} PRIVATE -DTVM_HOST_USE_GRAPH_EXECUTOR_MODULE)
target_link_libraries(main PRIVATE ${crt_lib_name})
# Circular dependencies result in the static libraries being listed
# twice in the link command, resolving circular dependencies between
# the libraries and the model.
#
# See https://cmake.org/cmake/help/latest/command/target_link_libraries.html#cyclic-dependencies-of-static-libraries
target_link_libraries(tvm_model PRIVATE ${crt_lib_name})
target_link_libraries(${crt_lib_name} PRIVATE tvm_model)
endforeach(crt_lib_name ${CRT_LIBS})

# Build model files
add_library(tvm_model)
file(GLOB_RECURSE tvm_model_srcs model/codegen/host/src/*.c model/codegen/host/lib/*.o)
target_sources(tvm_model PRIVATE ${tvm_model_srcs})
target_include_directories(tvm_model PRIVATE ${CMAKE_SOURCE_DIR}/include crt_config crt/include)
Expand Down
Loading