Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 22 additions & 13 deletions include/tvm/runtime/c_runtime_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,23 @@ TVM_DLL int TVMByteArrayFree(TVMByteArray* arr);
TVM_DLL int TVMDeviceAllocDataSpace(DLContext ctx, size_t nbytes, size_t alignment,
DLDataType type_hint, void** out_data);

/*!
* \brief Allocate a data space on device with special memory scope.
* \note The memory could use a special multi-dimensional memory layout.
* That is why we pass shape and dtype instead of raw number of bytes.
* \param ctx The device context to perform operation.
* \param ndim The number of dimension of the tensor.
* \param shape The shape of the tensor.
* \param dtype The type of elements.
* \param mem_scope The memory scope of the tensor,
* can be nullptr, which indicate the default global DRAM
* \param out_data The allocated device pointer.
* \return 0 when success, -1 when failure happens
*/
TVM_DLL int TVMDeviceAllocDataSpaceWithScope(DLContext ctx, int ndim, const int64_t* shape,
DLDataType dtype, const char* mem_scope,
void** out_data);

/*!
* \brief Free a data space on device.
* \param ctx The device context to perform operation.
Expand All @@ -569,22 +586,14 @@ TVM_DLL int TVMDeviceFreeDataSpace(TVMContext ctx, void* ptr);

/*!
* \brief Copy data from one place to another.
* \param from The source array.
* \param from_offset The byte offeset in the from.
* \param to The target array.
* \param to_offset The byte offset in the to.
* \param num_bytes The size of the memory in bytes
* \param ctx_from The source context
* \param ctx_to The target context
* \param type_hint The type of elements, only neded by certain backends.
* can be useful for cross device endian converison.
* \note This API is designed to support special memory with shape dependent layout.
* We pass in DLTensor* with shape information to support these cases.
* \param from The source tensor.
* \param to The target tensor.
* \param stream Optional stream object.
* \return 0 when success, -1 when failure happens.
*/
TVM_DLL int TVMDeviceCopyDataFromTo(const void* from, size_t from_offset, void* to,
size_t to_offset, size_t num_bytes, TVMContext ctx_from,
TVMContext ctx_to, DLDataType type_hint,
TVMStreamHandle stream);
TVM_DLL int TVMDeviceCopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream);

/*!
* \brief Check that an object is derived from another.
Expand Down
42 changes: 32 additions & 10 deletions include/tvm/runtime/device_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,17 @@ class TVM_DLL DeviceAPI {
*/
virtual void* AllocDataSpace(TVMContext ctx, size_t nbytes, size_t alignment,
DLDataType type_hint) = 0;
/*!
* \brief Allocate a data space on device with memory scope support.
* \param ctx The device context to perform operation.
* \param ndim The number of dimension of allocated tensor.
* \param shape The shape of allocated tensor.
* \param dtype The type of elements.
* \param mem_scope The memory scope of allocated tensor.
* \return The allocated device pointer.
*/
virtual void* AllocDataSpace(TVMContext ctx, int ndim, const int64_t* shape, DLDataType dtype,
Optional<String> mem_scope = NullOpt);
/*!
* \brief Free a data space on device.
* \param ctx The device context to perform operation.
Expand All @@ -98,20 +109,13 @@ class TVM_DLL DeviceAPI {
virtual void FreeDataSpace(TVMContext ctx, void* ptr) = 0;
/*!
* \brief copy data from one place to another
* \note This API is designed to support special memory with shape dependent layout.
* We pass in DLTensor* with shape information to support these cases.
* \param from The source array.
* \param from_offset The byte offeset in the from.
* \param to The target array.
* \param to_offset The byte offset in the to.
* \param num_bytes The size of the memory in bytes
* \param ctx_from The source context
* \param ctx_to The target context
* \param type_hint The type of elements, only neded by certain backends.
* can be useful for cross device endian converison.
* \param stream Optional stream object.
*/
virtual void CopyDataFromTo(const void* from, size_t from_offset, void* to, size_t to_offset,
size_t num_bytes, TVMContext ctx_from, TVMContext ctx_to,
DLDataType type_hint, TVMStreamHandle stream) = 0;
virtual void CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream);
/*!
* \brief Create a new stream of execution.
*
Expand Down Expand Up @@ -194,6 +198,24 @@ class TVM_DLL DeviceAPI {
static bool NeedSetDeviceContext(int device_type) {
return device_type != kDLCPU && device_type != kDLMicroDev;
}

protected:
/*!
* \brief copy data from one place to another
* \param from The source array.
* \param from_offset The byte offeset in the from.
* \param to The target array.
* \param to_offset The byte offset in the to.
* \param num_bytes The size of the memory in bytes
* \param ctx_from The source context
* \param ctx_to The target context
* \param type_hint The type of elements, only neded by certain backends.
* can be useful for cross device endian converison.
* \param stream Optional stream object.
*/
virtual void CopyDataFromTo(const void* from, size_t from_offset, void* to, size_t to_offset,
size_t num_bytes, TVMContext ctx_from, TVMContext ctx_to,
DLDataType type_hint, TVMStreamHandle stream);
};

/*! \brief The device type bigger than this is RPC device */
Expand Down
7 changes: 5 additions & 2 deletions include/tvm/runtime/ndarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#define TVM_RUNTIME_NDARRAY_H_

#include <tvm/runtime/c_runtime_api.h>
#include <tvm/runtime/container.h>
#include <tvm/runtime/data_type.h>
#include <tvm/runtime/object.h>
#include <tvm/runtime/serializer.h>
Expand Down Expand Up @@ -133,10 +134,12 @@ class NDArray : public ObjectRef {
* \brief Create an empty NDArray.
* \param shape The shape of the new array.
* \param dtype The data type of the new array.
* \param ctx The context of the Array.
* \param ctx The context of the array.
* \param mem_scope The memory scope of the array.
* \return The created Array
*/
TVM_DLL static NDArray Empty(std::vector<int64_t> shape, DLDataType dtype, DLContext ctx);
TVM_DLL static NDArray Empty(std::vector<int64_t> shape, DLDataType dtype, DLContext ctx,
Optional<String> mem_scope = NullOpt);
/*!
* \brief Create a NDArray backed by a dlpack tensor.
*
Expand Down
38 changes: 19 additions & 19 deletions python/tvm/runtime/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from tvm._ffi.base import _LIB, check_call, c_array, string_types, _FFI_MODE
from tvm._ffi.runtime_ctypes import DataType, TVMContext, TVMArray, TVMArrayHandle
from tvm._ffi.runtime_ctypes import DataTypeCode, tvm_shape_index_t
from . import _ffi_api

try:
# pylint: disable=wrong-import-position
Expand Down Expand Up @@ -253,42 +254,41 @@ def numpyasarray(np_data):
return arr, shape


def empty(shape, dtype="float32", ctx=context(1, 0)):
def empty(shape, dtype="float32", ctx=context(1, 0), mem_scope=None):
"""Create an empty array given shape and device

Parameters
----------
shape : tuple of int
The shape of the array
The shape of the array.

dtype : type or str
The data type of the array.

ctx : TVMContext
The context of the array
The context of the array.

mem_scope : Optional[str]
The memory scope of the array.

Returns
-------
arr : tvm.nd.NDArray
The array tvm supported.
"""
shape = c_array(tvm_shape_index_t, shape)
ndim = ctypes.c_int(len(shape))
handle = TVMArrayHandle()
shape_imm = []
for s in shape:
if isinstance(s, tvm.tir.IntImm):
shape_imm.append(s.value)
else:
shape_imm.append(int(s))
arr = np.array(shape_imm, "int64")
ptr = arr.ctypes.data_as(ctypes.POINTER(ctypes.c_int64))
shape_ptr = ctypes.cast(ptr, ctypes.c_void_p)
ndim = len(shape_imm)
dtype = DataType(dtype)
check_call(
_LIB.TVMArrayAlloc(
shape,
ndim,
ctypes.c_int(dtype.type_code),
ctypes.c_int(dtype.bits),
ctypes.c_int(dtype.lanes),
ctx.device_type,
ctx.device_id,
ctypes.byref(handle),
)
)
return _make_array(handle, False, False)
arr = _ffi_api.TVMArrayAllocWithScope(shape_ptr, ndim, dtype, ctx, mem_scope)
return arr


def from_dlpack(dltensor):
Expand Down
64 changes: 59 additions & 5 deletions src/runtime/c_runtime_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,50 @@ void* DeviceAPI::AllocWorkspace(TVMContext ctx, size_t size, DLDataType type_hin
return AllocDataSpace(ctx, size, kTempAllocaAlignment, type_hint);
}

static size_t GetDataAlignment(const DLDataType dtype) {
size_t align = (dtype.bits / 8) * dtype.lanes;
if (align < kAllocAlignment) return kAllocAlignment;
return align;
}

void* DeviceAPI::AllocDataSpace(TVMContext ctx, int ndim, const int64_t* shape, DLDataType dtype,
Optional<String> mem_scope) {
if (!mem_scope.defined() || mem_scope.value() == "global") {
// by default, we can always redirect to the flat memory allocations
DLTensor temp;
temp.data = nullptr;
temp.ctx = ctx;
temp.ndim = ndim;
temp.dtype = dtype;
temp.shape = const_cast<int64_t*>(shape);
temp.strides = nullptr;
temp.byte_offset = 0;
size_t size = GetDataSize(temp);
size_t alignment = GetDataAlignment(temp.dtype);
return AllocDataSpace(ctx, size, alignment, dtype);
}
LOG(FATAL) << "Device does not support allocate data space with "
<< "specified memory scope: " << mem_scope.value();
return nullptr;
}

void DeviceAPI::CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream) {
// by default, we can always redirect to the flat memory copy operation.
size_t nbytes = GetDataSize(*from);
ICHECK_EQ(nbytes, GetDataSize(*to));

ICHECK(IsContiguous(*from) && IsContiguous(*to))
<< "CopyDataFromTo only support contiguous array for now";
CopyDataFromTo(from->data, from->byte_offset, to->data, to->byte_offset, nbytes, from->ctx,
to->ctx, from->dtype, stream);
}

void DeviceAPI::CopyDataFromTo(const void* from, size_t from_offset, void* to, size_t to_offset,
size_t num_bytes, TVMContext ctx_from, TVMContext ctx_to,
DLDataType type_hint, TVMStreamHandle stream) {
LOG(FATAL) << "Device does not support CopyDataFromTo.";
}

void DeviceAPI::FreeWorkspace(TVMContext ctx, void* ptr) { FreeDataSpace(ctx, ptr); }

TVMStreamHandle DeviceAPI::CreateStream(TVMContext ctx) {
Expand Down Expand Up @@ -553,19 +597,29 @@ int TVMDeviceAllocDataSpace(DLContext ctx, size_t nbytes, size_t alignment, DLDa
API_END();
}

int TVMDeviceAllocDataSpaceWithScope(DLContext ctx, int ndim, const int64_t* shape,
DLDataType dtype, const char* mem_scope, void** out_data) {
API_BEGIN();
Optional<String> scope;
if (mem_scope != nullptr) {
scope = String(std::string(mem_scope));
}
out_data[0] = DeviceAPIManager::Get(ctx)->AllocDataSpace(ctx, ndim, shape, dtype, scope);
API_END();
}

int TVMDeviceFreeDataSpace(DLContext ctx, void* ptr) {
API_BEGIN();
DeviceAPIManager::Get(ctx)->FreeDataSpace(ctx, ptr);
API_END();
}

int TVMDeviceCopyDataFromTo(const void* from, size_t from_offset, void* to, size_t to_offset,
size_t num_bytes, TVMContext ctx_from, TVMContext ctx_to,
DLDataType type_hint, TVMStreamHandle stream) {
int TVMDeviceCopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream) {
API_BEGIN();
TVMContext ctx_from = from->ctx;
TVMContext ctx_to = to->ctx;
TVMContext ctx = ctx_from.device_type != kDLCPU ? ctx_from : ctx_to;
DeviceAPIManager::Get(ctx)->CopyDataFromTo(from, from_offset, to, to_offset, num_bytes, ctx_from,
ctx_to, type_hint, stream);
DeviceAPIManager::Get(ctx)->CopyDataFromTo(from, to, stream);
API_END();
}

Expand Down
13 changes: 7 additions & 6 deletions src/runtime/cpu_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,6 @@ class CPUDeviceAPI final : public DeviceAPI {
#endif
}

void CopyDataFromTo(const void* from, size_t from_offset, void* to, size_t to_offset, size_t size,
TVMContext ctx_from, TVMContext ctx_to, DLDataType type_hint,
TVMStreamHandle stream) final {
memcpy(static_cast<char*>(to) + to_offset, static_cast<const char*>(from) + from_offset, size);
}

void StreamSync(TVMContext ctx, TVMStreamHandle stream) final {}

void* AllocWorkspace(TVMContext ctx, size_t size, DLDataType type_hint) final;
Expand All @@ -86,6 +80,13 @@ class CPUDeviceAPI final : public DeviceAPI {
static auto* inst = new CPUDeviceAPI();
return inst;
}

protected:
void CopyDataFromTo(const void* from, size_t from_offset, void* to, size_t to_offset, size_t size,
TVMContext ctx_from, TVMContext ctx_to, DLDataType type_hint,
TVMStreamHandle stream) final {
memcpy(static_cast<char*>(to) + to_offset, static_cast<const char*>(from) + from_offset, size);
}
};

struct CPUWorkspacePool : public WorkspacePool {
Expand Down
39 changes: 34 additions & 5 deletions src/runtime/crt/common/crt_runtime_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <assert.h>
#include <inttypes.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Expand Down Expand Up @@ -87,16 +88,44 @@ int TVMDeviceAllocDataSpace(DLContext ctx, size_t nbytes, size_t alignment, DLDa
if (alignment != 1) {
nbytes = (nbytes + alignment - 1) / alignment * alignment;
}

return TVMPlatformMemoryAllocate(nbytes, ctx, out_data);
}

int TVMDeviceAllocDataSpaceWithScope(DLContext ctx, int ndim, const int64_t* shape,
DLDataType dtype, const char* mem_scope, void** out_data) {
size_t nbytes = 1;
for (int i = 0; i < ndim; ++i) {
nbytes *= shape[i];
}
nbytes *= (dtype.bits * dtype.lanes + 7) / 8;

int kAllocAlignment = 128;
size_t align = (dtype.bits / 8) * dtype.lanes;
if (align < kAllocAlignment) align = kAllocAlignment;
return TVMDeviceAllocDataSpace(ctx, nbytes, align, dtype, out_data);
}

int TVMDeviceFreeDataSpace(TVMContext ctx, void* ptr) { return TVMPlatformMemoryFree(ptr, ctx); }

int TVMDeviceCopyDataFromTo(const void* from, size_t from_offset, void* to, size_t to_offset,
size_t num_bytes, TVMContext ctx_from, TVMContext ctx_to,
DLDataType type_hint, TVMStreamHandle stream) {
memcpy(((uint8_t*)to) + to_offset, ((uint8_t*)from) + from_offset, num_bytes);
static bool IsContiguous(const DLTensor* arr) {
if (arr->strides == NULL) return true;
int64_t expected_stride = 1;
for (int32_t i = arr->ndim; i != 0; --i) {
int32_t k = i - 1;
if (arr->strides[k] != expected_stride) return false;
expected_stride *= arr->shape[k];
}
return true;
}

int TVMDeviceCopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream) {
assert(IsContiguous(from) && IsContiguous(to));
size_t size = 1;
for (int i = 0; i < from->ndim; ++i) {
size *= from->shape[i];
}
size *= (from->dtype.bits * from->dtype.lanes + 7) / 8;
memcpy(((uint8_t*)to->data) + to->byte_offset, ((uint8_t*)from->data) + from->byte_offset, size);
return 0;
}

Expand Down
2 changes: 2 additions & 0 deletions src/runtime/cuda/cuda_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ class CUDADeviceAPI final : public DeviceAPI {
}
}

protected:
void CopyDataFromTo(const void* from, size_t from_offset, void* to, size_t to_offset, size_t size,
TVMContext ctx_from, TVMContext ctx_to, DLDataType type_hint,
TVMStreamHandle stream) final {
Expand Down Expand Up @@ -166,6 +167,7 @@ class CUDADeviceAPI final : public DeviceAPI {
}
}

public:
TVMStreamHandle CreateStream(TVMContext ctx) {
CUDA_CALL(cudaSetDevice(ctx.device_id));
cudaStream_t retval;
Expand Down
Loading