Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions src/runtime/opencl/opencl_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,29 @@ inline const char* CLGetErrorString(cl_int error) {
}
}

inline cl_channel_type DTypeToOpenCLChannelType(DLDataType data_type) {
DataType dtype(data_type);
if (dtype == DataType::Float(32)) {
return CL_FLOAT;
} else if (dtype == DataType::Float(16)) {
return CL_HALF_FLOAT;
} else if (dtype == DataType::Int(8)) {
return CL_SIGNED_INT8;
} else if (dtype == DataType::Int(16)) {
return CL_SIGNED_INT16;
} else if (dtype == DataType::Int(32)) {
return CL_SIGNED_INT32;
} else if (dtype == DataType::UInt(8)) {
return CL_UNSIGNED_INT8;
} else if (dtype == DataType::UInt(16)) {
return CL_UNSIGNED_INT16;
} else if (dtype == DataType::UInt(32)) {
return CL_UNSIGNED_INT32;
}
LOG(FATAL) << "data type is not supported in OpenCL runtime yet: " << dtype;
return CL_FLOAT;
}

/*!
* \brief Protected OpenCL call
* \param func Expression to call.
Expand Down Expand Up @@ -231,6 +254,8 @@ class OpenCLWorkspace : public DeviceAPI {
void SetDevice(TVMContext ctx) final;
void GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) final;
void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment, DLDataType type_hint) final;
void* AllocDataSpace(TVMContext ctx, int ndim, const int64_t* shape, DLDataType dtype,
Optional<String> mem_scope = NullOpt) final;
void FreeDataSpace(TVMContext ctx, void* ptr) final;
void StreamSync(TVMContext ctx, TVMStreamHandle stream) final;
void* AllocWorkspace(TVMContext ctx, size_t size, DLDataType type_hint) final;
Expand Down Expand Up @@ -337,6 +362,14 @@ class OpenCLModuleNode : public ModuleNode {
std::vector<cl_kernel> kernels_;
};

inline cl_mem_object_type GetMemObjectType(const void* mem_ptr) {
cl_mem mem = static_cast<cl_mem>(const_cast<void*>(mem_ptr));
cl_mem_info param_name = CL_MEM_TYPE;
cl_mem_object_type mem_type;
OPENCL_CALL(clGetMemObjectInfo(mem, param_name, sizeof(mem_type), &mem_type, NULL));
return mem_type;
}

} // namespace runtime
} // namespace tvm
#endif // TVM_RUNTIME_OPENCL_OPENCL_COMMON_H_
165 changes: 152 additions & 13 deletions src/runtime/opencl/opencl_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,81 @@ void* OpenCLWorkspace::AllocDataSpace(TVMContext ctx, size_t size, size_t alignm
return mptr;
}

static inline size_t GetDataAlignment(const DLDataType dtype) {
size_t align = (dtype.bits / 8) * dtype.lanes;
if (align < kAllocAlignment) return kAllocAlignment;
return align;
}

static std::tuple<int64_t, int64_t> FlatShapeTo2D(std::vector<int64_t> shape) {
ICHECK(shape.size() >= 1 && shape.back() == 4);
while (shape.size() < 3) {
shape.insert(shape.end() - 1, 1);
}
int64_t width = 1;
for (auto it = shape.begin(); it < shape.end() - 2; ++it) {
width *= *it;
}
int64_t height = *(shape.end() - 2);
return std::make_tuple(width, height);
}

void* OpenCLWorkspace::AllocDataSpace(TVMContext ctx, int ndim, const int64_t* shape,
DLDataType dtype, Optional<String> mem_scope) {
if (!mem_scope.defined() || mem_scope.value() == "global") {
// by default, we can always redirect to the flat memory allocations
DLTensor temp;
temp.data = nullptr;
temp.ctx = ctx;
temp.ndim = ndim;
temp.dtype = dtype;
temp.shape = const_cast<int64_t*>(shape);
temp.strides = nullptr;
temp.byte_offset = 0;
size_t size = GetDataSize(temp);
size_t alignment = GetDataAlignment(temp.dtype);
return AllocDataSpace(ctx, size, alignment, dtype);
} else if (mem_scope.value() == "global:texture-act") {
this->Init();
ICHECK(this->context != nullptr) << "No OpenCL device";
cl_image_format image_format;
image_format.image_channel_data_type = DTypeToOpenCLChannelType(dtype);
cl_image_desc image_desc;

// shape must be (?, ..., ?, 4)
ICHECK_GT(ndim, 1);
ICHECK_EQ(shape[ndim - 1], 4);
// prepare descriptors
image_format.image_channel_order = CL_RGBA;
image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
// flat the tensor shape to 2D image
size_t width, height;
std::vector<int64_t> vshape(shape, shape + ndim);
std::tie(width, height) = FlatShapeTo2D(vshape);
// LOG(INFO) << "width = " << width;
// LOG(INFO) << "height = " << height;
image_desc.image_width = width;
image_desc.image_height = height;
image_desc.image_depth = 1;
image_desc.image_array_size = 1;
image_desc.image_row_pitch = 0;
image_desc.image_slice_pitch = 0;
image_desc.num_mip_levels = 0;
image_desc.num_samples = 0;
image_desc.buffer = NULL;

cl_int err_code;
cl_mem mptr = clCreateImage(this->context, CL_MEM_READ_WRITE, &image_format, &image_desc,
nullptr, &err_code);
OPENCL_CHECK_ERROR(err_code);
return mptr;
} else {
LOG(FATAL) << "Device does not support allocate data space with "
<< "specified memory scope: " << mem_scope.value();
return nullptr;
}
}

void OpenCLWorkspace::FreeDataSpace(TVMContext ctx, void* ptr) {
// We have to make sure that the memory object is not in the command queue
// for some OpenCL platforms.
Expand All @@ -135,28 +210,92 @@ void OpenCLWorkspace::FreeDataSpace(TVMContext ctx, void* ptr) {
OPENCL_CALL(clReleaseMemObject(mptr));
}

static inline void GetImageShape(const void* mem_ptr, size_t* region) {
cl_mem mem = static_cast<cl_mem>(const_cast<void*>(mem_ptr));
size_t width, height;
OPENCL_CALL(clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL));
OPENCL_CALL(clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL));
region[0] = width;
region[1] = height;
region[2] = 1;
return;
}

void OpenCLWorkspace::CopyDataFromTo(const void* from, size_t from_offset, void* to,
size_t to_offset, size_t size, TVMContext ctx_from,
TVMContext ctx_to, DLDataType type_hint,
TVMStreamHandle stream) {
this->Init();
ICHECK(stream == nullptr);
if (IsOpenCLDevice(ctx_from) && IsOpenCLDevice(ctx_to)) {
OPENCL_CALL(clEnqueueCopyBuffer(this->GetQueue(ctx_to),
static_cast<cl_mem>((void*)from), // NOLINT(*)
static_cast<cl_mem>(to), from_offset, to_offset, size, 0,
nullptr, nullptr));
cl_mem_object_type from_type = GetMemObjectType(from);
cl_mem_object_type to_type = GetMemObjectType(to);
if (from_type == CL_MEM_OBJECT_BUFFER && to_type == CL_MEM_OBJECT_BUFFER) {
OPENCL_CALL(clEnqueueCopyBuffer(this->GetQueue(ctx_to),
static_cast<cl_mem>((void*)from), // NOLINT(*)
static_cast<cl_mem>(to), from_offset, to_offset, size, 0,
nullptr, nullptr));
} else if (from_type == CL_MEM_OBJECT_IMAGE2D && to_type == CL_MEM_OBJECT_IMAGE2D) {
size_t from_origin[3] = {0, 0, 0};
size_t to_origin[3] = {0, 0, 0};
size_t region[3];
GetImageShape(from, region);
OPENCL_CALL(clEnqueueCopyImage(this->GetQueue(ctx_to),
static_cast<cl_mem>((void*)from), // NOLINT(*)
static_cast<cl_mem>(to), from_origin, to_origin, region, 0,
nullptr, nullptr));
} else {
LOG(FATAL) << "OpenCL memory object type is wrong.";
}
} else if (IsOpenCLDevice(ctx_from) && ctx_to.device_type == kDLCPU) {
OPENCL_CALL(clEnqueueReadBuffer(this->GetQueue(ctx_from),
static_cast<cl_mem>((void*)from), // NOLINT(*)
CL_FALSE, from_offset, size, static_cast<char*>(to) + to_offset,
0, nullptr, nullptr));
OPENCL_CALL(clFinish(this->GetQueue(ctx_from)));
cl_mem_object_type from_type = GetMemObjectType(from);
switch (from_type) {
case CL_MEM_OBJECT_BUFFER:
OPENCL_CALL(clEnqueueReadBuffer(this->GetQueue(ctx_from),
static_cast<cl_mem>((void*)from), // NOLINT(*)
CL_FALSE, from_offset, size,
static_cast<char*>(to) + to_offset, 0, nullptr, nullptr));
OPENCL_CALL(clFinish(this->GetQueue(ctx_from)));
break;
case CL_MEM_OBJECT_IMAGE2D: {
size_t origin[3] = {0, 0, 0};
size_t region[3];
GetImageShape(from, region);
OPENCL_CALL(clEnqueueReadImage(this->GetQueue(ctx_from),
static_cast<cl_mem>((void*)from), // NOLINT(*)
CL_FALSE, origin, region, 0, 0,
static_cast<char*>(to) + to_offset, 0, nullptr, nullptr));
OPENCL_CALL(clFinish(this->GetQueue(ctx_from)));
break;
}
default:
LOG(FATAL) << "OpenCL memory object type is wrong.";
}
} else if (ctx_from.device_type == kDLCPU && IsOpenCLDevice(ctx_to)) {
OPENCL_CALL(clEnqueueWriteBuffer(this->GetQueue(ctx_to), static_cast<cl_mem>(to), CL_FALSE,
to_offset, size, static_cast<const char*>(from) + from_offset,
0, nullptr, nullptr));
OPENCL_CALL(clFinish(this->GetQueue(ctx_to)));
cl_mem_object_type to_type = GetMemObjectType(to);
switch (to_type) {
case CL_MEM_OBJECT_BUFFER:
OPENCL_CALL(clEnqueueWriteBuffer(
this->GetQueue(ctx_to), static_cast<cl_mem>(to), CL_FALSE, to_offset, size,
static_cast<const char*>(from) + from_offset, 0, nullptr, nullptr));
OPENCL_CALL(clFinish(this->GetQueue(ctx_to)));
break;
case CL_MEM_OBJECT_IMAGE2D: {
size_t origin[3] = {0, 0, 0};
size_t region[3];
GetImageShape(to, region);
OPENCL_CALL(clEnqueueWriteImage(this->GetQueue(ctx_to),
static_cast<cl_mem>((void*)to), // NOLINT(*)
CL_FALSE, origin, region, 0, 0,
static_cast<const char*>(from) + from_offset, 0, nullptr,
nullptr));
OPENCL_CALL(clFinish(this->GetQueue(ctx_to)));
break;
}
default:
LOG(FATAL) << "OpenCL memory type is wrong.";
}

} else {
LOG(FATAL) << "Expect copy from/to OpenCL or between OpenCL";
}
Expand Down
20 changes: 19 additions & 1 deletion tests/python/unittest/test_target_codegen_opencl.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
# specific language governing permissions and limitations
# under the License.
import tvm
from tvm import te
from tvm import te, nd
import tvm.testing
import numpy as np

target = "opencl"

Expand Down Expand Up @@ -120,6 +121,23 @@ def check_max(ctx, n, dtype):
check_max(ctx, 1, "float64")


@tvm.testing.requires_gpu
@tvm.testing.requires_opencl
def test_opencl_texture_memory():
def check_allocate_and_copy(shape):
cpu_arr = nd.array(np.random.rand(*shape).astype("float32"), tvm.cpu(0))
opencl_arr0 = nd.empty(cpu_arr.shape, cpu_arr.dtype, tvm.opencl(0), "global:texture-act")
opencl_arr1 = nd.empty(cpu_arr.shape, cpu_arr.dtype, tvm.opencl(0), "global:texture-act")
cpu_arr.copyto(opencl_arr0)
opencl_arr0.copyto(opencl_arr1)
np.testing.assert_equal(cpu_arr.asnumpy(), opencl_arr1.asnumpy())

check_allocate_and_copy((3, 4))
check_allocate_and_copy((5, 6, 4))
check_allocate_and_copy((8, 5, 6, 4))


if __name__ == "__main__":
test_opencl_ternary_expression()
test_opencl_inf_nan()
test_opencl_texture_memory()