From f5cdd41730f2796e3ba14196c5c3017dac510b8e Mon Sep 17 00:00:00 2001 From: Egor Churaev Date: Thu, 17 Nov 2022 16:28:58 +0300 Subject: [PATCH] [OpenCL] Improve OpenCL version detection As it was mentioned in #13362, it would be nice to add check when the user is using a version of libOpenCL.so that is too old. In this PR we introduce this functionality. In the `init` method, we traverse through all OpenCL devices and check their version. If the version is older than the target version in TVM, then we notify the user that we will skip this device. We cannot throw any exception from method `init` because it is possible that you have compiled host code with OpenCL support, but the host device won't have any OpenCL device which is supported by TVM (e.g. they all have too old version of libOpenCL.so). From OpenCL codegen we call function OpenCLModuleCreate. In the OpenCLModuleCreate init function might be called and in this case an exception will be generated on the host side although that the target device might be supported by TVM. This is why we don't throw any exceptions in the init function. If in the runtime we use some OpenCL methods and the list of the devices is empty, then we will generate an exception and notify user that possible reason is because version of libOpenCL.so is too old. --- src/runtime/opencl/opencl_common.h | 11 +++- src/runtime/opencl/opencl_device_api.cc | 70 ++++++++++++++++++------- 2 files changed, 61 insertions(+), 20 deletions(-) diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h index 7f7f083cf303..4c51158c29df 100644 --- a/src/runtime/opencl/opencl_common.h +++ b/src/runtime/opencl/opencl_common.h @@ -263,7 +263,7 @@ class OpenCLWorkspace : public DeviceAPI { ICHECK(IsOpenCLDevice(dev)); this->Init(); ICHECK(dev.device_id >= 0 && static_cast(dev.device_id) < queues.size()) - << "Invalid OpenCL device_id=" << dev.device_id; + << "Invalid OpenCL device_id=" << dev.device_id << ". " << GetError(); return queues[dev.device_id]; } // get the event queue of the context @@ -271,7 +271,7 @@ class OpenCLWorkspace : public DeviceAPI { ICHECK(IsOpenCLDevice(dev)); this->Init(); ICHECK(dev.device_id >= 0 && static_cast(dev.device_id) < queues.size()) - << "Invalid OpenCL device_id=" << dev.device_id; + << "Invalid OpenCL device_id=" << dev.device_id << ". " << GetError(); return events[dev.device_id]; } // is current clCommandQueue in profiling mode @@ -310,6 +310,13 @@ class OpenCLWorkspace : public DeviceAPI { static OpenCLWorkspace* Global(); void CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream) final; + + private: + std::string GetError() { + if (this->devices.size() == 0) return noDevicesErrorMsg; + return ""; + } + std::string noDevicesErrorMsg = ""; }; /*! \brief Thread local workspace */ diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc index d67864287dbc..58744c2cc615 100644 --- a/src/runtime/opencl/opencl_device_api.cc +++ b/src/runtime/opencl/opencl_device_api.cc @@ -25,6 +25,8 @@ #include #include +#include + #include "opencl_common.h" namespace tvm { @@ -33,6 +35,7 @@ namespace cl { std::string GetPlatformInfo(cl_platform_id pid, cl_platform_info param_name); std::string GetDeviceInfo(cl_device_id pid, cl_device_info param_name); +std::string GetOpenCLVersion(cl_device_id pid); struct ImageInfo { size_t origin[3] = {}; @@ -111,7 +114,7 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv) *rv = static_cast(index < devices.size()); return; } - ICHECK_LT(index, devices.size()) << "Invalid device id " << index; + ICHECK_LT(index, devices.size()) << "Invalid device id " << index << ". " << GetError(); switch (kind) { case kExist: break; @@ -139,17 +142,9 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv) *rv = static_cast(value); break; } - case kComputeVersion: { - // String returned is "OpenCL $MAJOR.$MINOR $VENDOR_INFO". To - // match other implementations, we want to return "$MAJOR.$MINOR" - std::string ret = GetDeviceInfo(devices[index], CL_DEVICE_VERSION); - - const size_t version_start = 7; // Length of initial "OpenCL " prefix to skip - const size_t version_end = ret.find(' ', version_start); - *rv = ret.substr(version_start, version_end - version_start); + case kComputeVersion: + *rv = GetOpenCLVersion(devices[index]); break; - } - return; case kDeviceName: *rv = GetDeviceInfo(devices[index], CL_DEVICE_NAME); break; @@ -200,7 +195,7 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv) void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t size, size_t alignment, DLDataType type_hint) { this->Init(); - ICHECK(context != nullptr) << "No OpenCL device"; + ICHECK(context != nullptr) << "No OpenCL device. " << GetError(); cl_int err_code; cl::BufferDescriptor* desc = new cl::BufferDescriptor; // CL_INVALID_BUFFER_SIZE if size is 0. @@ -245,7 +240,7 @@ void OpenCLWorkspace::FreeDataSpace(Device dev, void* ptr) { cl_mem OpenCLWorkspace::AllocTexture(Device dev, size_t width, size_t height, DLDataType type_hint) { this->Init(); - ICHECK(context != nullptr) << "No OpenCL device"; + ICHECK(context != nullptr) << "No OpenCL device. " << GetError(); cl_int err_code; cl_channel_type cl_type = DTypeToOpenCLChannelType(type_hint); cl_image_format format = {CL_RGBA, cl_type}; @@ -373,12 +368,23 @@ std::string GetPlatformInfo(cl_platform_id pid, cl_platform_info param_name) { std::string GetDeviceInfo(cl_device_id pid, cl_device_info param_name) { size_t ret_size; OPENCL_CALL(clGetDeviceInfo(pid, param_name, 0, nullptr, &ret_size)); - std::string ret; - ret.resize(ret_size); - OPENCL_CALL(clGetDeviceInfo(pid, param_name, ret_size, &ret[0], nullptr)); + char* info = new char[ret_size]; + OPENCL_CALL(clGetDeviceInfo(pid, param_name, ret_size, info, nullptr)); + std::string ret = info; + delete[] info; return ret; } +std::string GetOpenCLVersion(cl_device_id pid) { + // String returned is "OpenCL $MAJOR.$MINOR $VENDOR_INFO". To + // match other implementations, we want to return "$MAJOR.$MINOR" + std::string ret = GetDeviceInfo(pid, CL_DEVICE_VERSION); + + const size_t version_start = 7; // Length of initial "OpenCL " prefix to skip + const size_t version_end = ret.find(' ', version_start); + return ret.substr(version_start, version_end - version_start); +} + std::vector GetPlatformIDs() { cl_uint ret_size; cl_int code = clGetPlatformIDs(0, nullptr, &ret_size); @@ -432,16 +438,44 @@ void OpenCLWorkspace::Init(const std::string& type_key, const std::string& devic LOG(WARNING) << "Using CPU OpenCL device"; devices_matched = cl::GetDeviceIDs(platform_id, "cpu"); } - if (devices_matched.size() > 0) { + std::vector supported_devices = {}; + auto get_version_str = [](int version) { + std::ostringstream out; + out.precision(1); + out << std::fixed << version / 100.f; + return out.str(); + }; + for (auto& device : devices_matched) { + std::string ver = GetOpenCLVersion(device); + int opencl_version = std::stod(ver) * 100; + if (opencl_version >= CL_TARGET_OPENCL_VERSION) { + supported_devices.push_back(device); + } else { + std::string dev_msg = GetDeviceInfo(device, CL_DEVICE_NAME) + + " has OpenCL version == " + get_version_str(opencl_version); + LOG(WARNING) << "TVM supports devices with OpenCL version >= " + << get_version_str(CL_TARGET_OPENCL_VERSION) << ", device " << dev_msg + << ". This device will be ignored."; + + if (noDevicesErrorMsg.empty()) { + noDevicesErrorMsg = + "Probably this error happen because TVM supports devices with OpenCL version >= " + + get_version_str(CL_TARGET_OPENCL_VERSION) + ". We found the following devices:\n"; + } + noDevicesErrorMsg += "\t" + dev_msg + "\n"; + } + } + if (supported_devices.size() > 0) { this->platform_id = platform_id; this->platform_name = cl::GetPlatformInfo(platform_id, CL_PLATFORM_NAME); this->device_type = device_type; - this->devices = devices_matched; + this->devices = supported_devices; break; } } if (this->platform_id == nullptr) { LOG(WARNING) << "No OpenCL device"; + initialized_ = true; return; } cl_int err_code;