From cb98da4db499e8f3d29375bdc202b4c508104d70 Mon Sep 17 00:00:00 2001 From: Siva Date: Fri, 27 Sep 2024 06:20:38 +0530 Subject: [PATCH] Introduce qualcomm extension support "cl_qcom_perf_hint", "cl_qcom_priority_hint" extn support added over workspace interface. OpenCL version will be picked up from SDK headers. CI fixes for build without Adreno OpenCL SDK. Entensions are activated based on its availability in SDK. New workspace API "SetNativePtr" defined that releases existing cl_mem and creates new mem object backed by given host ptr. Works for cl_qcom_ion_host_ptr, cl_qcom_android_ahardwarebuffer_host_ptr, cl_qcom_android_native_buffer_host_ptr, cl_qcom_dmabuf_host_ptr and cl_qcom_ion_host_ptr. The responsibility of preparing the host_ptr objects is with application. Some times the application needs device id for various devices related information. Use below ref. to get cl_device_id from workspace. OpenCLWorkspace* workspace = OpenCLWorkspace::Global(); cl_device_id device_id = workspace->GetCLDeviceID(0); --- cmake/config.cmake | 3 ++ cmake/modules/OpenCL.cmake | 13 ++++- cmake/modules/contrib/CLML.cmake | 2 +- cmake/utils/FindOpenCL.cmake | 2 +- src/runtime/opencl/opencl_common.h | 14 +++++- src/runtime/opencl/opencl_device_api.cc | 50 +++++++++++++++++-- src/support/libinfo.cc | 5 ++ .../cpp-runtime/opencl/aa_opencl_qcom_extn.cc | 50 +++++++++++++++++++ tests/scripts/task_build_adreno_bins.sh | 2 + 9 files changed, 133 insertions(+), 8 deletions(-) create mode 100644 tests/cpp-runtime/opencl/aa_opencl_qcom_extn.cc diff --git a/cmake/config.cmake b/cmake/config.cmake index 26d50630f7d3..0d912c0c75de 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -483,3 +483,6 @@ SET(CMAKE_VS_PLATFORM_NAME_DEFAULT "x64") # Set Windows Visual Studio default host (equivalent to -Thost=x64) SET(CMAKE_VS_PLATFORM_TOOLSET_HOST_ARCHITECTURE "x64") + +# Enable Qualcomm OpenCL extension support +set(USE_OPENCL_EXTN_QCOM OFF) diff --git a/cmake/modules/OpenCL.cmake b/cmake/modules/OpenCL.cmake index ddcd1e4190d1..67d739bb63a0 100644 --- a/cmake/modules/OpenCL.cmake +++ b/cmake/modules/OpenCL.cmake @@ -84,7 +84,7 @@ if(USE_OPENCL) "tests/cpp-runtime/opencl/*.cc" ) add_executable(opencl-cpptest ${OPENCL_TEST_SRCS}) - target_link_libraries(opencl-cpptest PRIVATE gtest_main tvm_runtime) + target_link_libraries(opencl-cpptest PRIVATE gtest_main tvm_runtime ${OpenCL_LIBRARIES}) else() message(STATUS "Couldn't build OpenCL-Gtests") endif() @@ -93,6 +93,17 @@ if(USE_OPENCL) if(USE_OPENCL_ENABLE_HOST_PTR) add_definitions(-DOPENCL_ENABLE_HOST_PTR) endif(USE_OPENCL_ENABLE_HOST_PTR) + if(USE_OPENCL_EXTN_QCOM) + add_definitions(-DUSE_OPENCL_EXTN_QCOM) + find_path(ocl_header cl.h HINTS ${OpenCL_INCLUDE_DIRS} PATH_SUFFIXES CL) + set(OCL_VERSION_HEADER "${ocl_header}/cl.h") + if(EXISTS ${OCL_VERSION_HEADER}) + file(READ ${OCL_VERSION_HEADER} ver) + string(REGEX MATCH "CL_TARGET_OPENCL_VERSION ([0-9]*)" _ ${ver}) + add_definitions(-DCL_TARGET_OPENCL_VERSION=${CMAKE_MATCH_1}) + message(STATUS "Set OpenCL Target version to " ${CMAKE_MATCH_1}) + endif() + endif(USE_OPENCL_EXTN_QCOM) else() list(APPEND COMPILER_SRCS src/target/opt/build_opencl_off.cc) endif(USE_OPENCL) diff --git a/cmake/modules/contrib/CLML.cmake b/cmake/modules/contrib/CLML.cmake index e658f15865df..118091696a9f 100644 --- a/cmake/modules/contrib/CLML.cmake +++ b/cmake/modules/contrib/CLML.cmake @@ -77,7 +77,7 @@ if(USE_CLML_GRAPH_EXECUTOR) message(STATUS "Enable OpenCL as fallback to CLML") file(GLOB RUNTIME_OPENCL_SRCS src/runtime/opencl/*.cc) list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS}) - set(USE_OPENCL ON) + set(USE_OPENCL ${CLML_PATH}) if(USE_OPENCL_ENABLE_HOST_PTR) add_definitions(-DOPENCL_ENABLE_HOST_PTR) endif(USE_OPENCL_ENABLE_HOST_PTR) diff --git a/cmake/utils/FindOpenCL.cmake b/cmake/utils/FindOpenCL.cmake index 8eb35ab3993e..13ffa7159381 100644 --- a/cmake/utils/FindOpenCL.cmake +++ b/cmake/utils/FindOpenCL.cmake @@ -46,7 +46,7 @@ macro(find_opencl use_opencl) endif() if(__opencl_sdk) - set(OpenCL_INCLUDE_DIRS ${__opencl_sdk}/include) + set(OpenCL_INCLUDE_DIRS ${__opencl_sdk}/include ${__opencl_sdk}) if (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY STREQUAL "ONLY") set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) endif() diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h index f752a487ea7e..e0abd1841b64 100644 --- a/src/runtime/opencl/opencl_common.h +++ b/src/runtime/opencl/opencl_common.h @@ -50,12 +50,17 @@ * files. This also allows us to expose the OpenCL version through * tvm.runtime.Device. */ +#if !defined(CL_TARGET_OPENCL_VERSION) #define CL_TARGET_OPENCL_VERSION 120 +#endif #ifdef __APPLE__ #include #else #include +#ifdef USE_OPENCL_EXTN_QCOM +#include +#endif #endif #include @@ -254,8 +259,13 @@ class OpenCLWorkspace : public DeviceAPI { } // Initialize the device. void Init(const std::string& type_key, const std::string& device_type, - const std::string& platform_name = ""); + const std::string& platform_name = "", cl_context_properties properties[] = nullptr); virtual void Init() { Init(this->type_key, "gpu"); } + virtual bool Init(cl_context_properties ctx_props[]) { + if (!contexts.empty()) return false; + Init(this->type_key, "gpu", "", ctx_props); + return true; + } // Check whether the context is OpenCL or not. virtual bool IsOpenCLDevice(Device dev) { return dev.device_type == kDLOpenCL; } // get the queue of the device @@ -314,6 +324,8 @@ class OpenCLWorkspace : public DeviceAPI { void* AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype, Optional mem_scope = NullOpt) final; void* GetNativePtr(const tvm::runtime::NDArray& narr); + void SetNativePtr(const tvm::runtime::NDArray& narr, void* host_ptr, size_t buf_size); + void SetPerfHint(Device dev, cl_uint perf_hint); void FreeDataSpace(Device dev, void* ptr) final; void StreamSync(Device dev, TVMStreamHandle stream) final; void* AllocWorkspace(Device dev, size_t size, DLDataType type_hint) final; diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc index 0057d0a10102..7b161e8932be 100644 --- a/src/runtime/opencl/opencl_device_api.cc +++ b/src/runtime/opencl/opencl_device_api.cc @@ -277,6 +277,47 @@ void* OpenCLWorkspace::GetNativePtr(const tvm::runtime::NDArray& narr) { return desc->host_ptr; } +void OpenCLWorkspace::SetNativePtr(const tvm::runtime::NDArray& narr, void* host_ptr, + size_t buf_size) { + cl::BufferDescriptor* desc = static_cast(narr.operator->()->data); + + this->Init(); + if (desc->layout == cl::BufferDescriptor::MemoryLayout::kBuffer1D) { +#ifdef USE_OPENCL_EXTN_QCOM + Device dev = narr.operator->()->device; + cl_device_id device_id = GetCLDeviceID(dev.device_id); + auto platform = device_info[device_id].platform_id; + + OPENCL_CALL(clFinish(this->GetQueue(dev))); + if (desc->host_ptr) { + OPENCL_CALL(clEnqueueUnmapMemObject(this->GetQueue(dev), desc->buffer, + reinterpret_cast(desc->host_ptr), 0, nullptr, + nullptr)); + desc->host_ptr = nullptr; + } + OPENCL_CALL(clReleaseMemObject(desc->buffer)); + + cl_int err_code; + desc->buffer = + clCreateBuffer(this->contexts[platform], + CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM, buf_size, + host_ptr, &err_code); + desc->layout = cl::BufferDescriptor::MemoryLayout::kBuffer1D; + OPENCL_CHECK_ERROR(err_code); +#endif + } else { + LOG(FATAL) << "Native Ptr not enabled over image objects"; + } +} + +void OpenCLWorkspace::SetPerfHint(Device dev, cl_uint perf_hint) { +#ifdef CL_CONTEXT_PERF_HINT_QCOM + cl_device_id device_id = GetCLDeviceID(dev.device_id); + auto platform = device_info[device_id].platform_id; + OPENCL_CALL(clSetPerfHintQCOM(this->contexts[platform], perf_hint)); +#endif +} + void OpenCLWorkspace::FreeDataSpace(Device dev, void* ptr) { // We have to make sure that the memory object is not in the command queue // for some OpenCL platforms. @@ -284,8 +325,9 @@ void OpenCLWorkspace::FreeDataSpace(Device dev, void* ptr) { cl::BufferDescriptor* desc = static_cast(ptr); if (desc->host_ptr) { - clEnqueueUnmapMemObject(this->GetQueue(dev), desc->buffer, - reinterpret_cast(desc->host_ptr), 0, nullptr, nullptr); + OPENCL_CALL(clEnqueueUnmapMemObject(this->GetQueue(dev), desc->buffer, + reinterpret_cast(desc->host_ptr), 0, nullptr, + nullptr)); } OPENCL_CALL(clReleaseMemObject(desc->buffer)); delete desc; @@ -473,7 +515,7 @@ bool MatchPlatformInfo(cl_platform_id pid, cl_platform_info param_name, std::str } void OpenCLWorkspace::Init(const std::string& type_key, const std::string& device_type, - const std::string& platform_name) { + const std::string& platform_name, cl_context_properties ctx_props[]) { if (initialized_) return; std::lock_guard lock(this->mu); if (initialized_) return; @@ -539,7 +581,7 @@ void OpenCLWorkspace::Init(const std::string& type_key, const std::string& devic for (auto& [platform, devices] : device_map) { this->platform_ids.push_back(platform); this->contexts[platform] = - clCreateContext(nullptr, devices.size(), &(devices[0]), nullptr, nullptr, &err_code); + clCreateContext(ctx_props, devices.size(), &(devices[0]), nullptr, nullptr, &err_code); this->devices.insert(this->devices.end(), devices.begin(), devices.end()); for (size_t i = 0; i < devices.size(); ++i) { cl_device_id did = devices[i]; diff --git a/src/support/libinfo.cc b/src/support/libinfo.cc index 2d1c33cbf282..f1768dfd77a8 100644 --- a/src/support/libinfo.cc +++ b/src/support/libinfo.cc @@ -63,6 +63,10 @@ #define TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR "NOT-FOUND" #endif +#ifndef TVM_INFO_USE_OPENCL_EXTN_QCOM +#define TVM_INFO_USE_OPENCL_EXTN_QCOM "NOT-FOUND" +#endif + #ifndef TVM_INFO_USE_OPENCL_GTEST #define TVM_INFO_USE_OPENCL_GTEST "NOT-FOUND" #endif @@ -362,6 +366,7 @@ TVM_DLL Map GetLibInfo() { {"USE_NNPACK", TVM_INFO_USE_NNPACK}, {"USE_OPENCL", TVM_INFO_USE_OPENCL}, {"USE_OPENCL_ENABLE_HOST_PTR", TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR}, + {"USE_OPENCL_EXTN_QCOM", TVM_INFO_USE_OPENCL_EXTN_QCOM}, {"USE_OPENCL_GTEST", TVM_INFO_USE_OPENCL_GTEST}, {"USE_OPENMP", TVM_INFO_USE_OPENMP}, {"USE_PAPI", TVM_INFO_USE_PAPI}, diff --git a/tests/cpp-runtime/opencl/aa_opencl_qcom_extn.cc b/tests/cpp-runtime/opencl/aa_opencl_qcom_extn.cc new file mode 100644 index 000000000000..1f3dc2057aee --- /dev/null +++ b/tests/cpp-runtime/opencl/aa_opencl_qcom_extn.cc @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// Note:: This should be first tests to be executed. +// hence, crafted the filename accordingly + +#include +#include + +#include "../src/runtime/opencl/opencl_common.h" + +using namespace tvm::runtime; +using namespace tvm::runtime::cl; + +#ifdef USE_OPENCL_EXTN_QCOM +#pragma message("Qualcomm OpenCL Extn GTests: enabled") +TEST(QCOMExtn, ContextPriorityHint) { + OpenCLWorkspace* workspace = OpenCLWorkspace::Global(); + cl_context_properties properties[] = {CL_CONTEXT_PRIORITY_HINT_QCOM, CL_PRIORITY_HINT_LOW_QCOM, + 0}; + // Only allow one time + ASSERT_EQ(workspace->Init(properties), true); + // Subsequent calls will be failure + ASSERT_EQ(workspace->Init(properties), false); +} + +TEST(QCOMExtn, ContextPerfHint) { + OpenCLWorkspace* workspace = OpenCLWorkspace::Global(); + auto dev = DLDevice{kDLOpenCL, 0}; + workspace->SetPerfHint(dev, CL_PERF_HINT_HIGH_QCOM); +} +#else +#pragma message("Qualcomm OpenCL Extn GTests: disabled") +#endif diff --git a/tests/scripts/task_build_adreno_bins.sh b/tests/scripts/task_build_adreno_bins.sh index 38eefd93a692..412af4928123 100755 --- a/tests/scripts/task_build_adreno_bins.sh +++ b/tests/scripts/task_build_adreno_bins.sh @@ -50,6 +50,8 @@ echo set\(MACHINE_NAME aarch64-linux-gnu\) >> config.cmake echo set\(USE_OPENCL_GTEST ON\) >> config.cmake +echo set\(USE_OPENCL_EXTN_QCOM ON\) >> config.cmake + cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake" \ -DANDROID_ABI=arm64-v8a \ -DANDROID_PLATFORM=android-28 \