Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docker/bash.sh
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,16 @@ while (( $# )); do
shift 2
;;

-e)
DOCKER_ENV+=( --env "$2" )
shift 2
;;

-v)
DOCKER_FLAGS+=( --volume "$2" )
shift 2
;;

--dry-run)
DRY_RUN=true
shift
Expand Down
8 changes: 8 additions & 0 deletions python/tvm/relay/op/contrib/clml.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,14 @@ def check_depth_to_space(extract):
return False
return True

pass_context = tvm.get_global_func("transform.GetCurrentPassContext")()
target_version = (
pass_context.config["relay.ext.clml.target_version"]
if "relay.ext.clml.target_version" in pass_context.config
else 3
)
print("CLML Target Version: ", target_version)

return [
("clml.pad_conv2d", pad_conv_pattern(), check_conv),
("clml.conv2d", conv_pattern(), check_conv),
Expand Down
4 changes: 4 additions & 0 deletions src/relay/backend/contrib/clml/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@
#include "../codegen_json/codegen_json.h"

namespace tvm {

constexpr const char* kCLMLTargetVersion = "relay.ext.clml.target_version";
TVM_REGISTER_PASS_CONFIG_OPTION(kCLMLTargetVersion, Integer);

namespace relay {
namespace contrib {

Expand Down
346 changes: 151 additions & 195 deletions src/runtime/contrib/clml/clml_runtime.cc

Large diffs are not rendered by default.

119 changes: 115 additions & 4 deletions src/runtime/contrib/clml/clml_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@
*/
#ifndef TVM_RUNTIME_CONTRIB_CLML_CLML_RUNTIME_H_
#define TVM_RUNTIME_CONTRIB_CLML_CLML_RUNTIME_H_

#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#if !defined(CL_TARGET_OPENCL_VERSION)
#define CL_TARGET_OPENCL_VERSION 300
#endif

#include <CL/cl.h>
#include <CL/opencl.h>
#include <stdlib.h>
Expand All @@ -48,8 +54,110 @@

#define CAT_I(a, b) a##b
#define CAT(a, b) CAT_I(a, b)
#define GET_ML_INTERFACE CAT(CAT(clGetMLInterfaceV, CL_QCOM_ML_OPS_H_MAJOR_VERSION), QCOM)
#define GET_ML_API_INTERFACE CAT(CAT(CLMLInterfaceV, CL_QCOM_ML_OPS_H_MAJOR_VERSION), QCOM)

#define CLML_CHECK_ERROR(e, API) \
{ ICHECK(e == CL_SUCCESS) << "CLML Error:" #API " code=" << e; }

#if CL_QCOM_ML_OPS_H_MAJOR_VERSION > 3
#define V4_API(API, ...) \
e = (reinterpret_cast<CLMLInterfaceV4QCOM*>(CLMLWorkspace::Global()->h_ClmlIntf)) \
->API(__VA_ARGS__); \
CLML_CHECK_ERROR(e, API);
#else
#define V4_API(API, ...) LOG(FATAL) << "CLML Error:" #API " - Incompatible V4 API call\n";
#endif

#if CL_QCOM_ML_OPS_H_MAJOR_VERSION > 2
#define V3_API(API, ...) \
e = (reinterpret_cast<CLMLInterfaceV3QCOM*>(CLMLWorkspace::Global()->h_ClmlIntf)) \
->API(__VA_ARGS__); \
CLML_CHECK_ERROR(e, API);
#else
#define V3_API(API, ...) LOG(FATAL) << "CLML Error:" #API " - Incompatible V3 API call\n";
#endif

#if CL_QCOM_ML_OPS_H_MAJOR_VERSION > 1
#define V2_API(API, ...) \
e = (reinterpret_cast<CLMLInterfaceV2QCOM*>(CLMLWorkspace::Global()->h_ClmlIntf)) \
->API(__VA_ARGS__); \
CLML_CHECK_ERROR(e, API);
#else
#define V2_API(API, ...) LOG(FATAL) << "CLML Error:" #API " - Incompatible V2 API call\n";
#endif

#define V1_API(API, ...) \
e = (reinterpret_cast<CLMLInterfaceV1QCOM*>(CLMLWorkspace::Global()->h_ClmlIntf)) \
->API(__VA_ARGS__); \
CLML_CHECK_ERROR(e, API);

#define CLML_CALL(API, ...) \
{ \
cl_int e; \
switch (CLMLWorkspace::Global()->target_major) { \
case 1: \
V1_API(API, __VA_ARGS__); \
break; \
case 2: \
V2_API(API, __VA_ARGS__); \
break; \
case 3: \
V3_API(API, __VA_ARGS__); \
break; \
case 4: \
V4_API(API, __VA_ARGS__); \
break; \
default: \
LOG(FATAL) << "CLML Error:" #API " - Unsupported target version \n"; \
} \
}

#define CLML_CALL_VERSIONED(APICALL, VERSION, ...) CAT(CAT(V, VERSION), _API)(APICALL, __VA_ARGS__)

#define CALL_CASE(VERSION, API, ...) \
case VERSION: \
CLML_CALL_VERSIONED(API, VERSION, __VA_ARGS__); \
break;

// clCreateMLOpClipQCOM
#define CLML_CALL_clCreateMLOpClipQCOM(...) \
cl_int e; \
switch (CLMLWorkspace::Global()->target_major) { \
CALL_CASE(2, clCreateMLOpClipQCOM, __VA_ARGS__) \
CALL_CASE(3, clCreateMLOpClipQCOM, __VA_ARGS__) \
CALL_CASE(4, clCreateMLOpClipQCOM, __VA_ARGS__) \
default: \
LOG(FATAL) << "CLML Error: - Unsupported target version \n"; \
}

// clCreateMLTensorQCOM and clCreateMLTensorWithUsageQCOM
#define CALL_clCreateMLTensorQCOM(VERSION, CONTEXT, TENSORPROPS, TENSORDESC, USAGE, TENSOR) \
CALL_CASE(VERSION, clCreateMLTensorQCOM, CONTEXT, TENSORPROPS, TENSORDESC, TENSOR)

#define CALL_clCreateMLTensorWithUsageQCOM(VERSION, CONTEXT, TENSORPROPS, TENSORDESC, USAGE, \
TENSOR) \
CALL_CASE(VERSION, clCreateMLTensorWithUsageQCOM, CONTEXT, TENSORPROPS, TENSORDESC, USAGE, TENSOR)

#define CLML_CALL_clCreateMLTensorQCOM(...) \
cl_int e; \
switch (CLMLWorkspace::Global()->target_major) { \
CALL_clCreateMLTensorQCOM(1, __VA_ARGS__); \
CALL_clCreateMLTensorQCOM(2, __VA_ARGS__); \
CALL_clCreateMLTensorQCOM(3, __VA_ARGS__); \
CALL_clCreateMLTensorWithUsageQCOM(4, __VA_ARGS__); \
default: \
LOG(FATAL) << "CLML Error: - Unsupported target version \n"; \
}

/* Version compatibility for CLML Tensor creation */
#if CL_QCOM_ML_OPS_H_MAJOR_VERSION < 4
typedef enum _cl_ml_tensor_usage_qcom {
CL_TENSOR_USAGE_INVALID_QCOM = 0,
CL_TENSOR_USAGE_UNUSED_QCOM = 1,
CL_TENSOR_USAGE_PARAMETER_QCOM = 2,
CL_TENSOR_USAGE_CNN_QCOM = 3,
CL_TENSOR_USAGE_TNN_QCOM = 4,
} cl_ml_tensor_usage_qcom;
#endif

/*! \brief Magic number for CLML Tuning cache entry */
static const uint64_t kTVMCLMLTuningCacheMagic = 0x434C4D4C54554E45;
Expand Down Expand Up @@ -81,7 +189,7 @@ class CLMLWorkspace {
virtual CLMLThreadEntry* GetThreadEntry();

/* CLML Context */
GET_ML_API_INTERFACE* h_ClmlIntf = nullptr;
void* h_ClmlIntf = nullptr;
cl::OpenCLWorkspace* workspace = nullptr;
cl::OpenCLThreadEntry* tentry = nullptr;
cl_device_id device_id;
Expand All @@ -107,6 +215,10 @@ class CLMLWorkspace {

/* DDR memory management */
std::map<cl_mem, std::pair<int, int>> ddr_global_pool; // buf, size and ref count

/* Device API version information */
int target_major;
int target_minor;
};

/*! \brief Thread local workspace */
Expand Down Expand Up @@ -172,7 +284,6 @@ struct tensor_dims_t {
uint32_t n, c, h, w;
};

#define CLML_INTF CLMLWorkspace::Global()->h_ClmlIntf
#define CLML_QUEUE \
CLMLWorkspace::Global()->workspace->GetQueue(CLMLWorkspace::Global()->tentry->device)
#define CLML_CTX CLMLWorkspace::Global()->workspace->contexts[CLMLWorkspace::Global()->platform_id]
Expand Down
43 changes: 16 additions & 27 deletions src/runtime/contrib/clml/clml_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,10 @@ using JSONGraphNode = tvm::runtime::json::JSONGraphNode;
*/
void CopyDataToCLMLTensor(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> tensor, void* data,
cl_ml_tensor_layout_qcom layout) {
cl_int result = 0;
cl_event evt = nullptr;
result = CLML_INTF->clEnqueueWriteMLTensorDataQCOM(CLML_QUEUE, data, layout, tensor->tensor,
tensor->memory,
0, // n waitlist
nullptr, // waitlist
&evt); // event
ICHECK((evt != nullptr) && result == CL_SUCCESS) << "clEnqueueWriteMLTensorDataQCOM:" << result;
CLML_CALL(clEnqueueWriteMLTensorDataQCOM, CLML_QUEUE, data, layout, tensor->tensor,
tensor->memory, 0, nullptr, &evt);
ICHECK(evt != nullptr) << "clEnqueueWriteMLTensorDataQCOM";
}

/*!
Expand All @@ -62,13 +58,8 @@ void CopyDataFromCLMLTensor(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> tenso
cl_int result = 0;
cl_event readEvent = nullptr;
// Read the output tensor
result = CLML_INTF->clEnqueueReadMLTensorDataQCOM(CLML_QUEUE, tensor->tensor, tensor->memory,
data, layout,
0, // n waitlist
nullptr, // waitlist
&readEvent); // event
ICHECK(result == CL_SUCCESS) << "clEnqueueReadMLTensorDataQCOM:" << result;

CLML_CALL(clEnqueueReadMLTensorDataQCOM, CLML_QUEUE, tensor->tensor, tensor->memory, data, layout,
0, nullptr, &readEvent);
result = clWaitForEvents(1, &readEvent);
ICHECK(result == CL_SUCCESS) << "clWaitForEvents:" << result;
}
Expand All @@ -83,14 +74,14 @@ void CopyDataFromCLMLTensor(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> tenso
* \return CLML tensor
*/
cl_ml_tensor_qcom DeviceMakeCLMLTensor(cl_context context, tensor_dims_t dims,
cl_ml_tensor_layout_qcom layout, cl_channel_type dtype) {
cl_ml_tensor_layout_qcom layout, cl_channel_type dtype,
cl_ml_tensor_usage_qcom usage) {
cl_ml_tensor_qcom tensor;
cl_int result = CL_OUT_OF_RESOURCES;

cl_ml_tensor_desc_qcom desc = {
dtype, layout, dims.n, dims.c, dims.h, dims.w, 0, CL_TENSOR_DIMENSIONS_4D_QCOM, {0}};
result = CLML_INTF->clCreateMLTensorQCOM(CLML_CTX, nullptr, &desc, &tensor);
ICHECK(tensor && result == CL_SUCCESS) << "clCreateMLTensorQCOM:" << result;
CLML_CALL_clCreateMLTensorQCOM(CLML_CTX, nullptr, &desc, usage, &tensor);
ICHECK(tensor) << "clCreateMLTensorQCOM";
return tensor;
}

Expand Down Expand Up @@ -195,11 +186,9 @@ cl_arithmetic_mode_qcom MakeCLArithMode(const cl_channel_type& data_type,
* \param dtype tensor data type
* \return CLML Tensor descriptor.
*/
std::shared_ptr<cl_ml_tensor_memory_desc_qcom> MakeCLMLTensor(const JSONGraphNode& tensor_rep,
void* data,
std::vector<size_t> c_shape,
cl_ml_tensor_layout_qcom layout,
cl_uint dtype) {
std::shared_ptr<cl_ml_tensor_memory_desc_qcom> MakeCLMLTensor(
const JSONGraphNode& tensor_rep, void* data, std::vector<size_t> c_shape,
cl_ml_tensor_layout_qcom layout, cl_uint dtype, cl_ml_tensor_usage_qcom usage) {
std::vector<int64_t> shape = tensor_rep.GetOpShape()[0];
std::vector<size_t> clml_shape(shape.begin(), shape.end());
if (c_shape.size() > 0) {
Expand All @@ -217,7 +206,7 @@ std::shared_ptr<cl_ml_tensor_memory_desc_qcom> MakeCLMLTensor(const JSONGraphNod
dims.w = clml_shape[3];

auto tensor_dsc = std::make_shared<cl_ml_tensor_memory_desc_qcom>();
tensor_dsc->tensor = DeviceMakeCLMLTensor(CLML_CTX, dims, layout, dtype);
tensor_dsc->tensor = DeviceMakeCLMLTensor(CLML_CTX, dims, layout, dtype, usage);
return tensor_dsc;
}

Expand All @@ -232,9 +221,9 @@ std::shared_ptr<cl_ml_tensor_memory_desc_qcom> MakeCLMLTensor(const JSONGraphNod
* \return CLML Tensor descriptor.
*/
std::shared_ptr<cl_ml_tensor_memory_desc_qcom> MakeCLMLTensorFromJSONNode(
const JSONGraphNode& node, cl_ml_tensor_layout_qcom layout, cl_uint dtype, void* data,
std::vector<size_t> shape) {
return MakeCLMLTensor(node, data, shape, layout, dtype);
const JSONGraphNode& node, cl_ml_tensor_layout_qcom layout, cl_ml_tensor_usage_qcom usage,
cl_uint dtype, void* data, std::vector<size_t> shape) {
return MakeCLMLTensor(node, data, shape, layout, dtype, usage);
}

/*!
Expand Down
14 changes: 6 additions & 8 deletions src/runtime/contrib/clml/clml_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ void CopyDataFromCLMLTensor(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> tenso
cl_ml_tensor_qcom DeviceMakeCLMLTensor(
cl_context context, tensor_dims_t dims,
cl_ml_tensor_layout_qcom layout = CL_TENSOR_LAYOUT_OPTIMAL_QCOM,
cl_channel_type dtype = CL_FLOAT);
cl_channel_type dtype = CL_FLOAT, cl_ml_tensor_usage_qcom usage = CL_TENSOR_USAGE_INVALID_QCOM);

cl_mem AllocateOnChipTensorMemory(size_t size, cl_uint on_chip_mem_offset);

Expand All @@ -58,15 +58,13 @@ cl_channel_type MakeCLDataType(const DLDataType& data_type);
cl_arithmetic_mode_qcom MakeCLArithMode(const cl_channel_type& data_type,
const cl_channel_type& acc_type = CL_FLOAT);

std::shared_ptr<cl_ml_tensor_memory_desc_qcom> MakeCLMLTensor(const JSONGraphNode& tensor_rep,
void* data,
std::vector<size_t> c_shape,
cl_ml_tensor_layout_qcom layout,
cl_uint dtype);
std::shared_ptr<cl_ml_tensor_memory_desc_qcom> MakeCLMLTensor(
const JSONGraphNode& tensor_rep, void* data, std::vector<size_t> c_shape,
cl_ml_tensor_layout_qcom layout, cl_uint dtype, cl_ml_tensor_usage_qcom usage);

std::shared_ptr<cl_ml_tensor_memory_desc_qcom> MakeCLMLTensorFromJSONNode(
const JSONGraphNode& node, cl_ml_tensor_layout_qcom layout, cl_uint dtype, void* data = nullptr,
std::vector<size_t> shape = {});
const JSONGraphNode& node, cl_ml_tensor_layout_qcom layout, cl_ml_tensor_usage_qcom usage,
cl_uint dtype, void* data = nullptr, std::vector<size_t> shape = {});

std::vector<cl_uint> GetVectorValues(const std::vector<std::string>& val);

Expand Down
27 changes: 18 additions & 9 deletions tests/scripts/ci.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ def generate_command(
precheck: Optional[Callable[[], None]] = None,
post_build: Optional[List[str]] = None,
additional_flags: Optional[Dict[str, str]] = None,
env: Optional[Dict[str, str]] = None,
):
"""
Helper to generate CLIs that:
Expand Down Expand Up @@ -424,17 +425,22 @@ def fn(
if kwargs.get(option_name, False):
scripts.extend(script.format(build_dir=build_dir) for script in extra_scripts)

docker_env = {
# Need to specify the library path manually or else TVM can't
# determine which build directory to use (i.e. if there are
# multiple copies of libtvm.so laying around)
"TVM_LIBRARY_PATH": str(REPO_ROOT / get_build_dir(name)),
"VERBOSE": "true" if verbose else "false",
}

if env is not None:
docker_env.update(env)

docker(
name=gen_name(f"ci-{name}"),
image=f"ci_{name}" if docker_image is None else docker_image,
scripts=scripts,
env={
# Need to specify the library path manually or else TVM can't
# determine which build directory to use (i.e. if there are
# multiple copies of libtvm.so laying around)
"TVM_LIBRARY_PATH": str(REPO_ROOT / get_build_dir(name)),
"VERBOSE": "true" if verbose else "false",
},
env=docker_env,
interactive=interactive,
additional_flags=additional_flags,
)
Expand Down Expand Up @@ -719,10 +725,13 @@ def add_subparser(
help="Run Adreno build and test(s)",
post_build=["./tests/scripts/task_build_adreno_bins.sh"],
additional_flags={
"--volume": os.environ.get("ADRENO_OPENCL", "") + ":/adreno-opencl",
"--env": "ADRENO_OPENCL=/adreno-opencl",
"--volume": os.environ.get("ADRENO_OPENCL", "/tmp/") + ":/adreno-opencl",
"--net": "host",
},
env={
"ADRENO_OPENCL": "/adreno-opencl",
"ADRENO_TARGET_CLML_VERSION": os.environ.get("ADRENO_TARGET_CLML_VERSION", "3"),
},
options={
"test": (
"run Adreno API/Python tests",
Expand Down