Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion python/tvm/relay/op/contrib/clml.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,18 @@ def check_softmax_op(extract):
return False
return True

def check_upsampling_op(extract):
call = extract
if call.attrs["method"] != "bilinear":
return False
return True

def check_concat_op(extract):
call = extract
if call.attrs["axis"] != 1:
return False
return True

def check_default_op(extract):
return True

Expand All @@ -324,7 +336,7 @@ def check_default_op(extract):
("clml.conv2d", conv_pattern(), check_conv),
("clml.dense", dense_pattern(), check_default_op),
("clml.pad", pad_pattern(), check_pad_op),
("clml.concat", concat_pattern(), check_default_op),
("clml.concat", concat_pattern(), check_concat_op),
("clml.batch_norm", batch_norm_pattern(), check_default_op),
("clml.add", is_op("add")(wildcard(), wildcard()), check_binary_op),
("clml.subtract", is_op("subtract")(wildcard(), wildcard()), check_binary_op),
Expand All @@ -341,6 +353,8 @@ def check_default_op(extract):
("clml.relu", is_op("nn.relu")(wildcard()), check_default_op),
("clml.clip", is_op("clip")(wildcard()), check_default_op),
("clml.batch_flatten", is_op("nn.batch_flatten")(wildcard()), check_default_op),
("clml.depth_to_space", is_op("nn.depth_to_space")(wildcard()), check_default_op),
("clml.upsampling", is_op("nn.upsampling")(wildcard()), check_upsampling_op),
]


Expand Down
67 changes: 66 additions & 1 deletion src/runtime/contrib/clml/clml_runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,14 @@ class CLMLRuntime : public JSONRuntimeBase {
auto out = CreateBinaryLayer(&layer_, node);
this->layer_.storage_map.insert({nid, std::make_pair(out, node)});
this->layer_.func_outs.push_back(out);
} else if ("nn.depth_to_space" == op_name) {
auto out = CreateDepthToSpaceLayer(&layer_, node);
this->layer_.storage_map.insert({nid, std::make_pair(out, node)});
this->layer_.func_outs.push_back(out);
} else if ("nn.upsampling" == op_name) {
auto out = CreateResizeLayer(&layer_, node);
this->layer_.storage_map.insert({nid, std::make_pair(out, node)});
this->layer_.func_outs.push_back(out);
} else {
LOG(FATAL) << "Unsupported op: " << op_name;
}
Expand Down Expand Up @@ -1151,13 +1159,14 @@ class CLMLRuntime : public JSONRuntimeBase {
cl_arithmetic_mode_qcom cl_arithmetic_mode = MakeCLArithMode(cl_dtype);
int inputSize = input_.size();
auto output = MakeCLMLTensorFromJSONNode(node, CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);
cl_uint axis = std::stoi(node.GetAttr<std::vector<std::string>>("axis")[0]);
cl_ml_tensor_qcom* concatInputs = new cl_ml_tensor_qcom[inputSize];
for (int i = 0; i < inputSize; i++) {
auto input = MakeCLMLTensorFromJSONEntry(node.GetInputs()[i], {},
CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);
concatInputs[i] = input->tensor;
}
cl_ml_op_concat_desc_qcom concatDesc = {1, (cl_uint)inputSize, cl_arithmetic_mode};
cl_ml_op_concat_desc_qcom concatDesc = {axis, (cl_uint)inputSize, cl_arithmetic_mode};

result = h_ClmlIntf->clCreateMLOpConcatQCOM(workspace->context, 0, &concatDesc, concatInputs,
output->tensor, &op, tuning_cache);
Expand Down Expand Up @@ -1301,6 +1310,62 @@ class CLMLRuntime : public JSONRuntimeBase {
return output;
}

/*!
* \brief Create a DepthToSpace(X) layer.
*
* \param layer The CLML layer to build. Containing inputs, outputs and the CLML output.
* \param node The JSON representation of the operator.
*/
std::shared_ptr<cl_ml_tensor_memory_desc_qcom> CreateDepthToSpaceLayer(
CachedLayer* layer, const JSONGraphNode& node) {
cl_int result = 0;
cl_ml_op_qcom op = NULL;
DLDataType tvm_dtype = node.GetOpDataType()[0];
cl_channel_type cl_dtype = MakeCLDataType(tvm_dtype);
cl_arithmetic_mode_qcom cl_arithmetic_mode = MakeCLArithMode(cl_dtype);
auto input = MakeCLMLTensorFromJSONEntry(node.GetInputs()[0], {}, CL_TENSOR_LAYOUT_OPTIMAL_QCOM,
cl_dtype);
auto output = MakeCLMLTensorFromJSONNode(node, CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);
cl_uint block_size = std::stoi(node.GetAttr<std::vector<std::string>>("block_size")[0]);

cl_ml_op_depthtospace_desc_qcom dtos_desc = {block_size, cl_arithmetic_mode};
result = h_ClmlIntf->clCreateMLOpDepthToSpaceQCOM(
workspace->context, 0, &dtos_desc, input->tensor, output->tensor, &op, tuning_cache);
ICHECK(op && result == CL_SUCCESS) << "DepthToSpace Layer Error:" << result;

layer_.func_ins.push_back(input);
layer->function.push_back(op);
return output;
}

/*!
* \brief Create a Resize(X) layer.
*
* \param layer The CLML layer to build. Containing inputs, outputs and the CLML output.
* \param node The JSON representation of the operator.
*/
std::shared_ptr<cl_ml_tensor_memory_desc_qcom> CreateResizeLayer(CachedLayer* layer,
const JSONGraphNode& node) {
cl_int result = 0;
cl_ml_op_qcom op = NULL;
DLDataType tvm_dtype = node.GetOpDataType()[0];
cl_channel_type cl_dtype = MakeCLDataType(tvm_dtype);
cl_arithmetic_mode_qcom cl_arithmetic_mode = MakeCLArithMode(cl_dtype);
auto input = MakeCLMLTensorFromJSONEntry(node.GetInputs()[0], {}, CL_TENSOR_LAYOUT_OPTIMAL_QCOM,
cl_dtype);
auto output = MakeCLMLTensorFromJSONNode(node, CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);
cl_bool align_corners = std::stoi(node.GetAttr<std::vector<std::string>>("align_corners")[0]);

cl_ml_op_resize_bilinear_desc_qcom resize_desc = {align_corners, false, cl_arithmetic_mode};
result = h_ClmlIntf->clCreateMLOpResizeBilinearQCOM(
workspace->context, 0, &resize_desc, input->tensor, output->tensor, &op, tuning_cache);
ICHECK(op && result == CL_SUCCESS) << "Resize Layer Error:" << result;

layer_.func_ins.push_back(input);
layer->function.push_back(op);
return output;
}

/*!
* \brief The network layers represented by acl functions.
* \note Currently only supports a single layer.
Expand Down
102 changes: 102 additions & 0 deletions tests/python/contrib/test_clml/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,5 +574,107 @@ def _verify(out, params, inputs):
_verify(*(_get_model((1, 16), relay.nn.relu)))


@pytest.mark.parametrize("dtype", ["float32", "float16"])
@tvm.testing.requires_openclml
def test_depth_to_space(device, dtype):
def _get_model(a_shape, block_size):
a = relay.var("a", shape=(a_shape), dtype=dtype)
out = relay.nn.depth_to_space(a, block_size)
inputs = {"a": tvm.nd.array(np.random.uniform(-1, 1, a_shape).astype(dtype))}
params = {}
return out, params, inputs

def _verify(out, params, inputs):
mod = IRModule.from_expr(out)
opencl_out = build_and_run(mod, inputs, 1, params, device, enable_clml=False)[0]
clml_out = build_and_run(mod, inputs, 1, params, device, enable_clml=True)[0]
tvm.testing.assert_allclose(
clml_out[0].asnumpy(), opencl_out[0].asnumpy(), rtol=1e-3, atol=1e-3
)

# Check to make sure these ops are offloaded to CLML instead of TVM.
exp_codegen = [
{
"attrs": {
"dtype": [[dtype]],
"shape": [[list(inputs["a"].shape)]],
},
"name": "",
"op": "input",
},
{
"attrs": {
"block_size": [[str(int(out.attrs.block_size))]],
"layout": [["NCHW"]],
"mode": [["DCR"]],
"dtype": [[dtype]],
"num_inputs": "1",
"num_outputs": "1",
"shape": [[list(clml_out[0].shape)]],
},
"inputs": [[0, 0, 0]],
"name": "nn.depth_to_space",
"op": "kernel",
},
]
verify_codegen(out, exp_codegen, device, params)

_verify(*(_get_model((1, 64, 8, 8), 4)))
_verify(*(_get_model((1, 64, 8, 8), 8)))


@pytest.mark.parametrize("dtype", ["float32", "float16"])
@tvm.testing.requires_openclml
def test_resize_bilinear(device, dtype):
def _get_model(a_shape, scale, align_corners):
a = relay.var("a", shape=(a_shape), dtype=dtype)
out = relay.nn.upsampling(
a, scale_h=scale[0], scale_w=scale[1], method="bilinear", align_corners=align_corners
)
inputs = {"a": tvm.nd.array(np.random.uniform(-1, 1, a_shape).astype(dtype))}
params = {}
return out, params, inputs

def _verify(out, params, inputs):
mod = IRModule.from_expr(out)
opencl_out = build_and_run(mod, inputs, 1, params, device, enable_clml=False)[0]
clml_out = build_and_run(mod, inputs, 1, params, device, enable_clml=True)[0]
tvm.testing.assert_allclose(
clml_out[0].asnumpy(), opencl_out[0].asnumpy(), rtol=1e-3, atol=1e-3
)

# Check to make sure these ops are offloaded to CLML instead of TVM.
exp_codegen = [
{
"attrs": {
"dtype": [[dtype]],
"shape": [[list(inputs["a"].shape)]],
},
"name": "",
"op": "input",
},
{
"attrs": {
"scale_h": [[str(int(out.attrs.scale_h))]],
"scale_w": [[str(int(out.attrs.scale_w))]],
"layout": [["NCHW"]],
"method": [[out.attrs.method]],
"align_corners": [[str(out.attrs.align_corners)]],
"dtype": [[dtype]],
"num_inputs": "1",
"num_outputs": "1",
"shape": [[list(clml_out[0].shape)]],
},
"inputs": [[0, 0, 0]],
"name": "nn.upsampling",
"op": "kernel",
},
]
verify_codegen(out, exp_codegen, device, params)

_verify(*(_get_model((1, 16, 8, 8), (2, 2), False)))
_verify(*(_get_model((1, 16, 7, 7), (2, 2), True)))


if __name__ == "__main__":
tvm.testing.main()