From dd16798faea6b6f6ee225384a208a3d80205f806 Mon Sep 17 00:00:00 2001 From: valmat07 Date: Thu, 24 Nov 2022 12:35:51 +0000 Subject: [PATCH 01/12] introduced support for weights on buffers --- python/tvm/topi/adreno/conv2d_nchw.py | 5 +- python/tvm/topi/adreno/conv2d_nhwc.py | 5 +- .../tvm/topi/adreno/depthwise_conv2d_nchw.py | 5 +- .../tvm/topi/adreno/depthwise_conv2d_nhwc.py | 5 +- .../transforms/annotate_texture_storage.cc | 79 ++++++++++++++----- 5 files changed, 72 insertions(+), 27 deletions(-) diff --git a/python/tvm/topi/adreno/conv2d_nchw.py b/python/tvm/topi/adreno/conv2d_nchw.py index b1f229ebe5dc..bd128ed7bf75 100644 --- a/python/tvm/topi/adreno/conv2d_nchw.py +++ b/python/tvm/topi/adreno/conv2d_nchw.py @@ -305,8 +305,9 @@ def schedule_conv2d_NCHWc_KCRSk(cfg, s, output): if autotvm.GLOBAL_SCOPE.in_tuning or filter_pack_rt: if not autotvm.GLOBAL_SCOPE.in_tuning: bind_data_copy(s[kernel]) - WT = s.cache_read(kernel, get_texture_storage(kernel.shape), [conv]) - bind_data_copy(s[WT]) + if kernel.shape[2] == 1 and kernel.shape[3] == 1: + WT = s.cache_read(kernel, get_texture_storage(kernel.shape), [conv]) + bind_data_copy(s[WT]) s[conv].set_scope("local") if latest_blocked == latest and output != latest: diff --git a/python/tvm/topi/adreno/conv2d_nhwc.py b/python/tvm/topi/adreno/conv2d_nhwc.py index 644978743b4d..e391495b5384 100644 --- a/python/tvm/topi/adreno/conv2d_nhwc.py +++ b/python/tvm/topi/adreno/conv2d_nhwc.py @@ -303,8 +303,9 @@ def schedule_conv2d_NHWC(cfg, s, output): if autotvm.GLOBAL_SCOPE.in_tuning or filter_pack_rt: if not autotvm.GLOBAL_SCOPE.in_tuning: bind_data_copy(s[kernel]) - WT = s.cache_read(kernel, get_texture_storage(kernel.shape), [conv]) - bind_data_copy(s[WT]) + if kernel.shape[0] == 1 and kernel.shape[1] == 1: + WT = s.cache_read(kernel, get_texture_storage(kernel.shape), [conv]) + bind_data_copy(s[WT]) s[conv].set_scope("local") if latest_blocked == latest and output != latest: diff --git a/python/tvm/topi/adreno/depthwise_conv2d_nchw.py b/python/tvm/topi/adreno/depthwise_conv2d_nchw.py index 8549399fb0d0..7fae354dee0e 100644 --- a/python/tvm/topi/adreno/depthwise_conv2d_nchw.py +++ b/python/tvm/topi/adreno/depthwise_conv2d_nchw.py @@ -254,8 +254,9 @@ def schedule_depthwise_conv2d_NCHWc_KCRSk(cfg, s, output): # create cache stage for tuning only or in case of 4d case AT = s.cache_read(pad_data, get_texture_storage(pad_data.shape), [conv]) bind_data_copy(s[AT]) - WT = s.cache_read(kernel, get_texture_storage(kernel.shape), [conv]) - bind_data_copy(s[WT]) + if kernel.shape[2] == 1 and kernel.shape[3] == 1: + WT = s.cache_read(kernel, get_texture_storage(kernel.shape), [conv]) + bind_data_copy(s[WT]) # tile and bind spatial axes n, fc, y, x, fb = s[latest_blocked].op.axis diff --git a/python/tvm/topi/adreno/depthwise_conv2d_nhwc.py b/python/tvm/topi/adreno/depthwise_conv2d_nhwc.py index 82e128443e85..f224fe3c88dc 100644 --- a/python/tvm/topi/adreno/depthwise_conv2d_nhwc.py +++ b/python/tvm/topi/adreno/depthwise_conv2d_nhwc.py @@ -250,8 +250,9 @@ def schedule_depthwise_conv2d_NHWC_HWOI(cfg, s, output): # create cache stage for tuning only or in case of 4d case AT = s.cache_read(pad_data, get_texture_storage(pad_data.shape), [conv]) bind_data_copy(s[AT]) - WT = s.cache_read(kernel, get_texture_storage(kernel.shape), [conv]) - bind_data_copy(s[WT]) + if kernel.shape[0] == 1 and kernel.shape[1] == 1: + WT = s.cache_read(kernel, get_texture_storage(kernel.shape), [conv]) + bind_data_copy(s[WT]) # tile and bind spatial axes n, y, x, fc, fb = s[latest_blocked].op.axis diff --git a/src/relay/transforms/annotate_texture_storage.cc b/src/relay/transforms/annotate_texture_storage.cc index 277c5e1da424..6f545d707a5a 100644 --- a/src/relay/transforms/annotate_texture_storage.cc +++ b/src/relay/transforms/annotate_texture_storage.cc @@ -90,25 +90,33 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { for (const auto& a : storage_info.args_to_vars_) { if (storage_map.count(a.first)) { for (const auto& v : a.second) { - storage_map.Set(v, storage_map[a.first]); - if (storage_map[a.first][Expr()][0] == "global" && - storage_info.accept_textures_.count(v)) { - Map> ent; - ent.Set(Expr(), storage_info.accept_textures_[v][Expr()]); - storage_map.Set(v, ent); - for (const auto& calls : storage_info.accept_textures_[v]) { - if (calls.first != Expr()) { - if (storage_map.count(a.first)) { - Map> ent_call = storage_map[a.first]; - ent_call.Set(calls.first, calls.second); - storage_map.Set(a.first, ent_call); - } else { - Map> ent_call; - ent_call.Set(calls.first, calls.second); - storage_map.Set(a.first, ent_call); + if (std::find(storage_info.const_to_buffers.begin(), storage_info.const_to_buffers.end(), v) != storage_info.const_to_buffers.end()) + { + Map> ent; + ent.Set(Expr(), Array{"global"}); + storage_map.Set(v, ent); + } + else + storage_map.Set(v, storage_map[a.first]); + + if (storage_map[a.first][Expr()][0] == "global" && + storage_info.accept_textures_.count(v)) { + Map> ent; + ent.Set(Expr(), storage_info.accept_textures_[v][Expr()]); + storage_map.Set(v, ent); + for (const auto& calls : storage_info.accept_textures_[v]) { + if (calls.first != Expr()) { + if (storage_map.count(a.first)) { + Map> ent_call = storage_map[a.first]; + ent_call.Set(calls.first, calls.second); + storage_map.Set(a.first, ent_call); + } else { + Map> ent_call; + ent_call.Set(calls.first, calls.second); + storage_map.Set(a.first, ent_call); + } } } - } } } } @@ -177,6 +185,12 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { // adding info about arguments if they can be converted to texture for (const auto& ttype : FlattenTupleType(fn->params[i]->checked_type())) { std::string scope = Scope(ttype->shape, GetVirtualDevice(GetRef(call))); + if (CanUseBuffers(call->args[i], ttype->shape, fn->attrs)) + { + const_to_buffers.push_back(fn->params[i]); + const_to_buffers_args.push_back(call->args[i]); + scope = "global"; + } if (scope.find("global.texture") != std::string::npos) { if (accept_textures_.count(fn->params[i])) { Map> ent = accept_textures_[fn->params[i]]; @@ -192,7 +206,7 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { } } } - } + } // Add consumer storage scope information for call arguments for (auto& arg : call->args) { if (storage_scope_.count(call)) { @@ -211,7 +225,8 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { } for (auto& arg : call->args) { - Visit(arg); + if (std::find(const_to_buffers_args.begin(), const_to_buffers_args.end(), arg) == const_to_buffers_args.end()) + Visit(arg); } // We have all callees filled into storage_scope_ if they support textures // We need to verify if this call expects texture and if it does not, remove from @@ -410,6 +425,29 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { return supports_texture_storage; } + bool CanUseBuffers(const Expr param, const Array shape, const tvm::DictAttrs param_attrs) const { + bool use_buffer = false; + int a0 = shape[0].as()->value; + int a1 = shape[1].as()->value; + int a2 = shape[2].as()->value; + int a3 = shape[3].as()->value; + if (param.as()) { + auto kernel_layout = param_attrs.GetAttr("kernel_layout"); + if (kernel_layout == "HWOI4o" || kernel_layout == "HWIO4o") + { + if (a0 != 1 && a1 != 1) + use_buffer = true; + } + else if (kernel_layout == "OIHW4o") + { + if (a0 != 1 && a2 != 1 && a3 != 1) + use_buffer = true; + } + } + + return use_buffer; + } + /*! \brief Temporary state for marking whether a visited function * primitive supports texture storage scope */ bool primitive_supports_texture_ = false; @@ -421,6 +459,9 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { std::unordered_map, ObjectPtrHash, ObjectPtrEqual> args_to_vars_; /*! \brief mapping of arguments that can be converted to texture*/ Map>> accept_textures_; + + std::vector const_to_buffers; + std::vector const_to_buffers_args; }; } // namespace From a157d0632c5278ea568e5dfc490b807a3576e620 Mon Sep 17 00:00:00 2001 From: valmat07 Date: Thu, 24 Nov 2022 16:29:52 +0000 Subject: [PATCH 02/12] update winograd schedule for support buffers on weights --- python/tvm/topi/adreno/conv2d_winograd_common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/tvm/topi/adreno/conv2d_winograd_common.py b/python/tvm/topi/adreno/conv2d_winograd_common.py index 8c62f11c2fe5..3f8d86f720ee 100644 --- a/python/tvm/topi/adreno/conv2d_winograd_common.py +++ b/python/tvm/topi/adreno/conv2d_winograd_common.py @@ -451,6 +451,7 @@ def schedule_conv2d_winograd(cfg, s, output, pre_computed): autotvm.GLOBAL_SCOPE.in_tuning or isinstance(kernel.op, tvm.te.ComputeOp) and "filter_pack" in kernel.op.tag + and kernel.shape[2] == 1 and kernel.shape[3] == 1 ): BB = s.cache_read(kernel_pack, get_texture_storage(kernel_pack.shape), [OL]) bind_data_copy(s[BB]) From 2af385efce722d9607878a8d69297b2968429c29 Mon Sep 17 00:00:00 2001 From: valmat07 Date: Tue, 29 Nov 2022 11:26:40 +0000 Subject: [PATCH 03/12] Updated the logic of using buffers --- .../transforms/annotate_texture_storage.cc | 46 +++++++++++-------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/src/relay/transforms/annotate_texture_storage.cc b/src/relay/transforms/annotate_texture_storage.cc index 6f545d707a5a..58f041b504cd 100644 --- a/src/relay/transforms/annotate_texture_storage.cc +++ b/src/relay/transforms/annotate_texture_storage.cc @@ -90,15 +90,15 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { for (const auto& a : storage_info.args_to_vars_) { if (storage_map.count(a.first)) { for (const auto& v : a.second) { - if (std::find(storage_info.const_to_buffers.begin(), storage_info.const_to_buffers.end(), v) != storage_info.const_to_buffers.end()) - { - Map> ent; - ent.Set(Expr(), Array{"global"}); - storage_map.Set(v, ent); - } - else - storage_map.Set(v, storage_map[a.first]); - + if (std::find(storage_info.const_to_buffers.begin(), storage_info.const_to_buffers.end(), v) != storage_info.const_to_buffers.end()) + { + Map> ent; + ent.Set(Expr(), Array{"global"}); + storage_map.Set(v, ent); + } + else + { + storage_map.Set(v, storage_map[a.first]); if (storage_map[a.first][Expr()][0] == "global" && storage_info.accept_textures_.count(v)) { Map> ent; @@ -117,6 +117,7 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { } } } + } } } } @@ -185,11 +186,14 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { // adding info about arguments if they can be converted to texture for (const auto& ttype : FlattenTupleType(fn->params[i]->checked_type())) { std::string scope = Scope(ttype->shape, GetVirtualDevice(GetRef(call))); - if (CanUseBuffers(call->args[i], ttype->shape, fn->attrs)) + if (expr_attrib[Expr()].as() || expr_attrib[Expr()].as()) { - const_to_buffers.push_back(fn->params[i]); - const_to_buffers_args.push_back(call->args[i]); - scope = "global"; + if ((i == 1) && CanUseBuffers(call->args[i], ttype->shape, fn->attrs)) + { + const_to_buffers.push_back(fn->params[i]); + const_to_buffers_args.push_back(call->args[i]); + scope = "global"; + } } if (scope.find("global.texture") != std::string::npos) { if (accept_textures_.count(fn->params[i])) { @@ -219,8 +223,8 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { } } } - if (!primitive_supports_texture_) { + expr_attrib.Set(Expr(), call->attrs); primitive_supports_texture_ = SupportsTextureStorage(call); } @@ -378,6 +382,7 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { } bool SupportsTextureStorage(const CallNode* call) const { + bool supports_texture_storage = false; // we need to verify only entry functions since one of entry op defines main schedule for (const auto& arg : call->args) { @@ -427,20 +432,20 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { bool CanUseBuffers(const Expr param, const Array shape, const tvm::DictAttrs param_attrs) const { bool use_buffer = false; - int a0 = shape[0].as()->value; - int a1 = shape[1].as()->value; - int a2 = shape[2].as()->value; - int a3 = shape[3].as()->value; - if (param.as()) { + if (param.as() && shape.size() == 5) { auto kernel_layout = param_attrs.GetAttr("kernel_layout"); if (kernel_layout == "HWOI4o" || kernel_layout == "HWIO4o") { + int a0 = shape[0].as()->value; + int a1 = shape[1].as()->value; if (a0 != 1 && a1 != 1) use_buffer = true; } else if (kernel_layout == "OIHW4o") { - if (a0 != 1 && a2 != 1 && a3 != 1) + int a2 = shape[2].as()->value; + int a3 = shape[3].as()->value; + if (a2 != 1 && a3 != 1) use_buffer = true; } } @@ -460,6 +465,7 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { /*! \brief mapping of arguments that can be converted to texture*/ Map>> accept_textures_; + Map expr_attrib; std::vector const_to_buffers; std::vector const_to_buffers_args; }; From b4aa16b41fbe2bdda2458b80c7f8f3496f2b03ed Mon Sep 17 00:00:00 2001 From: valmat07 Date: Wed, 30 Nov 2022 12:04:24 +0000 Subject: [PATCH 04/12] Update texture annotation pass --- src/relay/transforms/annotate_texture_storage.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/relay/transforms/annotate_texture_storage.cc b/src/relay/transforms/annotate_texture_storage.cc index 58f041b504cd..04de67a8872c 100644 --- a/src/relay/transforms/annotate_texture_storage.cc +++ b/src/relay/transforms/annotate_texture_storage.cc @@ -186,7 +186,7 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { // adding info about arguments if they can be converted to texture for (const auto& ttype : FlattenTupleType(fn->params[i]->checked_type())) { std::string scope = Scope(ttype->shape, GetVirtualDevice(GetRef(call))); - if (expr_attrib[Expr()].as() || expr_attrib[Expr()].as()) + if (expr_attrib.as() || expr_attrib.as()) { if ((i == 1) && CanUseBuffers(call->args[i], ttype->shape, fn->attrs)) { @@ -224,7 +224,7 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { } } if (!primitive_supports_texture_) { - expr_attrib.Set(Expr(), call->attrs); + expr_attrib = call->attrs; primitive_supports_texture_ = SupportsTextureStorage(call); } @@ -465,7 +465,7 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { /*! \brief mapping of arguments that can be converted to texture*/ Map>> accept_textures_; - Map expr_attrib; + tvm::Attrs expr_attrib; std::vector const_to_buffers; std::vector const_to_buffers_args; }; From 3422d239ac4084f03251df85056fb5431ce8ecfe Mon Sep 17 00:00:00 2001 From: valmat07 Date: Mon, 5 Dec 2022 15:45:05 +0000 Subject: [PATCH 05/12] now weights on buffers support only in fp32 case --- .../transforms/annotate_texture_storage.cc | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/relay/transforms/annotate_texture_storage.cc b/src/relay/transforms/annotate_texture_storage.cc index 04de67a8872c..084ba630b8ac 100644 --- a/src/relay/transforms/annotate_texture_storage.cc +++ b/src/relay/transforms/annotate_texture_storage.cc @@ -90,7 +90,7 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { for (const auto& a : storage_info.args_to_vars_) { if (storage_map.count(a.first)) { for (const auto& v : a.second) { - if (std::find(storage_info.const_to_buffers.begin(), storage_info.const_to_buffers.end(), v) != storage_info.const_to_buffers.end()) + if (std::find(storage_info.buffers_params.begin(), storage_info.buffers_params.end(), v) != storage_info.buffers_params.end()) { Map> ent; ent.Set(Expr(), Array{"global"}); @@ -188,10 +188,12 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { std::string scope = Scope(ttype->shape, GetVirtualDevice(GetRef(call))); if (expr_attrib.as() || expr_attrib.as()) { - if ((i == 1) && CanUseBuffers(call->args[i], ttype->shape, fn->attrs)) + if ((i == 1) && + !ttype->dtype.is_float16() && + CanUseBuffers(call->args[i], ttype->shape, fn->attrs)) { - const_to_buffers.push_back(fn->params[i]); - const_to_buffers_args.push_back(call->args[i]); + buffers_params.push_back(fn->params[i]); + buffers_args.push_back(call->args[i]); scope = "global"; } } @@ -229,7 +231,7 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { } for (auto& arg : call->args) { - if (std::find(const_to_buffers_args.begin(), const_to_buffers_args.end(), arg) == const_to_buffers_args.end()) + if (std::find(buffers_args.begin(), buffers_args.end(), arg) == buffers_args.end()) Visit(arg); } // We have all callees filled into storage_scope_ if they support textures @@ -464,10 +466,12 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { std::unordered_map, ObjectPtrHash, ObjectPtrEqual> args_to_vars_; /*! \brief mapping of arguments that can be converted to texture*/ Map>> accept_textures_; - + /*! \brief main attribute for expression*/ tvm::Attrs expr_attrib; - std::vector const_to_buffers; - std::vector const_to_buffers_args; + /*! \brief parameters that filter out from storage_map to use buffers*/ + std::vector buffers_params; + /*! \brief arguments in expression that will use buffers*/ + std::vector buffers_args; }; } // namespace From c3bf0f96b21b7f28134b5154f9ce310591479f6b Mon Sep 17 00:00:00 2001 From: valmat07 Date: Tue, 6 Dec 2022 19:27:20 +0300 Subject: [PATCH 06/12] update opencl tests for weights on buffers --- .../test_conv2d_nchw_texture.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py b/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py index a0ca8423478e..d62cd9df7308 100644 --- a/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py +++ b/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py @@ -592,12 +592,12 @@ def test_residual_block(remote, target, dtype): static_memory_scope = [ "global", "global.texture", - "global.texture-weight", + "global", "global.texture-weight", "global.texture", "global.texture-weight", "global.texture", - "global.texture-weight", + "global", "", "", ] @@ -834,11 +834,11 @@ def test_pooling_branching_texture_params(remote, target, dtype): "global.texture-weight", "global.texture", "global.texture", - "global.texture-weight", + "global", "global.texture-weight", "global.texture-weight", "global.texture", - "global.texture-weight", + "global", "global.texture", "", "", @@ -960,11 +960,11 @@ def test_branching_texture_params(remote, target, dtype): "global.texture", "global.texture-weight", "global.texture", - "global.texture-weight", + "global", "global.texture-weight", "global.texture-weight", "global.texture", - "global.texture-weight", + "global", "global.texture", "", "", @@ -1179,9 +1179,9 @@ def test_injective_nwo_inputs1(remote, target, dtype): static_memory_scope = [ "global", "global.texture", - "global.texture-nhwc", + "global", "global.texture", - "global.texture-nhwc", + "global", "global.texture", "global", "global", @@ -1277,10 +1277,10 @@ def test_injective_nwo_inputs2(remote, target, dtype): static_memory_scope = [ "global", "global.texture", - "global.texture-nhwc", + "global", "global.texture", "global", - "global.texture-nhwc", + "global", "global.texture", "global", ] From ae49139ca6d2c23687cd12c34c4919bf70492e95 Mon Sep 17 00:00:00 2001 From: valmat07 Date: Tue, 6 Dec 2022 20:11:41 +0300 Subject: [PATCH 07/12] fix lint --- python/tvm/topi/adreno/conv2d_winograd_common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/tvm/topi/adreno/conv2d_winograd_common.py b/python/tvm/topi/adreno/conv2d_winograd_common.py index 3f8d86f720ee..3ff64295c6e0 100644 --- a/python/tvm/topi/adreno/conv2d_winograd_common.py +++ b/python/tvm/topi/adreno/conv2d_winograd_common.py @@ -451,7 +451,8 @@ def schedule_conv2d_winograd(cfg, s, output, pre_computed): autotvm.GLOBAL_SCOPE.in_tuning or isinstance(kernel.op, tvm.te.ComputeOp) and "filter_pack" in kernel.op.tag - and kernel.shape[2] == 1 and kernel.shape[3] == 1 + and kernel.shape[2] == 1 + and kernel.shape[3] == 1 ): BB = s.cache_read(kernel_pack, get_texture_storage(kernel_pack.shape), [OL]) bind_data_copy(s[BB]) From 4343cc2581cfe57fc2991e2166baf38aad2f340b Mon Sep 17 00:00:00 2001 From: valmat07 Date: Tue, 6 Dec 2022 20:21:08 +0300 Subject: [PATCH 08/12] fix lint --- python/tvm/topi/adreno/conv2d_winograd_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tvm/topi/adreno/conv2d_winograd_common.py b/python/tvm/topi/adreno/conv2d_winograd_common.py index 3ff64295c6e0..d10acb73123d 100644 --- a/python/tvm/topi/adreno/conv2d_winograd_common.py +++ b/python/tvm/topi/adreno/conv2d_winograd_common.py @@ -451,7 +451,7 @@ def schedule_conv2d_winograd(cfg, s, output, pre_computed): autotvm.GLOBAL_SCOPE.in_tuning or isinstance(kernel.op, tvm.te.ComputeOp) and "filter_pack" in kernel.op.tag - and kernel.shape[2] == 1 + and kernel.shape[2] == 1 and kernel.shape[3] == 1 ): BB = s.cache_read(kernel_pack, get_texture_storage(kernel_pack.shape), [OL]) From f29093ac457f45d04dd9356ed6b0b2688c3f64f5 Mon Sep 17 00:00:00 2001 From: valmat07 Date: Tue, 6 Dec 2022 20:39:54 +0300 Subject: [PATCH 09/12] fix lint --- src/relay/transforms/annotate_texture_storage.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/relay/transforms/annotate_texture_storage.cc b/src/relay/transforms/annotate_texture_storage.cc index 084ba630b8ac..1aac0255c04f 100644 --- a/src/relay/transforms/annotate_texture_storage.cc +++ b/src/relay/transforms/annotate_texture_storage.cc @@ -186,7 +186,7 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { // adding info about arguments if they can be converted to texture for (const auto& ttype : FlattenTupleType(fn->params[i]->checked_type())) { std::string scope = Scope(ttype->shape, GetVirtualDevice(GetRef(call))); - if (expr_attrib.as() || expr_attrib.as()) + if (expr_attrib.as() || expr_attrib.as()) { if ((i == 1) && !ttype->dtype.is_float16() && @@ -212,7 +212,7 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { } } } - } + } // Add consumer storage scope information for call arguments for (auto& arg : call->args) { if (storage_scope_.count(call)) { @@ -434,7 +434,7 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { bool CanUseBuffers(const Expr param, const Array shape, const tvm::DictAttrs param_attrs) const { bool use_buffer = false; - if (param.as() && shape.size() == 5) { + if (param.as() && shape.size() == 5){ auto kernel_layout = param_attrs.GetAttr("kernel_layout"); if (kernel_layout == "HWOI4o" || kernel_layout == "HWIO4o") { @@ -451,7 +451,6 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { use_buffer = true; } } - return use_buffer; } From f428267b6c08b3eab70c36ce9bc1afb0131b840f Mon Sep 17 00:00:00 2001 From: valmat07 Date: Wed, 7 Dec 2022 16:24:39 +0300 Subject: [PATCH 10/12] apply comments --- .../transforms/annotate_texture_storage.cc | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/relay/transforms/annotate_texture_storage.cc b/src/relay/transforms/annotate_texture_storage.cc index 1aac0255c04f..81573ceb4be2 100644 --- a/src/relay/transforms/annotate_texture_storage.cc +++ b/src/relay/transforms/annotate_texture_storage.cc @@ -41,6 +41,7 @@ #include #include +#include #include "../op/memory/device_copy.h" #include "../op/memory/memory.h" @@ -90,7 +91,7 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { for (const auto& a : storage_info.args_to_vars_) { if (storage_map.count(a.first)) { for (const auto& v : a.second) { - if (std::find(storage_info.buffers_params.begin(), storage_info.buffers_params.end(), v) != storage_info.buffers_params.end()) + if (storage_info.buffers_params.find(v) != storage_info.buffers_params.end()) { Map> ent; ent.Set(Expr(), Array{"global"}); @@ -181,6 +182,7 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { storage_scope_[call].push_back("global.texture"); } } + const int weights_pos = 1; for (size_t i = 0; i < fn->params.size(); i++) { args_to_vars_[call->args[i]].push_back(fn->params[i]); // adding info about arguments if they can be converted to texture @@ -188,12 +190,12 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { std::string scope = Scope(ttype->shape, GetVirtualDevice(GetRef(call))); if (expr_attrib.as() || expr_attrib.as()) { - if ((i == 1) && + if ((i == weights_pos) && !ttype->dtype.is_float16() && CanUseBuffers(call->args[i], ttype->shape, fn->attrs)) { - buffers_params.push_back(fn->params[i]); - buffers_args.push_back(call->args[i]); + buffers_params.insert(fn->params[i]); + buffers_args.insert(call->args[i]); scope = "global"; } } @@ -231,7 +233,7 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { } for (auto& arg : call->args) { - if (std::find(buffers_args.begin(), buffers_args.end(), arg) == buffers_args.end()) + if (buffers_args.find(arg) == buffers_args.end()) Visit(arg); } // We have all callees filled into storage_scope_ if they support textures @@ -434,7 +436,7 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { bool CanUseBuffers(const Expr param, const Array shape, const tvm::DictAttrs param_attrs) const { bool use_buffer = false; - if (param.as() && shape.size() == 5){ + if (param.as() && shape.size() == 5) { auto kernel_layout = param_attrs.GetAttr("kernel_layout"); if (kernel_layout == "HWOI4o" || kernel_layout == "HWIO4o") { @@ -468,9 +470,9 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { /*! \brief main attribute for expression*/ tvm::Attrs expr_attrib; /*! \brief parameters that filter out from storage_map to use buffers*/ - std::vector buffers_params; + std::unordered_set buffers_params; /*! \brief arguments in expression that will use buffers*/ - std::vector buffers_args; + std::unordered_set buffers_args; }; } // namespace From 3d62ce484a1bdf588fb6729dd0169a395a00c416 Mon Sep 17 00:00:00 2001 From: valmat07 Date: Wed, 7 Dec 2022 17:53:43 +0300 Subject: [PATCH 11/12] fix lint --- .../transforms/annotate_texture_storage.cc | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/relay/transforms/annotate_texture_storage.cc b/src/relay/transforms/annotate_texture_storage.cc index 3b30d097da96..543b0d858457 100644 --- a/src/relay/transforms/annotate_texture_storage.cc +++ b/src/relay/transforms/annotate_texture_storage.cc @@ -91,14 +91,11 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { for (const auto& a : storage_info.args_to_vars_) { if (storage_map.count(a.first)) { for (const auto& v : a.second) { - if (storage_info.buffers_params.find(v) != storage_info.buffers_params.end()) - { + if (storage_info.buffers_params.find(v) != storage_info.buffers_params.end()) { Map> ent; ent.Set(Expr(), Array{"global"}); storage_map.Set(v, ent); - } - else - { + } else { storage_map.Set(v, storage_map[a.first]); if (storage_map[a.first][Expr()][0] == "global" && storage_info.accept_textures_.count(v)) { @@ -176,12 +173,10 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { // adding info about arguments if they can be converted to texture for (const auto& ttype : FlattenTupleType(fn->params[i]->checked_type())) { std::string scope = Scope(ttype->shape, GetVirtualDevice(GetRef(call))); - if (expr_attrib.as() || expr_attrib.as()) - { + if (expr_attrib.as() || expr_attrib.as()) { if ((i == weights_pos) && !ttype->dtype.is_float16() && - CanUseBuffers(call->args[i], ttype->shape, fn->attrs)) - { + CanUseBuffers(call->args[i], ttype->shape, fn->attrs)) { buffers_params.insert(fn->params[i]); buffers_args.insert(call->args[i]); scope = "global"; @@ -374,7 +369,6 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { } bool SupportsTextureStorage(const CallNode* call) const { - bool supports_texture_storage = false; // we need to verify only entry functions since one of entry op defines main schedule for (const auto& arg : call->args) { @@ -422,19 +416,17 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { return supports_texture_storage; } - bool CanUseBuffers(const Expr param, const Array shape, const tvm::DictAttrs param_attrs) const { + bool CanUseBuffers(const Expr param, const Array shape, + const tvm::DictAttrs param_attrs) const { bool use_buffer = false; if (param.as() && shape.size() == 5) { auto kernel_layout = param_attrs.GetAttr("kernel_layout"); - if (kernel_layout == "HWOI4o" || kernel_layout == "HWIO4o") - { + if (kernel_layout == "HWOI4o" || kernel_layout == "HWIO4o") { int a0 = shape[0].as()->value; int a1 = shape[1].as()->value; if (a0 != 1 && a1 != 1) use_buffer = true; - } - else if (kernel_layout == "OIHW4o") - { + } else if (kernel_layout == "OIHW4o") { int a2 = shape[2].as()->value; int a3 = shape[3].as()->value; if (a2 != 1 && a3 != 1) From 5b12f5bece5370fecde9d0447674ce35167b7ad8 Mon Sep 17 00:00:00 2001 From: valmat07 Date: Wed, 7 Dec 2022 19:12:07 +0300 Subject: [PATCH 12/12] fix lint --- .../transforms/annotate_texture_storage.cc | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/relay/transforms/annotate_texture_storage.cc b/src/relay/transforms/annotate_texture_storage.cc index 543b0d858457..9dbd631ad32d 100644 --- a/src/relay/transforms/annotate_texture_storage.cc +++ b/src/relay/transforms/annotate_texture_storage.cc @@ -174,8 +174,7 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { for (const auto& ttype : FlattenTupleType(fn->params[i]->checked_type())) { std::string scope = Scope(ttype->shape, GetVirtualDevice(GetRef(call))); if (expr_attrib.as() || expr_attrib.as()) { - if ((i == weights_pos) && - !ttype->dtype.is_float16() && + if ((i == weights_pos) && !ttype->dtype.is_float16() && CanUseBuffers(call->args[i], ttype->shape, fn->attrs)) { buffers_params.insert(fn->params[i]); buffers_args.insert(call->args[i]); @@ -216,8 +215,9 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { } for (auto& arg : call->args) { - if (buffers_args.find(arg) == buffers_args.end()) + if (buffers_args.find(arg) == buffers_args.end()) { Visit(arg); + } } // We have all callees filled into storage_scope_ if they support textures // We need to verify if this call expects texture and if it does not, remove from @@ -420,18 +420,20 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { const tvm::DictAttrs param_attrs) const { bool use_buffer = false; if (param.as() && shape.size() == 5) { - auto kernel_layout = param_attrs.GetAttr("kernel_layout"); - if (kernel_layout == "HWOI4o" || kernel_layout == "HWIO4o") { - int a0 = shape[0].as()->value; - int a1 = shape[1].as()->value; - if (a0 != 1 && a1 != 1) - use_buffer = true; - } else if (kernel_layout == "OIHW4o") { - int a2 = shape[2].as()->value; - int a3 = shape[3].as()->value; - if (a2 != 1 && a3 != 1) - use_buffer = true; + auto kernel_layout = param_attrs.GetAttr("kernel_layout"); + if (kernel_layout == "HWOI4o" || kernel_layout == "HWIO4o") { + int a0 = shape[0].as()->value; + int a1 = shape[1].as()->value; + if (a0 != 1 && a1 != 1) { + use_buffer = true; + } + } else if (kernel_layout == "OIHW4o") { + int a2 = shape[2].as()->value; + int a3 = shape[3].as()->value; + if (a2 != 1 && a3 != 1) { + use_buffer = true; } + } } return use_buffer; }