From aea4d957f0e146cc859ba23a62da952932b74503 Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Sat, 27 Feb 2021 22:31:40 -0800 Subject: [PATCH 01/19] Add Adreno specific impl. of CollectStorageInfo. --- .../transforms/adreno_memory_annotation.cc | 130 ++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 src/relay/transforms/adreno_memory_annotation.cc diff --git a/src/relay/transforms/adreno_memory_annotation.cc b/src/relay/transforms/adreno_memory_annotation.cc new file mode 100644 index 000000000000..fb386f055ecf --- /dev/null +++ b/src/relay/transforms/adreno_memory_annotation.cc @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file deivce_annotation.cc + * \brief + * + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace tvm { +namespace relay { + +class StorageInfo { + public: + static Map GetStorageMap(const Expr& expr) { + StorageInfo storage_info; + storage_info.pre_visitor_ = PreDfsOrderVisitor(); + storage_info.pre_visitor_.Visit(expr); + for (auto& it : storage_info.pre_visitor_.storage_scope_) { + storage_info.storage_map_.Set(GetRef(it.first), String(it.second)); + } + return storage_info.storage_map_; + } + + private: + class PreDfsOrderVisitor : private ExprVisitor { + public: + void Visit(const Expr& expr) { + if (const auto* fn = expr.as()) { + this->VisitExpr(fn->body); + for (const auto& param : fn->params) { + this->VisitExpr(param); + } + } else { + this->VisitExpr(expr); + } + } + + private: + void BackwardPropagateConsumerScope(const ExprNode* expr) { + auto consumer_scopes_it = consumer_storage_scopes_.find(expr); + if (consumer_scopes_it != consumer_storage_scopes_.end()) + { + bool all_consumers_support_texture = true; + for (auto& consumer_scope : consumer_scopes_it->second) { + if (consumer_scope != "texture") { + all_consumers_support_texture = false; + } + } + if (all_consumers_support_texture) + { + storage_scope_[expr] = "texture"; + } + } + } + + void VisitExpr_(const ConstantNode* cn) final { + BackwardPropagateConsumerScope(cn); + } + + void VisitExpr_(const CallNode* call) final { + // Check the contents of this primitive function + if (const auto* fn = call->op.as()) { + if (fn->HasNonzeroAttr(attr::kPrimitive)) { + primitive_supports_texture_ = false; + Visit(call->op); + if (primitive_supports_texture_) + { + storage_scope_[call] = "texture"; + for (auto& arg : call->args) { + consumer_storage_scopes_[arg.get()].push_back("texture"); + } + } + } + } + + if (call->attrs.as()) { + primitive_supports_texture_ = true; + } + for (auto& arg : call->args) { + Visit(arg); + } + } + + void VisitExpr_(const VarNode* vn) final { + BackwardPropagateConsumerScope(vn); + } + + bool primitive_supports_texture_ = false; + std::unordered_map storage_scope_; + std::unordered_map> consumer_storage_scopes_; + friend StorageInfo; + }; + + PreDfsOrderVisitor pre_visitor_; + Map storage_map_; +}; + +Map CollectStorageInfo(const Expr& expr) { return StorageInfo::GetStorageMap(expr); } + +TVM_REGISTER_GLOBAL("relay.analysis.CollectStorageInfo").set_body_typed(CollectStorageInfo); + +} // namespace relay +} // namespace tvm From 3b9df0898023e9b5d7aa236e523faa7270f961c4 Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Mon, 1 Mar 2021 11:25:33 -0800 Subject: [PATCH 02/19] Populate consumer_scopes with any assigned storage scope. --- src/relay/transforms/adreno_memory_annotation.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/relay/transforms/adreno_memory_annotation.cc b/src/relay/transforms/adreno_memory_annotation.cc index fb386f055ecf..706205e69ae2 100644 --- a/src/relay/transforms/adreno_memory_annotation.cc +++ b/src/relay/transforms/adreno_memory_annotation.cc @@ -71,6 +71,7 @@ class StorageInfo { for (auto& consumer_scope : consumer_scopes_it->second) { if (consumer_scope != "texture") { all_consumers_support_texture = false; + break; } } if (all_consumers_support_texture) @@ -93,9 +94,13 @@ class StorageInfo { if (primitive_supports_texture_) { storage_scope_[call] = "texture"; - for (auto& arg : call->args) { - consumer_storage_scopes_[arg.get()].push_back("texture"); - } + } + else + { + storage_scope_[call] = "global"; + } + for (auto& arg : call->args) { + consumer_storage_scopes_[arg.get()].push_back(storage_scope_[call]); } } } From 7c14af463cfad6936a7ce8cfa10239ac5203fe9b Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Mon, 1 Mar 2021 11:26:24 -0800 Subject: [PATCH 03/19] Add legalization to ensure producer expr only produces texture if all consumers support reading from texture. --- .../transforms/adreno_memory_annotation.cc | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/relay/transforms/adreno_memory_annotation.cc b/src/relay/transforms/adreno_memory_annotation.cc index 706205e69ae2..7db3a159504c 100644 --- a/src/relay/transforms/adreno_memory_annotation.cc +++ b/src/relay/transforms/adreno_memory_annotation.cc @@ -42,6 +42,8 @@ class StorageInfo { StorageInfo storage_info; storage_info.pre_visitor_ = PreDfsOrderVisitor(); storage_info.pre_visitor_.Visit(expr); + // TODO(csullivan): A unit test for legalization + storage_info.pre_visitor_.LegalizeProducerStorage(); for (auto& it : storage_info.pre_visitor_.storage_scope_) { storage_info.storage_map_.Set(GetRef(it.first), String(it.second)); } @@ -117,6 +119,33 @@ class StorageInfo { BackwardPropagateConsumerScope(vn); } + void LegalizeProducerStorage() { + for (auto& kv : consumer_storage_scopes_) { + const ExprNode* producer = kv.first; + // For any producers which have multiple consumers we + // must ensure that all of those consumers expect the + // same storage type. If not, default to global scope + // for the producer + if (kv.second.size() > 1) { + bool all_consumers_support_texture = true; + for (auto& consumer_scope : kv.second) { + if (consumer_scope != "texture") { + all_consumers_support_texture = false; + break; + } + } + if (all_consumers_support_texture) + { + storage_scope_[producer] = "texture"; + } + else + { + storage_scope_[producer] = "global"; + } + } + } + } + bool primitive_supports_texture_ = false; std::unordered_map storage_scope_; std::unordered_map> consumer_storage_scopes_; From 2a1ce0e5b33f48641031a7449252e9f7b12777a3 Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Mon, 1 Mar 2021 15:11:31 -0800 Subject: [PATCH 04/19] Tighten texture assignment to only apply to primitives containing conv2d in NCHW4c/OIHW4o layout. --- src/relay/transforms/adreno_memory_annotation.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/relay/transforms/adreno_memory_annotation.cc b/src/relay/transforms/adreno_memory_annotation.cc index 7db3a159504c..1e3a25fec92c 100644 --- a/src/relay/transforms/adreno_memory_annotation.cc +++ b/src/relay/transforms/adreno_memory_annotation.cc @@ -107,8 +107,11 @@ class StorageInfo { } } - if (call->attrs.as()) { - primitive_supports_texture_ = true; + if (auto attrs = call->attrs.as()) { + if (attrs->data_layout == "NCHW4c" && attrs->kernel_layout == "OIHW4o") + { + primitive_supports_texture_ = true; + } } for (auto& arg : call->args) { Visit(arg); From 2630bea198683ac1dd40860a9b37f1059cb308f3 Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Mon, 1 Mar 2021 21:23:15 -0800 Subject: [PATCH 05/19] Refactor consumer storage homogeneity check. --- .../transforms/adreno_memory_annotation.cc | 38 ++++++------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/src/relay/transforms/adreno_memory_annotation.cc b/src/relay/transforms/adreno_memory_annotation.cc index 1e3a25fec92c..5ea0ae221823 100644 --- a/src/relay/transforms/adreno_memory_annotation.cc +++ b/src/relay/transforms/adreno_memory_annotation.cc @@ -65,21 +65,21 @@ class StorageInfo { } private: + std::string GetConsumerScope(const std::vector& consumer_scopes) const { + std::string ref_scope = consumer_scopes.front(); + for (auto& consumer_scope : consumer_scopes) { + if (consumer_scope != ref_scope) { + return "global"; + } + } + return ref_scope; + } + void BackwardPropagateConsumerScope(const ExprNode* expr) { auto consumer_scopes_it = consumer_storage_scopes_.find(expr); if (consumer_scopes_it != consumer_storage_scopes_.end()) { - bool all_consumers_support_texture = true; - for (auto& consumer_scope : consumer_scopes_it->second) { - if (consumer_scope != "texture") { - all_consumers_support_texture = false; - break; - } - } - if (all_consumers_support_texture) - { - storage_scope_[expr] = "texture"; - } + storage_scope_[expr] = GetConsumerScope(consumer_scopes_it->second); } } @@ -130,21 +130,7 @@ class StorageInfo { // same storage type. If not, default to global scope // for the producer if (kv.second.size() > 1) { - bool all_consumers_support_texture = true; - for (auto& consumer_scope : kv.second) { - if (consumer_scope != "texture") { - all_consumers_support_texture = false; - break; - } - } - if (all_consumers_support_texture) - { - storage_scope_[producer] = "texture"; - } - else - { - storage_scope_[producer] = "global"; - } + storage_scope_[producer] = GetConsumerScope(kv.second); } } } From 345bafb64404f1bd560992b5f50376eeca6ecf62 Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Tue, 2 Mar 2021 14:50:50 -0800 Subject: [PATCH 06/19] Apply distinct storage scope annotation for weights in adreno storage scope annotation (CollectStorageInfo). --- src/relay/transforms/adreno_memory_annotation.cc | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/relay/transforms/adreno_memory_annotation.cc b/src/relay/transforms/adreno_memory_annotation.cc index 5ea0ae221823..aa43a198f48a 100644 --- a/src/relay/transforms/adreno_memory_annotation.cc +++ b/src/relay/transforms/adreno_memory_annotation.cc @@ -75,16 +75,22 @@ class StorageInfo { return ref_scope; } - void BackwardPropagateConsumerScope(const ExprNode* expr) { + void BackwardPropagateConsumerScope(const ExprNode* expr, std::string scope_suffix = "") { auto consumer_scopes_it = consumer_storage_scopes_.find(expr); if (consumer_scopes_it != consumer_storage_scopes_.end()) { storage_scope_[expr] = GetConsumerScope(consumer_scopes_it->second); + if (storage_scope_[expr] == "texture") + { + if (!scope_suffix.empty()) { + storage_scope_[expr] += (":" + scope_suffix); + } + } } } void VisitExpr_(const ConstantNode* cn) final { - BackwardPropagateConsumerScope(cn); + BackwardPropagateConsumerScope(cn, "weight"); } void VisitExpr_(const CallNode* call) final { From 517326c70c721e86c0957ee6efe3af698c7513a3 Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Wed, 17 Mar 2021 13:24:00 -0700 Subject: [PATCH 07/19] Only set output storage scope if non-global --- src/relay/transforms/adreno_memory_annotation.cc | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/relay/transforms/adreno_memory_annotation.cc b/src/relay/transforms/adreno_memory_annotation.cc index aa43a198f48a..1c5e01fb878b 100644 --- a/src/relay/transforms/adreno_memory_annotation.cc +++ b/src/relay/transforms/adreno_memory_annotation.cc @@ -99,23 +99,18 @@ class StorageInfo { if (fn->HasNonzeroAttr(attr::kPrimitive)) { primitive_supports_texture_ = false; Visit(call->op); - if (primitive_supports_texture_) - { + if (primitive_supports_texture_) { storage_scope_[call] = "texture"; } - else - { - storage_scope_[call] = "global"; - } for (auto& arg : call->args) { - consumer_storage_scopes_[arg.get()].push_back(storage_scope_[call]); + std::string scope = storage_scope_.count(call) ? storage_scope_[call] : "global"; + consumer_storage_scopes_[arg.get()].push_back(scope); } } } if (auto attrs = call->attrs.as()) { - if (attrs->data_layout == "NCHW4c" && attrs->kernel_layout == "OIHW4o") - { + if (attrs->data_layout == "NCHW4c" && attrs->kernel_layout == "OIHW4o") { primitive_supports_texture_ = true; } } From d7021910c5ccabe3b8097cd29373fda43305d915 Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Wed, 17 Mar 2021 13:24:50 -0700 Subject: [PATCH 08/19] Add CollectBufferBinds packed func for opencl adreno target --- .../transforms/adreno_memory_annotation.cc | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/src/relay/transforms/adreno_memory_annotation.cc b/src/relay/transforms/adreno_memory_annotation.cc index 1c5e01fb878b..0708fdcf3224 100644 --- a/src/relay/transforms/adreno_memory_annotation.cc +++ b/src/relay/transforms/adreno_memory_annotation.cc @@ -148,7 +148,71 @@ class StorageInfo { Map CollectStorageInfo(const Expr& expr) { return StorageInfo::GetStorageMap(expr); } +namespace { +String GetStorageScope(const Expr& expr, const Map& storage_map, size_t output_index) { + if (!storage_map.count(expr)) { return String{}; } + auto storage_info = Downcast>(storage_map[expr][2]); + if (output_index >= storage_info.size()) { + return String{}; + } + std::string scope = storage_info[output_index]; + auto pos = scope.find(":"); + if (pos != std::string::npos) { + scope = scope.substr(0, pos); + } + return String(scope); +} +} + +Array CollectBufferBinds(const Call& call, const Map& storage_map) { + const auto* primfn = call->op.as(); + ICHECK(primfn); + ICHECK(primfn->HasNonzeroAttr(attr::kPrimitive)) << "Can only collect buffer binds for primitive functions"; + ICHECK_EQ(call->args.size(), primfn->params.size()) << "Call arguments and function parameters do not match"; + + auto make_buffer = [&storage_map](const Expr& expr, const TensorTypeNode* ttype, const std::string& name, size_t index = 0) { + String scope = GetStorageScope(expr, storage_map, index); + PrimType storage_type(ttype->dtype); + tir::Var var = scope == "texture" ? tir::Var(name, TextureType(storage_type)) : tir::Var(name, PointerType(storage_type)); + return tir::Buffer(var, ttype->dtype, ttype->shape, Array{}, Integer(0), name, scope, -1, 0, tir::BufferType::kDefault); + }; + + // Make input buffers + Array buffers; + for (size_t i = 0; i < call->args.size(); i++) { + const Expr& arg = call->args[i]; + if (const auto* ttype = primfn->params[i]->checked_type().as()) { + buffers.push_back(make_buffer(arg, ttype, "placeholder" + std::to_string(i))); + } else { + const auto* tuple_type = primfn->params[i]->type_as(); + ICHECK(tuple_type); + for (size_t j = 0; j < tuple_type->fields.size(); j++) { + const auto* ttype = tuple_type->fields[j].as(); + ICHECK(ttype); + buffers.push_back(make_buffer(arg, ttype, "placeholder" + std::to_string(i) + "_" + std::to_string(j), j)); + } + } + } + + // Make output buffers + if (const auto* ttype = call->checked_type().as()) { + buffers.push_back(make_buffer(call, ttype, "compute")); + } else { + const auto* tuple_type = call->type_as(); + ICHECK(tuple_type); + for (size_t i = 0; i < tuple_type->fields.size(); i++) { + const auto* ttype = tuple_type->fields[i].as(); + ICHECK(ttype); + buffers.push_back(make_buffer(call, ttype, "compute" + std::to_string(i), i)); + } + } + + return buffers; +} + TVM_REGISTER_GLOBAL("relay.analysis.CollectStorageInfo").set_body_typed(CollectStorageInfo); +TVM_REGISTER_GLOBAL("relay.backend.opencl.adreno._CollectBufferBinds").set_body_typed(CollectBufferBinds); + } // namespace relay } // namespace tvm From fb25dfa487070d108374a14d33dbe3d4c3e504d9 Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Mon, 8 Mar 2021 21:28:44 -0800 Subject: [PATCH 09/19] Bug fix in storage scope legalization: Ensure that output is marked as global if single consumer requires global input. Previous legalization only ensured this if multiple consumers had differing storage requirements. --- src/relay/transforms/adreno_memory_annotation.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/relay/transforms/adreno_memory_annotation.cc b/src/relay/transforms/adreno_memory_annotation.cc index 0708fdcf3224..239d15233fa2 100644 --- a/src/relay/transforms/adreno_memory_annotation.cc +++ b/src/relay/transforms/adreno_memory_annotation.cc @@ -66,6 +66,7 @@ class StorageInfo { private: std::string GetConsumerScope(const std::vector& consumer_scopes) const { + if (!consumer_scopes.size()) { return "global"; } std::string ref_scope = consumer_scopes.front(); for (auto& consumer_scope : consumer_scopes) { if (consumer_scope != ref_scope) { @@ -126,12 +127,11 @@ class StorageInfo { void LegalizeProducerStorage() { for (auto& kv : consumer_storage_scopes_) { const ExprNode* producer = kv.first; - // For any producers which have multiple consumers we - // must ensure that all of those consumers expect the - // same storage type. If not, default to global scope - // for the producer - if (kv.second.size() > 1) { - storage_scope_[producer] = GetConsumerScope(kv.second); + std::string legal_scope = GetConsumerScope(kv.second); + if (storage_scope_.count(producer)) { + if (storage_scope_[producer] != legal_scope) { + storage_scope_[producer] = legal_scope; + } } } } From fc9343d7b1c861275f4b3f72ed85a2a32d687191 Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Tue, 9 Mar 2021 11:46:36 -0800 Subject: [PATCH 10/19] Slight fix to storage scope legalization. --- src/relay/transforms/adreno_memory_annotation.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/relay/transforms/adreno_memory_annotation.cc b/src/relay/transforms/adreno_memory_annotation.cc index 239d15233fa2..1adf416349c2 100644 --- a/src/relay/transforms/adreno_memory_annotation.cc +++ b/src/relay/transforms/adreno_memory_annotation.cc @@ -129,7 +129,7 @@ class StorageInfo { const ExprNode* producer = kv.first; std::string legal_scope = GetConsumerScope(kv.second); if (storage_scope_.count(producer)) { - if (storage_scope_[producer] != legal_scope) { + if (storage_scope_[producer].find(legal_scope) == std::string::npos) { storage_scope_[producer] = legal_scope; } } From a9de458083e43c94e297107f79c5633e5f082153 Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Wed, 17 Mar 2021 10:55:13 -0700 Subject: [PATCH 11/19] [Part 3/3] Support texture:weight lowering convention for externally provided texture buffers. Need to propagate this to allocated textures when cache_read(texture) is used for weights. --- src/relay/transforms/adreno_memory_annotation.cc | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/relay/transforms/adreno_memory_annotation.cc b/src/relay/transforms/adreno_memory_annotation.cc index 1adf416349c2..e04461a791da 100644 --- a/src/relay/transforms/adreno_memory_annotation.cc +++ b/src/relay/transforms/adreno_memory_annotation.cc @@ -171,9 +171,15 @@ Array CollectBufferBinds(const Call& call, const Mapargs.size(), primfn->params.size()) << "Call arguments and function parameters do not match"; auto make_buffer = [&storage_map](const Expr& expr, const TensorTypeNode* ttype, const std::string& name, size_t index = 0) { - String scope = GetStorageScope(expr, storage_map, index); + //String scope = GetStorageScope(expr, storage_map, index); + auto storage_info = Downcast>(storage_map[expr][2]); + std::string scope = ""; + if (storage_info.size()) { + scope = storage_info[index]; + } + PrimType storage_type(ttype->dtype); - tir::Var var = scope == "texture" ? tir::Var(name, TextureType(storage_type)) : tir::Var(name, PointerType(storage_type)); + tir::Var var = GetStorageScope(expr, storage_map, index) == "texture" ? tir::Var(name, TextureType(storage_type)) : tir::Var(name, PointerType(storage_type)); return tir::Buffer(var, ttype->dtype, ttype->shape, Array{}, Integer(0), name, scope, -1, 0, tir::BufferType::kDefault); }; From 9c039dfe3aee399a534aa1106901175a600d1fe8 Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Wed, 17 Mar 2021 13:27:59 -0700 Subject: [PATCH 12/19] Update adreno collect storage to support multiple outputs. Note for now the Adreno impl. assumes the output storage types are uniform for multi-output nodes. --- .../transforms/adreno_memory_annotation.cc | 78 ++++++++++++++----- 1 file changed, 57 insertions(+), 21 deletions(-) diff --git a/src/relay/transforms/adreno_memory_annotation.cc b/src/relay/transforms/adreno_memory_annotation.cc index e04461a791da..46d3729ac229 100644 --- a/src/relay/transforms/adreno_memory_annotation.cc +++ b/src/relay/transforms/adreno_memory_annotation.cc @@ -38,16 +38,19 @@ namespace relay { class StorageInfo { public: - static Map GetStorageMap(const Expr& expr) { + static Map> GetStorageMap(const Expr& expr) { StorageInfo storage_info; storage_info.pre_visitor_ = PreDfsOrderVisitor(); storage_info.pre_visitor_.Visit(expr); // TODO(csullivan): A unit test for legalization storage_info.pre_visitor_.LegalizeProducerStorage(); - for (auto& it : storage_info.pre_visitor_.storage_scope_) { - storage_info.storage_map_.Set(GetRef(it.first), String(it.second)); + Map> storage_map; + for (auto& kv : storage_info.pre_visitor_.storage_scope_) { + std::vector storage_scopes; + std::copy(kv.second.begin(), kv.second.end(), std::back_inserter(storage_scopes)); + storage_map.Set(GetRef(kv.first), Array{storage_scopes}); } - return storage_info.storage_map_; + return storage_map; } private: @@ -76,22 +79,35 @@ class StorageInfo { return ref_scope; } - void BackwardPropagateConsumerScope(const ExprNode* expr, std::string scope_suffix = "") { + bool HasMixedStorageOutputs(const ExprNode* expr) { + if (storage_scope_.count(expr)) { + std::string ref_scope = storage_scope_[expr].front(); + for (std::string& scope : storage_scope_[expr]) { + if (scope != ref_scope) { + return true; + } + } + } + return false; + } + + void ApplyConsumerScopeToInputs(const ExprNode* expr, std::string scope_suffix = "") { auto consumer_scopes_it = consumer_storage_scopes_.find(expr); - if (consumer_scopes_it != consumer_storage_scopes_.end()) - { - storage_scope_[expr] = GetConsumerScope(consumer_scopes_it->second); - if (storage_scope_[expr] == "texture") - { + if (consumer_scopes_it != consumer_storage_scopes_.end()) { + std::string consumer_scope = GetConsumerScope(consumer_scopes_it->second); + ICHECK(!storage_scope_.count(expr)) + << "Already propagated consumer scopes to input: " << GetRef(expr); + storage_scope_[expr].push_back(consumer_scope); + if (consumer_scope == "texture") { if (!scope_suffix.empty()) { - storage_scope_[expr] += (":" + scope_suffix); + storage_scope_[expr][0] += (":" + scope_suffix); } } } } void VisitExpr_(const ConstantNode* cn) final { - BackwardPropagateConsumerScope(cn, "weight"); + ApplyConsumerScopeToInputs(cn, "weight"); } void VisitExpr_(const CallNode* call) final { @@ -101,11 +117,28 @@ class StorageInfo { primitive_supports_texture_ = false; Visit(call->op); if (primitive_supports_texture_) { - storage_scope_[call] = "texture"; + if (call->checked_type().as()) { + storage_scope_[call].push_back("texture"); + } else { + const auto* tuple_type = call->type_as(); + ICHECK(tuple_type); + // TODO(csullivan): Add support for mixed output storage scope. + // In current adreno storage planner all outputs of a + // primitive function are assumed to be of the same storage + // type. This should be easy to extend in the future. + for (size_t i = 0; i < tuple_type->fields.size(); i++) { + storage_scope_[call].push_back("texture"); + } + } } + // Add consumer storage scope information for call arguments for (auto& arg : call->args) { - std::string scope = storage_scope_.count(call) ? storage_scope_[call] : "global"; - consumer_storage_scopes_[arg.get()].push_back(scope); + if (storage_scope_.count(call)) { + ICHECK(!HasMixedStorageOutputs(call)) << "Mixed output storage scopes are not currently supported"; + consumer_storage_scopes_[arg.operator->()].push_back(storage_scope_[call][0]); + } else { + consumer_storage_scopes_[arg.operator->()].push_back("global"); + } } } } @@ -121,7 +154,7 @@ class StorageInfo { } void VisitExpr_(const VarNode* vn) final { - BackwardPropagateConsumerScope(vn); + ApplyConsumerScopeToInputs(vn); } void LegalizeProducerStorage() { @@ -129,24 +162,27 @@ class StorageInfo { const ExprNode* producer = kv.first; std::string legal_scope = GetConsumerScope(kv.second); if (storage_scope_.count(producer)) { - if (storage_scope_[producer].find(legal_scope) == std::string::npos) { - storage_scope_[producer] = legal_scope; + ICHECK(!HasMixedStorageOutputs(producer)) << "Mixed output storage scopes are not currently supported"; + if (storage_scope_[producer].front().find(legal_scope) == std::string::npos) { + for (size_t i = 0; i < storage_scope_[producer].size(); i++) { + // Only support uniform storage scope accross all outputs for now + storage_scope_[producer][i] = legal_scope; + } } } } } bool primitive_supports_texture_ = false; - std::unordered_map storage_scope_; + std::unordered_map> storage_scope_; std::unordered_map> consumer_storage_scopes_; friend StorageInfo; }; PreDfsOrderVisitor pre_visitor_; - Map storage_map_; }; -Map CollectStorageInfo(const Expr& expr) { return StorageInfo::GetStorageMap(expr); } +Map> CollectStorageInfo(const Expr& expr) { return StorageInfo::GetStorageMap(expr); } namespace { String GetStorageScope(const Expr& expr, const Map& storage_map, size_t output_index) { From dfd3fbefb8a8a4690431f3e196b359c536695e2f Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Wed, 17 Mar 2021 11:14:30 -0700 Subject: [PATCH 13/19] Force the output storage of top level relay function outputs to be global storage to avoid need for image row pitch calc. --- .../transforms/adreno_memory_annotation.cc | 37 +++++++++++++++++-- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/src/relay/transforms/adreno_memory_annotation.cc b/src/relay/transforms/adreno_memory_annotation.cc index 46d3729ac229..a4b8d701fe28 100644 --- a/src/relay/transforms/adreno_memory_annotation.cc +++ b/src/relay/transforms/adreno_memory_annotation.cc @@ -42,8 +42,16 @@ class StorageInfo { StorageInfo storage_info; storage_info.pre_visitor_ = PreDfsOrderVisitor(); storage_info.pre_visitor_.Visit(expr); - // TODO(csullivan): A unit test for legalization storage_info.pre_visitor_.LegalizeProducerStorage(); + // TODO(csullivan): The below cann be removed if either of the following are true: + // * Function outputs are persistent (can_realloc = False) + // * Runtime support is added for passing tensor shape through CopyFromTo API + // so that image pitch can be determined allowing the correct read to be + // enqueued from a texture pool. + // For now we force write to global for the outputs of the function over which + // memory planning will be performed. This should incur only a trivial change + // in performance. + storage_info.pre_visitor_.ForceGlobalOutputStorage(expr); Map> storage_map; for (auto& kv : storage_info.pre_visitor_.storage_scope_) { std::vector storage_scopes; @@ -70,7 +78,7 @@ class StorageInfo { private: std::string GetConsumerScope(const std::vector& consumer_scopes) const { if (!consumer_scopes.size()) { return "global"; } - std::string ref_scope = consumer_scopes.front(); + std::string ref_scope = consumer_scopes[0]; for (auto& consumer_scope : consumer_scopes) { if (consumer_scope != ref_scope) { return "global"; @@ -81,7 +89,7 @@ class StorageInfo { bool HasMixedStorageOutputs(const ExprNode* expr) { if (storage_scope_.count(expr)) { - std::string ref_scope = storage_scope_[expr].front(); + std::string ref_scope = storage_scope_[expr][0]; for (std::string& scope : storage_scope_[expr]) { if (scope != ref_scope) { return true; @@ -163,7 +171,7 @@ class StorageInfo { std::string legal_scope = GetConsumerScope(kv.second); if (storage_scope_.count(producer)) { ICHECK(!HasMixedStorageOutputs(producer)) << "Mixed output storage scopes are not currently supported"; - if (storage_scope_[producer].front().find(legal_scope) == std::string::npos) { + if (storage_scope_[producer][0].find(legal_scope) == std::string::npos) { for (size_t i = 0; i < storage_scope_[producer].size(); i++) { // Only support uniform storage scope accross all outputs for now storage_scope_[producer][i] = legal_scope; @@ -173,6 +181,27 @@ class StorageInfo { } } + void ForceGlobalOutputStorage(const Expr& expr) { + // Mark function outputs as global scope + if (const auto* func = expr.as()) { + if (auto* tuple = func->body.as()) { + for (auto& field : tuple->fields) { + if (storage_scope_.count(field.operator->())) { + for (size_t i = 0; i < storage_scope_[field.operator->()].size(); i++) { + storage_scope_[field.operator->()][i] = "global"; + } + } + } + } else { + if (storage_scope_.count(func->body.operator->())) { + for (size_t i = 0; i < storage_scope_[func->body.operator->()].size(); i++) { + storage_scope_[func->body.operator->()][i] = "global"; + } + } + } + } + } + bool primitive_supports_texture_ = false; std::unordered_map> storage_scope_; std::unordered_map> consumer_storage_scopes_; From ec0880fb763349ecd89aa6a904a2ccaa9bd4c6d3 Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Wed, 17 Mar 2021 13:32:37 -0700 Subject: [PATCH 14/19] Register opencl adreno target specific CollectStorageInfo impl. --- .../transforms/adreno_memory_annotation.cc | 241 ++++++++++-------- 1 file changed, 136 insertions(+), 105 deletions(-) diff --git a/src/relay/transforms/adreno_memory_annotation.cc b/src/relay/transforms/adreno_memory_annotation.cc index a4b8d701fe28..b72dd2f6c334 100644 --- a/src/relay/transforms/adreno_memory_annotation.cc +++ b/src/relay/transforms/adreno_memory_annotation.cc @@ -36,14 +36,17 @@ namespace tvm { namespace relay { -class StorageInfo { +class StorageInfo : private ExprVisitor{ public: - static Map> GetStorageMap(const Expr& expr) { - StorageInfo storage_info; - storage_info.pre_visitor_ = PreDfsOrderVisitor(); - storage_info.pre_visitor_.Visit(expr); - storage_info.pre_visitor_.LegalizeProducerStorage(); - // TODO(csullivan): The below cann be removed if either of the following are true: + StorageInfo(const Map& dev_map, const Map& target_map) + : device_ids_(dev_map), targets_(target_map) {;} + static Map> GetStorageMap(const Expr& expr, + const Map& dev_map, + const Map& target_map) { + StorageInfo storage_info(dev_map, target_map); + storage_info.Visit(expr); + storage_info.LegalizeProducerStorage(); + // TODO(csullivan): The below can be removed if either of the following are true: // * Function outputs are persistent (can_realloc = False) // * Runtime support is added for passing tensor shape through CopyFromTo API // so that image pitch can be determined allowing the correct read to be @@ -51,9 +54,9 @@ class StorageInfo { // For now we force write to global for the outputs of the function over which // memory planning will be performed. This should incur only a trivial change // in performance. - storage_info.pre_visitor_.ForceGlobalOutputStorage(expr); + storage_info.ForceGlobalOutputStorage(expr); Map> storage_map; - for (auto& kv : storage_info.pre_visitor_.storage_scope_) { + for (auto& kv : storage_info.storage_scope_) { std::vector storage_scopes; std::copy(kv.second.begin(), kv.second.end(), std::back_inserter(storage_scopes)); storage_map.Set(GetRef(kv.first), Array{storage_scopes}); @@ -62,64 +65,30 @@ class StorageInfo { } private: - class PreDfsOrderVisitor : private ExprVisitor { - public: - void Visit(const Expr& expr) { - if (const auto* fn = expr.as()) { - this->VisitExpr(fn->body); - for (const auto& param : fn->params) { - this->VisitExpr(param); - } - } else { - this->VisitExpr(expr); + void Visit(const Expr& expr) { + // Pre-order traversal to enable upward propagation + // of consumer storage scopes to producers when desirable. + if (const auto* fn = expr.as()) { + this->VisitExpr(fn->body); + for (const auto& param : fn->params) { + this->VisitExpr(param); } + } else { + this->VisitExpr(expr); } + } - private: - std::string GetConsumerScope(const std::vector& consumer_scopes) const { - if (!consumer_scopes.size()) { return "global"; } - std::string ref_scope = consumer_scopes[0]; - for (auto& consumer_scope : consumer_scopes) { - if (consumer_scope != ref_scope) { - return "global"; - } - } - return ref_scope; - } - - bool HasMixedStorageOutputs(const ExprNode* expr) { - if (storage_scope_.count(expr)) { - std::string ref_scope = storage_scope_[expr][0]; - for (std::string& scope : storage_scope_[expr]) { - if (scope != ref_scope) { - return true; - } - } - } - return false; - } - - void ApplyConsumerScopeToInputs(const ExprNode* expr, std::string scope_suffix = "") { - auto consumer_scopes_it = consumer_storage_scopes_.find(expr); - if (consumer_scopes_it != consumer_storage_scopes_.end()) { - std::string consumer_scope = GetConsumerScope(consumer_scopes_it->second); - ICHECK(!storage_scope_.count(expr)) - << "Already propagated consumer scopes to input: " << GetRef(expr); - storage_scope_[expr].push_back(consumer_scope); - if (consumer_scope == "texture") { - if (!scope_suffix.empty()) { - storage_scope_[expr][0] += (":" + scope_suffix); - } - } - } - } + void VisitExpr_(const VarNode* vn) final { + ApplyConsumerScopeToInputs(vn); + } - void VisitExpr_(const ConstantNode* cn) final { - ApplyConsumerScopeToInputs(cn, "weight"); - } + void VisitExpr_(const ConstantNode* cn) final { + ApplyConsumerScopeToInputs(cn, "weight"); + } - void VisitExpr_(const CallNode* call) final { - // Check the contents of this primitive function + void VisitExpr_(const CallNode* call) final { + // Check the contents of this primitive function + if (IsAdrenoExpr(GetRef(call))) { if (const auto* fn = call->op.as()) { if (fn->HasNonzeroAttr(attr::kPrimitive)) { primitive_supports_texture_ = false; @@ -150,68 +119,124 @@ class StorageInfo { } } } + } - if (auto attrs = call->attrs.as()) { - if (attrs->data_layout == "NCHW4c" && attrs->kernel_layout == "OIHW4o") { - primitive_supports_texture_ = true; - } - } - for (auto& arg : call->args) { - Visit(arg); + if (auto attrs = call->attrs.as()) { + if (attrs->data_layout == "NCHW4c" && attrs->kernel_layout == "OIHW4o") { + primitive_supports_texture_ = true; } } + for (auto& arg : call->args) { + Visit(arg); + } + } - void VisitExpr_(const VarNode* vn) final { - ApplyConsumerScopeToInputs(vn); + void ApplyConsumerScopeToInputs(const ExprNode* expr, std::string scope_suffix = "") { + auto consumer_scopes_it = consumer_storage_scopes_.find(expr); + if (consumer_scopes_it != consumer_storage_scopes_.end()) { + std::string consumer_scope = GetConsumerScope(consumer_scopes_it->second); + ICHECK(!storage_scope_.count(expr)) + << "Already propagated consumer scopes to input: " << GetRef(expr); + storage_scope_[expr].push_back(consumer_scope); + if (consumer_scope == "texture") { + if (!scope_suffix.empty()) { + storage_scope_[expr][0] += (":" + scope_suffix); + } + } } + } - void LegalizeProducerStorage() { - for (auto& kv : consumer_storage_scopes_) { - const ExprNode* producer = kv.first; - std::string legal_scope = GetConsumerScope(kv.second); - if (storage_scope_.count(producer)) { - ICHECK(!HasMixedStorageOutputs(producer)) << "Mixed output storage scopes are not currently supported"; - if (storage_scope_[producer][0].find(legal_scope) == std::string::npos) { - for (size_t i = 0; i < storage_scope_[producer].size(); i++) { - // Only support uniform storage scope accross all outputs for now - storage_scope_[producer][i] = legal_scope; - } + void LegalizeProducerStorage() { + for (auto& kv : consumer_storage_scopes_) { + const ExprNode* producer = kv.first; + std::string legal_scope = GetConsumerScope(kv.second); + if (storage_scope_.count(producer)) { + ICHECK(!HasMixedStorageOutputs(producer)) << "Mixed output storage scopes are not currently supported"; + if (storage_scope_[producer][0].find(legal_scope) == std::string::npos) { + for (size_t i = 0; i < storage_scope_[producer].size(); i++) { + // Only support uniform storage scope accross all outputs for now + storage_scope_[producer][i] = legal_scope; } } } } + } - void ForceGlobalOutputStorage(const Expr& expr) { - // Mark function outputs as global scope - if (const auto* func = expr.as()) { - if (auto* tuple = func->body.as()) { - for (auto& field : tuple->fields) { - if (storage_scope_.count(field.operator->())) { - for (size_t i = 0; i < storage_scope_[field.operator->()].size(); i++) { - storage_scope_[field.operator->()][i] = "global"; - } + void ForceGlobalOutputStorage(const Expr& expr) { + // Mark function outputs as global scope + if (const auto* func = expr.as()) { + if (auto* tuple = func->body.as()) { + for (auto& field : tuple->fields) { + if (storage_scope_.count(field.operator->())) { + for (size_t i = 0; i < storage_scope_[field.operator->()].size(); i++) { + storage_scope_[field.operator->()][i] = "global"; } } - } else { - if (storage_scope_.count(func->body.operator->())) { - for (size_t i = 0; i < storage_scope_[func->body.operator->()].size(); i++) { - storage_scope_[func->body.operator->()][i] = "global"; - } + } + } else { + if (storage_scope_.count(func->body.operator->())) { + for (size_t i = 0; i < storage_scope_[func->body.operator->()].size(); i++) { + storage_scope_[func->body.operator->()][i] = "global"; } } } } + } - bool primitive_supports_texture_ = false; - std::unordered_map> storage_scope_; - std::unordered_map> consumer_storage_scopes_; - friend StorageInfo; - }; + bool IsAdrenoExpr(const Expr& expr) { + Target target; + Integer dev_id{-1}; + if (device_ids_.count(expr) && targets_.count(device_ids_[expr])) { + dev_id = device_ids_[expr]; + target = targets_[dev_id]; + } else if (targets_.size() == 1) { + const auto& kv = targets_.begin(); + dev_id = (*kv).first; + target = (*kv).second; + } + ICHECK(dev_id->value != -1) << "Error inferring target device, device mapping and targets do not match"; + Optional t_device = target->GetAttr("device"); + if (target->kind->device_type == kDLOpenCL && t_device.defined()) { + if (t_device.value() == "adreno") { return true; } + } + return false; + } - PreDfsOrderVisitor pre_visitor_; -}; + std::string GetConsumerScope(const std::vector& consumer_scopes) const { + if (!consumer_scopes.size()) { return "global"; } + std::string ref_scope = consumer_scopes[0]; + for (auto& consumer_scope : consumer_scopes) { + if (consumer_scope != ref_scope) { + return "global"; + } + } + return ref_scope; + } + + bool HasMixedStorageOutputs(const ExprNode* expr) { + if (storage_scope_.count(expr)) { + std::string ref_scope = storage_scope_[expr][0]; + for (std::string& scope : storage_scope_[expr]) { + if (scope != ref_scope) { + return true; + } + } + } + return false; + } -Map> CollectStorageInfo(const Expr& expr) { return StorageInfo::GetStorageMap(expr); } + /*! \brief expr device mapping */ + Map device_ids_; + /*! \brief device id to target mapping */ + Map targets_; + /*! \brief Temporary state for marking whether a visited function + primitive supports texture storage scope */ + bool primitive_supports_texture_ = false; + /*! \brief expr storage scope mapping for each output */ + std::unordered_map> storage_scope_; + /*! \brief output storage scopes used by consumers of expr key */ + std::unordered_map> consumer_storage_scopes_; +}; namespace { String GetStorageScope(const Expr& expr, const Map& storage_map, size_t output_index) { @@ -281,7 +306,13 @@ Array CollectBufferBinds(const Call& call, const Map> CollectTextureStorage(const Expr& expr, + const Map& dev_map, + const Map& target_map) { + return StorageInfo::GetStorageMap(expr, dev_map, target_map); +} + +TVM_REGISTER_GLOBAL("relay.backend.opencl.adreno._CollectStorageInfo").set_body_typed(CollectTextureStorage); TVM_REGISTER_GLOBAL("relay.backend.opencl.adreno._CollectBufferBinds").set_body_typed(CollectBufferBinds); From 20c5de5df8a6302147f81622ee326c65fe26164b Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Tue, 16 Mar 2021 17:04:12 -0700 Subject: [PATCH 15/19] More renaming --- ...reno_memory_annotation.cc => annotate_texture_storage.cc} | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) rename src/relay/transforms/{adreno_memory_annotation.cc => annotate_texture_storage.cc} (98%) diff --git a/src/relay/transforms/adreno_memory_annotation.cc b/src/relay/transforms/annotate_texture_storage.cc similarity index 98% rename from src/relay/transforms/adreno_memory_annotation.cc rename to src/relay/transforms/annotate_texture_storage.cc index b72dd2f6c334..a3b54b49bf20 100644 --- a/src/relay/transforms/adreno_memory_annotation.cc +++ b/src/relay/transforms/annotate_texture_storage.cc @@ -88,7 +88,7 @@ class StorageInfo : private ExprVisitor{ void VisitExpr_(const CallNode* call) final { // Check the contents of this primitive function - if (IsAdrenoExpr(GetRef(call))) { + if (DeviceSupportsTextureStorage(GetRef(call))) { if (const auto* fn = call->op.as()) { if (fn->HasNonzeroAttr(attr::kPrimitive)) { primitive_supports_texture_ = false; @@ -183,7 +183,7 @@ class StorageInfo : private ExprVisitor{ } } - bool IsAdrenoExpr(const Expr& expr) { + bool DeviceSupportsTextureStorage(const Expr& expr) { Target target; Integer dev_id{-1}; if (device_ids_.count(expr) && targets_.count(device_ids_[expr])) { @@ -196,6 +196,7 @@ class StorageInfo : private ExprVisitor{ } ICHECK(dev_id->value != -1) << "Error inferring target device, device mapping and targets do not match"; Optional t_device = target->GetAttr("device"); + // Currently only `target = opencl --device=adreno` supports texture storage if (target->kind->device_type == kDLOpenCL && t_device.defined()) { if (t_device.value() == "adreno") { return true; } } From 95b18cdaa7d4917492184a0940dbb0a9ff1b56b4 Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Tue, 16 Mar 2021 17:22:29 -0700 Subject: [PATCH 16/19] Use anonymous namespace --- src/relay/transforms/annotate_texture_storage.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/relay/transforms/annotate_texture_storage.cc b/src/relay/transforms/annotate_texture_storage.cc index a3b54b49bf20..f5abd1f20eb7 100644 --- a/src/relay/transforms/annotate_texture_storage.cc +++ b/src/relay/transforms/annotate_texture_storage.cc @@ -35,6 +35,7 @@ namespace tvm { namespace relay { +namespace { class StorageInfo : private ExprVisitor{ public: @@ -231,7 +232,7 @@ class StorageInfo : private ExprVisitor{ /*! \brief device id to target mapping */ Map targets_; /*! \brief Temporary state for marking whether a visited function - primitive supports texture storage scope */ + * primitive supports texture storage scope */ bool primitive_supports_texture_ = false; /*! \brief expr storage scope mapping for each output */ std::unordered_map> storage_scope_; @@ -239,7 +240,6 @@ class StorageInfo : private ExprVisitor{ std::unordered_map> consumer_storage_scopes_; }; -namespace { String GetStorageScope(const Expr& expr, const Map& storage_map, size_t output_index) { if (!storage_map.count(expr)) { return String{}; } auto storage_info = Downcast>(storage_map[expr][2]); From 5c1ad886db56f1e8179a5d4b596ebc4bdee234bb Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Wed, 17 Mar 2021 14:28:29 -0700 Subject: [PATCH 17/19] Add annotate_texture_storage.cc docstring. --- .../transforms/annotate_texture_storage.cc | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/relay/transforms/annotate_texture_storage.cc b/src/relay/transforms/annotate_texture_storage.cc index f5abd1f20eb7..42d7f239aab2 100644 --- a/src/relay/transforms/annotate_texture_storage.cc +++ b/src/relay/transforms/annotate_texture_storage.cc @@ -18,8 +18,21 @@ */ /*! - * \file deivce_annotation.cc - * \brief + * \file annotate_texture_storage.cc + * \brief Collection of target specific relay passes which + * storage scope related information. + * + * - CollectStorageInfo returns a mapping from relay expr + * to a list of output storage scopes for each output. + * These scopes are used during memory planning as well + * as downstream when doing codegen (see CollectBufferBinds) + * and in the graph runtime when doing runtime dataspace + * allocations. + * + * - CollectBufferBinds returns an array of tir::Buffer given + * the storage info yielded from CollectStogrageInfo. These + * buffers are bound to tensors created by the compile engine + * and are used as binds when calling tvm::lower/build. * */ From ef5ff26b9435c38c9fb633b170b08c93843e78b0 Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Sat, 10 Apr 2021 21:25:08 -0700 Subject: [PATCH 18/19] Increase texture operator coverage: - Max/Avg/Global Pooling - Concatenate - LayoutTransform (NCHW -> NCHW4c) --- .../transforms/annotate_texture_storage.cc | 49 +++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/src/relay/transforms/annotate_texture_storage.cc b/src/relay/transforms/annotate_texture_storage.cc index 42d7f239aab2..1a47a1463b64 100644 --- a/src/relay/transforms/annotate_texture_storage.cc +++ b/src/relay/transforms/annotate_texture_storage.cc @@ -140,6 +140,32 @@ class StorageInfo : private ExprVisitor{ primitive_supports_texture_ = true; } } + else if (auto attrs = call->attrs.as()) { + if (attrs->layout == "NCHW4c") { + primitive_supports_texture_ = true; + } + } + else if (auto attrs = call->attrs.as()) { + if (attrs->layout == "NCHW4c") { + primitive_supports_texture_ = true; + } + } + else if (auto attrs = call->attrs.as()) { + if (attrs->layout == "NCHW4c") { + primitive_supports_texture_ = true; + } + } else if (call->attrs.as()) { + primitive_supports_texture_ = true; + } else if (auto attrs = call->attrs.as()) { + // Enable if either the source or destination layout is packed with vector length == 4. + // Disabled for layout contraction due to a bug when writing from texture to global buffer. + // TODO(csullivan): Enable proper code generation when emitting non-coalesced writes + // of elements from a coalesced texture read. + if ((attrs->dst_layout.find("4") == 4) /* || (attrs->src_layout.find("4") == 4) */) { + primitive_supports_texture_ = true; + } + } + for (auto& arg : call->args) { Visit(arg); } @@ -151,11 +177,28 @@ class StorageInfo : private ExprVisitor{ std::string consumer_scope = GetConsumerScope(consumer_scopes_it->second); ICHECK(!storage_scope_.count(expr)) << "Already propagated consumer scopes to input: " << GetRef(expr); - storage_scope_[expr].push_back(consumer_scope); + + bool expr_is_rgba_vectorizable = false; + if (const auto* ttype = expr->checked_type().as()) { + auto inner_dim = ttype->shape.back().as(); + if (inner_dim && inner_dim->value == 4) { + expr_is_rgba_vectorizable = true; + } + } + + // Only propagate texture scope from consumers to input expr if + // the input shape of the input expr is rgba vectorizable. if (consumer_scope == "texture") { - if (!scope_suffix.empty()) { - storage_scope_[expr][0] += (":" + scope_suffix); + if (expr_is_rgba_vectorizable) { + std::string scope = consumer_scope; + // Apply any provided storage scope suffix before assignment + if (!scope_suffix.empty()) { + scope += (":" + scope_suffix); + } + storage_scope_[expr].push_back(scope); } + } else { + storage_scope_[expr].push_back(consumer_scope); } } } From 817b1862fb6265682ba1278c867890af5514529f Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Wed, 14 Apr 2021 15:16:39 -0700 Subject: [PATCH 19/19] Refactor out texture storage support for each call into helper method. --- .../transforms/annotate_texture_storage.cc | 64 ++++++++++--------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/src/relay/transforms/annotate_texture_storage.cc b/src/relay/transforms/annotate_texture_storage.cc index 1a47a1463b64..4b9a12898954 100644 --- a/src/relay/transforms/annotate_texture_storage.cc +++ b/src/relay/transforms/annotate_texture_storage.cc @@ -135,36 +135,7 @@ class StorageInfo : private ExprVisitor{ } } - if (auto attrs = call->attrs.as()) { - if (attrs->data_layout == "NCHW4c" && attrs->kernel_layout == "OIHW4o") { - primitive_supports_texture_ = true; - } - } - else if (auto attrs = call->attrs.as()) { - if (attrs->layout == "NCHW4c") { - primitive_supports_texture_ = true; - } - } - else if (auto attrs = call->attrs.as()) { - if (attrs->layout == "NCHW4c") { - primitive_supports_texture_ = true; - } - } - else if (auto attrs = call->attrs.as()) { - if (attrs->layout == "NCHW4c") { - primitive_supports_texture_ = true; - } - } else if (call->attrs.as()) { - primitive_supports_texture_ = true; - } else if (auto attrs = call->attrs.as()) { - // Enable if either the source or destination layout is packed with vector length == 4. - // Disabled for layout contraction due to a bug when writing from texture to global buffer. - // TODO(csullivan): Enable proper code generation when emitting non-coalesced writes - // of elements from a coalesced texture read. - if ((attrs->dst_layout.find("4") == 4) /* || (attrs->src_layout.find("4") == 4) */) { - primitive_supports_texture_ = true; - } - } + primitive_supports_texture_ = SupportsTextureStorage(call); for (auto& arg : call->args) { Visit(arg); @@ -283,6 +254,39 @@ class StorageInfo : private ExprVisitor{ return false; } + bool SupportsTextureStorage(const CallNode* call) const { + bool supports_texture_storage = false; + if (auto attrs = call->attrs.as()) { + if (attrs->data_layout == "NCHW4c" && attrs->kernel_layout == "OIHW4o") { + supports_texture_storage = true; + } + } else if (auto attrs = call->attrs.as()) { + if (attrs->layout == "NCHW4c") { + supports_texture_storage = true; + } + } else if (auto attrs = call->attrs.as()) { + if (attrs->layout == "NCHW4c") { + supports_texture_storage = true; + } + } else if (auto attrs = call->attrs.as()) { + if (attrs->layout == "NCHW4c") { + supports_texture_storage = true; + } + } else if (call->attrs.as()) { + supports_texture_storage = true; + } else if (auto attrs = call->attrs.as()) { + // Enable if either the source or destination layout is packed with vector length == 4. + // Disabled for layout contraction due to a bug when writing from texture to global buffer. + // TODO(csullivan): Enable proper code generation when emitting non-coalesced writes + // of elements from a coalesced texture read. + if ((attrs->dst_layout.find("4") == 4) /* || (attrs->src_layout.find("4") == 4) */) { + supports_texture_storage = true; + } + } + + return supports_texture_storage; + } + /*! \brief expr device mapping */ Map device_ids_; /*! \brief device id to target mapping */