diff --git a/python/tvm/contrib/ethosu/cascader/proposal.py b/python/tvm/contrib/ethosu/cascader/proposal.py new file mode 100644 index 000000000000..13184108120e --- /dev/null +++ b/python/tvm/contrib/ethosu/cascader/proposal.py @@ -0,0 +1,106 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Proposal class to hold graph scheduling information.""" +from typing import Dict, FrozenSet, List +import tvm._ffi +from tvm.contrib.ethosu.cascader.plan import Plan + +from tvm.runtime import Object + +from . import _ffi_api +from .graph import Tensor, Part, CascaderGraph +from .tensor_config import TensorConfig, MemoryRegion + + +@tvm._ffi.register_object("contrib.ethosu.cascader.Proposal") +class Proposal(Object): + """A class which describes how to schedule a CascaderGraph as a series of disjoint Plans. + + Attributes + ---------- + graph : CascaderGraph + The CascaderGraph to which the Proposal applies. + part_group : FrozenSet[Part] + The Parts which are covered by the Proposal. + plans : List[Plan] + The Plans used in the Proposal. + input_tensor_configs : Dict[Tensor, TensorConfig] + The TensorConfigs indexed by Tensor in the Proposal which aren't produced by a Plan. + cascade_region : MemoryRegion + The MemoryRegion where cascading buffers should be homed. + memory_usage : int + The memory required to execute the Proposal in the cascading MemoryRegion. + cycles : int + The estimated cycles taken to execute the Proposal. + + """ + + def __init__( + self, + graph: CascaderGraph, + part_group: FrozenSet[Part], + plans: List[Plan], + input_tensor_configs: Dict[Tensor, TensorConfig], + cascade_region: MemoryRegion, + memory_usage: Dict[MemoryRegion, int], + cycles: int, + ): + self.__init_handle_by_constructor__( + _ffi_api.Proposal, + graph, + list(part_group), + plans, + input_tensor_configs, + cascade_region, + memory_usage, + cycles, + ) + + @property + def graph(self) -> CascaderGraph: + """The CascaderGraph to which the Proposal applies.""" + return self._graph + + @property + def part_group(self) -> FrozenSet[Part]: + """The Parts which are covered by the Proposal.""" + return frozenset(self._part_group) + + @property + def plans(self) -> List[Plan]: + """The Plans used in the Proposal.""" + return list(self._plans) + + @property + def input_tensor_configs(self) -> Dict[Tensor, TensorConfig]: + """The TensorConfigs indexed by Tensor in the Proposal which aren't produced by a Plan.""" + return dict(self._input_tensor_configs) + + @property + def cascade_region(self) -> MemoryRegion: + """The MemoryRegion where cascading buffers should be homed.""" + return self._cascade_region + + @property + def memory_usage(self) -> int: + """The memory required to execute the Proposal in the cascading MemoryRegion.""" + return int(self._memory_usage) + + @property + def cycles(self) -> int: + """The estimated cycles taken to execute the Proposal.""" + return int(self._cycles) diff --git a/python/tvm/contrib/ethosu/cascader/proposal_generator.py b/python/tvm/contrib/ethosu/cascader/proposal_generator.py new file mode 100644 index 000000000000..d79021a20539 --- /dev/null +++ b/python/tvm/contrib/ethosu/cascader/proposal_generator.py @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Algorithms to generate Proposals for a Graph.""" +from typing import List, Dict, FrozenSet + +from . import _ffi_api +from .cascader_options import CascaderOptions +from .plan import Plan +from .proposal import Proposal +from .graph import CascaderGraph, Part + + +def generate_proposals( + graph: CascaderGraph, + home_map: Dict[FrozenSet[Part], List[Plan]], + options: CascaderOptions, +) -> List[Proposal]: + """Generate Pareto optimal Proposals for a CascaderGraph. + + This algorithm takes a top-down dynamic programming approach to determining how + to optimally combine Plans into Proposals. + + Parameters + ---------- + graph : CascaderGraph + The CascaderGraph to generate Proposals for. + home_map : Dict[FrozenSet[Part], List[Plan]] + The Tensor homing map defining valid memory homes for Tensors. + options : CascaderOptions + The configuration options with which to run the generator. + + Returns + ------ + List[Proposal] + A list of Pareto optimal Proposals. + + """ + return list( + _ffi_api.GenerateProposals( + graph, + home_map, + options, + ) + ) diff --git a/src/contrib/ethosu/cascader/pareto.cc b/src/contrib/ethosu/cascader/pareto.cc index 255719088cb0..52ea729bffa2 100644 --- a/src/contrib/ethosu/cascader/pareto.cc +++ b/src/contrib/ethosu/cascader/pareto.cc @@ -29,6 +29,8 @@ #include "common.h" #include "plan.h" +#include "proposal.h" +#include "tensor_config.h" namespace tvm { namespace contrib { @@ -106,6 +108,31 @@ std::vector ParetoCullPlans(std::vector plans, size_t max_plans) { return ThinVector(optimal_plans, max_plans); } +std::vector ParetoCullProposals(std::vector proposals, size_t max_proposals) { + std::sort(proposals.begin(), proposals.end(), [](const Proposal& a, const Proposal& b) -> bool { + return a->GetMemoryUsage() < b->GetMemoryUsage(); + }); + std::vector> costs; + for (const auto& proposal : proposals) { + std::array cost = {static_cast(proposal->GetMemoryUsage()), + static_cast(proposal->GetCycles())}; + costs.emplace_back(cost); + } + std::vector is_optimal = GetParetoFrontier<2>(costs); + std::vector optimal_proposals; + size_t i = 0; + for (bool optimal : is_optimal) { + if (optimal) { + optimal_proposals.push_back(proposals[i]); + } + i++; + } + if (optimal_proposals.size() <= max_proposals) { + return optimal_proposals; + } + return ThinVector(optimal_proposals, max_proposals); +} + TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GetParetoFrontier") .set_body_typed([](Array> tcosts) { std::vector> costs; diff --git a/src/contrib/ethosu/cascader/pareto.h b/src/contrib/ethosu/cascader/pareto.h index 6b0cd4e294ac..511da6c2712f 100644 --- a/src/contrib/ethosu/cascader/pareto.h +++ b/src/contrib/ethosu/cascader/pareto.h @@ -37,6 +37,8 @@ namespace ethosu { namespace cascader { class Plan; +class MemoryRegion; +class Proposal; /*! * \brief Determine the Pareto optimal points. @@ -65,6 +67,8 @@ std::vector ThinVector(const std::vector& vec, size_t max_size); */ std::vector ParetoCullPlans(std::vector plans, size_t max_plans); +std::vector ParetoCullProposals(std::vector proposals, size_t max_proposals); + } // namespace cascader } // namespace ethosu } // namespace contrib diff --git a/src/contrib/ethosu/cascader/proposal.cc b/src/contrib/ethosu/cascader/proposal.cc new file mode 100644 index 000000000000..e96be3466e10 --- /dev/null +++ b/src/contrib/ethosu/cascader/proposal.cc @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include "proposal.h" + +#include +#include +#include +#include + +#include +#include +#include + +#include "plan.h" + +namespace tvm { +namespace contrib { +namespace ethosu { +namespace cascader { + +void ProposalNode::VisitAttrs(AttrVisitor* v) { + v->Visit("_graph", &graph_); + Array tmp_parts(part_group_.begin(), part_group_.end()); + v->Visit("_part_group", &tmp_parts); + Array tmp_plans(plans_.begin(), plans_.end()); + v->Visit("_plans", &tmp_plans); + Map tmp_tmap(input_tensor_configs_.begin(), input_tensor_configs_.end()); + v->Visit("_input_tensor_configs", &tmp_tmap); + v->Visit("_cascade_region", &cascade_region_); + v->Visit("_memory_usage", &memory_usage_); + v->Visit("_cycles", &cycles_); +} + +Proposal::Proposal(const CascaderGraph& graph, const std::vector& part_group, + const std::vector& plans, const TensorConfigMap& input_tensor_configs, + const MemoryRegion& cascade_region, int memory_usage, int cycles) { + auto n = make_object(); + n->graph_ = std::move(graph); + n->part_group_ = std::move(part_group); + std::sort(n->part_group_.begin(), n->part_group_.end()); + n->plans_ = std::move(plans); + n->input_tensor_configs_ = std::move(input_tensor_configs); + n->cascade_region_ = std::move(cascade_region); + n->memory_usage_ = std::move(memory_usage); + n->cycles_ = cycles; + data_ = std::move(n); +} + +TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.Proposal") + .set_body_typed([](CascaderGraph graph, Array part_group, Array plans, + Map input_tensor_configs, MemoryRegion cascade_region, + int memory_usage, int cycles) { + std::vector spart_group(part_group.begin(), part_group.end()); + std::vector vplans(plans.begin(), plans.end()); + TensorConfigMap minput_tensor_configs(input_tensor_configs.begin(), + input_tensor_configs.end()); + return Proposal(graph, spart_group, vplans, minput_tensor_configs, cascade_region, + memory_usage, cycles); + }); + +TVM_REGISTER_NODE_TYPE(ProposalNode); + +} // namespace cascader +} // namespace ethosu +} // namespace contrib +} // namespace tvm diff --git a/src/contrib/ethosu/cascader/proposal.h b/src/contrib/ethosu/cascader/proposal.h new file mode 100644 index 000000000000..e5db0328b731 --- /dev/null +++ b/src/contrib/ethosu/cascader/proposal.h @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/contrib/ethosu/cascader/proposal.h + * \brief Proposal object for the NPU cascader + */ +#ifndef TVM_CONTRIB_ETHOSU_CASCADER_PROPOSAL_H_ +#define TVM_CONTRIB_ETHOSU_CASCADER_PROPOSAL_H_ + +#include +#include + +#include +#include +#include + +#include "graph.h" +#include "plan.h" +#include "tensor_config.h" + +namespace tvm { +namespace contrib { +namespace ethosu { +namespace cascader { + +using MemoryUsageMap = std::unordered_map; +using TensorConfigMap = std::unordered_map; + +/*! \brief Node to represent a Proposal */ +class ProposalNode : public Object { + public: + void VisitAttrs(AttrVisitor* v); + + /*! \return The CascaderGraph to which the Proposal applies */ + const CascaderGraph GetGraph() const { return graph_; } + /*! \return The Parts which are covered by the Proposal */ + const std::vector GetPartGroup() const { return part_group_; } + /*! \return The Plans used in the Proposal */ + const std::vector GetPlans() const { return plans_; } + /*! \return The TensorConfigs indexed by Tensor in the Proposal which aren't produced by a Plan */ + const TensorConfigMap GetInputTensorConfigs() const { return input_tensor_configs_; } + /*! \return The MemoryRegion where cascading buffers should be homed */ + const MemoryRegion GetCascadeRegion() const { return cascade_region_; } + /*! \return The memory required to execute the Proposal in the cascading MemoryRegion */ + const int GetMemoryUsage() const { return memory_usage_; } + /*! \return The estimated cycles taken to execute the Proposal */ + int GetCycles() const { return cycles_; } + + static constexpr const char* _type_key = "contrib.ethosu.cascader.Proposal"; + TVM_DECLARE_FINAL_OBJECT_INFO(ProposalNode, Object); + + protected: + friend class Proposal; + + /*! \brief The CascaderGraph to which the Proposal applies */ + CascaderGraph graph_; + /*! \brief The Parts which are covered by the Proposal */ + std::vector part_group_; + /*! \brief The Plans used in the Proposal */ + std::vector plans_; + /*! \brief The TensorConfigs indexed by Tensor in the Proposal which aren't produced by a Plan */ + TensorConfigMap input_tensor_configs_; + /*! \brief The MemoryRegion where cascading buffers should be homed */ + MemoryRegion cascade_region_; + /*! \brief The memory required to execute the Proposal in the cascading MemoryRegion */ + int memory_usage_; + /*! \brief The estimated cycles taken to execute the Proposal */ + int cycles_; +}; + +/*! + * \brief A class which describes how to schedule a CascaderGraph as a series of disjoint Plans. + */ +class Proposal : public ObjectRef { + public: + Proposal(const CascaderGraph& graph, const std::vector& part_group, + const std::vector& plans, const TensorConfigMap& input_tensor_configs, + const MemoryRegion& cascade_region, int memory_usage, int cycles); + + TVM_DEFINE_OBJECT_REF_METHODS(Proposal, ObjectRef, ProposalNode); +}; + +} // namespace cascader +} // namespace ethosu +} // namespace contrib +} // namespace tvm + +#endif // TVM_CONTRIB_ETHOSU_CASCADER_PROPOSAL_H_ diff --git a/src/contrib/ethosu/cascader/proposal_generator.cc b/src/contrib/ethosu/cascader/proposal_generator.cc new file mode 100644 index 000000000000..ce709cbaa657 --- /dev/null +++ b/src/contrib/ethosu/cascader/proposal_generator.cc @@ -0,0 +1,219 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "cascader_options.h" +#include "graph.h" +#include "pareto.h" +#include "plan.h" +#include "plan_generator.h" +#include "proposal.h" +#include "stripe_config.h" +#include "tensor_config.h" + +namespace tvm { +namespace contrib { +namespace ethosu { +namespace cascader { + +std::unordered_set GetPlanBoundaryConfigs(const Plan& plan) { + std::unordered_set boundary_configs; + for (const auto& config : plan->GetTensorConfigs()) { + if (config->GetState() == TensorConfigState::BOUNDARY) { + boundary_configs.insert(config); + } + } + return boundary_configs; +} + +bool IsPlanCompatible(const Proposal& proposal, const std::vector& plan_part_group, + const std::unordered_set& plan_boundary_configs) { + // Check the Plan Part group is disjoint with the Proposal Part group + for (const auto& plan_part : plan_part_group) { + for (const auto& proposal_part : proposal->GetPartGroup()) { + if (plan_part == proposal_part) { + return false; + } + } + } + // If the Plan and Proposal disagree on the memory home of a Tensor, they + // are incompatible and can't be used to create a new Proposal + auto tensor_configs = proposal->GetInputTensorConfigs(); + for (const auto& plan_config : plan_boundary_configs) { + if (tensor_configs.find(plan_config->GetTensor()) != tensor_configs.end()) { + auto proposal_config = tensor_configs.at(plan_config->GetTensor()); + if (proposal_config->GetHomeRegion() != plan_config->GetHomeRegion()) { + return false; + } + } + } + return true; +} + +std::unordered_map, ObjectPtrHash, ObjectPtrEqual> CreatePlansByPart( + const std::unordered_map, std::vector>& plans_by_group, + const CascaderGraph& graph) { + std::unordered_map, ObjectPtrHash, ObjectPtrEqual> plans_by_part; + for (const auto& it : plans_by_group) { + auto part_group = it.first; + auto plans = it.second; + int highest_index = 0; + Part& index_part = part_group.front(); + // Determine the Part in the Part group with the highest ID - this will be used to index + // the Plans + for (const auto& part : part_group) { + int pid = graph->GetPartID(part); + if (pid >= highest_index) { + index_part = part; + highest_index = pid; + } + } + plans_by_part[index_part].insert(plans_by_part[index_part].begin(), plans.begin(), plans.end()); + } + return plans_by_part; +} + +Proposal AddPlanToProposal(const Proposal& proposal, const Plan& plan, + const std::unordered_set& plan_boundary_configs) { + std::vector new_plans = proposal->GetPlans(); + new_plans.push_back(plan); + TensorConfigMap new_configs = proposal->GetInputTensorConfigs(); + // Add input configs from the Plan if they're homed in the cascade region + for (const auto& config : plan_boundary_configs) { + if (config->GetHomeRegion() == proposal->GetCascadeRegion()) { + new_configs[config->GetTensor()] = config; + } + } + // Remove the Plan's output config from the new_configs if it's present because + // it won't be an input to the Proposal any more + if (new_configs.find(plan->GetOutputConfig()->GetTensor()) != new_configs.end()) { + new_configs.erase(plan->GetOutputConfig()->GetTensor()); + } + // The updated memory usage is the memory required to run the Plan plus the + // non-local memory that's required in the Proposal at that point in time + int new_memory_usage = plan->GetMemoryUsage(); + for (const auto& it : new_configs) { + if (plan_boundary_configs.find(it.second) == plan_boundary_configs.end()) { + new_memory_usage += it.first->GetSize(); + } + } + new_memory_usage = std::max(new_memory_usage, proposal->GetMemoryUsage()); + int new_cycles = proposal->GetCycles() + plan->GetCycles(); + std::vector new_part_group = proposal->GetPartGroup(); + new_part_group.insert(new_part_group.end(), plan->GetPartGroup().begin(), + plan->GetPartGroup().end()); + std::sort(new_part_group.begin(), new_part_group.end()); + return Proposal(proposal->GetGraph(), new_part_group, new_plans, new_configs, + proposal->GetCascadeRegion(), new_memory_usage, new_cycles); +} + +std::vector GeneratePartialProposals( + const CascaderGraph& graph, const HomeMap& home_map, const CascaderOptions options, + const std::unordered_map, ObjectPtrHash, ObjectPtrEqual>& plans_by_part, + const std::vector& partial_proposal_group, + std::unordered_map, std::vector>* proposals_by_group) { + if (proposals_by_group->find(partial_proposal_group) != proposals_by_group->end()) { + return proposals_by_group->at(partial_proposal_group); + } + if (partial_proposal_group.size() == 0) { + (*proposals_by_group)[partial_proposal_group] = + std::vector{Proposal(graph, std::vector(), std::vector(), + TensorConfigMap(), options->cascade_region, 0, 0)}; + } else { + Part part = partial_proposal_group.back(); + const auto& plans = plans_by_part.at(part); + for (const auto& plan : plans) { + if (plan->GetInteriorRegion() == options->cascade_region) { + // Doing this isn't very efficient, but it improves the performance of the Plan + // generator + std::unordered_set plan_boundary_configs = GetPlanBoundaryConfigs(plan); + // The residual_proposal_group is a Part group indicating the Parts which aren't + // covered by the current Plan. It's the group for which we must find 'residual + // Proposals', meaning Proposals which cover the rest of the CascaderGraph assuming we + // pick the current Plan. + std::vector residual_proposal_group; + std::copy_if(partial_proposal_group.begin(), partial_proposal_group.end(), + std::back_inserter(residual_proposal_group), [&plan](Part value) { + return std::find(plan->GetPartGroup().begin(), plan->GetPartGroup().end(), + value) == plan->GetPartGroup().end(); + }); + // std::sort(residual_proposal_group.begin(), residual_proposal_group.end()); + const auto& residual_proposals = GeneratePartialProposals( + graph, home_map, options, plans_by_part, residual_proposal_group, proposals_by_group); + auto plan_output_tensor = plan->GetOutputConfig()->GetTensor(); + ICHECK_LE(plan_output_tensor->GetProducers().size(), 1) + << "All tensors must have at most one producer."; + for (const auto& residual_proposal : residual_proposals) { + if (IsPlanCompatible(residual_proposal, plan->GetPartGroup(), plan_boundary_configs)) { + (*proposals_by_group)[partial_proposal_group].push_back( + AddPlanToProposal(residual_proposal, plan, plan_boundary_configs)); + } + } + } + } + (*proposals_by_group)[partial_proposal_group] = + ParetoCullProposals(proposals_by_group->at(partial_proposal_group), options->max_proposals); + } + return proposals_by_group->at(partial_proposal_group); +} + +std::vector GenerateProposals(const CascaderGraph& graph, const HomeMap& home_map, + const CascaderOptions options) { + // First generate all the Pareto optimal Plans for the CascaderGraph + auto plans_by_group = GenerateGraphPlans(graph, home_map, options); + // First create a map between every Part in the CascaderGraph and all the Plans for which that + // Part is the lowest ID Part within the Plan's Part group + std::unordered_map, ObjectPtrHash, ObjectPtrEqual> plans_by_part = + CreatePlansByPart(plans_by_group, graph); + // The Part group that partial Proposals are current being generated for + std::vector partial_proposal_group = graph->GetPartOrder(); + // A map of Proposals indexed by the Part group they cover + std::unordered_map, std::vector> proposals_by_group; + return GeneratePartialProposals(graph, home_map, options, plans_by_part, partial_proposal_group, + &proposals_by_group); +} + +TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GenerateProposals") + .set_body_typed([](CascaderGraph graph, Map> home_map, + CascaderOptions options) { + std::unordered_map, ObjectPtrHash, ObjectPtrEqual> + mhome_map; + for (const auto& it : home_map) { + std::vector home_regions; + for (const auto& i : it.second) { + home_regions.push_back(i); + } + mhome_map[it.first] = home_regions; + } + return Array(GenerateProposals(graph, mhome_map, options)); + }); + +} // namespace cascader +} // namespace ethosu +} // namespace contrib +} // namespace tvm diff --git a/src/contrib/ethosu/cascader/proposal_generator.h b/src/contrib/ethosu/cascader/proposal_generator.h new file mode 100644 index 000000000000..624dc5b98a85 --- /dev/null +++ b/src/contrib/ethosu/cascader/proposal_generator.h @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/contrib/ethosu/cascader/proposal_generator.h + * \brief Algorithm to generate possible Proposals in the NPU cascader + */ +#ifndef TVM_CONTRIB_ETHOSU_CASCADER_PROPOSAL_GENERATOR_H_ +#define TVM_CONTRIB_ETHOSU_CASCADER_PROPOSAL_GENERATOR_H_ + +#include +#include + +#include +#include +#include + +namespace tvm { +namespace contrib { +namespace ethosu { +namespace cascader { + +class CascaderGraph; +class MemoryRegion; +class Tensor; +class Proposal; +class CascaderOptions; + +using HomeMap = + std::unordered_map, ObjectPtrHash, ObjectPtrEqual>; + +/*! + * \brief Generate Pareto optimal Proposals for a CascaderGraph. + * \param graph The CascaderGraph to generate Proposals for. + * \param home_map The Tensor homing map defining valid memory homes for Tensors. + * \param options The configuration options with which to run the generator. + * \return A vector of Pareto optimal Proposals. + * \note This algorithm takes a top-down dynamic programming approach to determining how + * to optimally combine Plans into Proposals. It does the following: + * + * First, run GenerateGraphPlans to generate the Pareto optimal Plans that cover all the + * Part groups in the CascaderGraph. + * + * Solve the problem recursively, generating optimal Proposals for increasingly small + * portions of the overall graph. + * + * Take the first Part in the graph: + * 1. Find all the Plans for which the Part is both in the Plan's Part group and has the + * highest Part ID of any Part in the Part group (i.e. it's the 'first' Part in the + * group). + * For each Plan: + * 2. Get the Part group covered by the Plan and subtract it from the 'total Part group' + * covering all the Parts. This forms a 'residual Part group'. + * 3. Recursively, determine the optimal Proposals for the 'residual Part group' (the graph + * minus the Parts included in the Plan). Memoize the results. + * For each residual Proposal: + * 4. Create a new Proposal by adding the current Plan to the residual Proposal. + * 5. Pareto cull all the newly created Proposals (which all share the same Part group). + * 6. Return the Proposals which cover all the Parts in the CascaderGraph. + * + */ +std::vector GenerateProposals(const CascaderGraph& graph, const HomeMap& home_map, + const CascaderOptions& options); + +} // namespace cascader +} // namespace ethosu +} // namespace contrib +} // namespace tvm + +#endif // TVM_CONTRIB_ETHOSU_CASCADER_PROPOSAL_GENERATOR_H_ diff --git a/tests/python/contrib/test_ethosu/cascader/conftest.py b/tests/python/contrib/test_ethosu/cascader/conftest.py index 21ed401994c1..cffaf83df0bc 100644 --- a/tests/python/contrib/test_ethosu/cascader/conftest.py +++ b/tests/python/contrib/test_ethosu/cascader/conftest.py @@ -203,3 +203,400 @@ def _get_func(): @pytest.fixture def MobileNetv2DiamondTE(): return make_MobileNetv2DiamondTE() + + @pytest.fixture + def MobileNetv2DiamondGraph(): + _, te_graph, const_dict = make_MobileNetv2DiamondTE() + device_config = cs.EthosuDeviceConfig("ethos-u55-256") + return cs.create_cascader_graph(te_graph, const_dict, device_config) + + def make_BinaryTE(): + def _get_func(): + ifm_a = relay.var("ifm_a", shape=(1, 8, 8, 8), dtype="int8") + ifm_b = relay.var("ifm_b", shape=(1, 8, 8, 8), dtype="int8") + conv1 = make_ethosu_conv2d( + ifm=ifm_a, + ifm_channels=8, + ofm_channels=8, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + conv2 = make_ethosu_conv2d( + ifm=ifm_b, + ifm_channels=8, + ofm_channels=8, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + add1 = make_ethosu_binary_elementwise( + ifm=conv1, + ifm2=conv2, + ifm_channels=8, + ifm2_channels=8, + operator_type="ADD", + ofm_dtype="int8", + ) + func = relay.Function(relay.analysis.free_vars(add1), add1) + func = run_opt_pass(func, relay.transform.InferType()) + return func + + func = _get_func() + te_graph, const_dict = create_te_graph(func) + sch = tvm.te.create_schedule([t.op for t in te_graph.outputs]) + return sch, te_graph, const_dict + + @pytest.fixture + def BinaryTE(): + return make_BinaryTE() + + @pytest.fixture + def BinaryGraph(): + _, te_graph, const_dict = make_BinaryTE() + device_config = cs.EthosuDeviceConfig("ethos-u55-256") + return cs.create_cascader_graph(te_graph, const_dict, device_config) + + def make_MobileNetv1StartTE(): + def _get_func(): + ifm = relay.var("ifm", shape=(1, 224, 224, 3), dtype="int8") + conv1 = make_ethosu_conv2d( + ifm=ifm, + ifm_channels=3, + ofm_channels=32, + kernel_shape=(3, 3), + padding=(0, 0, 1, 1), + strides=(2, 2), + dilation=(1, 1), + ) + depth1 = make_ethosu_depthwise_conv2d( + ifm=conv1, + channels=32, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv2 = make_ethosu_conv2d( + ifm=depth1, + ifm_channels=32, + ofm_channels=64, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth2 = make_ethosu_depthwise_conv2d( + ifm=conv2, + channels=64, + kernel_shape=(3, 3), + padding=(0, 0, 1, 1), + strides=(2, 2), + dilation=(1, 1), + ) + conv3 = make_ethosu_conv2d( + ifm=depth2, + ifm_channels=64, + ofm_channels=128, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth3 = make_ethosu_depthwise_conv2d( + ifm=conv3, + channels=128, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv4 = make_ethosu_conv2d( + ifm=depth3, + ifm_channels=128, + ofm_channels=128, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth4 = make_ethosu_depthwise_conv2d( + ifm=conv4, + channels=128, + kernel_shape=(3, 3), + padding=(0, 0, 1, 1), + strides=(2, 2), + dilation=(1, 1), + ) + func = relay.Function(relay.analysis.free_vars(depth4), depth4) + func = run_opt_pass(func, relay.transform.InferType()) + return func + + func = _get_func() + te_graph, const_dict = create_te_graph(func) + sch = tvm.te.create_schedule([t.op for t in te_graph.outputs]) + return sch, te_graph, const_dict + + @pytest.fixture + def MobileNetv1StartTE(): + return make_MobileNetv1StartTE() + + @pytest.fixture + def MobileNetv1StartGraph(): + _, te_graph, const_dict = make_MobileNetv1StartTE() + device_config = cs.EthosuDeviceConfig("ethos-u55-256") + return cs.create_cascader_graph(te_graph, const_dict, device_config) + + def make_MobileNetv1TE(): + def _get_func(): + ifm = relay.var("ifm", shape=(1, 224, 224, 3), dtype="int8") + conv1 = make_ethosu_conv2d( + ifm=ifm, + ifm_channels=3, + ofm_channels=32, + kernel_shape=(3, 3), + padding=(0, 0, 1, 1), + strides=(2, 2), + dilation=(1, 1), + ) + depth1 = make_ethosu_depthwise_conv2d( + ifm=conv1, + channels=32, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv2 = make_ethosu_conv2d( + ifm=depth1, + ifm_channels=32, + ofm_channels=64, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth2 = make_ethosu_depthwise_conv2d( + ifm=conv2, + channels=64, + kernel_shape=(3, 3), + padding=(0, 0, 1, 1), + strides=(2, 2), + dilation=(1, 1), + ) + conv3 = make_ethosu_conv2d( + ifm=depth2, + ifm_channels=64, + ofm_channels=128, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth3 = make_ethosu_depthwise_conv2d( + ifm=conv3, + channels=128, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv4 = make_ethosu_conv2d( + ifm=depth3, + ifm_channels=128, + ofm_channels=128, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth4 = make_ethosu_depthwise_conv2d( + ifm=conv4, + channels=128, + kernel_shape=(3, 3), + padding=(0, 0, 1, 1), + strides=(2, 2), + dilation=(1, 1), + ) + conv5 = make_ethosu_conv2d( + ifm=depth4, + ifm_channels=128, + ofm_channels=256, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth5 = make_ethosu_depthwise_conv2d( + ifm=conv5, + channels=256, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv6 = make_ethosu_conv2d( + ifm=depth5, + ifm_channels=256, + ofm_channels=256, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth6 = make_ethosu_depthwise_conv2d( + ifm=conv6, + channels=256, + kernel_shape=(3, 3), + padding=(0, 0, 1, 1), + strides=(2, 2), + dilation=(1, 1), + ) + conv7 = make_ethosu_conv2d( + ifm=depth6, + ifm_channels=256, + ofm_channels=512, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth7 = make_ethosu_depthwise_conv2d( + ifm=conv7, + channels=512, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv8 = make_ethosu_conv2d( + ifm=depth7, + ifm_channels=512, + ofm_channels=512, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth8 = make_ethosu_depthwise_conv2d( + ifm=conv8, + channels=512, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv9 = make_ethosu_conv2d( + ifm=depth8, + ifm_channels=512, + ofm_channels=512, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth9 = make_ethosu_depthwise_conv2d( + ifm=conv9, + channels=512, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv10 = make_ethosu_conv2d( + ifm=depth9, + ifm_channels=512, + ofm_channels=512, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth10 = make_ethosu_depthwise_conv2d( + ifm=conv10, + channels=512, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv11 = make_ethosu_conv2d( + ifm=depth10, + ifm_channels=512, + ofm_channels=512, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth11 = make_ethosu_depthwise_conv2d( + ifm=conv11, + channels=512, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv12 = make_ethosu_conv2d( + ifm=depth11, + ifm_channels=512, + ofm_channels=512, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth12 = make_ethosu_depthwise_conv2d( + ifm=conv12, + channels=512, + kernel_shape=(3, 3), + padding=(0, 0, 1, 1), + strides=(2, 2), + dilation=(1, 1), + ) + conv13 = make_ethosu_conv2d( + ifm=depth12, + ifm_channels=512, + ofm_channels=1024, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth13 = make_ethosu_depthwise_conv2d( + ifm=conv13, + channels=1024, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv14 = make_ethosu_conv2d( + ifm=depth13, + ifm_channels=1024, + ofm_channels=1024, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + func = relay.Function(relay.analysis.free_vars(conv14), conv14) + func = run_opt_pass(func, relay.transform.InferType()) + return func + + func = _get_func() + te_graph, const_dict = create_te_graph(func) + sch = tvm.te.create_schedule([t.op for t in te_graph.outputs]) + return sch, te_graph, const_dict + + @pytest.fixture + def MobileNetv1TE(): + return make_MobileNetv1TE() + + @pytest.fixture + def MobileNetv1Graph(): + _, te_graph, const_dict = make_MobileNetv1TE() + device_config = cs.EthosuDeviceConfig("ethos-u55-256") + return cs.create_cascader_graph(te_graph, const_dict, device_config) diff --git a/tests/python/contrib/test_ethosu/cascader/test_proposal_generator.py b/tests/python/contrib/test_ethosu/cascader/test_proposal_generator.py new file mode 100644 index 000000000000..5af89a415978 --- /dev/null +++ b/tests/python/contrib/test_ethosu/cascader/test_proposal_generator.py @@ -0,0 +1,139 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import pytest +from tvm.contrib.ethosu.cascader.proposal_generator import generate_proposals + +from .infra import make_simple_home_map, make_options, ethosu_enabled + + +if ethosu_enabled: + + def test_generate_proposals(FLASH, SRAM, TwoConv2DGraph): + graph = TwoConv2DGraph + min_sram = 3700 + max_sram = 11700 + input_configs = 1 + parts = 2 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=32, + stripe_factors=4, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert min_sram < proposal.memory_usage < max_sram + assert proposal.cycles > 0 + + def test_generate_proposals_binary(FLASH, SRAM, BinaryGraph): + graph = BinaryGraph + input_configs = 2 + parts = 3 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=32, + stripe_factors=4, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert proposal.cycles > 0 + + def test_generate_proposals_mobilenetv1_start(FLASH, SRAM, MobileNetv1StartGraph): + graph = MobileNetv1StartGraph + min_sram = 200000 + max_sram = 1300000 + input_configs = 1 + parts = 8 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=32, + stripe_factors=5, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert min_sram < proposal.memory_usage < max_sram + assert proposal.cycles > 0 + + def test_generate_proposals_mobilenetv1(FLASH, SRAM, MobileNetv1Graph): + graph = MobileNetv1Graph + min_sram = 200000 + max_sram = 1300000 + input_configs = 1 + parts = 27 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=32, + stripe_factors=5, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert min_sram < proposal.memory_usage < max_sram + assert proposal.cycles > 0 + + def test_generate_proposals_mobilenetv2diamond(FLASH, SRAM, MobileNetv2DiamondGraph): + graph = MobileNetv2DiamondGraph + min_sram = 370000 + max_sram = 990000 + input_configs = 1 + parts = 5 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=64, + stripe_factors=5, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert min_sram < proposal.memory_usage < max_sram + assert proposal.cycles > 0 + + +if __name__ == "__main__": + pytest.main([__file__])