From 31f9aa0c5ed4a33f1e170af4ae231429a138675e Mon Sep 17 00:00:00 2001 From: Matthew Barrett Date: Thu, 7 Oct 2021 11:36:04 +0100 Subject: [PATCH 1/2] [microNPU][4] Add the cascader Proposal generator The Proposal generator takes optimal Plans and combines them to find optimal 'Proposals' - sets of disjoint Plans that cover every Part in a CascaderGraph. It ultimately produces a Pareto-frontier of 'optimal' Proposals in terms of estimated cycles and memory usage. Change-Id: Id42099819a596496a5769bae22f08eeb75ec69b6 --- python/tvm/contrib/ethosu/cascader/pareto.py | 8 + .../tvm/contrib/ethosu/cascader/proposal.py | 72 ++++ .../ethosu/cascader/proposal_generator.py | 38 ++ src/contrib/ethosu/cascader/pareto.cc | 33 ++ src/contrib/ethosu/cascader/pareto.h | 4 + src/contrib/ethosu/cascader/proposal.cc | 81 ++++ src/contrib/ethosu/cascader/proposal.h | 105 +++++ .../ethosu/cascader/proposal_generator.cc | 218 ++++++++++ .../ethosu/cascader/proposal_generator.h | 86 ++++ .../contrib/test_ethosu/cascader/conftest.py | 397 ++++++++++++++++++ .../cascader/test_proposal_generator.py | 141 +++++++ 11 files changed, 1183 insertions(+) create mode 100644 python/tvm/contrib/ethosu/cascader/proposal.py create mode 100644 python/tvm/contrib/ethosu/cascader/proposal_generator.py create mode 100644 src/contrib/ethosu/cascader/proposal.cc create mode 100644 src/contrib/ethosu/cascader/proposal.h create mode 100644 src/contrib/ethosu/cascader/proposal_generator.cc create mode 100644 src/contrib/ethosu/cascader/proposal_generator.h create mode 100644 tests/python/contrib/test_ethosu/cascader/test_proposal_generator.py diff --git a/python/tvm/contrib/ethosu/cascader/pareto.py b/python/tvm/contrib/ethosu/cascader/pareto.py index 3c4dcbc88a45..b282cfcbaa43 100644 --- a/python/tvm/contrib/ethosu/cascader/pareto.py +++ b/python/tvm/contrib/ethosu/cascader/pareto.py @@ -21,6 +21,8 @@ from . import _ffi_api from .plan import Plan +from .proposal import Proposal +from .tensor_config import MemoryRegion def _get_pareto_frontier(costs: List[List[float]]) -> List[bool]: @@ -37,3 +39,9 @@ def _thin_vector(vec: List[Object], max_size: int) -> List[Object]: def _pareto_cull_plans(plans: List[Plan], max_plans: int) -> List[Plan]: return list(_ffi_api.ParetoCullPlans(plans, max_plans)) + + +def pareto_cull_proposals( + proposals: List[Proposal], cascade_region: MemoryRegion, max_proposals: int +) -> List[Proposal]: + return list(_ffi_api.ParetoCullProposals(proposals, cascade_region, max_proposals)) diff --git a/python/tvm/contrib/ethosu/cascader/proposal.py b/python/tvm/contrib/ethosu/cascader/proposal.py new file mode 100644 index 000000000000..c72653a10831 --- /dev/null +++ b/python/tvm/contrib/ethosu/cascader/proposal.py @@ -0,0 +1,72 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Proposal class to hold graph scheduling information.""" +from typing import Dict, FrozenSet, List +import tvm._ffi +from tvm.contrib.ethosu.cascader.plan import Plan + +from tvm.runtime import Object + +from . import _ffi_api +from .graph import Tensor, Part +from .tensor_config import TensorConfig, MemoryRegion + + +@tvm._ffi.register_object("contrib.ethosu.cascader.Proposal") +class Proposal(Object): + """Proposal class""" + + def __init__( + self, + part_group: FrozenSet[Part], + plans: List[Plan], + input_tensor_configs: Dict[Tensor, TensorConfig], + memory_usage: Dict[MemoryRegion, int], + cycles: int, + ): + self.__init_handle_by_constructor__( + _ffi_api.Proposal, + list(part_group), + plans, + input_tensor_configs, + memory_usage, + cycles, + ) + + @property + def graph(self): + return self._graph + + @property + def part_group(self): + return frozenset(self._part_group) + + @property + def plans(self): + return list(self._plans) + + @property + def input_tensor_configs(self): + return dict(self._input_tensor_configs) + + @property + def memory_usage(self): + return int(self._memory_usage) + + @property + def cycles(self): + return int(self._cycles) diff --git a/python/tvm/contrib/ethosu/cascader/proposal_generator.py b/python/tvm/contrib/ethosu/cascader/proposal_generator.py new file mode 100644 index 000000000000..99361cb60cc7 --- /dev/null +++ b/python/tvm/contrib/ethosu/cascader/proposal_generator.py @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Algorithms to generate Proposals for a Graph.""" +from typing import List, Dict, FrozenSet + +from . import _ffi_api +from .cascader_options import CascaderOptions +from .plan import Plan +from .proposal import Proposal +from .graph import CascaderGraph, Part + + +def generate_proposals( + graph: CascaderGraph, + home_map: Dict[FrozenSet[Part], List[Plan]], + options: CascaderOptions, +) -> List[Proposal]: + return list( + _ffi_api.GenerateProposals( + graph, + home_map, + options, + ) + ) diff --git a/src/contrib/ethosu/cascader/pareto.cc b/src/contrib/ethosu/cascader/pareto.cc index 255719088cb0..21f0994ba316 100644 --- a/src/contrib/ethosu/cascader/pareto.cc +++ b/src/contrib/ethosu/cascader/pareto.cc @@ -29,6 +29,8 @@ #include "common.h" #include "plan.h" +#include "proposal.h" +#include "tensor_config.h" namespace tvm { namespace contrib { @@ -106,6 +108,31 @@ std::vector ParetoCullPlans(std::vector plans, size_t max_plans) { return ThinVector(optimal_plans, max_plans); } +std::vector ParetoCullProposals(std::vector proposals, size_t max_proposals) { + std::sort(proposals.begin(), proposals.end(), [](const Proposal& a, const Proposal& b) -> bool { + return a->GetMemoryUsage() < b->GetMemoryUsage(); + }); + std::vector> costs; + for (const auto& proposal : proposals) { + std::array cost = {static_cast(proposal->GetMemoryUsage()), + static_cast(proposal->GetCycles())}; + costs.emplace_back(cost); + } + std::vector is_optimal = GetParetoFrontier<2>(costs); + std::vector optimal_proposals; + size_t i = 0; + for (bool optimal : is_optimal) { + if (optimal) { + optimal_proposals.push_back(proposals[i]); + } + i++; + } + if (optimal_proposals.size() <= max_proposals) { + return optimal_proposals; + } + return ThinVector(optimal_proposals, max_proposals); +} + TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GetParetoFrontier") .set_body_typed([](Array> tcosts) { std::vector> costs; @@ -134,6 +161,12 @@ TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.ParetoCullPlans") return Array(ParetoCullPlans(vplans, max_size)); }); +TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.ParetoCullProposals") + .set_body_typed([](Array proposals, int max_size) { + std::vector vproposals(proposals.begin(), proposals.end()); + return Array(ParetoCullProposals(vproposals, max_size)); + }); + } // namespace cascader } // namespace ethosu } // namespace contrib diff --git a/src/contrib/ethosu/cascader/pareto.h b/src/contrib/ethosu/cascader/pareto.h index 6b0cd4e294ac..511da6c2712f 100644 --- a/src/contrib/ethosu/cascader/pareto.h +++ b/src/contrib/ethosu/cascader/pareto.h @@ -37,6 +37,8 @@ namespace ethosu { namespace cascader { class Plan; +class MemoryRegion; +class Proposal; /*! * \brief Determine the Pareto optimal points. @@ -65,6 +67,8 @@ std::vector ThinVector(const std::vector& vec, size_t max_size); */ std::vector ParetoCullPlans(std::vector plans, size_t max_plans); +std::vector ParetoCullProposals(std::vector proposals, size_t max_proposals); + } // namespace cascader } // namespace ethosu } // namespace contrib diff --git a/src/contrib/ethosu/cascader/proposal.cc b/src/contrib/ethosu/cascader/proposal.cc new file mode 100644 index 000000000000..5a7c88a1a7a7 --- /dev/null +++ b/src/contrib/ethosu/cascader/proposal.cc @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include "proposal.h" + +#include +#include +#include +#include + +#include +#include + +#include "plan.h" + +namespace tvm { +namespace contrib { +namespace ethosu { +namespace cascader { + +void ProposalNode::VisitAttrs(AttrVisitor* v) { + v->Visit("_graph", &graph_); + Array tmp_parts(part_group_.begin(), part_group_.end()); + v->Visit("_part_group", &tmp_parts); + Array tmp_plans(plans_.begin(), plans_.end()); + v->Visit("_plans", &tmp_plans); + Map tmp_tmap(input_tensor_configs_.begin(), input_tensor_configs_.end()); + v->Visit("_input_tensor_configs", &tmp_tmap); + v->Visit("_cascade_region", &cascade_region_); + v->Visit("_memory_usage", &memory_usage_); + v->Visit("_cycles", &cycles_); +} + +Proposal::Proposal(const CascaderGraph& graph, const std::vector& part_group, + const std::vector& plans, const TensorConfigMap& input_tensor_configs, + const MemoryRegion& cascade_region, int memory_usage, int cycles) { + auto n = make_object(); + n->graph_ = std::move(graph); + n->part_group_ = std::move(part_group); + std::sort(n->part_group_.begin(), n->part_group_.end()); + n->plans_ = std::move(plans); + n->input_tensor_configs_ = std::move(input_tensor_configs); + n->cascade_region_ = std::move(cascade_region); + n->memory_usage_ = std::move(memory_usage); + n->cycles_ = cycles; + data_ = std::move(n); +} + +TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.Proposal") + .set_body_typed([](CascaderGraph graph, Array part_group, Array plans, + Map input_tensor_configs, MemoryRegion cascade_region, + int memory_usage, int cycles) { + std::vector spart_group(part_group.begin(), part_group.end()); + std::vector vplans(plans.begin(), plans.end()); + TensorConfigMap minput_tensor_configs(input_tensor_configs.begin(), + input_tensor_configs.end()); + return Proposal(graph, spart_group, vplans, minput_tensor_configs, cascade_region, + memory_usage, cycles); + }); + +TVM_REGISTER_NODE_TYPE(ProposalNode); + +} // namespace cascader +} // namespace ethosu +} // namespace contrib +} // namespace tvm diff --git a/src/contrib/ethosu/cascader/proposal.h b/src/contrib/ethosu/cascader/proposal.h new file mode 100644 index 000000000000..e5db0328b731 --- /dev/null +++ b/src/contrib/ethosu/cascader/proposal.h @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/contrib/ethosu/cascader/proposal.h + * \brief Proposal object for the NPU cascader + */ +#ifndef TVM_CONTRIB_ETHOSU_CASCADER_PROPOSAL_H_ +#define TVM_CONTRIB_ETHOSU_CASCADER_PROPOSAL_H_ + +#include +#include + +#include +#include +#include + +#include "graph.h" +#include "plan.h" +#include "tensor_config.h" + +namespace tvm { +namespace contrib { +namespace ethosu { +namespace cascader { + +using MemoryUsageMap = std::unordered_map; +using TensorConfigMap = std::unordered_map; + +/*! \brief Node to represent a Proposal */ +class ProposalNode : public Object { + public: + void VisitAttrs(AttrVisitor* v); + + /*! \return The CascaderGraph to which the Proposal applies */ + const CascaderGraph GetGraph() const { return graph_; } + /*! \return The Parts which are covered by the Proposal */ + const std::vector GetPartGroup() const { return part_group_; } + /*! \return The Plans used in the Proposal */ + const std::vector GetPlans() const { return plans_; } + /*! \return The TensorConfigs indexed by Tensor in the Proposal which aren't produced by a Plan */ + const TensorConfigMap GetInputTensorConfigs() const { return input_tensor_configs_; } + /*! \return The MemoryRegion where cascading buffers should be homed */ + const MemoryRegion GetCascadeRegion() const { return cascade_region_; } + /*! \return The memory required to execute the Proposal in the cascading MemoryRegion */ + const int GetMemoryUsage() const { return memory_usage_; } + /*! \return The estimated cycles taken to execute the Proposal */ + int GetCycles() const { return cycles_; } + + static constexpr const char* _type_key = "contrib.ethosu.cascader.Proposal"; + TVM_DECLARE_FINAL_OBJECT_INFO(ProposalNode, Object); + + protected: + friend class Proposal; + + /*! \brief The CascaderGraph to which the Proposal applies */ + CascaderGraph graph_; + /*! \brief The Parts which are covered by the Proposal */ + std::vector part_group_; + /*! \brief The Plans used in the Proposal */ + std::vector plans_; + /*! \brief The TensorConfigs indexed by Tensor in the Proposal which aren't produced by a Plan */ + TensorConfigMap input_tensor_configs_; + /*! \brief The MemoryRegion where cascading buffers should be homed */ + MemoryRegion cascade_region_; + /*! \brief The memory required to execute the Proposal in the cascading MemoryRegion */ + int memory_usage_; + /*! \brief The estimated cycles taken to execute the Proposal */ + int cycles_; +}; + +/*! + * \brief A class which describes how to schedule a CascaderGraph as a series of disjoint Plans. + */ +class Proposal : public ObjectRef { + public: + Proposal(const CascaderGraph& graph, const std::vector& part_group, + const std::vector& plans, const TensorConfigMap& input_tensor_configs, + const MemoryRegion& cascade_region, int memory_usage, int cycles); + + TVM_DEFINE_OBJECT_REF_METHODS(Proposal, ObjectRef, ProposalNode); +}; + +} // namespace cascader +} // namespace ethosu +} // namespace contrib +} // namespace tvm + +#endif // TVM_CONTRIB_ETHOSU_CASCADER_PROPOSAL_H_ diff --git a/src/contrib/ethosu/cascader/proposal_generator.cc b/src/contrib/ethosu/cascader/proposal_generator.cc new file mode 100644 index 000000000000..3fda0d9921b5 --- /dev/null +++ b/src/contrib/ethosu/cascader/proposal_generator.cc @@ -0,0 +1,218 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "cascader_options.h" +#include "graph.h" +#include "pareto.h" +#include "plan.h" +#include "plan_generator.h" +#include "proposal.h" +#include "stripe_config.h" +#include "tensor_config.h" + +namespace tvm { +namespace contrib { +namespace ethosu { +namespace cascader { + +std::unordered_set GetPlanBoundaryConfigs(const Plan& plan) { + std::unordered_set boundary_configs; + for (const auto& config : plan->GetTensorConfigs()) { + if (config->GetState() == TensorConfigState::BOUNDARY) { + boundary_configs.insert(config); + } + } + return boundary_configs; +} + +bool IsPlanCompatible(const Proposal& proposal, + const std::vector& plan_part_group, + const std::unordered_set& plan_boundary_configs) { + // Check the Plan Part group is disjoint with the Proposal Part group + for(const auto& plan_part : plan_part_group) { + for(const auto& proposal_part : proposal->GetPartGroup()) { + if(plan_part == proposal_part) { + return false; + } + } + } + // If the Plan and Proposal disagree on the memory home of a Tensor, they + // are incompatible and can't be used to create a new Proposal + auto tensor_configs = proposal->GetInputTensorConfigs(); + for (const auto& plan_config : plan_boundary_configs) { + if (tensor_configs.find(plan_config->GetTensor()) != tensor_configs.end()) { + auto proposal_config = tensor_configs.at(plan_config->GetTensor()); + if (proposal_config->GetHomeRegion() != plan_config->GetHomeRegion()) { + return false; + } + } + } + return true; +} + +std::unordered_map, ObjectPtrHash, ObjectPtrEqual> CreatePlansByPart( + const std::unordered_map, std::vector>& plans_by_group, + const CascaderGraph& graph) { + std::unordered_map, ObjectPtrHash, ObjectPtrEqual> plans_by_part; + for (const auto& it : plans_by_group) { + auto part_group = it.first; + auto plans = it.second; + int highest_index = 0; + Part& index_part = part_group.front(); + // Determine the Part in the Part group with the highest ID - this will be used to index + // the Plans + for (const auto& part : part_group) { + int pid = graph->GetPartID(part); + if (pid >= highest_index) { + index_part = part; + highest_index = pid; + } + } + plans_by_part[index_part].insert(plans_by_part[index_part].begin(), plans.begin(), plans.end()); + } + return plans_by_part; +} + +Proposal AddPlanToProposal(const Proposal& proposal, const Plan& plan, + const std::unordered_set& plan_boundary_configs) { + std::vector new_plans = proposal->GetPlans(); + new_plans.push_back(plan); + TensorConfigMap new_configs = proposal->GetInputTensorConfigs(); + // Add input configs from the Plan if they're homed in the cascade region + for (const auto& config : plan_boundary_configs) { + if (config->GetHomeRegion() == proposal->GetCascadeRegion()) { + new_configs[config->GetTensor()] = config; + } + } + // Remove the Plan's output config from the new_configs if it's present because + // it won't be an input to the Proposal any more + if (new_configs.find(plan->GetOutputConfig()->GetTensor()) != new_configs.end()) { + new_configs.erase(plan->GetOutputConfig()->GetTensor()); + } + // The updated memory usage is the memory required to run the Plan plus the + // non-local memory that's required in the Proposal at that point in time + int new_memory_usage = plan->GetMemoryUsage(); + for (const auto& it : new_configs) { + if (plan_boundary_configs.find(it.second) == plan_boundary_configs.end()) { + new_memory_usage += it.first->GetSize(); + } + } + new_memory_usage = std::max(new_memory_usage, proposal->GetMemoryUsage()); + int new_cycles = proposal->GetCycles() + plan->GetCycles(); + std::vector new_part_group = proposal->GetPartGroup(); + new_part_group.insert(new_part_group.end(), plan->GetPartGroup().begin(), plan->GetPartGroup().end()); + std::sort(new_part_group.begin(), new_part_group.end()); + return Proposal(proposal->GetGraph(), new_part_group, new_plans, new_configs, + proposal->GetCascadeRegion(), new_memory_usage, new_cycles); +} + +std::vector GeneratePartialProposals(const CascaderGraph& graph, const HomeMap& home_map, + const CascaderOptions options, + const std::unordered_map, ObjectPtrHash, ObjectPtrEqual>& plans_by_part, + const std::vector& partial_proposal_group, + std::unordered_map, std::vector>* proposals_by_group) { + if (proposals_by_group->find(partial_proposal_group) != proposals_by_group->end()) { + return proposals_by_group->at(partial_proposal_group); + } + if (partial_proposal_group.size() == 0) { + (*proposals_by_group)[partial_proposal_group] = + std::vector{Proposal(graph, std::vector(), std::vector(), + TensorConfigMap(), options->cascade_region, 0, 0)}; + } else { + Part part = partial_proposal_group.back(); + const auto& plans = plans_by_part.at(part); + for (const auto& plan : plans) { + if (plan->GetInteriorRegion() == options->cascade_region) { + // Doing this isn't very efficient, but it improves the performance of the Plan + // generator + std::unordered_set plan_boundary_configs = GetPlanBoundaryConfigs(plan); + // The residual_proposal_group is a Part group indicating the Parts which aren't + // covered by the current Plan. It's the group for which we must find 'residual + // Proposals', meaning Proposals which cover the rest of the CascaderGraph assuming we + // pick the current Plan. + std::vector residual_proposal_group; + std::copy_if(partial_proposal_group.begin(), partial_proposal_group.end(), + std::back_inserter(residual_proposal_group), [&plan](Part value) { + return std::find(plan->GetPartGroup().begin(), + plan->GetPartGroup().end(), + value) == plan->GetPartGroup().end(); + }); + // std::sort(residual_proposal_group.begin(), residual_proposal_group.end()); + const auto& residual_proposals = GeneratePartialProposals(graph, home_map, options, plans_by_part, residual_proposal_group, proposals_by_group); + auto plan_output_tensor = plan->GetOutputConfig()->GetTensor(); + ICHECK_LE(plan_output_tensor->GetProducers().size(), 1) + << "All tensors must have at most one producer."; + for (const auto& residual_proposal : residual_proposals) { + if (IsPlanCompatible(residual_proposal, plan->GetPartGroup(), plan_boundary_configs)) { + (*proposals_by_group)[partial_proposal_group].push_back(AddPlanToProposal( + residual_proposal, plan, plan_boundary_configs)); + } + } + } + } + (*proposals_by_group)[partial_proposal_group] = ParetoCullProposals( + proposals_by_group->at(partial_proposal_group), options->max_proposals); + } + return proposals_by_group->at(partial_proposal_group); +} + +std::vector GenerateProposals(const CascaderGraph& graph, const HomeMap& home_map, + const CascaderOptions options) { + // First generate all the Pareto optimal Plans for the CascaderGraph + auto plans_by_group = GenerateGraphPlans(graph, home_map, options); + // First create a map between every Part in the CascaderGraph and all the Plans for which that + // Part is the lowest ID Part within the Plan's Part group + std::unordered_map, ObjectPtrHash, ObjectPtrEqual> plans_by_part = + CreatePlansByPart(plans_by_group, graph); + // The Part group that partial Proposals are current being generated for + std::vector partial_proposal_group = graph->GetPartOrder(); + // A map of Proposals indexed by the Part group they cover + std::unordered_map, std::vector> proposals_by_group; + return GeneratePartialProposals(graph, home_map, options, plans_by_part, partial_proposal_group, &proposals_by_group); +} + +TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GenerateProposals") + .set_body_typed([](CascaderGraph graph, Map> home_map, + CascaderOptions options) { + std::unordered_map, ObjectPtrHash, ObjectPtrEqual> + mhome_map; + for (const auto& it : home_map) { + std::vector home_regions; + for (const auto& i : it.second) { + home_regions.push_back(i); + } + mhome_map[it.first] = home_regions; + } + return Array(GenerateProposals(graph, mhome_map, options)); + }); + +} // namespace cascader +} // namespace ethosu +} // namespace contrib +} // namespace tvm diff --git a/src/contrib/ethosu/cascader/proposal_generator.h b/src/contrib/ethosu/cascader/proposal_generator.h new file mode 100644 index 000000000000..624dc5b98a85 --- /dev/null +++ b/src/contrib/ethosu/cascader/proposal_generator.h @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/contrib/ethosu/cascader/proposal_generator.h + * \brief Algorithm to generate possible Proposals in the NPU cascader + */ +#ifndef TVM_CONTRIB_ETHOSU_CASCADER_PROPOSAL_GENERATOR_H_ +#define TVM_CONTRIB_ETHOSU_CASCADER_PROPOSAL_GENERATOR_H_ + +#include +#include + +#include +#include +#include + +namespace tvm { +namespace contrib { +namespace ethosu { +namespace cascader { + +class CascaderGraph; +class MemoryRegion; +class Tensor; +class Proposal; +class CascaderOptions; + +using HomeMap = + std::unordered_map, ObjectPtrHash, ObjectPtrEqual>; + +/*! + * \brief Generate Pareto optimal Proposals for a CascaderGraph. + * \param graph The CascaderGraph to generate Proposals for. + * \param home_map The Tensor homing map defining valid memory homes for Tensors. + * \param options The configuration options with which to run the generator. + * \return A vector of Pareto optimal Proposals. + * \note This algorithm takes a top-down dynamic programming approach to determining how + * to optimally combine Plans into Proposals. It does the following: + * + * First, run GenerateGraphPlans to generate the Pareto optimal Plans that cover all the + * Part groups in the CascaderGraph. + * + * Solve the problem recursively, generating optimal Proposals for increasingly small + * portions of the overall graph. + * + * Take the first Part in the graph: + * 1. Find all the Plans for which the Part is both in the Plan's Part group and has the + * highest Part ID of any Part in the Part group (i.e. it's the 'first' Part in the + * group). + * For each Plan: + * 2. Get the Part group covered by the Plan and subtract it from the 'total Part group' + * covering all the Parts. This forms a 'residual Part group'. + * 3. Recursively, determine the optimal Proposals for the 'residual Part group' (the graph + * minus the Parts included in the Plan). Memoize the results. + * For each residual Proposal: + * 4. Create a new Proposal by adding the current Plan to the residual Proposal. + * 5. Pareto cull all the newly created Proposals (which all share the same Part group). + * 6. Return the Proposals which cover all the Parts in the CascaderGraph. + * + */ +std::vector GenerateProposals(const CascaderGraph& graph, const HomeMap& home_map, + const CascaderOptions& options); + +} // namespace cascader +} // namespace ethosu +} // namespace contrib +} // namespace tvm + +#endif // TVM_CONTRIB_ETHOSU_CASCADER_PROPOSAL_GENERATOR_H_ diff --git a/tests/python/contrib/test_ethosu/cascader/conftest.py b/tests/python/contrib/test_ethosu/cascader/conftest.py index 21ed401994c1..cffaf83df0bc 100644 --- a/tests/python/contrib/test_ethosu/cascader/conftest.py +++ b/tests/python/contrib/test_ethosu/cascader/conftest.py @@ -203,3 +203,400 @@ def _get_func(): @pytest.fixture def MobileNetv2DiamondTE(): return make_MobileNetv2DiamondTE() + + @pytest.fixture + def MobileNetv2DiamondGraph(): + _, te_graph, const_dict = make_MobileNetv2DiamondTE() + device_config = cs.EthosuDeviceConfig("ethos-u55-256") + return cs.create_cascader_graph(te_graph, const_dict, device_config) + + def make_BinaryTE(): + def _get_func(): + ifm_a = relay.var("ifm_a", shape=(1, 8, 8, 8), dtype="int8") + ifm_b = relay.var("ifm_b", shape=(1, 8, 8, 8), dtype="int8") + conv1 = make_ethosu_conv2d( + ifm=ifm_a, + ifm_channels=8, + ofm_channels=8, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + conv2 = make_ethosu_conv2d( + ifm=ifm_b, + ifm_channels=8, + ofm_channels=8, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + add1 = make_ethosu_binary_elementwise( + ifm=conv1, + ifm2=conv2, + ifm_channels=8, + ifm2_channels=8, + operator_type="ADD", + ofm_dtype="int8", + ) + func = relay.Function(relay.analysis.free_vars(add1), add1) + func = run_opt_pass(func, relay.transform.InferType()) + return func + + func = _get_func() + te_graph, const_dict = create_te_graph(func) + sch = tvm.te.create_schedule([t.op for t in te_graph.outputs]) + return sch, te_graph, const_dict + + @pytest.fixture + def BinaryTE(): + return make_BinaryTE() + + @pytest.fixture + def BinaryGraph(): + _, te_graph, const_dict = make_BinaryTE() + device_config = cs.EthosuDeviceConfig("ethos-u55-256") + return cs.create_cascader_graph(te_graph, const_dict, device_config) + + def make_MobileNetv1StartTE(): + def _get_func(): + ifm = relay.var("ifm", shape=(1, 224, 224, 3), dtype="int8") + conv1 = make_ethosu_conv2d( + ifm=ifm, + ifm_channels=3, + ofm_channels=32, + kernel_shape=(3, 3), + padding=(0, 0, 1, 1), + strides=(2, 2), + dilation=(1, 1), + ) + depth1 = make_ethosu_depthwise_conv2d( + ifm=conv1, + channels=32, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv2 = make_ethosu_conv2d( + ifm=depth1, + ifm_channels=32, + ofm_channels=64, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth2 = make_ethosu_depthwise_conv2d( + ifm=conv2, + channels=64, + kernel_shape=(3, 3), + padding=(0, 0, 1, 1), + strides=(2, 2), + dilation=(1, 1), + ) + conv3 = make_ethosu_conv2d( + ifm=depth2, + ifm_channels=64, + ofm_channels=128, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth3 = make_ethosu_depthwise_conv2d( + ifm=conv3, + channels=128, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv4 = make_ethosu_conv2d( + ifm=depth3, + ifm_channels=128, + ofm_channels=128, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth4 = make_ethosu_depthwise_conv2d( + ifm=conv4, + channels=128, + kernel_shape=(3, 3), + padding=(0, 0, 1, 1), + strides=(2, 2), + dilation=(1, 1), + ) + func = relay.Function(relay.analysis.free_vars(depth4), depth4) + func = run_opt_pass(func, relay.transform.InferType()) + return func + + func = _get_func() + te_graph, const_dict = create_te_graph(func) + sch = tvm.te.create_schedule([t.op for t in te_graph.outputs]) + return sch, te_graph, const_dict + + @pytest.fixture + def MobileNetv1StartTE(): + return make_MobileNetv1StartTE() + + @pytest.fixture + def MobileNetv1StartGraph(): + _, te_graph, const_dict = make_MobileNetv1StartTE() + device_config = cs.EthosuDeviceConfig("ethos-u55-256") + return cs.create_cascader_graph(te_graph, const_dict, device_config) + + def make_MobileNetv1TE(): + def _get_func(): + ifm = relay.var("ifm", shape=(1, 224, 224, 3), dtype="int8") + conv1 = make_ethosu_conv2d( + ifm=ifm, + ifm_channels=3, + ofm_channels=32, + kernel_shape=(3, 3), + padding=(0, 0, 1, 1), + strides=(2, 2), + dilation=(1, 1), + ) + depth1 = make_ethosu_depthwise_conv2d( + ifm=conv1, + channels=32, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv2 = make_ethosu_conv2d( + ifm=depth1, + ifm_channels=32, + ofm_channels=64, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth2 = make_ethosu_depthwise_conv2d( + ifm=conv2, + channels=64, + kernel_shape=(3, 3), + padding=(0, 0, 1, 1), + strides=(2, 2), + dilation=(1, 1), + ) + conv3 = make_ethosu_conv2d( + ifm=depth2, + ifm_channels=64, + ofm_channels=128, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth3 = make_ethosu_depthwise_conv2d( + ifm=conv3, + channels=128, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv4 = make_ethosu_conv2d( + ifm=depth3, + ifm_channels=128, + ofm_channels=128, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth4 = make_ethosu_depthwise_conv2d( + ifm=conv4, + channels=128, + kernel_shape=(3, 3), + padding=(0, 0, 1, 1), + strides=(2, 2), + dilation=(1, 1), + ) + conv5 = make_ethosu_conv2d( + ifm=depth4, + ifm_channels=128, + ofm_channels=256, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth5 = make_ethosu_depthwise_conv2d( + ifm=conv5, + channels=256, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv6 = make_ethosu_conv2d( + ifm=depth5, + ifm_channels=256, + ofm_channels=256, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth6 = make_ethosu_depthwise_conv2d( + ifm=conv6, + channels=256, + kernel_shape=(3, 3), + padding=(0, 0, 1, 1), + strides=(2, 2), + dilation=(1, 1), + ) + conv7 = make_ethosu_conv2d( + ifm=depth6, + ifm_channels=256, + ofm_channels=512, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth7 = make_ethosu_depthwise_conv2d( + ifm=conv7, + channels=512, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv8 = make_ethosu_conv2d( + ifm=depth7, + ifm_channels=512, + ofm_channels=512, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth8 = make_ethosu_depthwise_conv2d( + ifm=conv8, + channels=512, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv9 = make_ethosu_conv2d( + ifm=depth8, + ifm_channels=512, + ofm_channels=512, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth9 = make_ethosu_depthwise_conv2d( + ifm=conv9, + channels=512, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv10 = make_ethosu_conv2d( + ifm=depth9, + ifm_channels=512, + ofm_channels=512, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth10 = make_ethosu_depthwise_conv2d( + ifm=conv10, + channels=512, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv11 = make_ethosu_conv2d( + ifm=depth10, + ifm_channels=512, + ofm_channels=512, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth11 = make_ethosu_depthwise_conv2d( + ifm=conv11, + channels=512, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv12 = make_ethosu_conv2d( + ifm=depth11, + ifm_channels=512, + ofm_channels=512, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth12 = make_ethosu_depthwise_conv2d( + ifm=conv12, + channels=512, + kernel_shape=(3, 3), + padding=(0, 0, 1, 1), + strides=(2, 2), + dilation=(1, 1), + ) + conv13 = make_ethosu_conv2d( + ifm=depth12, + ifm_channels=512, + ofm_channels=1024, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + depth13 = make_ethosu_depthwise_conv2d( + ifm=conv13, + channels=1024, + kernel_shape=(3, 3), + padding=(1, 1, 1, 1), + strides=(1, 1), + dilation=(1, 1), + ) + conv14 = make_ethosu_conv2d( + ifm=depth13, + ifm_channels=1024, + ofm_channels=1024, + kernel_shape=(1, 1), + padding=(0, 0, 0, 0), + strides=(1, 1), + dilation=(1, 1), + ) + func = relay.Function(relay.analysis.free_vars(conv14), conv14) + func = run_opt_pass(func, relay.transform.InferType()) + return func + + func = _get_func() + te_graph, const_dict = create_te_graph(func) + sch = tvm.te.create_schedule([t.op for t in te_graph.outputs]) + return sch, te_graph, const_dict + + @pytest.fixture + def MobileNetv1TE(): + return make_MobileNetv1TE() + + @pytest.fixture + def MobileNetv1Graph(): + _, te_graph, const_dict = make_MobileNetv1TE() + device_config = cs.EthosuDeviceConfig("ethos-u55-256") + return cs.create_cascader_graph(te_graph, const_dict, device_config) diff --git a/tests/python/contrib/test_ethosu/cascader/test_proposal_generator.py b/tests/python/contrib/test_ethosu/cascader/test_proposal_generator.py new file mode 100644 index 000000000000..eecf7b561ee2 --- /dev/null +++ b/tests/python/contrib/test_ethosu/cascader/test_proposal_generator.py @@ -0,0 +1,141 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import pytest +from tvm.contrib.ethosu.cascader.proposal_generator import generate_proposals + +from .infra import make_simple_home_map, make_options + + +def test_generate_proposals(FLASH, SRAM, TwoConv2DGraph): + graph = TwoConv2DGraph + min_sram = 3700 + max_sram = 11700 + input_configs = 1 + parts = 2 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=32, + stripe_factors=4, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert min_sram < proposal.memory_usage < max_sram + assert proposal.cycles > 0 + + +def test_generate_proposals_binary(FLASH, SRAM, BinaryGraph): + graph = BinaryGraph + input_configs = 2 + parts = 3 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=32, + stripe_factors=4, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + # assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert proposal.cycles > 0 + + +def test_generate_proposals_mobilenetv1_start(FLASH, SRAM, MobileNetv1StartGraph): + graph = MobileNetv1StartGraph + min_sram = 200000 + max_sram = 1300000 + input_configs = 1 + parts = 8 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=32, + stripe_factors=5, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert min_sram < proposal.memory_usage < max_sram + assert proposal.cycles > 0 + + +def test_generate_proposals_mobilenetv1(FLASH, SRAM, MobileNetv1Graph): + graph = MobileNetv1Graph + min_sram = 200000 + max_sram = 1300000 + input_configs = 1 + parts = 27 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=32, + stripe_factors=5, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert min_sram < proposal.memory_usage < max_sram + assert proposal.cycles > 0 + + +def test_generate_proposals_mobilenetv2diamond(FLASH, SRAM, MobileNetv2DiamondGraph): + graph = MobileNetv2DiamondGraph + min_sram = 370000 + max_sram = 990000 + input_configs = 1 + parts = 5 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=64, + stripe_factors=5, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert min_sram < proposal.memory_usage < max_sram + assert proposal.cycles > 0 + + +if __name__ == "__main__": + pytest.main([__file__]) From a272e8a0051169a765c83a3ab2f47641f28dbc96 Mon Sep 17 00:00:00 2001 From: Matthew Barrett Date: Thu, 10 Feb 2022 17:19:24 +0000 Subject: [PATCH 2/2] Fixes Change-Id: I4f5f2a298bd3bb379c7c8d179150358923b0dd66 --- python/tvm/contrib/ethosu/cascader/pareto.py | 8 - .../tvm/contrib/ethosu/cascader/proposal.py | 50 +++- .../ethosu/cascader/proposal_generator.py | 20 ++ src/contrib/ethosu/cascader/pareto.cc | 6 - src/contrib/ethosu/cascader/proposal.cc | 1 + .../ethosu/cascader/proposal_generator.cc | 47 ++-- .../cascader/test_proposal_generator.py | 234 +++++++++--------- 7 files changed, 203 insertions(+), 163 deletions(-) diff --git a/python/tvm/contrib/ethosu/cascader/pareto.py b/python/tvm/contrib/ethosu/cascader/pareto.py index b282cfcbaa43..3c4dcbc88a45 100644 --- a/python/tvm/contrib/ethosu/cascader/pareto.py +++ b/python/tvm/contrib/ethosu/cascader/pareto.py @@ -21,8 +21,6 @@ from . import _ffi_api from .plan import Plan -from .proposal import Proposal -from .tensor_config import MemoryRegion def _get_pareto_frontier(costs: List[List[float]]) -> List[bool]: @@ -39,9 +37,3 @@ def _thin_vector(vec: List[Object], max_size: int) -> List[Object]: def _pareto_cull_plans(plans: List[Plan], max_plans: int) -> List[Plan]: return list(_ffi_api.ParetoCullPlans(plans, max_plans)) - - -def pareto_cull_proposals( - proposals: List[Proposal], cascade_region: MemoryRegion, max_proposals: int -) -> List[Proposal]: - return list(_ffi_api.ParetoCullProposals(proposals, cascade_region, max_proposals)) diff --git a/python/tvm/contrib/ethosu/cascader/proposal.py b/python/tvm/contrib/ethosu/cascader/proposal.py index c72653a10831..13184108120e 100644 --- a/python/tvm/contrib/ethosu/cascader/proposal.py +++ b/python/tvm/contrib/ethosu/cascader/proposal.py @@ -22,51 +22,85 @@ from tvm.runtime import Object from . import _ffi_api -from .graph import Tensor, Part +from .graph import Tensor, Part, CascaderGraph from .tensor_config import TensorConfig, MemoryRegion @tvm._ffi.register_object("contrib.ethosu.cascader.Proposal") class Proposal(Object): - """Proposal class""" + """A class which describes how to schedule a CascaderGraph as a series of disjoint Plans. + + Attributes + ---------- + graph : CascaderGraph + The CascaderGraph to which the Proposal applies. + part_group : FrozenSet[Part] + The Parts which are covered by the Proposal. + plans : List[Plan] + The Plans used in the Proposal. + input_tensor_configs : Dict[Tensor, TensorConfig] + The TensorConfigs indexed by Tensor in the Proposal which aren't produced by a Plan. + cascade_region : MemoryRegion + The MemoryRegion where cascading buffers should be homed. + memory_usage : int + The memory required to execute the Proposal in the cascading MemoryRegion. + cycles : int + The estimated cycles taken to execute the Proposal. + + """ def __init__( self, + graph: CascaderGraph, part_group: FrozenSet[Part], plans: List[Plan], input_tensor_configs: Dict[Tensor, TensorConfig], + cascade_region: MemoryRegion, memory_usage: Dict[MemoryRegion, int], cycles: int, ): self.__init_handle_by_constructor__( _ffi_api.Proposal, + graph, list(part_group), plans, input_tensor_configs, + cascade_region, memory_usage, cycles, ) @property - def graph(self): + def graph(self) -> CascaderGraph: + """The CascaderGraph to which the Proposal applies.""" return self._graph @property - def part_group(self): + def part_group(self) -> FrozenSet[Part]: + """The Parts which are covered by the Proposal.""" return frozenset(self._part_group) @property - def plans(self): + def plans(self) -> List[Plan]: + """The Plans used in the Proposal.""" return list(self._plans) @property - def input_tensor_configs(self): + def input_tensor_configs(self) -> Dict[Tensor, TensorConfig]: + """The TensorConfigs indexed by Tensor in the Proposal which aren't produced by a Plan.""" return dict(self._input_tensor_configs) @property - def memory_usage(self): + def cascade_region(self) -> MemoryRegion: + """The MemoryRegion where cascading buffers should be homed.""" + return self._cascade_region + + @property + def memory_usage(self) -> int: + """The memory required to execute the Proposal in the cascading MemoryRegion.""" return int(self._memory_usage) @property - def cycles(self): + def cycles(self) -> int: + """The estimated cycles taken to execute the Proposal.""" return int(self._cycles) diff --git a/python/tvm/contrib/ethosu/cascader/proposal_generator.py b/python/tvm/contrib/ethosu/cascader/proposal_generator.py index 99361cb60cc7..d79021a20539 100644 --- a/python/tvm/contrib/ethosu/cascader/proposal_generator.py +++ b/python/tvm/contrib/ethosu/cascader/proposal_generator.py @@ -29,6 +29,26 @@ def generate_proposals( home_map: Dict[FrozenSet[Part], List[Plan]], options: CascaderOptions, ) -> List[Proposal]: + """Generate Pareto optimal Proposals for a CascaderGraph. + + This algorithm takes a top-down dynamic programming approach to determining how + to optimally combine Plans into Proposals. + + Parameters + ---------- + graph : CascaderGraph + The CascaderGraph to generate Proposals for. + home_map : Dict[FrozenSet[Part], List[Plan]] + The Tensor homing map defining valid memory homes for Tensors. + options : CascaderOptions + The configuration options with which to run the generator. + + Returns + ------ + List[Proposal] + A list of Pareto optimal Proposals. + + """ return list( _ffi_api.GenerateProposals( graph, diff --git a/src/contrib/ethosu/cascader/pareto.cc b/src/contrib/ethosu/cascader/pareto.cc index 21f0994ba316..52ea729bffa2 100644 --- a/src/contrib/ethosu/cascader/pareto.cc +++ b/src/contrib/ethosu/cascader/pareto.cc @@ -161,12 +161,6 @@ TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.ParetoCullPlans") return Array(ParetoCullPlans(vplans, max_size)); }); -TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.ParetoCullProposals") - .set_body_typed([](Array proposals, int max_size) { - std::vector vproposals(proposals.begin(), proposals.end()); - return Array(ParetoCullProposals(vproposals, max_size)); - }); - } // namespace cascader } // namespace ethosu } // namespace contrib diff --git a/src/contrib/ethosu/cascader/proposal.cc b/src/contrib/ethosu/cascader/proposal.cc index 5a7c88a1a7a7..e96be3466e10 100644 --- a/src/contrib/ethosu/cascader/proposal.cc +++ b/src/contrib/ethosu/cascader/proposal.cc @@ -23,6 +23,7 @@ #include #include +#include #include #include diff --git a/src/contrib/ethosu/cascader/proposal_generator.cc b/src/contrib/ethosu/cascader/proposal_generator.cc index 3fda0d9921b5..ce709cbaa657 100644 --- a/src/contrib/ethosu/cascader/proposal_generator.cc +++ b/src/contrib/ethosu/cascader/proposal_generator.cc @@ -51,13 +51,12 @@ std::unordered_set GetPlanBoundaryConfigs(const Plan& plan) { return boundary_configs; } -bool IsPlanCompatible(const Proposal& proposal, - const std::vector& plan_part_group, +bool IsPlanCompatible(const Proposal& proposal, const std::vector& plan_part_group, const std::unordered_set& plan_boundary_configs) { // Check the Plan Part group is disjoint with the Proposal Part group - for(const auto& plan_part : plan_part_group) { - for(const auto& proposal_part : proposal->GetPartGroup()) { - if(plan_part == proposal_part) { + for (const auto& plan_part : plan_part_group) { + for (const auto& proposal_part : proposal->GetPartGroup()) { + if (plan_part == proposal_part) { return false; } } @@ -126,24 +125,25 @@ Proposal AddPlanToProposal(const Proposal& proposal, const Plan& plan, new_memory_usage = std::max(new_memory_usage, proposal->GetMemoryUsage()); int new_cycles = proposal->GetCycles() + plan->GetCycles(); std::vector new_part_group = proposal->GetPartGroup(); - new_part_group.insert(new_part_group.end(), plan->GetPartGroup().begin(), plan->GetPartGroup().end()); + new_part_group.insert(new_part_group.end(), plan->GetPartGroup().begin(), + plan->GetPartGroup().end()); std::sort(new_part_group.begin(), new_part_group.end()); return Proposal(proposal->GetGraph(), new_part_group, new_plans, new_configs, proposal->GetCascadeRegion(), new_memory_usage, new_cycles); } -std::vector GeneratePartialProposals(const CascaderGraph& graph, const HomeMap& home_map, - const CascaderOptions options, - const std::unordered_map, ObjectPtrHash, ObjectPtrEqual>& plans_by_part, - const std::vector& partial_proposal_group, - std::unordered_map, std::vector>* proposals_by_group) { +std::vector GeneratePartialProposals( + const CascaderGraph& graph, const HomeMap& home_map, const CascaderOptions options, + const std::unordered_map, ObjectPtrHash, ObjectPtrEqual>& plans_by_part, + const std::vector& partial_proposal_group, + std::unordered_map, std::vector>* proposals_by_group) { if (proposals_by_group->find(partial_proposal_group) != proposals_by_group->end()) { return proposals_by_group->at(partial_proposal_group); } if (partial_proposal_group.size() == 0) { (*proposals_by_group)[partial_proposal_group] = - std::vector{Proposal(graph, std::vector(), std::vector(), - TensorConfigMap(), options->cascade_region, 0, 0)}; + std::vector{Proposal(graph, std::vector(), std::vector(), + TensorConfigMap(), options->cascade_region, 0, 0)}; } else { Part part = partial_proposal_group.back(); const auto& plans = plans_by_part.at(part); @@ -158,26 +158,26 @@ std::vector GeneratePartialProposals(const CascaderGraph& graph, const // pick the current Plan. std::vector residual_proposal_group; std::copy_if(partial_proposal_group.begin(), partial_proposal_group.end(), - std::back_inserter(residual_proposal_group), [&plan](Part value) { - return std::find(plan->GetPartGroup().begin(), - plan->GetPartGroup().end(), + std::back_inserter(residual_proposal_group), [&plan](Part value) { + return std::find(plan->GetPartGroup().begin(), plan->GetPartGroup().end(), value) == plan->GetPartGroup().end(); - }); + }); // std::sort(residual_proposal_group.begin(), residual_proposal_group.end()); - const auto& residual_proposals = GeneratePartialProposals(graph, home_map, options, plans_by_part, residual_proposal_group, proposals_by_group); + const auto& residual_proposals = GeneratePartialProposals( + graph, home_map, options, plans_by_part, residual_proposal_group, proposals_by_group); auto plan_output_tensor = plan->GetOutputConfig()->GetTensor(); ICHECK_LE(plan_output_tensor->GetProducers().size(), 1) << "All tensors must have at most one producer."; for (const auto& residual_proposal : residual_proposals) { if (IsPlanCompatible(residual_proposal, plan->GetPartGroup(), plan_boundary_configs)) { - (*proposals_by_group)[partial_proposal_group].push_back(AddPlanToProposal( - residual_proposal, plan, plan_boundary_configs)); + (*proposals_by_group)[partial_proposal_group].push_back( + AddPlanToProposal(residual_proposal, plan, plan_boundary_configs)); } } } } - (*proposals_by_group)[partial_proposal_group] = ParetoCullProposals( - proposals_by_group->at(partial_proposal_group), options->max_proposals); + (*proposals_by_group)[partial_proposal_group] = + ParetoCullProposals(proposals_by_group->at(partial_proposal_group), options->max_proposals); } return proposals_by_group->at(partial_proposal_group); } @@ -194,7 +194,8 @@ std::vector GenerateProposals(const CascaderGraph& graph, const HomeMa std::vector partial_proposal_group = graph->GetPartOrder(); // A map of Proposals indexed by the Part group they cover std::unordered_map, std::vector> proposals_by_group; - return GeneratePartialProposals(graph, home_map, options, plans_by_part, partial_proposal_group, &proposals_by_group); + return GeneratePartialProposals(graph, home_map, options, plans_by_part, partial_proposal_group, + &proposals_by_group); } TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GenerateProposals") diff --git a/tests/python/contrib/test_ethosu/cascader/test_proposal_generator.py b/tests/python/contrib/test_ethosu/cascader/test_proposal_generator.py index eecf7b561ee2..5af89a415978 100644 --- a/tests/python/contrib/test_ethosu/cascader/test_proposal_generator.py +++ b/tests/python/contrib/test_ethosu/cascader/test_proposal_generator.py @@ -17,124 +17,122 @@ import pytest from tvm.contrib.ethosu.cascader.proposal_generator import generate_proposals -from .infra import make_simple_home_map, make_options - - -def test_generate_proposals(FLASH, SRAM, TwoConv2DGraph): - graph = TwoConv2DGraph - min_sram = 3700 - max_sram = 11700 - input_configs = 1 - parts = 2 - home_map = make_simple_home_map(graph, SRAM, FLASH) - options = make_options( - cascade_region=SRAM, - max_proposals=32, - stripe_factors=4, - max_plan_size=10, - ) - - proposals = generate_proposals(graph, home_map, options) - - for proposal in proposals: - assert 0 < len(proposal.plans) <= parts - assert len(proposal.input_tensor_configs) == input_configs - assert len(proposal.part_group) == parts - assert min_sram < proposal.memory_usage < max_sram - assert proposal.cycles > 0 - - -def test_generate_proposals_binary(FLASH, SRAM, BinaryGraph): - graph = BinaryGraph - input_configs = 2 - parts = 3 - home_map = make_simple_home_map(graph, SRAM, FLASH) - options = make_options( - cascade_region=SRAM, - max_proposals=32, - stripe_factors=4, - max_plan_size=10, - ) - - proposals = generate_proposals(graph, home_map, options) - - for proposal in proposals: - assert 0 < len(proposal.plans) <= parts - # assert len(proposal.input_tensor_configs) == input_configs - assert len(proposal.part_group) == parts - assert proposal.cycles > 0 - - -def test_generate_proposals_mobilenetv1_start(FLASH, SRAM, MobileNetv1StartGraph): - graph = MobileNetv1StartGraph - min_sram = 200000 - max_sram = 1300000 - input_configs = 1 - parts = 8 - home_map = make_simple_home_map(graph, SRAM, FLASH) - options = make_options( - cascade_region=SRAM, - max_proposals=32, - stripe_factors=5, - max_plan_size=10, - ) - - proposals = generate_proposals(graph, home_map, options) - - for proposal in proposals: - assert 0 < len(proposal.plans) <= parts - assert len(proposal.input_tensor_configs) == input_configs - assert len(proposal.part_group) == parts - assert min_sram < proposal.memory_usage < max_sram - assert proposal.cycles > 0 - - -def test_generate_proposals_mobilenetv1(FLASH, SRAM, MobileNetv1Graph): - graph = MobileNetv1Graph - min_sram = 200000 - max_sram = 1300000 - input_configs = 1 - parts = 27 - home_map = make_simple_home_map(graph, SRAM, FLASH) - options = make_options( - cascade_region=SRAM, - max_proposals=32, - stripe_factors=5, - max_plan_size=10, - ) - - proposals = generate_proposals(graph, home_map, options) - - for proposal in proposals: - assert 0 < len(proposal.plans) <= parts - assert len(proposal.input_tensor_configs) == input_configs - assert len(proposal.part_group) == parts - assert min_sram < proposal.memory_usage < max_sram - assert proposal.cycles > 0 - - -def test_generate_proposals_mobilenetv2diamond(FLASH, SRAM, MobileNetv2DiamondGraph): - graph = MobileNetv2DiamondGraph - min_sram = 370000 - max_sram = 990000 - input_configs = 1 - parts = 5 - home_map = make_simple_home_map(graph, SRAM, FLASH) - options = make_options( - cascade_region=SRAM, - max_proposals=64, - stripe_factors=5, - max_plan_size=10, - ) - - proposals = generate_proposals(graph, home_map, options) - - for proposal in proposals: - assert 0 < len(proposal.plans) <= parts - assert len(proposal.input_tensor_configs) == input_configs - assert len(proposal.part_group) == parts - assert min_sram < proposal.memory_usage < max_sram - assert proposal.cycles > 0 +from .infra import make_simple_home_map, make_options, ethosu_enabled + + +if ethosu_enabled: + + def test_generate_proposals(FLASH, SRAM, TwoConv2DGraph): + graph = TwoConv2DGraph + min_sram = 3700 + max_sram = 11700 + input_configs = 1 + parts = 2 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=32, + stripe_factors=4, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert min_sram < proposal.memory_usage < max_sram + assert proposal.cycles > 0 + + def test_generate_proposals_binary(FLASH, SRAM, BinaryGraph): + graph = BinaryGraph + input_configs = 2 + parts = 3 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=32, + stripe_factors=4, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert proposal.cycles > 0 + + def test_generate_proposals_mobilenetv1_start(FLASH, SRAM, MobileNetv1StartGraph): + graph = MobileNetv1StartGraph + min_sram = 200000 + max_sram = 1300000 + input_configs = 1 + parts = 8 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=32, + stripe_factors=5, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert min_sram < proposal.memory_usage < max_sram + assert proposal.cycles > 0 + + def test_generate_proposals_mobilenetv1(FLASH, SRAM, MobileNetv1Graph): + graph = MobileNetv1Graph + min_sram = 200000 + max_sram = 1300000 + input_configs = 1 + parts = 27 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=32, + stripe_factors=5, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert min_sram < proposal.memory_usage < max_sram + assert proposal.cycles > 0 + + def test_generate_proposals_mobilenetv2diamond(FLASH, SRAM, MobileNetv2DiamondGraph): + graph = MobileNetv2DiamondGraph + min_sram = 370000 + max_sram = 990000 + input_configs = 1 + parts = 5 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=64, + stripe_factors=5, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert min_sram < proposal.memory_usage < max_sram + assert proposal.cycles > 0 if __name__ == "__main__":