From 629ec505b405f8e0c9523c578567107e9f91861b Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Fri, 25 Jun 2021 17:26:42 +0800 Subject: [PATCH 01/60] optimize resize vertor --- src/auto_scheduler/search_policy/utils.cc | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/auto_scheduler/search_policy/utils.cc b/src/auto_scheduler/search_policy/utils.cc index ce8dc39922e0..76c6a549a8ea 100644 --- a/src/auto_scheduler/search_policy/utils.cc +++ b/src/auto_scheduler/search_policy/utils.cc @@ -158,17 +158,15 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo std::vector space_outer, space_inner, reduce_outer, reduce_inner; Array split_res; - for (const auto c : format) { - if (tolower(c) == 's') { - space_levels.emplace_back(); - } else if (tolower(c) == 'r') { - reduce_levels.emplace_back(); - } else { - LOG(FATAL) << "Invalid multi-level tiling format: " << format; - } + std::string format_lower; + std::transform(format.begin(), format.end(), format_lower.begin(), ::tolower); + size_t n_space = std::count(format_lower.begin(), format_lower.end(), 's'); + size_t n_reduce = std::count(format_lower.begin(), format_lower.end(), 'r'); + if (n_space + n_reduce != format.size()) { + LOG(FATAL) << "Invalid multi-level tiling format: " << format; } - size_t n_space = space_levels.size(); - size_t n_reduce = reduce_levels.size(); + space_levels.resize(n_space); + reduce_levels.resize(n_reduce); spatial_split_step_ids->clear(); From 50053731cad83e064e662032fadc9506dac583ef Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Fri, 25 Jun 2021 17:35:09 +0800 Subject: [PATCH 02/60] tmp --- src/auto_scheduler/search_policy/utils.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/auto_scheduler/search_policy/utils.cc b/src/auto_scheduler/search_policy/utils.cc index 76c6a549a8ea..6ac3daa3ba49 100644 --- a/src/auto_scheduler/search_policy/utils.cc +++ b/src/auto_scheduler/search_policy/utils.cc @@ -177,7 +177,9 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo ? GetIterNameSetParam(stage->op->attrs, SearchPolicyKey::no_split_at_inner) : std::set(); - for (const auto& iter : state->stages[stage_id]->iters) { + auto func = [&](std::vector> & levels, const) + + for (const auto& iter : state->stages[stage_id]->iters) { if (!no_split_at_inner_name_set.count(iter->name)) { if (iter->iter_kind == IteratorKind::kSpatial) { ICHECK_GE(n_space, 1); From a1b749f6139f3d92d4735fb0be566c6844501b49 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Sat, 26 Jun 2021 10:10:29 +0800 Subject: [PATCH 03/60] DoMultiLevelTiling --- src/auto_scheduler/search_policy/utils.cc | 45 ++++++++++------------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/src/auto_scheduler/search_policy/utils.cc b/src/auto_scheduler/search_policy/utils.cc index 6ac3daa3ba49..2cd7d09c046f 100644 --- a/src/auto_scheduler/search_policy/utils.cc +++ b/src/auto_scheduler/search_policy/utils.cc @@ -156,7 +156,6 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo std::vector> space_levels; std::vector> reduce_levels; std::vector space_outer, space_inner, reduce_outer, reduce_inner; - Array split_res; std::string format_lower; std::transform(format.begin(), format.end(), format_lower.begin(), ::tolower); @@ -165,8 +164,6 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo if (n_space + n_reduce != format.size()) { LOG(FATAL) << "Invalid multi-level tiling format: " << format; } - space_levels.resize(n_space); - reduce_levels.resize(n_reduce); spatial_split_step_ids->clear(); @@ -177,33 +174,29 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo ? GetIterNameSetParam(stage->op->attrs, SearchPolicyKey::no_split_at_inner) : std::set(); - auto func = [&](std::vector> & levels, const) + auto srlevels = [&](int size, const Iterator& iter, std::vector>& levels) { + ICHECK_GE(size, 1); + levels.resize(size); + if (size == 1) { + levels[0].push_back(iter); + } else { + Array split_res = + tmp_s.split(stage_id, iter, Array>(size - 1, NullOpt)); + for (size_t i = 0; i < size; i++) { + levels[i].push_back(split_res[i]); + } + if (iter->iter_kind == IteratorKind::kSpatial) { + spatial_split_step_ids->push_back(tmp_s->transform_steps.size() - 1); + } + } + }; - for (const auto& iter : state->stages[stage_id]->iters) { + for (const auto& iter : state->stages[stage_id]->iters) { if (!no_split_at_inner_name_set.count(iter->name)) { if (iter->iter_kind == IteratorKind::kSpatial) { - ICHECK_GE(n_space, 1); - - if (n_space == 1) { - space_levels[0].push_back(iter); - } else { - split_res = tmp_s.split(stage_id, iter, Array>(n_space - 1, NullOpt)); - for (size_t i = 0; i < n_space; i++) { - space_levels[i].push_back(split_res[i]); - } - spatial_split_step_ids->push_back(tmp_s->transform_steps.size() - 1); - } + srlevels(n_space, iter, space_levels); } else if (iter->iter_kind == IteratorKind::kReduction) { - ICHECK_GE(n_reduce, 1); - - if (n_reduce == 1) { - reduce_levels[0].push_back(iter); - } else { - split_res = tmp_s.split(stage_id, iter, Array>(n_reduce - 1, NullOpt)); - for (size_t i = 0; i < n_reduce; i++) { - reduce_levels[i].push_back(split_res[i]); - } - } + srlevels(n_reduce, iter, reduce_levels); } else { LOG(FATAL) << "Invalid iter type: " << int(iter->iter_kind); } From f1fc3139688a068cd673ad92f220d3865c96067f Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Sat, 26 Jun 2021 10:30:43 +0800 Subject: [PATCH 04/60] modify size_t to int --- src/auto_scheduler/search_policy/utils.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/auto_scheduler/search_policy/utils.cc b/src/auto_scheduler/search_policy/utils.cc index 2cd7d09c046f..f6ead3f1d058 100644 --- a/src/auto_scheduler/search_policy/utils.cc +++ b/src/auto_scheduler/search_policy/utils.cc @@ -159,8 +159,8 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo std::string format_lower; std::transform(format.begin(), format.end(), format_lower.begin(), ::tolower); - size_t n_space = std::count(format_lower.begin(), format_lower.end(), 's'); - size_t n_reduce = std::count(format_lower.begin(), format_lower.end(), 'r'); + int n_space = std::count(format_lower.begin(), format_lower.end(), 's'); + int n_reduce = std::count(format_lower.begin(), format_lower.end(), 'r'); if (n_space + n_reduce != format.size()) { LOG(FATAL) << "Invalid multi-level tiling format: " << format; } @@ -182,7 +182,7 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo } else { Array split_res = tmp_s.split(stage_id, iter, Array>(size - 1, NullOpt)); - for (size_t i = 0; i < size; i++) { + for (int i = 0; i < size; i++) { levels[i].push_back(split_res[i]); } if (iter->iter_kind == IteratorKind::kSpatial) { From 65a7a0019772c5e6194f57e93b3df58bf2fea649 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Sat, 26 Jun 2021 10:56:28 +0800 Subject: [PATCH 05/60] modify --- src/auto_scheduler/search_policy/utils.cc | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/auto_scheduler/search_policy/utils.cc b/src/auto_scheduler/search_policy/utils.cc index f6ead3f1d058..629ca3829e19 100644 --- a/src/auto_scheduler/search_policy/utils.cc +++ b/src/auto_scheduler/search_policy/utils.cc @@ -159,8 +159,8 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo std::string format_lower; std::transform(format.begin(), format.end(), format_lower.begin(), ::tolower); - int n_space = std::count(format_lower.begin(), format_lower.end(), 's'); - int n_reduce = std::count(format_lower.begin(), format_lower.end(), 'r'); + size_t n_space = std::count(format_lower.begin(), format_lower.end(), 's'); + size_t n_reduce = std::count(format_lower.begin(), format_lower.end(), 'r'); if (n_space + n_reduce != format.size()) { LOG(FATAL) << "Invalid multi-level tiling format: " << format; } @@ -174,7 +174,7 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo ? GetIterNameSetParam(stage->op->attrs, SearchPolicyKey::no_split_at_inner) : std::set(); - auto srlevels = [&](int size, const Iterator& iter, std::vector>& levels) { + auto sr_levels = [&](int size, const Iterator& iter, std::vector>& levels) { ICHECK_GE(size, 1); levels.resize(size); if (size == 1) { @@ -194,9 +194,9 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo for (const auto& iter : state->stages[stage_id]->iters) { if (!no_split_at_inner_name_set.count(iter->name)) { if (iter->iter_kind == IteratorKind::kSpatial) { - srlevels(n_space, iter, space_levels); + sr_levels(n_space, iter, space_levels); } else if (iter->iter_kind == IteratorKind::kReduction) { - srlevels(n_reduce, iter, reduce_levels); + sr_levels(n_reduce, iter, reduce_levels); } else { LOG(FATAL) << "Invalid iter type: " << int(iter->iter_kind); } @@ -211,6 +211,10 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo } } + auto fill_levels = [&](std::vector>& levels, + std::vector& fill, ) { + + }; if (!space_outer.empty()) { ICHECK(!space_levels.empty()); space_levels.front().insert(space_levels.front().begin(), @@ -239,12 +243,13 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo Array order; int space_ct = 0, reduce_ct = 0; - for (const auto c : format) { - if (tolower(c) == 's') { + + for (const auto c : format_lower) { + if (c == 's') { order.insert(order.end(), std::make_move_iterator(space_levels[space_ct].begin()), std::make_move_iterator(space_levels[space_ct].end())); space_ct++; - } else if (tolower(c) == 'r') { + } else if (c == 'r') { order.insert(order.end(), std::make_move_iterator(reduce_levels[reduce_ct].begin()), std::make_move_iterator(reduce_levels[reduce_ct].end())); reduce_ct++; From 2368df96ceb60a4bb547f0a5a76ae84a8a881a0f Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Sat, 26 Jun 2021 11:27:43 +0800 Subject: [PATCH 06/60] modify level fill --- src/auto_scheduler/search_policy/utils.cc | 40 ++++++++--------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/src/auto_scheduler/search_policy/utils.cc b/src/auto_scheduler/search_policy/utils.cc index 629ca3829e19..fdb82b41ff62 100644 --- a/src/auto_scheduler/search_policy/utils.cc +++ b/src/auto_scheduler/search_policy/utils.cc @@ -164,6 +164,8 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo if (n_space + n_reduce != format.size()) { LOG(FATAL) << "Invalid multi-level tiling format: " << format; } + space_levels.resize(n_space); + reduce_levels.resize(n_reduce); spatial_split_step_ids->clear(); @@ -176,7 +178,6 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo auto sr_levels = [&](int size, const Iterator& iter, std::vector>& levels) { ICHECK_GE(size, 1); - levels.resize(size); if (size == 1) { levels[0].push_back(iter); } else { @@ -211,34 +212,19 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo } } - auto fill_levels = [&](std::vector>& levels, - std::vector& fill, ) { - + auto fill_levels = [&](std::vector& levels_iter, std::vector& fill) { + if (!fill.empty()) { + levels_iter.insert(levels_iter.begin(), std::make_move_iterator(fill.begin()), + std::make_move_iterator(fill.end())); + } }; - if (!space_outer.empty()) { - ICHECK(!space_levels.empty()); - space_levels.front().insert(space_levels.front().begin(), - std::make_move_iterator(space_outer.begin()), - std::make_move_iterator(space_outer.end())); - } - if (!space_inner.empty()) { - ICHECK(!space_levels.empty()); - space_levels.back().insert(space_levels.back().begin(), - std::make_move_iterator(space_inner.begin()), - std::make_move_iterator(space_inner.end())); - } - - if (!reduce_outer.empty()) { - ICHECK(!reduce_levels.empty()); - reduce_levels.front().insert(reduce_levels.front().begin(), - std::make_move_iterator(reduce_outer.begin()), - std::make_move_iterator(reduce_outer.end())); + if (!space_levels.empty()) { + fill_levels(space_levels.front(), space_outer); + fill_levels(space_levels.back(), space_inner); } - if (!reduce_inner.empty()) { - ICHECK(!reduce_levels.empty()); - reduce_levels.back().insert(reduce_levels.back().begin(), - std::make_move_iterator(reduce_inner.begin()), - std::make_move_iterator(reduce_inner.end())); + if (!reduce_levels.empty()) { + fill_levels(reduce_levels.front(), reduce_outer); + fill_levels(reduce_levels.back(), reduce_inner); } Array order; From e8ba850d320539781a8415e9b217fd49d0908f82 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Sat, 26 Jun 2021 16:14:32 +0800 Subject: [PATCH 07/60] Update utils.cc modify format_lower --- src/auto_scheduler/search_policy/utils.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/auto_scheduler/search_policy/utils.cc b/src/auto_scheduler/search_policy/utils.cc index fdb82b41ff62..8ef2cb13b327 100644 --- a/src/auto_scheduler/search_policy/utils.cc +++ b/src/auto_scheduler/search_policy/utils.cc @@ -157,7 +157,7 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo std::vector> reduce_levels; std::vector space_outer, space_inner, reduce_outer, reduce_inner; - std::string format_lower; + std::string format_lower = format; std::transform(format.begin(), format.end(), format_lower.begin(), ::tolower); size_t n_space = std::count(format_lower.begin(), format_lower.end(), 's'); size_t n_reduce = std::count(format_lower.begin(), format_lower.end(), 'r'); From a832739f25dff86b644df16eefae6b83f70dddc7 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Sat, 26 Jun 2021 16:21:19 +0800 Subject: [PATCH 08/60] format lower count --- src/auto_scheduler/search_policy/utils.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/auto_scheduler/search_policy/utils.cc b/src/auto_scheduler/search_policy/utils.cc index fdb82b41ff62..773b3010d3d6 100644 --- a/src/auto_scheduler/search_policy/utils.cc +++ b/src/auto_scheduler/search_policy/utils.cc @@ -157,10 +157,10 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo std::vector> reduce_levels; std::vector space_outer, space_inner, reduce_outer, reduce_inner; - std::string format_lower; - std::transform(format.begin(), format.end(), format_lower.begin(), ::tolower); - size_t n_space = std::count(format_lower.begin(), format_lower.end(), 's'); - size_t n_reduce = std::count(format_lower.begin(), format_lower.end(), 'r'); + size_t n_space = + std::count(format.begin(), format.end(), 's') + std::count(format.begin(), format.end(), 'S'); + size_t n_reduce = + std::count(format.begin(), format.end(), 'r') + std::count(format.begin(), format.end(), 'R'); if (n_space + n_reduce != format.size()) { LOG(FATAL) << "Invalid multi-level tiling format: " << format; } @@ -230,12 +230,12 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo Array order; int space_ct = 0, reduce_ct = 0; - for (const auto c : format_lower) { - if (c == 's') { + for (const auto c : format) { + if (c == 's' || c == 'S') { order.insert(order.end(), std::make_move_iterator(space_levels[space_ct].begin()), std::make_move_iterator(space_levels[space_ct].end())); space_ct++; - } else if (c == 'r') { + } else if (c == 'r' || c == 'R') { order.insert(order.end(), std::make_move_iterator(reduce_levels[reduce_ct].begin()), std::make_move_iterator(reduce_levels[reduce_ct].end())); reduce_ct++; From 2de6c995b1bec1f1145f56fc71a57ca534ec2dd6 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Sat, 26 Jun 2021 16:31:25 +0800 Subject: [PATCH 09/60] delete blank lines --- src/auto_scheduler/search_policy/utils.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/auto_scheduler/search_policy/utils.cc b/src/auto_scheduler/search_policy/utils.cc index 773b3010d3d6..ac1cf2dd82c9 100644 --- a/src/auto_scheduler/search_policy/utils.cc +++ b/src/auto_scheduler/search_policy/utils.cc @@ -153,6 +153,8 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo if (spatial_split_step_ids == nullptr) { spatial_split_step_ids = &temp_split_step_ids; } + spatial_split_step_ids->clear(); + std::vector> space_levels; std::vector> reduce_levels; std::vector space_outer, space_inner, reduce_outer, reduce_inner; @@ -167,8 +169,6 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo space_levels.resize(n_space); reduce_levels.resize(n_reduce); - spatial_split_step_ids->clear(); - State tmp_s = state; const Stage& stage = state->stages[stage_id]; const std::set& no_split_at_inner_name_set = @@ -229,7 +229,6 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo Array order; int space_ct = 0, reduce_ct = 0; - for (const auto c : format) { if (c == 's' || c == 'S') { order.insert(order.end(), std::make_move_iterator(space_levels[space_ct].begin()), From cb9938827a0dfd988f64a0188587f1f13e93ab2f Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Sat, 26 Jun 2021 16:31:25 +0800 Subject: [PATCH 10/60] delete blank lines --- src/auto_scheduler/search_policy/utils.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/auto_scheduler/search_policy/utils.cc b/src/auto_scheduler/search_policy/utils.cc index 773b3010d3d6..ac1cf2dd82c9 100644 --- a/src/auto_scheduler/search_policy/utils.cc +++ b/src/auto_scheduler/search_policy/utils.cc @@ -153,6 +153,8 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo if (spatial_split_step_ids == nullptr) { spatial_split_step_ids = &temp_split_step_ids; } + spatial_split_step_ids->clear(); + std::vector> space_levels; std::vector> reduce_levels; std::vector space_outer, space_inner, reduce_outer, reduce_inner; @@ -167,8 +169,6 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo space_levels.resize(n_space); reduce_levels.resize(n_reduce); - spatial_split_step_ids->clear(); - State tmp_s = state; const Stage& stage = state->stages[stage_id]; const std::set& no_split_at_inner_name_set = @@ -229,7 +229,6 @@ State DoMultiLevelTiling(const State& state, int stage_id, const std::string& fo Array order; int space_ct = 0, reduce_ct = 0; - for (const auto c : format) { if (c == 's' || c == 'S') { order.insert(order.end(), std::make_move_iterator(space_levels[space_ct].begin()), From 9da6fa3b969907fe68620bf5d805df818ad7011e Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Sun, 27 Jun 2021 12:15:14 +0800 Subject: [PATCH 11/60] re-commit message From 7377e432776df5f3b0e7d51bdec4280d8e74cf23 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Thu, 15 Jul 2021 16:22:09 +0800 Subject: [PATCH 12/60] Update graph_executor.h add set_output_zero_copy --- src/runtime/graph_executor/graph_executor.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/runtime/graph_executor/graph_executor.h b/src/runtime/graph_executor/graph_executor.h index 631605f630da..6ae8803f801c 100644 --- a/src/runtime/graph_executor/graph_executor.h +++ b/src/runtime/graph_executor/graph_executor.h @@ -138,6 +138,12 @@ class TVM_DLL GraphExecutor : public ModuleNode { * \return NDArray corresponding to given input node index. */ NDArray GetInput(int index) const; +/*! + * \brief set index-th output to the graph without copying the data. + * \param index The output index. + * \param data_ref The output data that is referred. + */ + void SetOutputZeroCopy(int index, DLTensor* data_ref); /*! * \brief Return NDArray for given output index. * \param index The output index. From 4a007ab1fa9b64e0acbdd1d51bd4a2e30d4c7bee Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Mon, 19 Jul 2021 15:18:24 +0800 Subject: [PATCH 13/60] add setoutputzero --- src/runtime/graph_executor/graph_executor.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/runtime/graph_executor/graph_executor.h b/src/runtime/graph_executor/graph_executor.h index 6ae8803f801c..8464d12c3f6d 100644 --- a/src/runtime/graph_executor/graph_executor.h +++ b/src/runtime/graph_executor/graph_executor.h @@ -119,6 +119,12 @@ class TVM_DLL GraphExecutor : public ModuleNode { * \param data_ref The input data that is referred. */ void SetInputZeroCopy(int index, DLTensor* data_ref); + /*! + * \brief set index-th output to the graph without copying the data. + * \param index The output index. + * \param data_ref The output data that is referred. + */ + void SetOutputZeroCopy(int index, DLTensor* data_ref); /*! * \brief Get the number of outputs * @@ -138,12 +144,6 @@ class TVM_DLL GraphExecutor : public ModuleNode { * \return NDArray corresponding to given input node index. */ NDArray GetInput(int index) const; -/*! - * \brief set index-th output to the graph without copying the data. - * \param index The output index. - * \param data_ref The output data that is referred. - */ - void SetOutputZeroCopy(int index, DLTensor* data_ref); /*! * \brief Return NDArray for given output index. * \param index The output index. @@ -406,6 +406,8 @@ class TVM_DLL GraphExecutor : public ModuleNode { std::unordered_map input_map_; /*! \brief Used for quick node input DLTensor* lookup given an input eid. */ std::vector> input_dltensors_; + /*! \brief Used for quick node output DLTensor* lookup given an input eid. */ + std::vector output_dltensors_; /*! \brief Used for quick entry indexing. */ std::vector node_row_ptr_; /*! \brief Output entries. */ From 8ca606fda840c6807e22ec99c85e8e2275aa30b6 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Mon, 19 Jul 2021 15:19:25 +0800 Subject: [PATCH 14/60] add set output zero --- src/runtime/graph_executor/graph_executor.cc | 37 ++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/runtime/graph_executor/graph_executor.cc b/src/runtime/graph_executor/graph_executor.cc index 1084b4ee3ec4..831e3235cda4 100644 --- a/src/runtime/graph_executor/graph_executor.cc +++ b/src/runtime/graph_executor/graph_executor.cc @@ -139,6 +139,28 @@ void GraphExecutor::SetInputZeroCopy(int index, DLTensor* data_ref) { t->data = data_ref->data; } } +/*! + * \brief set index-th output to the graph without copying the data. + * \param index The output index. + * \param data_ref The output data that is referred. + */ +void GraphExecutor::SetOutputZeroCopy(int index, DLTensor* data_ref) { + ICHECK_LT(static_cast(index), outputs_.size()); + uint32_t eid = this->entry_id(outputs_[index]); + DLTensor* old_t = const_cast(data_entry_[eid].operator->()); + // check the consistency of output + ICHECK_EQ(data_alignment_[eid], details::GetDataAlignment(*data_ref)); + ICHECK_EQ(reinterpret_cast(data_ref->data) % kAllocAlignment, 0); + ICHECK_EQ(old_t->ndim, static_cast(data_ref->ndim)); + ICHECK_EQ(old_t->device.device_type, data_ref->device.device_type); + ICHECK_EQ(old_t->device.device_id, data_ref->device.device_id); + for (auto i = 0; i < data_ref->ndim; ++i) { + ICHECK_EQ(old_t->shape[i], data_ref->shape[i]); + } + // Update the data pointer for output op + ICHECK_LT(static_cast(index), output_dltensors_.size()); + output_dltensors_[index]->data = data_ref->data; +} /*! * \brief Get the number of outputs * @@ -363,6 +385,10 @@ void GraphExecutor::SetupOpExecs() { uint32_t nid = input_nodes_[i]; input_node_eids.insert(entry_id(nid, 0)); } + std::unordered_set output_node_id; + for (size_t i = 0; i < outputs_.size(); i++) { + output_node_id.insert(outputs_[i].node_id); + } // setup the array and requirements. for (uint32_t nid = 0; nid < this->GetNumOfNodes(); ++nid) { @@ -389,6 +415,13 @@ void GraphExecutor::SetupOpExecs() { input_dltensors_[eid].push_back(static_cast(op_args->arg_values[i].v_handle)); } } + // check if op output is model output + if (output_node_id.count(nid) > 0) { + for (uint32_t index = inode.inputs.size(); + index < inode.param.num_outputs + inode.param.num_inputs; ++index) { + output_dltensors_.push_back(static_cast(op_args->arg_values[index].v_handle)); + } + } } } @@ -463,6 +496,10 @@ PackedFunc GraphExecutor::GetFunction(const std::string& name, this->SetInputZeroCopy(args[0], args[1]); } }); + } else if (name == "set_output_zero_copy") { + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + this->SetOutputZeroCopy(args[0], args[1]); + }); } else if (name == "get_output") { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { if (args.num_args == 2) { From 6afb6099852c5203739853d5f007b78c7a99c12b Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Mon, 19 Jul 2021 15:30:27 +0800 Subject: [PATCH 15/60] Update graph_executor.cc --- src/runtime/graph_executor/graph_executor.cc | 24 +++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/runtime/graph_executor/graph_executor.cc b/src/runtime/graph_executor/graph_executor.cc index 831e3235cda4..4ee014953d97 100644 --- a/src/runtime/graph_executor/graph_executor.cc +++ b/src/runtime/graph_executor/graph_executor.cc @@ -91,6 +91,11 @@ void GraphExecutor::Init(const std::string& graph_json, tvm::runtime::Module mod std::string& name = nodes_[nid].name; input_map_[name] = i; } + for (size_t i = 0; i < outputs_.size(); i++) { + const uint32_t nid = outputs_[i].node_id; + std::string& name = nodes_[nid].name; + output_map_[name] = i; + } } /*! * \brief Get the input index given the name of input. @@ -104,6 +109,18 @@ int GraphExecutor::GetInputIndex(const std::string& name) { } return -1; } +/*! + * \brief Get the output index given the name of output. + * \param name The name of the output. + * \return The index of output. + */ +int GraphExecutor::GetOutputIndex(const std::string& name) { + auto it = output_map_.find(name); + if (it != output_map_.end()) { + return it->second; + } + return -1; +} /*! * \brief set index-th input to the graph. * \param index The input index. @@ -498,7 +515,12 @@ PackedFunc GraphExecutor::GetFunction(const std::string& name, }); } else if (name == "set_output_zero_copy") { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { - this->SetOutputZeroCopy(args[0], args[1]); + if (String::CanConvertFrom(args[0])) { + int in_idx = this->GetOutputIndex(args[0].operator String()); + if (in_idx >= 0) this->SetOutputZeroCopy(in_idx, args[1]); + } else { + this->SetOutputZeroCopy(args[0], args[1]); + } }); } else if (name == "get_output") { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { From d71decead8e96d901deee0045ba1e0517a1478ff Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Mon, 19 Jul 2021 15:30:40 +0800 Subject: [PATCH 16/60] Update graph_executor.h --- src/runtime/graph_executor/graph_executor.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/runtime/graph_executor/graph_executor.h b/src/runtime/graph_executor/graph_executor.h index 8464d12c3f6d..892916dee190 100644 --- a/src/runtime/graph_executor/graph_executor.h +++ b/src/runtime/graph_executor/graph_executor.h @@ -107,6 +107,13 @@ class TVM_DLL GraphExecutor : public ModuleNode { */ int GetInputIndex(const std::string& name); + /*! + * \brief Get the output index given the name of output. + * \param name The name of the output. + * \return The index of output. + */ + int GetOutputIndex(const std::string& name); + /*! * \brief set index-th input to the graph. * \param index The input index. @@ -404,6 +411,8 @@ class TVM_DLL GraphExecutor : public ModuleNode { std::vector input_nodes_; /*! \brief Map of input names to input indices. */ std::unordered_map input_map_; + /*! \brief Map of input names to output indices. */ + std::unordered_map output_map_; /*! \brief Used for quick node input DLTensor* lookup given an input eid. */ std::vector> input_dltensors_; /*! \brief Used for quick node output DLTensor* lookup given an input eid. */ From 145219cfcb4da1f2ae770c166ee57273642d9602 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Tue, 20 Jul 2021 09:37:09 +0800 Subject: [PATCH 17/60] delete const_cast --- src/runtime/graph_executor/graph_executor.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/runtime/graph_executor/graph_executor.cc b/src/runtime/graph_executor/graph_executor.cc index 4ee014953d97..0c00c96882dc 100644 --- a/src/runtime/graph_executor/graph_executor.cc +++ b/src/runtime/graph_executor/graph_executor.cc @@ -164,7 +164,8 @@ void GraphExecutor::SetInputZeroCopy(int index, DLTensor* data_ref) { void GraphExecutor::SetOutputZeroCopy(int index, DLTensor* data_ref) { ICHECK_LT(static_cast(index), outputs_.size()); uint32_t eid = this->entry_id(outputs_[index]); - DLTensor* old_t = const_cast(data_entry_[eid].operator->()); + const DLTensor* old_t = data_entry_[eid].operator->(); + // check the consistency of output ICHECK_EQ(data_alignment_[eid], details::GetDataAlignment(*data_ref)); ICHECK_EQ(reinterpret_cast(data_ref->data) % kAllocAlignment, 0); @@ -174,6 +175,7 @@ void GraphExecutor::SetOutputZeroCopy(int index, DLTensor* data_ref) { for (auto i = 0; i < data_ref->ndim; ++i) { ICHECK_EQ(old_t->shape[i], data_ref->shape[i]); } + // Update the data pointer for output op ICHECK_LT(static_cast(index), output_dltensors_.size()); output_dltensors_[index]->data = data_ref->data; From e45c77b736e2bd124c0014d58dd8b5edeedcec2a Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Tue, 20 Jul 2021 14:25:41 +0800 Subject: [PATCH 18/60] add common function chechDltensor --- src/runtime/graph_executor/graph_executor.cc | 44 ++++++++++---------- src/runtime/graph_executor/graph_executor.h | 6 +++ 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/src/runtime/graph_executor/graph_executor.cc b/src/runtime/graph_executor/graph_executor.cc index 0c00c96882dc..2d9fb5a4f238 100644 --- a/src/runtime/graph_executor/graph_executor.cc +++ b/src/runtime/graph_executor/graph_executor.cc @@ -121,6 +121,24 @@ int GraphExecutor::GetOutputIndex(const std::string& name) { } return -1; } +/*! + * \brief Check the legality of DLTensor* of external DLTensor*. + * \param exeternal The exeternal DLTensor* + * \param eid The data_enrty_ index + */ +void GraphExecutor::CheckExeternalDltensor(const DLTensor* exeternal, uint32_t eid) const { + const DLTensor* internal = data_entry_[eid].operator->(); + + // check the consistency of input + ICHECK_EQ(data_alignment_[eid], details::GetDataAlignment(*exeternal)); + ICHECK_EQ(reinterpret_cast(exeternal->data) % kAllocAlignment, 0); + ICHECK_EQ(internal->ndim, static_cast(exeternal->ndim)); + ICHECK_EQ(internal->device.device_type, exeternal->device.device_type); + ICHECK_EQ(internal->device.device_id, exeternal->device.device_id); + for (auto i = 0; i < exeternal->ndim; ++i) { + ICHECK_EQ(internal->shape[i], exeternal->shape[i]); + } +} /*! * \brief set index-th input to the graph. * \param index The input index. @@ -139,17 +157,9 @@ void GraphExecutor::SetInput(int index, DLTensor* data_in) { void GraphExecutor::SetInputZeroCopy(int index, DLTensor* data_ref) { ICHECK_LT(static_cast(index), input_nodes_.size()); uint32_t eid = this->entry_id(input_nodes_[index], 0); - const DLTensor* old_t = data_entry_[eid].operator->(); // check the consistency of input - ICHECK_EQ(data_alignment_[eid], details::GetDataAlignment(*data_ref)); - ICHECK_EQ(reinterpret_cast(data_ref->data) % kAllocAlignment, 0); - ICHECK_EQ(old_t->ndim, static_cast(data_ref->ndim)); - ICHECK_EQ(old_t->device.device_type, data_ref->device.device_type); - ICHECK_EQ(old_t->device.device_id, data_ref->device.device_id); - for (auto i = 0; i < data_ref->ndim; ++i) { - ICHECK_EQ(old_t->shape[i], data_ref->shape[i]); - } + CheckExeternalDltensor(data_ref, eid); // Update the data pointer for each argument of each op for (DLTensor* t : input_dltensors_[eid]) { @@ -163,21 +173,13 @@ void GraphExecutor::SetInputZeroCopy(int index, DLTensor* data_ref) { */ void GraphExecutor::SetOutputZeroCopy(int index, DLTensor* data_ref) { ICHECK_LT(static_cast(index), outputs_.size()); + ICHECK_LT(static_cast(index), output_dltensors_.size()); uint32_t eid = this->entry_id(outputs_[index]); - const DLTensor* old_t = data_entry_[eid].operator->(); // check the consistency of output - ICHECK_EQ(data_alignment_[eid], details::GetDataAlignment(*data_ref)); - ICHECK_EQ(reinterpret_cast(data_ref->data) % kAllocAlignment, 0); - ICHECK_EQ(old_t->ndim, static_cast(data_ref->ndim)); - ICHECK_EQ(old_t->device.device_type, data_ref->device.device_type); - ICHECK_EQ(old_t->device.device_id, data_ref->device.device_id); - for (auto i = 0; i < data_ref->ndim; ++i) { - ICHECK_EQ(old_t->shape[i], data_ref->shape[i]); - } + CheckExeternalDltensor(data_ref, eid); // Update the data pointer for output op - ICHECK_LT(static_cast(index), output_dltensors_.size()); output_dltensors_[index]->data = data_ref->data; } /*! @@ -518,8 +520,8 @@ PackedFunc GraphExecutor::GetFunction(const std::string& name, } else if (name == "set_output_zero_copy") { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { if (String::CanConvertFrom(args[0])) { - int in_idx = this->GetOutputIndex(args[0].operator String()); - if (in_idx >= 0) this->SetOutputZeroCopy(in_idx, args[1]); + int out_idx = this->GetOutputIndex(args[0].operator String()); + if (out_idx >= 0) this->SetOutputZeroCopy(out_idx, args[1]); } else { this->SetOutputZeroCopy(args[0], args[1]); } diff --git a/src/runtime/graph_executor/graph_executor.h b/src/runtime/graph_executor/graph_executor.h index 892916dee190..99a3b369a0e0 100644 --- a/src/runtime/graph_executor/graph_executor.h +++ b/src/runtime/graph_executor/graph_executor.h @@ -390,6 +390,12 @@ class TVM_DLL GraphExecutor : public ModuleNode { void SetupStorage(); /*! \brief Setup the executors. */ void SetupOpExecs(); + /*! + * \brief Check the legality of DLTensor* of external DLTensor*. + * \param exeternal The exeternal DLTensor* + * \param eid The data_enrty_ index + */ + void CheckExeternalDltensor(const DLTensor* external, uint32_t eid) const; /*! * \brief Create an execution function given input. * \param attrs The node attributes. From b7a27c530266baff17e805cb117179526aee5bd3 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Tue, 20 Jul 2021 16:18:21 +0800 Subject: [PATCH 19/60] Update graph_executor.h --- src/runtime/graph_executor/graph_executor.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/runtime/graph_executor/graph_executor.h b/src/runtime/graph_executor/graph_executor.h index 99a3b369a0e0..8dd47091f493 100644 --- a/src/runtime/graph_executor/graph_executor.h +++ b/src/runtime/graph_executor/graph_executor.h @@ -206,6 +206,9 @@ class TVM_DLL GraphExecutor : public ModuleNode { uint32_t node_id; uint32_t index; uint32_t version; + inline bool operator==(const NodeEntry& other) const { + return node_id == other.node_id && index == other.index && version == other.version; + } // JSON Loader void Load(dmlc::JSONReader* reader) { reader->BeginArray(); @@ -391,11 +394,11 @@ class TVM_DLL GraphExecutor : public ModuleNode { /*! \brief Setup the executors. */ void SetupOpExecs(); /*! - * \brief Check the legality of DLTensor* of external DLTensor*. - * \param exeternal The exeternal DLTensor* - * \param eid The data_enrty_ index + * \brief Check the legality of external DLTensor*. + * \param external The external DLTensor*. + * \param eid The data_enrty_ index. */ - void CheckExeternalDltensor(const DLTensor* external, uint32_t eid) const; + void CheckExternalDLTensor(const DLTensor* external, uint32_t eid) const; /*! * \brief Create an execution function given input. * \param attrs The node attributes. From bf6ed0800c8b5f62a857460e521837d3aaecd39c Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Tue, 20 Jul 2021 16:18:33 +0800 Subject: [PATCH 20/60] Update graph_executor.cc --- src/runtime/graph_executor/graph_executor.cc | 56 ++++++++++++-------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/src/runtime/graph_executor/graph_executor.cc b/src/runtime/graph_executor/graph_executor.cc index 2d9fb5a4f238..4fae0a199483 100644 --- a/src/runtime/graph_executor/graph_executor.cc +++ b/src/runtime/graph_executor/graph_executor.cc @@ -121,24 +121,6 @@ int GraphExecutor::GetOutputIndex(const std::string& name) { } return -1; } -/*! - * \brief Check the legality of DLTensor* of external DLTensor*. - * \param exeternal The exeternal DLTensor* - * \param eid The data_enrty_ index - */ -void GraphExecutor::CheckExeternalDltensor(const DLTensor* exeternal, uint32_t eid) const { - const DLTensor* internal = data_entry_[eid].operator->(); - - // check the consistency of input - ICHECK_EQ(data_alignment_[eid], details::GetDataAlignment(*exeternal)); - ICHECK_EQ(reinterpret_cast(exeternal->data) % kAllocAlignment, 0); - ICHECK_EQ(internal->ndim, static_cast(exeternal->ndim)); - ICHECK_EQ(internal->device.device_type, exeternal->device.device_type); - ICHECK_EQ(internal->device.device_id, exeternal->device.device_id); - for (auto i = 0; i < exeternal->ndim; ++i) { - ICHECK_EQ(internal->shape[i], exeternal->shape[i]); - } -} /*! * \brief set index-th input to the graph. * \param index The input index. @@ -149,6 +131,23 @@ void GraphExecutor::SetInput(int index, DLTensor* data_in) { uint32_t eid = this->entry_id(input_nodes_[index], 0); data_entry_[eid].CopyFrom(data_in); } +/*! + * \brief Check the legality of external DLTensor*. + * \param external The external DLTensor*. + * \param eid The data_enrty_ index. + */ +void GraphExecutor::CheckExternalDLTensor(const DLTensor* external, uint32_t eid) const { + const DLTensor* internal = data_entry_[eid].operator->(); + + ICHECK_EQ(data_alignment_[eid], details::GetDataAlignment(*external)); + ICHECK_EQ(reinterpret_cast(external->data) % kAllocAlignment, 0); + ICHECK_EQ(internal->ndim, static_cast(external->ndim)); + ICHECK_EQ(internal->device.device_type, external->device.device_type); + ICHECK_EQ(internal->device.device_id, external->device.device_id); + for (auto i = 0; i < external->ndim; ++i) { + ICHECK_EQ(internal->shape[i], external->shape[i]); + } +} /*! * \brief set index-th input to the graph without copying the data. * \param index The input index. @@ -157,10 +156,8 @@ void GraphExecutor::SetInput(int index, DLTensor* data_in) { void GraphExecutor::SetInputZeroCopy(int index, DLTensor* data_ref) { ICHECK_LT(static_cast(index), input_nodes_.size()); uint32_t eid = this->entry_id(input_nodes_[index], 0); - // check the consistency of input - CheckExeternalDltensor(data_ref, eid); - + CheckExternalDLTensor(data_ref, eid); // Update the data pointer for each argument of each op for (DLTensor* t : input_dltensors_[eid]) { t->data = data_ref->data; @@ -174,13 +171,26 @@ void GraphExecutor::SetInputZeroCopy(int index, DLTensor* data_ref) { void GraphExecutor::SetOutputZeroCopy(int index, DLTensor* data_ref) { ICHECK_LT(static_cast(index), outputs_.size()); ICHECK_LT(static_cast(index), output_dltensors_.size()); - uint32_t eid = this->entry_id(outputs_[index]); + const NodeEntry& output_node = outputs_[index]; + uint32_t eid = this->entry_id(output_node); // check the consistency of output - CheckExeternalDltensor(data_ref, eid); + CheckExternalDLTensor(data_ref, eid); // Update the data pointer for output op output_dltensors_[index]->data = data_ref->data; + // Update the input of the op connected to the output + for (auto node : nodes_) { + auto it = std::find(node.inputs.begin(), node.inputs.end(), output_node); + if (it != node.inputs.end()) { + int input_nid = GetInputIndex(node.name); + int index = it - node.inputs.begin(); + uint32_t eid = this->entry_id(input_nodes_[input_nid], index); + for (DLTensor* t : input_dltensors_[eid]) { + t->data = data_ref->data; + } + } + } } /*! * \brief Get the number of outputs From 80fc91f6ce855d5eb6f80505399913727c31fe27 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Tue, 20 Jul 2021 17:20:54 +0800 Subject: [PATCH 21/60] add output_ sort --- src/runtime/graph_executor/graph_executor.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/runtime/graph_executor/graph_executor.h b/src/runtime/graph_executor/graph_executor.h index 8dd47091f493..36b6a32d188f 100644 --- a/src/runtime/graph_executor/graph_executor.h +++ b/src/runtime/graph_executor/graph_executor.h @@ -373,6 +373,9 @@ class TVM_DLL GraphExecutor : public ModuleNode { bitmask |= 4; } else if (key == "heads") { reader->Read(&outputs_); + std::sort( + outputs_.begin(), outputs_.end(), + [&](const NodeEntry& a, const NodeEntry& b) -> bool { return a.node_id < b.node_id; }); bitmask |= 8; } else if (key == "attrs") { reader->Read(&attrs_); From ab5f957da0ff0ecb9f2e26cd93ef9ec7e7560824 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Tue, 20 Jul 2021 17:48:28 +0800 Subject: [PATCH 22/60] Update graph_executor.cc --- src/runtime/graph_executor/graph_executor.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/runtime/graph_executor/graph_executor.cc b/src/runtime/graph_executor/graph_executor.cc index 4fae0a199483..c28d6208756a 100644 --- a/src/runtime/graph_executor/graph_executor.cc +++ b/src/runtime/graph_executor/graph_executor.cc @@ -446,6 +446,7 @@ void GraphExecutor::SetupOpExecs() { input_dltensors_[eid].push_back(static_cast(op_args->arg_values[i].v_handle)); } } + // check if op output is model output if (output_node_id.count(nid) > 0) { for (uint32_t index = inode.inputs.size(); From 07e80ad75ff30ec68253379456926fbf7b3b83f6 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Tue, 20 Jul 2021 17:58:51 +0800 Subject: [PATCH 23/60] add a.nodeid == b.nodeid --- src/runtime/graph_executor/graph_executor.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/runtime/graph_executor/graph_executor.h b/src/runtime/graph_executor/graph_executor.h index 36b6a32d188f..3be364b6b8c8 100644 --- a/src/runtime/graph_executor/graph_executor.h +++ b/src/runtime/graph_executor/graph_executor.h @@ -373,9 +373,10 @@ class TVM_DLL GraphExecutor : public ModuleNode { bitmask |= 4; } else if (key == "heads") { reader->Read(&outputs_); - std::sort( - outputs_.begin(), outputs_.end(), - [&](const NodeEntry& a, const NodeEntry& b) -> bool { return a.node_id < b.node_id; }); + std::sort(outputs_.begin(), outputs_.end(), + [&](const NodeEntry& a, const NodeEntry& b) -> bool { + return a.node_id == b.node_id ? a.index < b.index : a.node_id < b.node_id; + }); bitmask |= 8; } else if (key == "attrs") { reader->Read(&attrs_); From e67b8391c30cef8627bd98e1a94dabb6c5ebb249 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Wed, 21 Jul 2021 16:02:56 +0800 Subject: [PATCH 24/60] add unit test for set output zero --- tests/cpp/relay_build_module_test.cc | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/cpp/relay_build_module_test.cc b/tests/cpp/relay_build_module_test.cc index 37e9e6f9c42c..6a74e63a8582 100644 --- a/tests/cpp/relay_build_module_test.cc +++ b/tests/cpp/relay_build_module_test.cc @@ -73,6 +73,7 @@ TVM_REGISTER_GLOBAL("relay.backend.lower_call") return (*f)(outs, impl); }); + TEST(Relay, BuildModule) { auto tensor_type = relay::TensorType({2, 3}, DataType::Float(32)); auto a = relay::Var("a", tensor_type); @@ -129,13 +130,15 @@ TEST(Relay, BuildModule) { ICHECK(mod.defined()) << "Module must be defined"; tvm::runtime::Module run_mod = (*pfr)(json, mod, (int)dev.device_type, (int)dev.device_id); auto set_input_f = run_mod.GetFunction("set_input_zero_copy", false); + auto set_output_f = run_mod.GetFunction("set_output_zero_copy", false); auto run_f = run_mod.GetFunction("run", false); - auto get_output_f = run_mod.GetFunction("get_output", false); set_input_f("a", const_cast(A.operator->())); set_input_f("b", const_cast(B.operator->())); set_input_f("c", const_cast(C.operator->())); + auto Y = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0}); + set_output_f(0, Y); run_f(); - tvm::runtime::NDArray Y = get_output_f(0); + // tvm::runtime::NDArray Y = get_output_f(0); auto pY = (float*)Y->data; for (int i = 0; i < 6; ++i) { ICHECK_LT(fabs(pY[i] - (i + (i + 1) + (i + 2))), 1e-4); @@ -145,10 +148,8 @@ TEST(Relay, BuildModule) { pB[i] = i + 3; } run_f(); - tvm::runtime::NDArray Y2 = get_output_f(0); - auto pY2 = (float*)Y2->data; for (int i = 0; i < 6; ++i) { - ICHECK_LT(fabs(pY2[i] - (i + (i + 3) + (i + 2))), 1e-4); + ICHECK_LT(fabs(pY[i] - (i + (i + 3) + (i + 2))), 1e-4); } // attach a different input and run it again auto C2 = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0}); @@ -158,10 +159,8 @@ TEST(Relay, BuildModule) { } set_input_f("c", const_cast(C2.operator->())); run_f(); - tvm::runtime::NDArray Y3 = get_output_f(0); - auto pY3 = (float*)Y3->data; for (int i = 0; i < 6; ++i) { - ICHECK_LT(fabs(pY3[i] - (i + (i + 3) + (i + 4))), 1e-4); + ICHECK_LT(fabs(pY[i] - (i + (i + 3) + (i + 4))), 1e-4); } } From 052fa56ed2743417675d16d2ba73f6938722621c Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Thu, 22 Jul 2021 14:38:00 +0800 Subject: [PATCH 25/60] add include --- src/runtime/graph_executor/graph_executor.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/runtime/graph_executor/graph_executor.h b/src/runtime/graph_executor/graph_executor.h index 3be364b6b8c8..0dd0e4d3be9c 100644 --- a/src/runtime/graph_executor/graph_executor.h +++ b/src/runtime/graph_executor/graph_executor.h @@ -31,6 +31,7 @@ #include #include +#include #include #include #include From 847634ef5eb8cfec94752a791fbaf15fc1dbb1c2 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Thu, 22 Jul 2021 16:48:52 +0800 Subject: [PATCH 26/60] modify Setoutput zero copy --- src/runtime/graph_executor/graph_executor.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/runtime/graph_executor/graph_executor.cc b/src/runtime/graph_executor/graph_executor.cc index c28d6208756a..8ffebf42fc7c 100644 --- a/src/runtime/graph_executor/graph_executor.cc +++ b/src/runtime/graph_executor/graph_executor.cc @@ -181,13 +181,12 @@ void GraphExecutor::SetOutputZeroCopy(int index, DLTensor* data_ref) { output_dltensors_[index]->data = data_ref->data; // Update the input of the op connected to the output for (auto node : nodes_) { - auto it = std::find(node.inputs.begin(), node.inputs.end(), output_node); - if (it != node.inputs.end()) { + if (std::find(node.inputs.begin(), node.inputs.end(), output_node) != node.inputs.end()) { int input_nid = GetInputIndex(node.name); - int index = it - node.inputs.begin(); - uint32_t eid = this->entry_id(input_nodes_[input_nid], index); - for (DLTensor* t : input_dltensors_[eid]) { - t->data = data_ref->data; + auto it = std::find(input_nodes_.begin(), input_nodes_.end(), input_nid); + if (it != input_nodes_.end()) { + int input_index = it - input_nodes_.begin(); + SetInputZeroCopy(input_index, data_ref); } } } From b2d947172192fabc4e41280c4a821ac962a40f0e Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Thu, 22 Jul 2021 17:03:40 +0800 Subject: [PATCH 27/60] modify by clang-format --- tests/cpp/relay_build_module_test.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/cpp/relay_build_module_test.cc b/tests/cpp/relay_build_module_test.cc index 6a74e63a8582..359939dac56a 100644 --- a/tests/cpp/relay_build_module_test.cc +++ b/tests/cpp/relay_build_module_test.cc @@ -73,7 +73,6 @@ TVM_REGISTER_GLOBAL("relay.backend.lower_call") return (*f)(outs, impl); }); - TEST(Relay, BuildModule) { auto tensor_type = relay::TensorType({2, 3}, DataType::Float(32)); auto a = relay::Var("a", tensor_type); From 5d0461a3be3e04cd786809b01281e018852359ad Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Thu, 22 Jul 2021 21:16:28 +0800 Subject: [PATCH 28/60] add unit test for set output zero --- src/runtime/graph_executor/graph_executor.cc | 34 ++--- src/runtime/graph_executor/graph_executor.h | 3 +- tests/cpp/build_module_test.cc | 128 +++++++++++++++++++ 3 files changed, 149 insertions(+), 16 deletions(-) diff --git a/src/runtime/graph_executor/graph_executor.cc b/src/runtime/graph_executor/graph_executor.cc index 8ffebf42fc7c..85b99b0188fd 100644 --- a/src/runtime/graph_executor/graph_executor.cc +++ b/src/runtime/graph_executor/graph_executor.cc @@ -172,21 +172,26 @@ void GraphExecutor::SetOutputZeroCopy(int index, DLTensor* data_ref) { ICHECK_LT(static_cast(index), outputs_.size()); ICHECK_LT(static_cast(index), output_dltensors_.size()); const NodeEntry& output_node = outputs_[index]; - uint32_t eid = this->entry_id(output_node); + uint32_t output_node_eid = this->entry_id(output_node); // check the consistency of output - CheckExternalDLTensor(data_ref, eid); + CheckExternalDLTensor(data_ref, output_node_eid); // Update the data pointer for output op - output_dltensors_[index]->data = data_ref->data; + for (DLTensor* t : output_dltensors_[output_node.node_id]) { + t->data = data_ref->data; + } + // Update the input of the op connected to the output - for (auto node : nodes_) { - if (std::find(node.inputs.begin(), node.inputs.end(), output_node) != node.inputs.end()) { - int input_nid = GetInputIndex(node.name); - auto it = std::find(input_nodes_.begin(), input_nodes_.end(), input_nid); - if (it != input_nodes_.end()) { - int input_index = it - input_nodes_.begin(); - SetInputZeroCopy(input_index, data_ref); + for (size_t input_nid = 0; input_nid < nodes_.size(); input_nid++) { + if (std::find(nodes_[input_nid].inputs.begin(), nodes_[input_nid].inputs.end(), output_node) != + nodes_[input_nid].inputs.end()) { + uint32_t input_eid = this->entry_id(input_nid, 0); + // check the consistency of input + CheckExternalDLTensor(data_ref, input_eid); + // Update the data pointer for each argument of each op + for (DLTensor* t : input_dltensors_[input_eid]) { + t->data = data_ref->data; } } } @@ -410,6 +415,7 @@ void GraphExecutor::SetupStorage() { void GraphExecutor::SetupOpExecs() { op_execs_.resize(this->GetNumOfNodes()); input_dltensors_.resize(num_node_entries()); + output_dltensors_.resize(num_node_entries()); std::unordered_set input_node_eids; for (size_t i = 0; i < input_nodes_.size(); i++) { uint32_t nid = input_nodes_[i]; @@ -440,17 +446,15 @@ void GraphExecutor::SetupOpExecs() { for (size_t i = 0; i < inode.inputs.size(); i++) { uint32_t eid = this->entry_id(inode.inputs[i]); - // check if op input is model input - if (input_node_eids.count(eid) > 0) { - input_dltensors_[eid].push_back(static_cast(op_args->arg_values[i].v_handle)); - } + input_dltensors_[eid].push_back(static_cast(op_args->arg_values[i].v_handle)); } // check if op output is model output if (output_node_id.count(nid) > 0) { for (uint32_t index = inode.inputs.size(); index < inode.param.num_outputs + inode.param.num_inputs; ++index) { - output_dltensors_.push_back(static_cast(op_args->arg_values[index].v_handle)); + output_dltensors_[nid].push_back( + static_cast(op_args->arg_values[index].v_handle)); } } } diff --git a/src/runtime/graph_executor/graph_executor.h b/src/runtime/graph_executor/graph_executor.h index 0dd0e4d3be9c..d13dffa530fc 100644 --- a/src/runtime/graph_executor/graph_executor.h +++ b/src/runtime/graph_executor/graph_executor.h @@ -368,6 +368,7 @@ class TVM_DLL GraphExecutor : public ModuleNode { bitmask |= 1; } else if (key == "arg_nodes") { reader->Read(&input_nodes_); + std::sort(input_nodes_.begin(), input_nodes_.end()); bitmask |= 2; } else if (key == "node_row_ptr") { reader->Read(&node_row_ptr_); @@ -430,7 +431,7 @@ class TVM_DLL GraphExecutor : public ModuleNode { /*! \brief Used for quick node input DLTensor* lookup given an input eid. */ std::vector> input_dltensors_; /*! \brief Used for quick node output DLTensor* lookup given an input eid. */ - std::vector output_dltensors_; + std::vector> output_dltensors_; /*! \brief Used for quick entry indexing. */ std::vector node_row_ptr_; /*! \brief Output entries. */ diff --git a/tests/cpp/build_module_test.cc b/tests/cpp/build_module_test.cc index 204a824f9248..8a7ef2d4bb6a 100644 --- a/tests/cpp/build_module_test.cc +++ b/tests/cpp/build_module_test.cc @@ -200,6 +200,134 @@ TEST(BuildModule, Heterogeneous) { } } +TEST(BuildModule, ZeroCopy) { + /* + * + * A B + * \ / + * elemwise_add(out0) + * \ + * C copy + * \ / + * elemwise_sub(out1) + */ + + using namespace tvm; + using namespace tvm::te; + + auto target_llvm = Target("llvm"); + + // The shape of input tensors. + const int n = 4; + Array shape{n}; + + auto A = placeholder(shape, DataType::Float(32), "A"); + auto B = placeholder(shape, DataType::Float(32), "B"); + auto C = placeholder(shape, DataType::Float(32), "C"); + + auto elemwise_add = compute( + A->shape, [&A, &B](PrimExpr i) { return A[i] + B[i]; }, "elemwise_add"); + + auto copy = placeholder(shape, DataType::Float(32), "__copy"); + auto elemwise_sub = compute( + C->shape, [©, &C](PrimExpr i) { return copy[i] - C[i]; }, "elemwise_sub"); + + With llvm_scope(target_llvm); + auto s1 = create_schedule({elemwise_add->op}); + auto s2 = create_schedule({elemwise_sub->op}); + + auto args1 = Array({A, B, elemwise_add}); + auto args2 = Array({copy, C, elemwise_sub}); + + std::unordered_map binds; + auto lowered_s1 = LowerSchedule(s1, args1, "elemwise_add", binds); + auto lowered_s2 = LowerSchedule(s2, args2, "elemwise_sub", binds); + Map inputs = {{target_llvm, lowered_s1}, {target_llvm, lowered_s2}}; + auto module = build(inputs, Target()); + + // Execute the graph and check the correctness. + // Setup graph json. + std::string json = + "{\"nodes\": [{\"op\": \"null\", \"name\": \"A\", \"inputs\": []}, " + "{\"op\": \"null\", \"name\": \"B\", \"inputs\": []}, {\"op\": " + "\"tvm_op\", \"name\": \"elemwise_add\", \"attrs\": {\"flatten_data\": " + "\"1\", \"func_name\": \"elemwise_add\", \"num_inputs\": \"2\", " + "\"num_outputs\": \"1\"}, \"inputs\": [[0, 0, 0], [1, 0, 0]]}, {\"op\": " + "\"tvm_op\", \"name\": \"__copy_add_to_sub\", \"attrs\": " + "{\"flatten_data\": \"0\", \"func_name\": \"__copy\", \"num_inputs\": " + "\"1\", \"num_outputs\": \"1\"}, \"inputs\": [[2, 0, 0]]}, {\"op\": " + "\"null\", \"name\": \"C\", \"inputs\": []}, {\"op\": \"tvm_op\", " + "\"name\": \"elemwise_sub\", \"attrs\": {\"flatten_data\": \"0\", " + "\"func_name\": \"elemwise_sub\", \"num_inputs\": \"2\", " + "\"num_outputs\": \"1\"}, \"inputs\": [[3, 0, 0], [4, 0, 0]]}], " + "\"arg_nodes\": [0, 1, 4], \"node_row_ptr\": [0, 1, 2, 3, 4, 5, 6], " + "\"heads\": [[2, 0, 0], [5, 0, 0]], \"attrs\": {\"storage_id\": [\"list_int\", [3, " + "4, 0, 1, 5, 2]], \"shape\": [\"list_shape\", [[4], [4], [4], [4], [4], " + "[4]]], \"device_index\": [\"list_int\", [2, 2, 2, 1, 1, 1]], \"dtype\": " + "[\"list_int\", [0, 0, 0, 0, 0, 0]], \"dltype\": [\"list_str\", " + "[\"float32\", \"float32\", \"float32\", \"float32\", \"float32\", " + "\"float32\"]]}}"; + // Setup inputs. + auto a_val = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); + auto b_val = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); + auto c_val = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); + + auto pa = (float*)(a_val->data); + auto pb = (float*)(b_val->data); + auto pc = (float*)(c_val->data); + + // Assign values. + for (int i = 0; i < n; i++) { + pa[i] = i; + pb[i] = i + 1.0; + pc[i] = i - 1.0; + } + + // // Initialize graph executor. + int cpu_dev_ty = static_cast(kDLCPU); + int cpu_dev_id = 0; + + const runtime::PackedFunc* graph_executor = + tvm::runtime::Registry::Get("tvm.graph_executor.create"); + runtime::Module mod = (*graph_executor)(json, module, cpu_dev_ty, cpu_dev_id); + + // test FFI for module. + auto test_ffi = PackedFunc([](TVMArgs args, TVMRetValue* rv) { + int tcode = args[1]; + ICHECK_EQ(args[0].type_code(), tcode); + }); + + test_ffi(runtime::Module(mod), static_cast(kTVMModuleHandle)); + test_ffi(Optional(mod), static_cast(kTVMModuleHandle)); + + PackedFunc set_input = mod.GetFunction("set_input", false); + PackedFunc run = mod.GetFunction("run", false); + PackedFunc get_output = mod.GetFunction("get_output", false); + PackedFunc set_output_zero_copy = mod.GetFunction("set_output_zero_copy", false); + set_input("A", a_val); + set_input("B", b_val); + set_input("C", c_val); + + tvm::runtime::NDArray out0 = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); + tvm::runtime::NDArray out1 = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); + set_output_zero_copy(0, out0); + set_output_zero_copy(1, out1); + + run(); + // print_data_entry(); + float* p_out0 = (float*)out0->data; + float* p_out1 = (float*)out1->data; + + // Check correctness. + for (int i = 0; i < n; ++i) { + ICHECK_LT(std::fabs(p_out0[i] - (i + (i + 1.0))), 1e-5); + } + + for (int i = 0; i < n; ++i) { + ICHECK_LT(std::fabs(p_out1[i] - (i + (i + 1.0) - (i - 1.0))), 1e-5); + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); testing::FLAGS_gtest_death_test_style = "threadsafe"; From 4ebf2bdf8cd1bf66031db6d8be6bab51c0ac6907 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Thu, 22 Jul 2021 21:19:41 +0800 Subject: [PATCH 29/60] rrealy ut go back --- tests/cpp/relay_build_module_test.cc | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/cpp/relay_build_module_test.cc b/tests/cpp/relay_build_module_test.cc index 359939dac56a..b6f1f668812e 100644 --- a/tests/cpp/relay_build_module_test.cc +++ b/tests/cpp/relay_build_module_test.cc @@ -129,15 +129,13 @@ TEST(Relay, BuildModule) { ICHECK(mod.defined()) << "Module must be defined"; tvm::runtime::Module run_mod = (*pfr)(json, mod, (int)dev.device_type, (int)dev.device_id); auto set_input_f = run_mod.GetFunction("set_input_zero_copy", false); - auto set_output_f = run_mod.GetFunction("set_output_zero_copy", false); auto run_f = run_mod.GetFunction("run", false); + auto get_output_f = run_mod.GetFunction("get_output", false); set_input_f("a", const_cast(A.operator->())); set_input_f("b", const_cast(B.operator->())); set_input_f("c", const_cast(C.operator->())); - auto Y = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0}); - set_output_f(0, Y); run_f(); - // tvm::runtime::NDArray Y = get_output_f(0); + tvm::runtime::NDArray Y = get_output_f(0); auto pY = (float*)Y->data; for (int i = 0; i < 6; ++i) { ICHECK_LT(fabs(pY[i] - (i + (i + 1) + (i + 2))), 1e-4); @@ -147,8 +145,10 @@ TEST(Relay, BuildModule) { pB[i] = i + 3; } run_f(); + tvm::runtime::NDArray Y2 = get_output_f(0); + auto pY2 = (float*)Y2->data; for (int i = 0; i < 6; ++i) { - ICHECK_LT(fabs(pY[i] - (i + (i + 3) + (i + 2))), 1e-4); + ICHECK_LT(fabs(pY2[i] - (i + (i + 3) + (i + 2))), 1e-4); } // attach a different input and run it again auto C2 = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0}); @@ -158,8 +158,10 @@ TEST(Relay, BuildModule) { } set_input_f("c", const_cast(C2.operator->())); run_f(); + tvm::runtime::NDArray Y3 = get_output_f(0); + auto pY3 = (float*)Y3->data; for (int i = 0; i < 6; ++i) { - ICHECK_LT(fabs(pY[i] - (i + (i + 3) + (i + 4))), 1e-4); + ICHECK_LT(fabs(pY3[i] - (i + (i + 3) + (i + 4))), 1e-4); } } @@ -184,4 +186,4 @@ int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); testing::FLAGS_gtest_death_test_style = "threadsafe"; return RUN_ALL_TESTS(); -} +} \ No newline at end of file From c221b51c0a1afc5c633eda59c41228a453c236b1 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Thu, 22 Jul 2021 21:20:57 +0800 Subject: [PATCH 30/60] rrealy ut go back --- tests/cpp/relay_build_module_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cpp/relay_build_module_test.cc b/tests/cpp/relay_build_module_test.cc index b6f1f668812e..37e9e6f9c42c 100644 --- a/tests/cpp/relay_build_module_test.cc +++ b/tests/cpp/relay_build_module_test.cc @@ -186,4 +186,4 @@ int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); testing::FLAGS_gtest_death_test_style = "threadsafe"; return RUN_ALL_TESTS(); -} \ No newline at end of file +} From 92294d3c3bd2480b79d0f15da31c2ab6a50c9810 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Thu, 22 Jul 2021 21:30:16 +0800 Subject: [PATCH 31/60] modify input->output --- src/runtime/graph_executor/graph_executor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/graph_executor/graph_executor.h b/src/runtime/graph_executor/graph_executor.h index d13dffa530fc..755a996bd425 100644 --- a/src/runtime/graph_executor/graph_executor.h +++ b/src/runtime/graph_executor/graph_executor.h @@ -430,7 +430,7 @@ class TVM_DLL GraphExecutor : public ModuleNode { std::unordered_map output_map_; /*! \brief Used for quick node input DLTensor* lookup given an input eid. */ std::vector> input_dltensors_; - /*! \brief Used for quick node output DLTensor* lookup given an input eid. */ + /*! \brief Used for quick node output DLTensor* lookup given an output eid. */ std::vector> output_dltensors_; /*! \brief Used for quick entry indexing. */ std::vector node_row_ptr_; From dd54915874af515c64348468ae450a71d8f4f718 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Fri, 23 Jul 2021 09:40:53 +0800 Subject: [PATCH 32/60] delete sort output input --- src/runtime/graph_executor/graph_executor.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/runtime/graph_executor/graph_executor.h b/src/runtime/graph_executor/graph_executor.h index 755a996bd425..8bc5fe7b9732 100644 --- a/src/runtime/graph_executor/graph_executor.h +++ b/src/runtime/graph_executor/graph_executor.h @@ -31,7 +31,6 @@ #include #include -#include #include #include #include @@ -368,17 +367,12 @@ class TVM_DLL GraphExecutor : public ModuleNode { bitmask |= 1; } else if (key == "arg_nodes") { reader->Read(&input_nodes_); - std::sort(input_nodes_.begin(), input_nodes_.end()); bitmask |= 2; } else if (key == "node_row_ptr") { reader->Read(&node_row_ptr_); bitmask |= 4; } else if (key == "heads") { reader->Read(&outputs_); - std::sort(outputs_.begin(), outputs_.end(), - [&](const NodeEntry& a, const NodeEntry& b) -> bool { - return a.node_id == b.node_id ? a.index < b.index : a.node_id < b.node_id; - }); bitmask |= 8; } else if (key == "attrs") { reader->Read(&attrs_); From 66ef5fe7068e0d1acf7244145d53921ba98137ea Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Fri, 23 Jul 2021 11:46:26 +0800 Subject: [PATCH 33/60] modify build_module_test.cc --- tests/cpp/build_module_test.cc | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/cpp/build_module_test.cc b/tests/cpp/build_module_test.cc index 8a7ef2d4bb6a..7d70179a3e74 100644 --- a/tests/cpp/build_module_test.cc +++ b/tests/cpp/build_module_test.cc @@ -284,12 +284,12 @@ TEST(BuildModule, ZeroCopy) { } // // Initialize graph executor. - int cpu_dev_ty = static_cast(kDLCPU); - int cpu_dev_id = 0; + int device_type = static_cast(kDLCPU); + int device_id = 0; const runtime::PackedFunc* graph_executor = tvm::runtime::Registry::Get("tvm.graph_executor.create"); - runtime::Module mod = (*graph_executor)(json, module, cpu_dev_ty, cpu_dev_id); + runtime::Module mod = (*graph_executor)(json, module, device_type, device_id); // test FFI for module. auto test_ffi = PackedFunc([](TVMArgs args, TVMRetValue* rv) { @@ -302,7 +302,6 @@ TEST(BuildModule, ZeroCopy) { PackedFunc set_input = mod.GetFunction("set_input", false); PackedFunc run = mod.GetFunction("run", false); - PackedFunc get_output = mod.GetFunction("get_output", false); PackedFunc set_output_zero_copy = mod.GetFunction("set_output_zero_copy", false); set_input("A", a_val); set_input("B", b_val); @@ -314,7 +313,6 @@ TEST(BuildModule, ZeroCopy) { set_output_zero_copy(1, out1); run(); - // print_data_entry(); float* p_out0 = (float*)out0->data; float* p_out1 = (float*)out1->data; From 7918c7b1aa6d925e817c57b896315832d0f5ee7f Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Sat, 24 Jul 2021 09:29:35 +0800 Subject: [PATCH 34/60] re-pr From c7e00cb12f526437da731f631a3dc8252bd51afc Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Sat, 24 Jul 2021 21:11:24 +0800 Subject: [PATCH 35/60] empty commit From 2558aee0a70cbcc629ce485aa662f0dbdfe567e5 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Sun, 25 Jul 2021 10:57:14 +0800 Subject: [PATCH 36/60] empty commit From bf85d3e5184c06de0398a0ee1a8ebf2f28e01bde Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Sun, 25 Jul 2021 22:32:07 +0800 Subject: [PATCH 37/60] empty commit From df24fc39626a36ba2eb88496b8e94fa31bfd2ac2 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Wed, 28 Jul 2021 20:19:06 +0800 Subject: [PATCH 38/60] modify input to ouput --- src/runtime/graph_executor/graph_executor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/graph_executor/graph_executor.h b/src/runtime/graph_executor/graph_executor.h index 8bc5fe7b9732..79da3064de05 100644 --- a/src/runtime/graph_executor/graph_executor.h +++ b/src/runtime/graph_executor/graph_executor.h @@ -420,7 +420,7 @@ class TVM_DLL GraphExecutor : public ModuleNode { std::vector input_nodes_; /*! \brief Map of input names to input indices. */ std::unordered_map input_map_; - /*! \brief Map of input names to output indices. */ + /*! \brief Map of output names to output indices. */ std::unordered_map output_map_; /*! \brief Used for quick node input DLTensor* lookup given an input eid. */ std::vector> input_dltensors_; From c1bf14cb28f5682367a2693666af32b73c29f6c4 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Thu, 29 Jul 2021 10:10:57 +0800 Subject: [PATCH 39/60] modify zero ouput copy disorder issus --- src/runtime/graph_executor/graph_executor.cc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/runtime/graph_executor/graph_executor.cc b/src/runtime/graph_executor/graph_executor.cc index 85b99b0188fd..e6d9f45d8a92 100644 --- a/src/runtime/graph_executor/graph_executor.cc +++ b/src/runtime/graph_executor/graph_executor.cc @@ -445,16 +445,18 @@ void GraphExecutor::SetupOpExecs() { std::tie(op_execs_[nid], op_args) = CreateTVMOp(inode.param, args, inode.inputs.size()); for (size_t i = 0; i < inode.inputs.size(); i++) { - uint32_t eid = this->entry_id(inode.inputs[i]); - input_dltensors_[eid].push_back(static_cast(op_args->arg_values[i].v_handle)); + uint32_t input_eid = this->entry_id(inode.inputs[i]); + input_dltensors_[input_eid].push_back( + static_cast(op_args->arg_values[i].v_handle)); } // check if op output is model output if (output_node_id.count(nid) > 0) { - for (uint32_t index = inode.inputs.size(); - index < inode.param.num_outputs + inode.param.num_inputs; ++index) { - output_dltensors_[nid].push_back( - static_cast(op_args->arg_values[index].v_handle)); + for (uint32_t i = inode.inputs.size(); i < inode.inputs.size() + inode.param.num_outputs; + ++i) { + uint32_t output_eid = this->entry_id(nid, i - inode.inputs.size()); + output_dltensors_[output_eid].push_back( + static_cast(op_args->arg_values[i].v_handle)); } } } From 81143b92f016ac36a0dafee9471b5db051c45998 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Tue, 3 Aug 2021 16:02:25 +0800 Subject: [PATCH 40/60] modify nid->eid to record output, add var to record the dltensor both model output and op input --- src/runtime/graph_executor/graph_executor.cc | 41 ++++++++++---------- src/runtime/graph_executor/graph_executor.h | 2 + 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/src/runtime/graph_executor/graph_executor.cc b/src/runtime/graph_executor/graph_executor.cc index 8fafb670e2eb..8898ade985c9 100644 --- a/src/runtime/graph_executor/graph_executor.cc +++ b/src/runtime/graph_executor/graph_executor.cc @@ -178,22 +178,13 @@ void GraphExecutor::SetOutputZeroCopy(int index, DLTensor* data_ref) { CheckExternalDLTensor(data_ref, output_node_eid); // Update the data pointer for output op - for (DLTensor* t : output_dltensors_[output_node.node_id]) { + for (DLTensor* t : output_dltensors_[output_node_eid]) { t->data = data_ref->data; } // Update the input of the op connected to the output - for (size_t input_nid = 0; input_nid < nodes_.size(); input_nid++) { - if (std::find(nodes_[input_nid].inputs.begin(), nodes_[input_nid].inputs.end(), output_node) != - nodes_[input_nid].inputs.end()) { - uint32_t input_eid = this->entry_id(input_nid, 0); - // check the consistency of input - CheckExternalDLTensor(data_ref, input_eid); - // Update the data pointer for each argument of each op - for (DLTensor* t : input_dltensors_[input_eid]) { - t->data = data_ref->data; - } - } + for (DLTensor* t : both_output_opinput_dltensors_[output_node_eid]) { + t->data = data_ref->data; } } /*! @@ -416,14 +407,15 @@ void GraphExecutor::SetupOpExecs() { op_execs_.resize(this->GetNumOfNodes()); input_dltensors_.resize(num_node_entries()); output_dltensors_.resize(num_node_entries()); + both_output_opinput_dltensors_.resize(num_node_entries()); std::unordered_set input_node_eids; for (size_t i = 0; i < input_nodes_.size(); i++) { uint32_t nid = input_nodes_[i]; input_node_eids.insert(entry_id(nid, 0)); } - std::unordered_set output_node_id; + std::unordered_set output_node_eids; for (size_t i = 0; i < outputs_.size(); i++) { - output_node_id.insert(outputs_[i].node_id); + output_node_eids.insert(entry_id(outputs_[i])); } // setup the array and requirements. @@ -446,15 +438,22 @@ void GraphExecutor::SetupOpExecs() { for (size_t i = 0; i < inode.inputs.size(); i++) { uint32_t input_eid = this->entry_id(inode.inputs[i]); - input_dltensors_[input_eid].push_back( - static_cast(op_args->arg_values[i].v_handle)); + // check if op input is model input + if (input_node_eids.count(input_eid) > 0) { + input_dltensors_[input_eid].push_back( + static_cast(op_args->arg_values[i].v_handle)); + } + // check if any model output is the input of the op + if (output_node_eids.count(input_eid) > 0) { + both_output_opinput_dltensors_[input_eid].push_back( + static_cast(op_args->arg_values[i].v_handle)); + } } - // check if op output is model output - if (output_node_id.count(nid) > 0) { - for (uint32_t i = inode.inputs.size(); i < inode.inputs.size() + inode.param.num_outputs; - ++i) { - uint32_t output_eid = this->entry_id(nid, i - inode.inputs.size()); + for (uint32_t i = inode.inputs.size(); i < inode.inputs.size() + inode.param.num_outputs; ++i) { + uint32_t output_eid = this->entry_id(nid, i - inode.inputs.size()); + // check if op output is model output + if (output_node_eids.count(output_eid) > 0) { output_dltensors_[output_eid].push_back( static_cast(op_args->arg_values[i].v_handle)); } diff --git a/src/runtime/graph_executor/graph_executor.h b/src/runtime/graph_executor/graph_executor.h index d0b742f73816..8daf7a2c5d37 100644 --- a/src/runtime/graph_executor/graph_executor.h +++ b/src/runtime/graph_executor/graph_executor.h @@ -425,6 +425,8 @@ class TVM_DLL GraphExecutor : public ModuleNode { std::vector> input_dltensors_; /*! \brief Used for quick node output DLTensor* lookup given an output eid. */ std::vector> output_dltensors_; + /*! \brief Used for quick node(both model output and op input) DLTensor* lookup given an output eid. */ + std::vector> both_output_opinput_dltensors_; /*! \brief Used for quick entry indexing. */ std::vector node_row_ptr_; /*! \brief Output entries. */ From 6f7b06882052fcce7b3de053b0c7b7475a4b0b81 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Tue, 3 Aug 2021 16:39:30 +0800 Subject: [PATCH 41/60] character too long >= 100 --- src/runtime/graph_executor/graph_executor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/graph_executor/graph_executor.h b/src/runtime/graph_executor/graph_executor.h index 8daf7a2c5d37..87e8aa3cee34 100644 --- a/src/runtime/graph_executor/graph_executor.h +++ b/src/runtime/graph_executor/graph_executor.h @@ -425,7 +425,7 @@ class TVM_DLL GraphExecutor : public ModuleNode { std::vector> input_dltensors_; /*! \brief Used for quick node output DLTensor* lookup given an output eid. */ std::vector> output_dltensors_; - /*! \brief Used for quick node(both model output and op input) DLTensor* lookup given an output eid. */ + /*! \brief Used for quick node(both model output and op input) DLTensor* lookup given an eid. */ std::vector> both_output_opinput_dltensors_; /*! \brief Used for quick entry indexing. */ std::vector node_row_ptr_; From 0d2567439a73c1ff6f1b453618e8955321759cad Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Tue, 3 Aug 2021 16:46:56 +0800 Subject: [PATCH 42/60] modify zero copy UT add set input zero copy --- tests/cpp/build_module_test.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/cpp/build_module_test.cc b/tests/cpp/build_module_test.cc index 7d70179a3e74..8d8da9607c14 100644 --- a/tests/cpp/build_module_test.cc +++ b/tests/cpp/build_module_test.cc @@ -300,12 +300,12 @@ TEST(BuildModule, ZeroCopy) { test_ffi(runtime::Module(mod), static_cast(kTVMModuleHandle)); test_ffi(Optional(mod), static_cast(kTVMModuleHandle)); - PackedFunc set_input = mod.GetFunction("set_input", false); - PackedFunc run = mod.GetFunction("run", false); + PackedFunc set_input_zero_copy = mod.GetFunction("set_input_zero_copy", false); PackedFunc set_output_zero_copy = mod.GetFunction("set_output_zero_copy", false); - set_input("A", a_val); - set_input("B", b_val); - set_input("C", c_val); + PackedFunc run = mod.GetFunction("run", false); + set_input_zero_copy("A", a_val); + set_input_zero_copy("B", b_val); + set_input_zero_copy("C", c_val); tvm::runtime::NDArray out0 = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); tvm::runtime::NDArray out1 = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); From 6fc50477cc440c1bff69be876e5c4a65ea620add Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Tue, 3 Aug 2021 16:48:47 +0800 Subject: [PATCH 43/60] modify zero copy UT add set input zero copy --- tests/cpp/build_module_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cpp/build_module_test.cc b/tests/cpp/build_module_test.cc index 8d8da9607c14..152b06d27a1b 100644 --- a/tests/cpp/build_module_test.cc +++ b/tests/cpp/build_module_test.cc @@ -309,8 +309,8 @@ TEST(BuildModule, ZeroCopy) { tvm::runtime::NDArray out0 = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); tvm::runtime::NDArray out1 = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); - set_output_zero_copy(0, out0); - set_output_zero_copy(1, out1); + set_output_zero_copy("elemwise_add", out0); + set_output_zero_copy("elemwise_sub", out1); run(); float* p_out0 = (float*)out0->data; From 969c80f61b85c068edf8815c01f11cbc8b44dcad Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Tue, 3 Aug 2021 16:55:56 +0800 Subject: [PATCH 44/60] modify zero copy UT add set input zero copy --- tests/cpp/build_module_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cpp/build_module_test.cc b/tests/cpp/build_module_test.cc index 8d8da9607c14..94703cfbae3f 100644 --- a/tests/cpp/build_module_test.cc +++ b/tests/cpp/build_module_test.cc @@ -261,8 +261,8 @@ TEST(BuildModule, ZeroCopy) { "\"func_name\": \"elemwise_sub\", \"num_inputs\": \"2\", " "\"num_outputs\": \"1\"}, \"inputs\": [[3, 0, 0], [4, 0, 0]]}], " "\"arg_nodes\": [0, 1, 4], \"node_row_ptr\": [0, 1, 2, 3, 4, 5, 6], " - "\"heads\": [[2, 0, 0], [5, 0, 0]], \"attrs\": {\"storage_id\": [\"list_int\", [3, " - "4, 0, 1, 5, 2]], \"shape\": [\"list_shape\", [[4], [4], [4], [4], [4], " + "\"heads\": [[2, 0, 0], [5, 0, 0]], \"attrs\": {\"storage_id\": [\"list_int\", " + "[3, 4, 0, 1, 5, 2]], \"shape\": [\"list_shape\", [[4], [4], [4], [4], [4], " "[4]]], \"device_index\": [\"list_int\", [2, 2, 2, 1, 1, 1]], \"dtype\": " "[\"list_int\", [0, 0, 0, 0, 0, 0]], \"dltype\": [\"list_str\", " "[\"float32\", \"float32\", \"float32\", \"float32\", \"float32\", " From 5f858ccbadb759ec53530e3f8e33a8c1feef10a6 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Tue, 3 Aug 2021 20:33:10 +0800 Subject: [PATCH 45/60] empty commit From 1762cb5a522e2e15b4a4312f90f4a4071850eaa8 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Wed, 4 Aug 2021 11:22:28 +0800 Subject: [PATCH 46/60] trigger CI From 2640e764a53b34da3d752e6f2e7cc8fce1c5b046 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Wed, 4 Aug 2021 20:02:27 +0800 Subject: [PATCH 47/60] trigger CI From 07128aab8c10794aa9f6087f8047e88cbae8d61e Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Sat, 21 Aug 2021 00:08:00 +0800 Subject: [PATCH 48/60] empty commit From c0e89f5391ac3fc72658e2866bee9a9844588a70 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Sat, 21 Aug 2021 07:58:24 +0800 Subject: [PATCH 49/60] empty commit From 3e46c0ec32619c02aca96d88a785fd1a204d4887 Mon Sep 17 00:00:00 2001 From: hwstaff Date: Sat, 21 Aug 2021 14:17:58 +0800 Subject: [PATCH 50/60] trigger CI From 6b3a1266309797d22c097655d092bc062cdd092e Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Sat, 21 Aug 2021 14:18:18 +0800 Subject: [PATCH 51/60] trigger CI From 37b69b1bda4431686be1d106045554e42c2752f4 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Sat, 21 Aug 2021 16:48:59 +0800 Subject: [PATCH 52/60] trigger CI From e622619d9d5b0d15b648b3116f1b218c700e1643 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Sat, 21 Aug 2021 17:00:46 +0800 Subject: [PATCH 53/60] trigger CI From 8f9287f60d3da69d4014343749d51175efbf7948 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Sat, 21 Aug 2021 23:42:24 +0800 Subject: [PATCH 54/60] trigger CI From 13a1355e3267604d50ab7b290447637feef8b432 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Wed, 25 Aug 2021 14:58:40 +0800 Subject: [PATCH 55/60] modify C style --- tests/cpp/build_module_test.cc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/cpp/build_module_test.cc b/tests/cpp/build_module_test.cc index fe7293a9ccb7..a6e89849a39c 100644 --- a/tests/cpp/build_module_test.cc +++ b/tests/cpp/build_module_test.cc @@ -272,9 +272,9 @@ TEST(BuildModule, ZeroCopy) { auto b_val = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); auto c_val = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); - auto pa = (float*)(a_val->data); - auto pb = (float*)(b_val->data); - auto pc = (float*)(c_val->data); + auto pa = static_cast(a_val->data); + auto pb = static_cast(b_val->data); + auto pc = static_cast(c_val->data); // Assign values. for (int i = 0; i < n; i++) { @@ -313,8 +313,9 @@ TEST(BuildModule, ZeroCopy) { set_output_zero_copy("elemwise_sub", out1); run(); - float* p_out0 = (float*)out0->data; - float* p_out1 = (float*)out1->data; + auto p_out0 = static_cast(out0->data); + auto p_out1 = static_cast(out1->data); + // Check correctness. for (int i = 0; i < n; ++i) { From cb09eab39d8756382719053e0e958a35598627e0 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Wed, 25 Aug 2021 15:00:31 +0800 Subject: [PATCH 56/60] add runtime test --- tests/cpp/runtime_test.cc | 154 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 tests/cpp/runtime_test.cc diff --git a/tests/cpp/runtime_test.cc b/tests/cpp/runtime_test.cc new file mode 100644 index 000000000000..9c4791d59995 --- /dev/null +++ b/tests/cpp/runtime_test.cc @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +TEST(Runtime, ZeroCopy) { + /* + * + * A B + * \ / + * elemwise_add(out0) + * \ + * C copy + * \ / + * elemwise_sub(out1) + */ + + using namespace tvm; + using namespace tvm::te; + + auto target_llvm = Target("llvm"); + + // The shape of input tensors. + const int n = 4; + Array shape{n}; + + auto A = placeholder(shape, DataType::Float(32), "A"); + auto B = placeholder(shape, DataType::Float(32), "B"); + auto C = placeholder(shape, DataType::Float(32), "C"); + + auto elemwise_add = compute( + A->shape, [&A, &B](PrimExpr i) { return A[i] + B[i]; }, "elemwise_add"); + + auto copy = placeholder(shape, DataType::Float(32), "__copy"); + auto elemwise_sub = compute( + C->shape, [©, &C](PrimExpr i) { return copy[i] - C[i]; }, "elemwise_sub"); + + With llvm_scope(target_llvm); + auto s1 = create_schedule({elemwise_add->op}); + auto s2 = create_schedule({elemwise_sub->op}); + + auto args1 = Array({A, B, elemwise_add}); + auto args2 = Array({copy, C, elemwise_sub}); + + std::unordered_map binds; + auto lowered_s1 = LowerSchedule(s1, args1, "elemwise_add", binds); + auto lowered_s2 = LowerSchedule(s2, args2, "elemwise_sub", binds); + Map inputs = {{target_llvm, lowered_s1}, {target_llvm, lowered_s2}}; + auto module = build(inputs, Target()); + + // Execute the graph and check the correctness. + // Setup graph json. + std::string json = + "{\"nodes\": [{\"op\": \"null\", \"name\": \"A\", \"inputs\": []}, " + "{\"op\": \"null\", \"name\": \"B\", \"inputs\": []}, {\"op\": " + "\"tvm_op\", \"name\": \"elemwise_add\", \"attrs\": {\"flatten_data\": " + "\"1\", \"func_name\": \"elemwise_add\", \"num_inputs\": \"2\", " + "\"num_outputs\": \"1\"}, \"inputs\": [[0, 0, 0], [1, 0, 0]]}, {\"op\": " + "\"tvm_op\", \"name\": \"__copy_add_to_sub\", \"attrs\": " + "{\"flatten_data\": \"0\", \"func_name\": \"__copy\", \"num_inputs\": " + "\"1\", \"num_outputs\": \"1\"}, \"inputs\": [[2, 0, 0]]}, {\"op\": " + "\"null\", \"name\": \"C\", \"inputs\": []}, {\"op\": \"tvm_op\", " + "\"name\": \"elemwise_sub\", \"attrs\": {\"flatten_data\": \"0\", " + "\"func_name\": \"elemwise_sub\", \"num_inputs\": \"2\", " + "\"num_outputs\": \"1\"}, \"inputs\": [[3, 0, 0], [4, 0, 0]]}], " + "\"arg_nodes\": [0, 1, 4], \"node_row_ptr\": [0, 1, 2, 3, 4, 5, 6], " + "\"heads\": [[2, 0, 0], [5, 0, 0]], \"attrs\": {\"storage_id\": [\"list_int\", " + "[3, 4, 0, 1, 5, 2]], \"shape\": [\"list_shape\", [[4], [4], [4], [4], [4], " + "[4]]], \"device_index\": [\"list_int\", [2, 2, 2, 1, 1, 1]], \"dtype\": " + "[\"list_int\", [0, 0, 0, 0, 0, 0]], \"dltype\": [\"list_str\", " + "[\"float32\", \"float32\", \"float32\", \"float32\", \"float32\", " + "\"float32\"]]}}"; + // Setup inputs. + auto a_val = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); + auto b_val = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); + auto c_val = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); + + auto pa = static_cast(a_val->data); + auto pb = static_cast(b_val->data); + auto pc = static_cast(c_val->data); + + // Assign values. + for (int i = 0; i < n; i++) { + pa[i] = i; + pb[i] = i + 1.0; + pc[i] = i - 1.0; + } + + // // Initialize graph executor. + int device_type = static_cast(kDLCPU); + int device_id = 0; + + const runtime::PackedFunc* graph_executor = + tvm::runtime::Registry::Get("tvm.graph_executor.create"); + runtime::Module mod = (*graph_executor)(json, module, device_type, device_id); + + // test FFI for module. + auto test_ffi = PackedFunc([](TVMArgs args, TVMRetValue* rv) { + int tcode = args[1]; + ICHECK_EQ(args[0].type_code(), tcode); + }); + + test_ffi(runtime::Module(mod), static_cast(kTVMModuleHandle)); + test_ffi(Optional(mod), static_cast(kTVMModuleHandle)); + + PackedFunc set_input_zero_copy = mod.GetFunction("set_input_zero_copy", false); + PackedFunc set_output_zero_copy = mod.GetFunction("set_output_zero_copy", false); + PackedFunc run = mod.GetFunction("run", false); + set_input_zero_copy("A", a_val); + set_input_zero_copy("B", b_val); + set_input_zero_copy("C", c_val); + + tvm::runtime::NDArray out0 = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); + tvm::runtime::NDArray out1 = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); + set_output_zero_copy("elemwise_add", out0); + set_output_zero_copy("elemwise_sub", out1); + + run(); + auto p_out0 = static_cast(out0->data); + auto p_out1 = static_cast(out1->data); + + // Check correctness. + for (int i = 0; i < n; ++i) { + ICHECK_LT(std::fabs(p_out0[i] - (i + (i + 1.0))), 1e-5); + } + + for (int i = 0; i < n; ++i) { + ICHECK_LT(std::fabs(p_out1[i] - (i + (i + 1.0) - (i - 1.0))), 1e-5); + } +} From 32055901546145256df40cc78327a42257d80717 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Wed, 25 Aug 2021 15:01:12 +0800 Subject: [PATCH 57/60] add runtime test --- tests/cpp/runtime_test.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/cpp/runtime_test.cc b/tests/cpp/runtime_test.cc index 9c4791d59995..2b8c6e923d77 100644 --- a/tests/cpp/runtime_test.cc +++ b/tests/cpp/runtime_test.cc @@ -20,9 +20,7 @@ #include #include #include -#include #include -#include #include #include From aab0ef7928276c6d0a77acb875aa8b793f1553c8 Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Wed, 25 Aug 2021 15:16:25 +0800 Subject: [PATCH 58/60] add runtime test --- tests/cpp/build_module_test.cc | 127 --------------------------------- tests/cpp/runtime_test.cc | 5 -- 2 files changed, 132 deletions(-) diff --git a/tests/cpp/build_module_test.cc b/tests/cpp/build_module_test.cc index a6e89849a39c..2295c3dafe46 100644 --- a/tests/cpp/build_module_test.cc +++ b/tests/cpp/build_module_test.cc @@ -199,130 +199,3 @@ TEST(BuildModule, Heterogeneous) { ICHECK_LT(std::fabs(p_out[i] - (i + (i + 1.0) - (i - 1.0))), 1e-5); } } - -TEST(BuildModule, ZeroCopy) { - /* - * - * A B - * \ / - * elemwise_add(out0) - * \ - * C copy - * \ / - * elemwise_sub(out1) - */ - - using namespace tvm; - using namespace tvm::te; - - auto target_llvm = Target("llvm"); - - // The shape of input tensors. - const int n = 4; - Array shape{n}; - - auto A = placeholder(shape, DataType::Float(32), "A"); - auto B = placeholder(shape, DataType::Float(32), "B"); - auto C = placeholder(shape, DataType::Float(32), "C"); - - auto elemwise_add = compute( - A->shape, [&A, &B](PrimExpr i) { return A[i] + B[i]; }, "elemwise_add"); - - auto copy = placeholder(shape, DataType::Float(32), "__copy"); - auto elemwise_sub = compute( - C->shape, [©, &C](PrimExpr i) { return copy[i] - C[i]; }, "elemwise_sub"); - - With llvm_scope(target_llvm); - auto s1 = create_schedule({elemwise_add->op}); - auto s2 = create_schedule({elemwise_sub->op}); - - auto args1 = Array({A, B, elemwise_add}); - auto args2 = Array({copy, C, elemwise_sub}); - - std::unordered_map binds; - auto lowered_s1 = LowerSchedule(s1, args1, "elemwise_add", binds); - auto lowered_s2 = LowerSchedule(s2, args2, "elemwise_sub", binds); - Map inputs = {{target_llvm, lowered_s1}, {target_llvm, lowered_s2}}; - auto module = build(inputs, Target()); - - // Execute the graph and check the correctness. - // Setup graph json. - std::string json = - "{\"nodes\": [{\"op\": \"null\", \"name\": \"A\", \"inputs\": []}, " - "{\"op\": \"null\", \"name\": \"B\", \"inputs\": []}, {\"op\": " - "\"tvm_op\", \"name\": \"elemwise_add\", \"attrs\": {\"flatten_data\": " - "\"1\", \"func_name\": \"elemwise_add\", \"num_inputs\": \"2\", " - "\"num_outputs\": \"1\"}, \"inputs\": [[0, 0, 0], [1, 0, 0]]}, {\"op\": " - "\"tvm_op\", \"name\": \"__copy_add_to_sub\", \"attrs\": " - "{\"flatten_data\": \"0\", \"func_name\": \"__copy\", \"num_inputs\": " - "\"1\", \"num_outputs\": \"1\"}, \"inputs\": [[2, 0, 0]]}, {\"op\": " - "\"null\", \"name\": \"C\", \"inputs\": []}, {\"op\": \"tvm_op\", " - "\"name\": \"elemwise_sub\", \"attrs\": {\"flatten_data\": \"0\", " - "\"func_name\": \"elemwise_sub\", \"num_inputs\": \"2\", " - "\"num_outputs\": \"1\"}, \"inputs\": [[3, 0, 0], [4, 0, 0]]}], " - "\"arg_nodes\": [0, 1, 4], \"node_row_ptr\": [0, 1, 2, 3, 4, 5, 6], " - "\"heads\": [[2, 0, 0], [5, 0, 0]], \"attrs\": {\"storage_id\": [\"list_int\", " - "[3, 4, 0, 1, 5, 2]], \"shape\": [\"list_shape\", [[4], [4], [4], [4], [4], " - "[4]]], \"device_index\": [\"list_int\", [2, 2, 2, 1, 1, 1]], \"dtype\": " - "[\"list_int\", [0, 0, 0, 0, 0, 0]], \"dltype\": [\"list_str\", " - "[\"float32\", \"float32\", \"float32\", \"float32\", \"float32\", " - "\"float32\"]]}}"; - // Setup inputs. - auto a_val = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); - auto b_val = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); - auto c_val = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); - - auto pa = static_cast(a_val->data); - auto pb = static_cast(b_val->data); - auto pc = static_cast(c_val->data); - - // Assign values. - for (int i = 0; i < n; i++) { - pa[i] = i; - pb[i] = i + 1.0; - pc[i] = i - 1.0; - } - - // // Initialize graph executor. - int device_type = static_cast(kDLCPU); - int device_id = 0; - - const runtime::PackedFunc* graph_executor = - tvm::runtime::Registry::Get("tvm.graph_executor.create"); - runtime::Module mod = (*graph_executor)(json, module, device_type, device_id); - - // test FFI for module. - auto test_ffi = PackedFunc([](TVMArgs args, TVMRetValue* rv) { - int tcode = args[1]; - ICHECK_EQ(args[0].type_code(), tcode); - }); - - test_ffi(runtime::Module(mod), static_cast(kTVMModuleHandle)); - test_ffi(Optional(mod), static_cast(kTVMModuleHandle)); - - PackedFunc set_input_zero_copy = mod.GetFunction("set_input_zero_copy", false); - PackedFunc set_output_zero_copy = mod.GetFunction("set_output_zero_copy", false); - PackedFunc run = mod.GetFunction("run", false); - set_input_zero_copy("A", a_val); - set_input_zero_copy("B", b_val); - set_input_zero_copy("C", c_val); - - tvm::runtime::NDArray out0 = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); - tvm::runtime::NDArray out1 = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); - set_output_zero_copy("elemwise_add", out0); - set_output_zero_copy("elemwise_sub", out1); - - run(); - auto p_out0 = static_cast(out0->data); - auto p_out1 = static_cast(out1->data); - - - // Check correctness. - for (int i = 0; i < n; ++i) { - ICHECK_LT(std::fabs(p_out0[i] - (i + (i + 1.0))), 1e-5); - } - - for (int i = 0; i < n; ++i) { - ICHECK_LT(std::fabs(p_out1[i] - (i + (i + 1.0) - (i - 1.0))), 1e-5); - } -} diff --git a/tests/cpp/runtime_test.cc b/tests/cpp/runtime_test.cc index 2b8c6e923d77..aeb7bfa6fdd5 100644 --- a/tests/cpp/runtime_test.cc +++ b/tests/cpp/runtime_test.cc @@ -17,14 +17,10 @@ * under the License. */ -#include #include #include #include -#include -#include - TEST(Runtime, ZeroCopy) { /* * @@ -36,7 +32,6 @@ TEST(Runtime, ZeroCopy) { * \ / * elemwise_sub(out1) */ - using namespace tvm; using namespace tvm::te; From 8c0dfb6e95992105c2535a534c06d7c7d2412c3d Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Thu, 26 Aug 2021 14:44:04 +0800 Subject: [PATCH 59/60] realy build generatr the json --- tests/cpp/runtime_test.cc | 247 ++++++++++++++++++++------------------ 1 file changed, 127 insertions(+), 120 deletions(-) diff --git a/tests/cpp/runtime_test.cc b/tests/cpp/runtime_test.cc index aeb7bfa6fdd5..a9f93dd0efb8 100644 --- a/tests/cpp/runtime_test.cc +++ b/tests/cpp/runtime_test.cc @@ -19,129 +19,136 @@ #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include + +using namespace tvm; +using namespace tvm::relay; + +TVM_REGISTER_GLOBAL("runtime_test.strategy") + .set_body_typed([](const Attrs& attrs, const Array& inputs, const Type& out_type, + const Target& target) { + FTVMCompute fcompute = [](const Attrs& attrs, const Array& inputs, + const Type& out_type) -> Array { + ICHECK_EQ(inputs.size(), 2U); + return {topi::add(inputs[0], inputs[1])}; + }; + FTVMSchedule fschedule = [](const Attrs& attrs, const Array& outs, + const Target& target) { + With target_scope(target); + return topi::generic::schedule_injective(target, outs); + }; + + auto n = make_object(); + auto strategy = tvm::relay::OpStrategy(std::move(n)); + strategy.AddImplementation(fcompute, fschedule, "runtime_test.strategy", 10); + return strategy; + }); TEST(Runtime, ZeroCopy) { - /* - * - * A B - * \ / - * elemwise_add(out0) - * \ - * C copy - * \ / - * elemwise_sub(out1) - */ - using namespace tvm; - using namespace tvm::te; - - auto target_llvm = Target("llvm"); - - // The shape of input tensors. - const int n = 4; - Array shape{n}; - - auto A = placeholder(shape, DataType::Float(32), "A"); - auto B = placeholder(shape, DataType::Float(32), "B"); - auto C = placeholder(shape, DataType::Float(32), "C"); - - auto elemwise_add = compute( - A->shape, [&A, &B](PrimExpr i) { return A[i] + B[i]; }, "elemwise_add"); - - auto copy = placeholder(shape, DataType::Float(32), "__copy"); - auto elemwise_sub = compute( - C->shape, [©, &C](PrimExpr i) { return copy[i] - C[i]; }, "elemwise_sub"); - - With llvm_scope(target_llvm); - auto s1 = create_schedule({elemwise_add->op}); - auto s2 = create_schedule({elemwise_sub->op}); - - auto args1 = Array({A, B, elemwise_add}); - auto args2 = Array({copy, C, elemwise_sub}); - - std::unordered_map binds; - auto lowered_s1 = LowerSchedule(s1, args1, "elemwise_add", binds); - auto lowered_s2 = LowerSchedule(s2, args2, "elemwise_sub", binds); - Map inputs = {{target_llvm, lowered_s1}, {target_llvm, lowered_s2}}; - auto module = build(inputs, Target()); - - // Execute the graph and check the correctness. - // Setup graph json. - std::string json = - "{\"nodes\": [{\"op\": \"null\", \"name\": \"A\", \"inputs\": []}, " - "{\"op\": \"null\", \"name\": \"B\", \"inputs\": []}, {\"op\": " - "\"tvm_op\", \"name\": \"elemwise_add\", \"attrs\": {\"flatten_data\": " - "\"1\", \"func_name\": \"elemwise_add\", \"num_inputs\": \"2\", " - "\"num_outputs\": \"1\"}, \"inputs\": [[0, 0, 0], [1, 0, 0]]}, {\"op\": " - "\"tvm_op\", \"name\": \"__copy_add_to_sub\", \"attrs\": " - "{\"flatten_data\": \"0\", \"func_name\": \"__copy\", \"num_inputs\": " - "\"1\", \"num_outputs\": \"1\"}, \"inputs\": [[2, 0, 0]]}, {\"op\": " - "\"null\", \"name\": \"C\", \"inputs\": []}, {\"op\": \"tvm_op\", " - "\"name\": \"elemwise_sub\", \"attrs\": {\"flatten_data\": \"0\", " - "\"func_name\": \"elemwise_sub\", \"num_inputs\": \"2\", " - "\"num_outputs\": \"1\"}, \"inputs\": [[3, 0, 0], [4, 0, 0]]}], " - "\"arg_nodes\": [0, 1, 4], \"node_row_ptr\": [0, 1, 2, 3, 4, 5, 6], " - "\"heads\": [[2, 0, 0], [5, 0, 0]], \"attrs\": {\"storage_id\": [\"list_int\", " - "[3, 4, 0, 1, 5, 2]], \"shape\": [\"list_shape\", [[4], [4], [4], [4], [4], " - "[4]]], \"device_index\": [\"list_int\", [2, 2, 2, 1, 1, 1]], \"dtype\": " - "[\"list_int\", [0, 0, 0, 0, 0, 0]], \"dltype\": [\"list_str\", " - "[\"float32\", \"float32\", \"float32\", \"float32\", \"float32\", " - "\"float32\"]]}}"; - // Setup inputs. - auto a_val = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); - auto b_val = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); - auto c_val = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); - - auto pa = static_cast(a_val->data); - auto pb = static_cast(b_val->data); - auto pc = static_cast(c_val->data); - - // Assign values. - for (int i = 0; i < n; i++) { - pa[i] = i; - pb[i] = i + 1.0; - pc[i] = i - 1.0; + auto tensor_type = relay::TensorType({2, 3}, DataType::Float(32)); + auto a = relay::Var("a", tensor_type); + auto b = relay::Var("b", tensor_type); + auto add_op = relay::Op::Get("add"); + auto x = relay::Call(add_op, {a, b}, tvm::Attrs(), {}); + auto c = relay::Var("c", tensor_type); + auto y = relay::Call(add_op, {x, c}, tvm::Attrs(), {}); + auto func = relay::Function(relay::FreeVars(y), y, relay::Type(), {}); + auto A = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0}); + auto B = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0}); + auto C = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0}); + auto Y = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0}); + + auto pA = static_cast(A->data); + auto pB = static_cast(B->data); + auto pC = static_cast(C->data); + auto pY = static_cast(Y->data); + + for (int i = 0; i < 6; ++i) { + pA[i] = i; + pB[i] = i + 1; + pC[i] = i + 2; } - - // // Initialize graph executor. - int device_type = static_cast(kDLCPU); - int device_id = 0; - - const runtime::PackedFunc* graph_executor = - tvm::runtime::Registry::Get("tvm.graph_executor.create"); - runtime::Module mod = (*graph_executor)(json, module, device_type, device_id); - - // test FFI for module. - auto test_ffi = PackedFunc([](TVMArgs args, TVMRetValue* rv) { - int tcode = args[1]; - ICHECK_EQ(args[0].type_code(), tcode); - }); - - test_ffi(runtime::Module(mod), static_cast(kTVMModuleHandle)); - test_ffi(Optional(mod), static_cast(kTVMModuleHandle)); - - PackedFunc set_input_zero_copy = mod.GetFunction("set_input_zero_copy", false); - PackedFunc set_output_zero_copy = mod.GetFunction("set_output_zero_copy", false); - PackedFunc run = mod.GetFunction("run", false); - set_input_zero_copy("A", a_val); - set_input_zero_copy("B", b_val); - set_input_zero_copy("C", c_val); - - tvm::runtime::NDArray out0 = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); - tvm::runtime::NDArray out1 = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0}); - set_output_zero_copy("elemwise_add", out0); - set_output_zero_copy("elemwise_sub", out1); - - run(); - auto p_out0 = static_cast(out0->data); - auto p_out1 = static_cast(out1->data); - - // Check correctness. - for (int i = 0; i < n; ++i) { - ICHECK_LT(std::fabs(p_out0[i] - (i + (i + 1.0))), 1e-5); + // get schedule + auto reg = tvm::runtime::Registry::Get("ir.RegisterOpAttr"); + if (!reg) { + LOG(FATAL) << "no _Register"; } - - for (int i = 0; i < n; ++i) { - ICHECK_LT(std::fabs(p_out1[i] - (i + (i + 1.0) - (i - 1.0))), 1e-5); + auto fs = tvm::runtime::Registry::Get("runtime_test.strategy"); + if (!fs) { + LOG(FATAL) << "No test_strategy registered."; + } + auto fgeneric = GenericFunc::Get("runtime_test.strategy_generic").set_default(*fs); + (*reg)("add", "FTVMStrategy", fgeneric, 10); + Array dep; + dep.push_back(0); + (*reg)("add", "TShapeDataDependent", dep, 10); + // build + auto pfb = tvm::runtime::Registry::Get("relay.build_module._BuildModule"); + tvm::runtime::Module build_mod = (*pfb)(); + auto build_f = build_mod.GetFunction("build", false); + auto json_f = build_mod.GetFunction("get_graph_json", false); + auto mod_f = build_mod.GetFunction("get_module", false); + Map targets; + Target llvm_tgt = Target("llvm"); + targets.Set(0, llvm_tgt); + auto relay_mod = tvm::IRModule::FromExpr(func); + ICHECK(relay_mod.defined()) << "Module must be defined"; + build_f(relay_mod, targets, llvm_tgt, runtime::kTvmExecutorGraph, ""); + // create graph executor + std::string json = json_f(); + tvm::runtime::Module mod = mod_f(); + auto dev = A->device; + auto pfr = tvm::runtime::Registry::Get("tvm.graph_executor.create"); + ICHECK(mod.defined()) << "Module must be defined"; + tvm::runtime::Module run_mod = + (*pfr)(json, mod, static_cast(dev.device_type), dev.device_id); + // get function + auto set_input_f = run_mod.GetFunction("set_input_zero_copy", false); + auto set_output_f = run_mod.GetFunction("setput_zero_copy", false); + auto run_f = run_mod.GetFunction("run", false); + // set input zero copy + set_input_f("a", const_cast(A.operator->())); + set_input_f("b", const_cast(B.operator->())); + set_input_f("c", const_cast(C.operator->())); + // set output zero copy + set_output_f("y", const_cast(Y.operator->())); + run_f(); + // check correctness + for (int i = 0; i < 6; ++i) { + ICHECK_LT(fabs(pY[i] - (i + (i + 1) + (i + 2))), 1e-4); + } + // mutate the input a bit and run it again + for (int i = 0; i < 6; ++i) { + pB[i] = i + 3; + } + run_f(); + // check correctness + for (int i = 0; i < 6; ++i) { + ICHECK_LT(fabs(pY[i] - (i + (i + 3) + (i + 2))), 1e-4); + } + // attach a different input and run it again + auto C2 = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0}); + auto pC2 = static_cast(C2->data); + for (int i = 0; i < 6; ++i) { + pC2[i] = i + 4; + } + set_input_f("c", const_cast(C2.operator->())); + run_f(); + // check correctness + for (int i = 0; i < 6; ++i) { + ICHECK_LT(fabs(pY[i] - (i + (i + 3) + (i + 4))), 1e-4); } -} +} \ No newline at end of file From 2603263512acbd63e9726dcf83982d9b936e331d Mon Sep 17 00:00:00 2001 From: "Swift.Sun" Date: Thu, 26 Aug 2021 14:44:50 +0800 Subject: [PATCH 60/60] realy build generatr the json --- tests/cpp/runtime_test.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/cpp/runtime_test.cc b/tests/cpp/runtime_test.cc index a9f93dd0efb8..6dbcd61b8c37 100644 --- a/tests/cpp/runtime_test.cc +++ b/tests/cpp/runtime_test.cc @@ -117,14 +117,14 @@ TEST(Runtime, ZeroCopy) { (*pfr)(json, mod, static_cast(dev.device_type), dev.device_id); // get function auto set_input_f = run_mod.GetFunction("set_input_zero_copy", false); - auto set_output_f = run_mod.GetFunction("setput_zero_copy", false); + auto set_output_f = run_mod.GetFunction("set_output_zero_copy", false); auto run_f = run_mod.GetFunction("run", false); // set input zero copy set_input_f("a", const_cast(A.operator->())); set_input_f("b", const_cast(B.operator->())); set_input_f("c", const_cast(C.operator->())); // set output zero copy - set_output_f("y", const_cast(Y.operator->())); + set_output_f(0, const_cast(Y.operator->())); run_f(); // check correctness for (int i = 0; i < 6; ++i) { @@ -151,4 +151,4 @@ TEST(Runtime, ZeroCopy) { for (int i = 0; i < 6; ++i) { ICHECK_LT(fabs(pY[i] - (i + (i + 3) + (i + 4))), 1e-4); } -} \ No newline at end of file +}