From 317186f98ddecadb67650293a5c4269cbf7e76e6 Mon Sep 17 00:00:00 2001 From: Jon Date: Fri, 20 Sep 2019 09:41:01 -0700 Subject: [PATCH 01/18] Fix extern schedule for x86 --- topi/include/topi/x86/extern.h | 89 ++++++++++++++++++++++++++++++ topi/python/topi/cuda/__init__.py | 1 - topi/python/topi/cuda/extern.py | 48 ---------------- topi/python/topi/generic/extern.py | 12 ++-- topi/python/topi/x86/dense.py | 4 ++ 5 files changed, 101 insertions(+), 53 deletions(-) create mode 100644 topi/include/topi/x86/extern.h delete mode 100644 topi/python/topi/cuda/extern.py diff --git a/topi/include/topi/x86/extern.h b/topi/include/topi/x86/extern.h new file mode 100644 index 000000000000..f925c72dd428 --- /dev/null +++ b/topi/include/topi/x86/extern.h @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file x86/extern.h + * \brief x86 schedule for extern followed by injective operations + */ +#ifndef TOPI_X86_EXTERN_H_ +#define TOPI_X86_EXTERN_H_ + +#include "topi/tags.h" +#include "topi/detail/fuse.h" +#include "tvm/operation.h" +#include "tvm/build_module.h" + +namespace topi { +using namespace tvm; + +namespace x86 { +/*! + * \brief Schedule a given operation representing one of the outputs of an + * external function which is followed by injective operations. + * + * \param target The target to generate a schedule for. + * \param op The operation representing the output followed by injective operations. + * \param sch The schedule to apply this scheduling to + * + * \return The schedule given by sch + */ +inline Schedule ScheduleOutputForExtern(Target target, Operation op, Schedule sch) { + auto x = op.output(0); + auto axis = sch[x]->op.as()->axis; + if (axis.size() == 4) { + auto n = axis[0]; + auto c = axis[1]; + auto fused = detail::Fuse(sch[x], { n, c }); // for nhwc layout, fuse n and h + sch[x].parallel(fused); + } else { + sch[x].parallel(axis[0]); + } + return sch; +} + +/*! +* \brief Schedule an extern op followed by injective operations. +* For example, cudnn kernel + bias add + relu +* +* \param target The target to generate a schedule for. +* \param outs The output tensors. +* +* \return A schedule for the op. +*/ +inline Schedule schedule_extern(const Target& target, Array outs) { + Array out_ops; + for (auto t : outs) { + out_ops.push_back(t->op); + } + auto s = create_schedule(out_ops); + + tvm::schedule::AutoInlineInjective(s); + for (auto out : outs) { + if (out->op->derived_from()) { + continue; + } + ScheduleOutputForExtern(target, out->op, s); + } + + return s; +} + +} // namespace x86 +} // namespace topi +#endif // TOPI_X86_EXTERN_H_ diff --git a/topi/python/topi/cuda/__init__.py b/topi/python/topi/cuda/__init__.py index 403f67b972f7..beab0afb0e3f 100644 --- a/topi/python/topi/cuda/__init__.py +++ b/topi/python/topi/cuda/__init__.py @@ -13,7 +13,6 @@ from .injective import schedule_injective, schedule_elemwise, schedule_broadcast from .dense import schedule_dense from .pooling import schedule_pool, schedule_adaptive_pool -from .extern import schedule_extern from .nn import schedule_lrn, schedule_l2_normalize from .batch_matmul import schedule_batch_matmul from .vision import * diff --git a/topi/python/topi/cuda/extern.py b/topi/python/topi/cuda/extern.py deleted file mode 100644 index 74b88ec4ccd7..000000000000 --- a/topi/python/topi/cuda/extern.py +++ /dev/null @@ -1,48 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# pylint: disable=invalid-name, unused-variable, -"""Schedule for cudnn and miopen extern op""" -import tvm -from .. import generic -from .injective import _schedule_injective - - -@generic.schedule_extern.register(["cuda", "gpu"]) -def schedule_extern(outs): - """Schedule for an extern op followed by injective operations. - For example, cudnn kernel + bias add + relu. - - Parameters - ---------- - outs: Array of Tensor - The computation graph description of extern plus injective ops in the format - of an array of tensors. - - Returns - ------- - sch: Schedule - The computation schedule for the op. - """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) - - tvm.schedule.AutoInlineInjective(s) - for out in outs: - if isinstance(out.op, tvm.tensor.ExternOp): - continue - _schedule_injective(out.op, s) - return s diff --git a/topi/python/topi/generic/extern.py b/topi/python/topi/generic/extern.py index 3728222fa622..6cddc4c7230d 100644 --- a/topi/python/topi/generic/extern.py +++ b/topi/python/topi/generic/extern.py @@ -35,8 +35,12 @@ def schedule_extern(outs): sch: Schedule The computation schedule for the op. """ - target = tvm.target.current_target(allow_none=False) - if target.target_name != "llvm": - raise RuntimeError("schedule_extern not registered for '%s'" % target) outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - return tvm.create_schedule([x.op for x in outs]) + s = tvm.create_schedule([x.op for x in outs]) + + tvm.schedule.AutoInlineInjective(s) + for out in outs: + if isinstance(out.op, tvm.tensor.ExternOp): + continue + _schedule_injective(out.op, s) + return s diff --git a/topi/python/topi/x86/dense.py b/topi/python/topi/x86/dense.py index 27cadfa04cce..9b7624eedd06 100644 --- a/topi/python/topi/x86/dense.py +++ b/topi/python/topi/x86/dense.py @@ -111,6 +111,10 @@ def _declaration_dense_nopack(cfg, data, weight, bias=None, out_dtype=None): @autotvm.register_topi_schedule(generic.schedule_dense, "cpu", "direct") def _schedule_dense(cfg, outs): + target = tvm.target.current_target() + if "cblas" in target.libs: + return generic.schedule_extern(outs) + s = tvm.create_schedule([x.op for x in outs]) def _callback(op): From 299a80bc60409af074853bff387f075cf6264289 Mon Sep 17 00:00:00 2001 From: Jon Date: Fri, 20 Sep 2019 14:34:30 -0700 Subject: [PATCH 02/18] Register x86::schedule_extern --- topi/python/topi/generic/extern.py | 1 + topi/src/topi.cc | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/topi/python/topi/generic/extern.py b/topi/python/topi/generic/extern.py index 6cddc4c7230d..8273d9263ef1 100644 --- a/topi/python/topi/generic/extern.py +++ b/topi/python/topi/generic/extern.py @@ -19,6 +19,7 @@ from __future__ import absolute_import as _abs import tvm +from .injective import _schedule_injective @tvm.target.generic_func def schedule_extern(outs): diff --git a/topi/src/topi.cc b/topi/src/topi.cc index b0709399660c..91d18d212428 100644 --- a/topi/src/topi.cc +++ b/topi/src/topi.cc @@ -65,6 +65,7 @@ #include #include +#include #include #include @@ -606,6 +607,11 @@ TVM_REGISTER_GLOBAL("topi.x86.default_schedule") } }); +TVM_REGISTER_GLOBAL("topi.x86.schedule_extern") +.set_body([](TVMArgs args, TVMRetValue *rv) { + *rv = topi::x86::schedule_extern(args[0], args[1]); + }); + TVM_REGISTER_GLOBAL("topi.x86.schedule_injective") .set_body([](TVMArgs args, TVMRetValue *rv) { *rv = topi::x86::schedule_injective(args[0], args[1]); From d7b21f888729be55b7b0d74dda0fcf3a858602ca Mon Sep 17 00:00:00 2001 From: Jon Date: Fri, 20 Sep 2019 15:20:41 -0700 Subject: [PATCH 03/18] Fix --- topi/python/topi/generic/extern.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/topi/python/topi/generic/extern.py b/topi/python/topi/generic/extern.py index 8273d9263ef1..78d38de26512 100644 --- a/topi/python/topi/generic/extern.py +++ b/topi/python/topi/generic/extern.py @@ -19,7 +19,7 @@ from __future__ import absolute_import as _abs import tvm -from .injective import _schedule_injective +from .injective import schedule_injective @tvm.target.generic_func def schedule_extern(outs): @@ -43,5 +43,5 @@ def schedule_extern(outs): for out in outs: if isinstance(out.op, tvm.tensor.ExternOp): continue - _schedule_injective(out.op, s) + schedule_injective(out.op, s) return s From 261639395e3ed74fbb3b26318bd12bc72d953dee Mon Sep 17 00:00:00 2001 From: Jon Date: Fri, 20 Sep 2019 15:33:45 -0700 Subject: [PATCH 04/18] Fix --- topi/python/topi/generic/extern.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/topi/python/topi/generic/extern.py b/topi/python/topi/generic/extern.py index 78d38de26512..5983aeccfb21 100644 --- a/topi/python/topi/generic/extern.py +++ b/topi/python/topi/generic/extern.py @@ -43,5 +43,5 @@ def schedule_extern(outs): for out in outs: if isinstance(out.op, tvm.tensor.ExternOp): continue - schedule_injective(out.op, s) + schedule_injective(out) return s From 4fb7063282b6c5ab69356204a68545144059d4e8 Mon Sep 17 00:00:00 2001 From: Jon Soifer Date: Mon, 23 Sep 2019 11:22:27 -0700 Subject: [PATCH 05/18] Replace extern.py with extern.h --- topi/include/topi/cuda/extern.h | 86 ----------------------------- topi/include/topi/generic/extern.h | 11 ++++ topi/include/topi/x86/extern.h | 89 ------------------------------ topi/python/topi/generic/extern.py | 13 +---- topi/src/topi.cc | 12 ---- 5 files changed, 14 insertions(+), 197 deletions(-) delete mode 100644 topi/include/topi/cuda/extern.h delete mode 100644 topi/include/topi/x86/extern.h diff --git a/topi/include/topi/cuda/extern.h b/topi/include/topi/cuda/extern.h deleted file mode 100644 index 7800986ead02..000000000000 --- a/topi/include/topi/cuda/extern.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file cuda/extern.h - * \brief CUDA schedule for extern followed by injective operations - */ -#ifndef TOPI_CUDA_EXTERN_H_ -#define TOPI_CUDA_EXTERN_H_ - -#include "topi/tags.h" -#include "topi/detail/fuse.h" -#include "tvm/operation.h" -#include "tvm/build_module.h" - -namespace topi { -using namespace tvm; - -namespace cuda { -/*! - * \brief Schedule a given operation representing one of the outputs of an - * external function which is followed by injective operations. - * - * \param target The target to generate a schedule for. - * \param op The operation representing the output followed by injective operations. - * \param sch The schedule to apply this scheduling to - * - * \return The schedule given by sch - */ -inline Schedule ScheduleOutputForExtern(Target target, Operation op, Schedule sch) { - auto x = op.output(0); - auto fused = detail::Fuse(sch[x], sch[x]->op.as()->axis); - auto num_thread = target->max_num_threads; - IterVar bx, tx; - sch[x].split(fused, num_thread, &bx, &tx); - sch[x].bind(bx, tvm::thread_axis(Range(), "blockIdx.x")); - sch[x].bind(tx, tvm::thread_axis(Range(), "threadIdx.x")); - return sch; -} - -/*! -* \brief Schedule an extern op followed by injective operations. -* For example, cudnn kernel + bias add + relu -* -* \param target The target to generate a schedule for. -* \param outs The output tensors. -* -* \return A schedule for the op. -*/ -inline Schedule schedule_extern(const Target& target, Array outs) { - Array out_ops; - for (auto t : outs) { - out_ops.push_back(t->op); - } - auto s = create_schedule(out_ops); - - tvm::schedule::AutoInlineInjective(s); - for (auto out : outs) { - if (out->op->derived_from()) { - continue; - } - ScheduleOutputForExtern(target, out->op, s); - } - - return s; -} - -} // namespace cuda -} // namespace topi -#endif // TOPI_CUDA_EXTERN_H_ diff --git a/topi/include/topi/generic/extern.h b/topi/include/topi/generic/extern.h index 6c51d7c7b2d5..57daedd5ed85 100644 --- a/topi/include/topi/generic/extern.h +++ b/topi/include/topi/generic/extern.h @@ -28,6 +28,7 @@ #include "topi/detail/fuse.h" #include "tvm/operation.h" #include "tvm/build_module.h" +#include "injective.h" namespace topi { using namespace tvm; @@ -47,6 +48,16 @@ inline Schedule schedule_extern(const Target& target, Array outs) { out_ops.push_back(t->op); } auto s = create_schedule(out_ops); + + tvm::schedule::AutoInlineInjective(s); + for (auto out : outs) { + if (out->op->derived_from()) { + continue; + } + Array new_outs = { out }; + tvm::GenericFunc::Get("schedule_injective")(new_outs); + } + return s; } diff --git a/topi/include/topi/x86/extern.h b/topi/include/topi/x86/extern.h deleted file mode 100644 index f925c72dd428..000000000000 --- a/topi/include/topi/x86/extern.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file x86/extern.h - * \brief x86 schedule for extern followed by injective operations - */ -#ifndef TOPI_X86_EXTERN_H_ -#define TOPI_X86_EXTERN_H_ - -#include "topi/tags.h" -#include "topi/detail/fuse.h" -#include "tvm/operation.h" -#include "tvm/build_module.h" - -namespace topi { -using namespace tvm; - -namespace x86 { -/*! - * \brief Schedule a given operation representing one of the outputs of an - * external function which is followed by injective operations. - * - * \param target The target to generate a schedule for. - * \param op The operation representing the output followed by injective operations. - * \param sch The schedule to apply this scheduling to - * - * \return The schedule given by sch - */ -inline Schedule ScheduleOutputForExtern(Target target, Operation op, Schedule sch) { - auto x = op.output(0); - auto axis = sch[x]->op.as()->axis; - if (axis.size() == 4) { - auto n = axis[0]; - auto c = axis[1]; - auto fused = detail::Fuse(sch[x], { n, c }); // for nhwc layout, fuse n and h - sch[x].parallel(fused); - } else { - sch[x].parallel(axis[0]); - } - return sch; -} - -/*! -* \brief Schedule an extern op followed by injective operations. -* For example, cudnn kernel + bias add + relu -* -* \param target The target to generate a schedule for. -* \param outs The output tensors. -* -* \return A schedule for the op. -*/ -inline Schedule schedule_extern(const Target& target, Array outs) { - Array out_ops; - for (auto t : outs) { - out_ops.push_back(t->op); - } - auto s = create_schedule(out_ops); - - tvm::schedule::AutoInlineInjective(s); - for (auto out : outs) { - if (out->op->derived_from()) { - continue; - } - ScheduleOutputForExtern(target, out->op, s); - } - - return s; -} - -} // namespace x86 -} // namespace topi -#endif // TOPI_X86_EXTERN_H_ diff --git a/topi/python/topi/generic/extern.py b/topi/python/topi/generic/extern.py index 5983aeccfb21..a0601147d5e8 100644 --- a/topi/python/topi/generic/extern.py +++ b/topi/python/topi/generic/extern.py @@ -19,7 +19,7 @@ from __future__ import absolute_import as _abs import tvm -from .injective import schedule_injective +from .. import cpp @tvm.target.generic_func def schedule_extern(outs): @@ -36,12 +36,5 @@ def schedule_extern(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) - - tvm.schedule.AutoInlineInjective(s) - for out in outs: - if isinstance(out.op, tvm.tensor.ExternOp): - continue - schedule_injective(out) - return s + target = tvm.target.current_target() + return cpp.generic.schedule_extern(target, outs) diff --git a/topi/src/topi.cc b/topi/src/topi.cc index 91d18d212428..c8171e04cdc8 100644 --- a/topi/src/topi.cc +++ b/topi/src/topi.cc @@ -56,7 +56,6 @@ #include #include -#include #include #include #include @@ -65,7 +64,6 @@ #include #include -#include #include #include @@ -607,11 +605,6 @@ TVM_REGISTER_GLOBAL("topi.x86.default_schedule") } }); -TVM_REGISTER_GLOBAL("topi.x86.schedule_extern") -.set_body([](TVMArgs args, TVMRetValue *rv) { - *rv = topi::x86::schedule_extern(args[0], args[1]); - }); - TVM_REGISTER_GLOBAL("topi.x86.schedule_injective") .set_body([](TVMArgs args, TVMRetValue *rv) { *rv = topi::x86::schedule_injective(args[0], args[1]); @@ -649,11 +642,6 @@ TVM_REGISTER_GLOBAL("topi.cuda.schedule_dense") *rv = topi::cuda::schedule_dense(args[0], args[1]); }); -TVM_REGISTER_GLOBAL("topi.cuda.schedule_extern") -.set_body([](TVMArgs args, TVMRetValue *rv) { - *rv = topi::cuda::schedule_extern(args[0], args[1]); - }); - TVM_REGISTER_GLOBAL("topi.cuda.schedule_injective") .set_body([](TVMArgs args, TVMRetValue *rv) { *rv = topi::cuda::schedule_injective(args[0], args[1]); From 9df90101cbd11b59c6101f6e053118121ca5898c Mon Sep 17 00:00:00 2001 From: Jon Soifer Date: Mon, 23 Sep 2019 13:56:27 -0700 Subject: [PATCH 06/18] Introduce new generic function schedule_injective_from_existing --- topi/include/topi/cuda/injective.h | 29 ++++++++------- topi/include/topi/generic/extern.h | 3 +- topi/include/topi/generic/injective.h | 15 +++++++- topi/include/topi/x86/injective.h | 32 ++++++++++++----- topi/python/topi/arm_cpu/injective.py | 35 +++++++++++++----- topi/python/topi/cuda/conv2d_int8.py | 6 ++-- topi/python/topi/cuda/group_conv2d_nchw.py | 6 ++-- topi/python/topi/cuda/injective.py | 41 ++++++++++++++-------- topi/python/topi/cuda/reduction.py | 6 ++-- topi/python/topi/cuda/softmax.py | 4 +-- topi/python/topi/cuda/sort.py | 4 +-- topi/python/topi/cuda/vision.py | 4 +-- topi/python/topi/generic/injective.py | 21 ++++++++++- topi/python/topi/hls/injective.py | 25 +++++++++++-- topi/python/topi/opengl/injective.py | 23 +++++++++--- topi/python/topi/x86/injective.py | 35 +++++++++++++----- topi/src/topi.cc | 39 ++++++++++++++++++++ 17 files changed, 249 insertions(+), 79 deletions(-) diff --git a/topi/include/topi/cuda/injective.h b/topi/include/topi/cuda/injective.h index e629ae1c3503..663bc1fcbeb2 100644 --- a/topi/include/topi/cuda/injective.h +++ b/topi/include/topi/cuda/injective.h @@ -33,21 +33,24 @@ namespace topi { using namespace tvm; namespace cuda { + /*! -* \brief Schedule a given injective operation. -* -* \param target The target to generate a schedule for. -* \param op The operation representing the injective operation. -* \param s The schedule to apply this scheduling to -*/ -inline void ScheduleInjectiveOp(const Target &target, Operation op, Schedule s) { - auto x = op.output(0); - auto fused = detail::Fuse(s[x], s[x]->op.as()->axis); + * \brief Updates an existing schedule for the given injective ops. + * + * \param sch The schedule to update. + * \param out The tensor representing the injective op. + * + * \return The updated schedule. + */ +inline Schedule schedule_injective_from_existing(Schedule sch, const Tensor& out) { + auto fused = detail::Fuse(sch[out], sch[out]->op.as()->axis); + auto target = Target::Current(false); auto num_thread = target->max_num_threads; IterVar bx, tx; - s[x].split(fused, num_thread, &bx, &tx); - s[x].bind(bx, thread_axis(Range(), "blockIdx.x")); - s[x].bind(tx, thread_axis(Range(), "threadIdx.x")); + sch[out].split(fused, num_thread, &bx, &tx); + sch[out].bind(bx, thread_axis(Range(), "blockIdx.x")); + sch[out].bind(tx, thread_axis(Range(), "threadIdx.x")); + return sch; } /*! @@ -66,7 +69,7 @@ inline Schedule schedule_injective(const Target &target, const Array& ou auto s = create_schedule(out_ops); tvm::schedule::AutoInlineInjective(s); for (auto out : outs) { - ScheduleInjectiveOp(target, out->op, s); + schedule_injective_from_existing(s, out); } return s; } diff --git a/topi/include/topi/generic/extern.h b/topi/include/topi/generic/extern.h index 57daedd5ed85..5c0c392f768d 100644 --- a/topi/include/topi/generic/extern.h +++ b/topi/include/topi/generic/extern.h @@ -54,8 +54,7 @@ inline Schedule schedule_extern(const Target& target, Array outs) { if (out->op->derived_from()) { continue; } - Array new_outs = { out }; - tvm::GenericFunc::Get("schedule_injective")(new_outs); + tvm::GenericFunc::Get("schedule_injective_from_existing")(s, out); } return s; diff --git a/topi/include/topi/generic/injective.h b/topi/include/topi/generic/injective.h index f26651c2ddb7..fa7df4ccfd46 100644 --- a/topi/include/topi/generic/injective.h +++ b/topi/include/topi/generic/injective.h @@ -34,6 +34,19 @@ using namespace tvm; namespace generic { +/*! + * \brief Updates an existing schedule for the given injective ops. + * + * \param sch The schedule to update. + * \param out The tensor representing the injective op. + * + * \return The updated schedule. + */ +inline Schedule schedule_injective_from_existing(Schedule sch, const Tensor& out) { + detail::Fuse(sch[out], sch[out]->op.as()->axis); + return sch; +} + /*! * \brief Create a generic schedule for the given injective ops. * @@ -50,7 +63,7 @@ inline Schedule schedule_injective(const Target &target, const Array& ou auto s = create_schedule(out_ops); tvm::schedule::AutoInlineInjective(s); auto x = outs[0]; - detail::Fuse(s[x], s[x]->op.as()->axis); + schedule_injective_from_existing(s, x); return s; } diff --git a/topi/include/topi/x86/injective.h b/topi/include/topi/x86/injective.h index adbe382495b8..7cb79ae322c2 100644 --- a/topi/include/topi/x86/injective.h +++ b/topi/include/topi/x86/injective.h @@ -33,6 +33,28 @@ namespace topi { using namespace tvm; namespace x86 { + +/*! + * \brief Updates an existing schedule for the given injective ops. + * + * \param sch The schedule to update. + * \param out The tensor representing the injective op. + * + * \return The updated schedule. + */ +inline Schedule schedule_injective_from_existing(Schedule sch, const Tensor& out) { + auto axis = sch[out]->op.as()->axis; + if (axis.size() == 4) { + auto n = axis[0]; + auto c = axis[1]; + auto fused = detail::Fuse(sch[out], { n, c }); // for nhwc layout, fuse n and h + sch[out].parallel(fused); + } else { + sch[out].parallel(axis[0]); + } + return sch; +} + /*! * \brief Create an x86 schedule for the given injective ops. * @@ -50,15 +72,7 @@ inline Schedule schedule_injective(const Target &target, const Array& ou tvm::schedule::AutoInlineInjective(s); auto x = outs[0]; - auto axis = s[x]->op.as()->axis; - if (axis.size() == 4) { - auto n = axis[0]; - auto c = axis[1]; - auto fused = detail::Fuse(s[x], { n, c }); // for nhwc layout, fuse n and h - s[x].parallel(fused); - } else { - s[x].parallel(axis[0]); - } + schedule_injective_from_existing(s, x); return s; } diff --git a/topi/python/topi/arm_cpu/injective.py b/topi/python/topi/arm_cpu/injective.py index 028558f69e91..cd14e3a0f058 100644 --- a/topi/python/topi/arm_cpu/injective.py +++ b/topi/python/topi/arm_cpu/injective.py @@ -19,6 +19,32 @@ import tvm from .. import generic +@generic.schedule_injective_from_existing.register(["arm_cpu"]) +def schedule_injective_from_existing(sch, out): + """Schedule for injective op from existing schedule. + + Parameters + ---------- + sch: Schedule + The schedule to update. + out: Tensor + The tensor representing the injective op. + + Returns + ------- + sch: Schedule + The updated schedule. + """ + if len(sch[out].op.axis) >= 4: + fused = sch[out].fuse(sch[out].op.axis[0], sch[out].op.axis[1], sch[out].op.axis[2]) + sch[out].parallel(fused) + elif len(sch[out].op.axis) >= 3: + fused = sch[out].fuse(sch[out].op.axis[0], sch[out].op.axis[1]) + sch[out].parallel(fused) + elif len(sch[x].op.axis) >= 2: + sch[out].parallel(sch[out].op.axis[0]) + return sch + @generic.schedule_injective.register(["arm_cpu"]) def schedule_injective(outs): """ARM CPU schedule for injective op. @@ -42,14 +68,7 @@ def schedule_injective(outs): (io, ii) = s[x].split(list(s[x].op.axis)[-1], 8) s[x].vectorize(ii) tvm.schedule.AutoInlineInjective(s) - if len(s[x].op.axis) >= 4: - fused = s[x].fuse(s[x].op.axis[0], s[x].op.axis[1], s[x].op.axis[2]) - s[x].parallel(fused) - elif len(s[x].op.axis) >= 3: - fused = s[x].fuse(s[x].op.axis[0], s[x].op.axis[1]) - s[x].parallel(fused) - elif len(s[x].op.axis) >= 2: - s[x].parallel(s[x].op.axis[0]) + schedule_injective_from_existing(s, x) return s @generic.schedule_concatenate.register(["arm_cpu"]) diff --git a/topi/python/topi/cuda/conv2d_int8.py b/topi/python/topi/cuda/conv2d_int8.py index 25d49451810d..580cf96b53e8 100644 --- a/topi/python/topi/cuda/conv2d_int8.py +++ b/topi/python/topi/cuda/conv2d_int8.py @@ -19,7 +19,7 @@ import tvm from tvm import autotvm -from .injective import _schedule_injective +from .injective import schedule_injective_from_existing from .tensor_intrin import dp4a from ..nn.pad import pad from ..nn.util import get_pad_tuple @@ -172,8 +172,8 @@ def schedule_conv2d_NCHWc_int8(cfg, s, output): if isinstance(packed_kernel.op, tvm.tensor.ComputeOp) and\ packed_kernel.name == 'packed_kernel': # data and kernel are not pre-computed, schedule layout transform here - _schedule_injective(packed_data.op, s) - _schedule_injective(packed_kernel.op, s) + schedule_injective_from_existing(s, packed_data) + schedule_injective_from_existing(s, packed_kernel) if pad_data != packed_data: s[pad_data].compute_inline() diff --git a/topi/python/topi/cuda/group_conv2d_nchw.py b/topi/python/topi/cuda/group_conv2d_nchw.py index cd8c823ae8ae..f4bb73470651 100644 --- a/topi/python/topi/cuda/group_conv2d_nchw.py +++ b/topi/python/topi/cuda/group_conv2d_nchw.py @@ -19,7 +19,7 @@ import tvm from tvm import autotvm -from .injective import _schedule_injective +from .injective import schedule_injective_from_existing from .tensor_intrin import dp4a from ..nn.pad import pad from ..nn.util import get_pad_tuple @@ -201,8 +201,8 @@ def schedule_group_conv2d_NCHWc_int8(cfg, s, output): if isinstance(packed_kernel.op, tvm.tensor.ComputeOp) and\ packed_kernel.name == 'packed_kernel': # data and kernel are not pre-computed, schedule layout transform here - _schedule_injective(packed_data.op, s) - _schedule_injective(packed_kernel.op, s) + schedule_injective_from_existing(s, packed_data) + schedule_injective_from_existing(s, packed_kernel) if pad_data != packed_data: s[pad_data].compute_inline() diff --git a/topi/python/topi/cuda/injective.py b/topi/python/topi/cuda/injective.py index 8a16c5ca2dd4..a6ec85377a13 100644 --- a/topi/python/topi/cuda/injective.py +++ b/topi/python/topi/cuda/injective.py @@ -19,33 +19,46 @@ import tvm from .. import generic, util -def _schedule_injective(op, sch): - x = op.output(0) - fused = sch[x].fuse(*sch[x].op.axis) +@generic.schedule_injective_from_existing.register(["cuda", "gpu"]) +def schedule_injective_from_existing(sch, out): + """Schedule for injective op from existing schedule. + + Parameters + ---------- + sch: Schedule + The schedule to update. + out: Tensor + The tensor representing the injective op. + + Returns + ------- + sch: Schedule + The updated schedule. + """ + fused = sch[out].fuse(*sch[out].op.axis) num_thread = tvm.target.current_target(allow_none=False).max_num_threads max_block = 256 try: - const_size = util.get_const_int(util.prod(x.shape)) + const_size = util.get_const_int(util.prod(out.shape)) max_block = 256 need_block_split = const_size > max_block * num_thread except ValueError: need_block_split = False if need_block_split: - xo, xi = sch[x].split(fused, factor=num_thread * max_block) - bx, tx = sch[x].split(xi, factor=num_thread) - sch[x].reorder(bx, tx, xo) - sch[x].bind(bx, tvm.thread_axis("blockIdx.x")) - sch[x].bind(tx, tvm.thread_axis("threadIdx.x")) + xo, xi = sch[out].split(fused, factor=num_thread * max_block) + bx, tx = sch[out].split(xi, factor=num_thread) + sch[out].reorder(bx, tx, xo) + sch[out].bind(bx, tvm.thread_axis("blockIdx.x")) + sch[out].bind(tx, tvm.thread_axis("threadIdx.x")) else: - bx, tx = sch[x].split(fused, factor=num_thread) - sch[x].bind(tx, tvm.thread_axis("threadIdx.x")) - sch[x].bind(bx, tvm.thread_axis("blockIdx.x")) + bx, tx = sch[out].split(fused, factor=num_thread) + sch[out].bind(tx, tvm.thread_axis("threadIdx.x")) + sch[out].bind(bx, tvm.thread_axis("blockIdx.x")) return sch - @generic.schedule_injective.register(["cuda", "gpu"]) def schedule_injective(outs): """Schedule for injective op. @@ -66,7 +79,7 @@ def schedule_injective(outs): tvm.schedule.AutoInlineInjective(s) for out in outs: - _schedule_injective(out.op, s) + schedule_injective_from_existing(s, out) return s schedule_elemwise = schedule_injective diff --git a/topi/python/topi/cuda/reduction.py b/topi/python/topi/cuda/reduction.py index 25885315179c..a7a56b90fb1f 100644 --- a/topi/python/topi/cuda/reduction.py +++ b/topi/python/topi/cuda/reduction.py @@ -20,7 +20,7 @@ import tvm from .. import tag from .. import generic -from .injective import _schedule_injective +from .injective import schedule_injective_from_existing def _schedule_reduce(op, sch, is_idx_reduce=False): if is_idx_reduce: @@ -30,7 +30,7 @@ def _schedule_reduce(op, sch, is_idx_reduce=False): data_out = op.output(0) if not sch[data_out].op.reduce_axis: - return _schedule_injective(op, sch) + return schedule_injective_from_existing(sch, op.output(0)) if len(sch[data_out].op.axis) > 0: all_reduce = False @@ -126,7 +126,7 @@ def traverse_after_reduce(operator): """Internal travserse function""" if tag.is_broadcast(operator.tag): if operator not in scheduled_ops: - _schedule_injective(operator, sch) + schedule_injective_from_existing(sch, operator.output(0)) for tensor in operator.input_tensors: traverse_after_reduce(tensor.op) elif operator.tag == 'comm_reduce': diff --git a/topi/python/topi/cuda/softmax.py b/topi/python/topi/cuda/softmax.py index 09b6ef8030ec..9fdac50a79d1 100644 --- a/topi/python/topi/cuda/softmax.py +++ b/topi/python/topi/cuda/softmax.py @@ -18,7 +18,7 @@ """Schedule for softmax operator""" import tvm from .. import generic -from .injective import _schedule_injective +from .injective import schedule_injective_from_existing @generic.schedule_softmax.register(["cuda", "gpu"]) def schedule_softmax(outs): @@ -58,7 +58,7 @@ def schedule_softmax(outs): ops.append(exp.op) for op in ops: - s = _schedule_injective(op, s) + s = schedule_injective_from_existing(s, op.output(0)) else: num_thread = 64 block_x = tvm.thread_axis("blockIdx.x") diff --git a/topi/python/topi/cuda/sort.py b/topi/python/topi/cuda/sort.py index 1d9148f46278..c45465e31624 100644 --- a/topi/python/topi/cuda/sort.py +++ b/topi/python/topi/cuda/sort.py @@ -42,10 +42,10 @@ def _schedule_sort(outs): outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs s = tvm.create_schedule([x.op for x in outs]) scheduled_ops = [] - from .injective import _schedule_injective + from .injective import schedule_injective_from_existing def traverse(op): if tag.is_injective(op.tag): - _schedule_injective(op, s) + schedule_injective_from_existing(s, op.output(0)) for tensor in op.input_tensors: if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) diff --git a/topi/python/topi/cuda/vision.py b/topi/python/topi/cuda/vision.py index 968e554ac81d..3a90402707ad 100644 --- a/topi/python/topi/cuda/vision.py +++ b/topi/python/topi/cuda/vision.py @@ -28,10 +28,10 @@ def _default_schedule(outs): outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs s = tvm.create_schedule([x.op for x in outs]) scheduled_ops = [] - from .injective import _schedule_injective + from .injective import schedule_injective_from_existing def traverse(op): if tag.is_broadcast(op.tag) or op.tag in ['bbox_score', 'sorted_bbox']: - _schedule_injective(op, s) + schedule_injective_from_existing(s, op.output(0)) for tensor in op.input_tensors: if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) diff --git a/topi/python/topi/generic/injective.py b/topi/python/topi/generic/injective.py index ec1732f394f2..178363dc0d4f 100644 --- a/topi/python/topi/generic/injective.py +++ b/topi/python/topi/generic/injective.py @@ -20,6 +20,25 @@ import tvm +@tvm.target.override_native_generic_func("schedule_injective_from_existing") +def schedule_injective_from_existing(sch, out): + """Schedule for injective op from existing schedule. + + Parameters + ---------- + sch: Schedule + The schedule to update. + out: Tensor + The tensor representing the injective op. + + Returns + ------- + sch: Schedule + The updated schedule. + """ + sch[out].fuse(s[out].op.axis) + return sch + @tvm.target.override_native_generic_func("schedule_injective") def schedule_injective(outs): """Schedule for injective op. @@ -42,7 +61,7 @@ def schedule_injective(outs): x = outs[0] s = tvm.create_schedule([x.op for x in outs]) tvm.schedule.AutoInlineInjective(s) - s[x].fuse(s[x].op.axis) + schedule_injective_from_existing(s, x) return s @tvm.target.generic_func diff --git a/topi/python/topi/hls/injective.py b/topi/python/topi/hls/injective.py index 5a43306f361b..de584287a90e 100644 --- a/topi/python/topi/hls/injective.py +++ b/topi/python/topi/hls/injective.py @@ -19,6 +19,27 @@ import tvm from .. import generic +@generic.schedule_injective_from_existing.register(["hls"]) +def schedule_injective_from_existing(sch, out): + """Schedule for injective op from existing schedule. + + Parameters + ---------- + sch: Schedule + The schedule to update. + out: Tensor + The tensor representing the injective op. + + Returns + ------- + sch: Schedule + The updated schedule. + """ + fused = sch[out].fuse(*sch[out].op.axis) + px, x = sch[out].split(fused, nparts=1) + sch[out].bind(px, tvm.thread_axis("pipeline")) + return sch + @generic.schedule_injective.register(["hls"]) def schedule_injective(outs): """Schedule for injective op. @@ -38,9 +59,7 @@ def schedule_injective(outs): s = tvm.create_schedule([x.op for x in outs]) tvm.schedule.AutoInlineInjective(s) for out in outs: - fused = s[out].fuse(*s[out].op.axis) - px, x = s[out].split(fused, nparts=1) - s[out].bind(px, tvm.thread_axis("pipeline")) + schedule_injective_from_existing(s, out) return s schedule_elemwise = schedule_injective diff --git a/topi/python/topi/opengl/injective.py b/topi/python/topi/opengl/injective.py index 1f3f4942ac95..d3ebc943b962 100644 --- a/topi/python/topi/opengl/injective.py +++ b/topi/python/topi/opengl/injective.py @@ -19,11 +19,24 @@ import tvm from .. import generic -def _schedule_injective(op, sch): - x = op.output(0) - sch[x].opengl() - return sch +@generic.schedule_injective_from_existing.register(["opengl"]) +def schedule_injective_from_existing(sch, out): + """Schedule for injective op from existing schedule. + + Parameters + ---------- + sch: Schedule + The schedule to update. + out: Tensor + The tensor representing the injective op. + Returns + ------- + sch: Schedule + The updated schedule. + """ + sch[out].opengl() + return sch @generic.schedule_injective.register(["opengl"]) def schedule_injective(outs): @@ -45,7 +58,7 @@ def schedule_injective(outs): tvm.schedule.AutoInlineInjective(s) for out in outs: - _schedule_injective(out.op, s) + schedule_injective_from_existing(s, out) return s schedule_elemwise = schedule_injective diff --git a/topi/python/topi/x86/injective.py b/topi/python/topi/x86/injective.py index 37080e03d8e4..5bcb17921b79 100644 --- a/topi/python/topi/x86/injective.py +++ b/topi/python/topi/x86/injective.py @@ -20,6 +20,32 @@ import tvm from .. import generic +@generic.schedule_injective_from_existing.register(["cpu"]) +def schedule_injective_from_existing(sch, out): + """Schedule for injective op from existing schedule. + + Parameters + ---------- + sch: Schedule + The schedule to update. + out: Tensor + The tensor representing the injective op. + + Returns + ------- + sch: Schedule + The updated schedule. + """ + if len(sch[out].op.axis) >= 5: + fused = sch[out].fuse(sch[out].op.axis[0], sch[out].op.axis[1], sch[out].op.axis[2]) + sch[out].parallel(fused) + elif len(sch[out].op.axis) >= 3: + fused = sch[out].fuse(sch[out].op.axis[0], sch[out].op.axis[1]) + sch[out].parallel(fused) + elif len(sch[out].op.axis) >= 1: + sch[out].parallel(sch[out].op.axis[0]) + return sch + @generic.schedule_injective.register(["cpu"]) def schedule_injective(outs): """X86 schedule for injective op. @@ -39,14 +65,7 @@ def schedule_injective(outs): x = outs[0] s = tvm.create_schedule([x.op for x in outs]) tvm.schedule.AutoInlineInjective(s) - if len(s[x].op.axis) >= 5: - fused = s[x].fuse(s[x].op.axis[0], s[x].op.axis[1], s[x].op.axis[2]) - s[x].parallel(fused) - elif len(s[x].op.axis) >= 3: - fused = s[x].fuse(s[x].op.axis[0], s[x].op.axis[1]) - s[x].parallel(fused) - elif len(s[x].op.axis) >= 1: - s[x].parallel(s[x].op.axis[0]) + schedule_injective_from_existing(s, x) return s @generic.schedule_concatenate.register(["cpu"]) diff --git a/topi/src/topi.cc b/topi/src/topi.cc index c8171e04cdc8..7114f4d878a8 100644 --- a/topi/src/topi.cc +++ b/topi/src/topi.cc @@ -585,6 +585,11 @@ TVM_REGISTER_GLOBAL("topi.generic.schedule_injective") *rv = topi::generic::schedule_injective(args[0], args[1]); }); +TVM_REGISTER_GLOBAL("topi.generic.schedule_injective_from_existing") +.set_body([](TVMArgs args, TVMRetValue *rv) { + *rv = topi::generic::schedule_injective_from_existing(args[0], args[1]); + }); + /* x86 schedules */ TVM_REGISTER_GLOBAL("topi.x86.schedule_binarize_pack") .set_body([](TVMArgs args, TVMRetValue *rv) { @@ -610,6 +615,11 @@ TVM_REGISTER_GLOBAL("topi.x86.schedule_injective") *rv = topi::x86::schedule_injective(args[0], args[1]); }); +TVM_REGISTER_GLOBAL("topi.x86.schedule_injective_from_existing") +.set_body([](TVMArgs args, TVMRetValue *rv) { + *rv = topi::x86::schedule_injective_from_existing(args[0], args[1]); + }); + /* ROCm schedules */ TVM_REGISTER_GLOBAL("topi.rocm.dense_cuda") .set_body([](TVMArgs args, TVMRetValue *rv) { @@ -647,6 +657,11 @@ TVM_REGISTER_GLOBAL("topi.cuda.schedule_injective") *rv = topi::cuda::schedule_injective(args[0], args[1]); }); +TVM_REGISTER_GLOBAL("topi.cuda.schedule_injective_from_existing") +.set_body([](TVMArgs args, TVMRetValue *rv) { + *rv = topi::cuda::schedule_injective_from_existing(args[0], args[1]); + }); + TVM_REGISTER_GLOBAL("topi.cuda.schedule_pool") .set_body([](TVMArgs args, TVMRetValue *rv) { *rv = topi::cuda::schedule_pool(args[0], args[1]); @@ -746,6 +761,30 @@ TVM_REGISTER_GENERIC_FUNC(schedule_binary_dense) .set_default(WrapSchedule(topi::generic::default_schedule)) .register_func({ "cpu" }, WrapSchedule(topi::x86::schedule_binary_dense)); +/*! \brief Builder function for instantiating schedules from existing schedules. */ +using FTVMScheduleFromExistingBuilder = std::function< + tvm::Schedule(tvm::Schedule sch, const tvm::Tensor& out)>; + +/*! + * \brief Helper function for registering generic functions matching the + * FTVMScheduleFromExistingBuilder signature. The schedule builder function is wrapped + * with a PackedFunc suitable for passing to a tvm::GenericFunc. + * + * \param builder The schedule builder to wrap. + * + * \return The wrapped schedule builder + */ +inline PackedFunc WrapScheduleFromExisting(FTVMScheduleFromExistingBuilder builder) { + return PackedFunc([builder](TVMArgs args, TVMRetValue* ret) { + *ret = builder(args[0], args[1]); + }); +} + +TVM_REGISTER_GENERIC_FUNC(schedule_injective_from_existing) +.set_default(WrapScheduleFromExisting(topi::generic::schedule_injective_from_existing)) +.register_func({ "cpu" }, WrapScheduleFromExisting(topi::x86::schedule_injective_from_existing)) +.register_func({ "cuda", "gpu" }, WrapScheduleFromExisting(topi::cuda::schedule_injective_from_existing)); + /*! \brief Builder function for instantiating dense ops. */ using FTVMDenseOpBuilder = std::function Date: Mon, 23 Sep 2019 15:47:18 -0700 Subject: [PATCH 07/18] Fix --- topi/include/topi/cuda/injective.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/topi/include/topi/cuda/injective.h b/topi/include/topi/cuda/injective.h index 663bc1fcbeb2..5f75bd71e581 100644 --- a/topi/include/topi/cuda/injective.h +++ b/topi/include/topi/cuda/injective.h @@ -44,7 +44,7 @@ namespace cuda { */ inline Schedule schedule_injective_from_existing(Schedule sch, const Tensor& out) { auto fused = detail::Fuse(sch[out], sch[out]->op.as()->axis); - auto target = Target::Current(false); + auto target = Target::Current(); auto num_thread = target->max_num_threads; IterVar bx, tx; sch[out].split(fused, num_thread, &bx, &tx); From 09d5f0c306f699945727021e02a3687970096eb7 Mon Sep 17 00:00:00 2001 From: Jon Soifer Date: Mon, 23 Sep 2019 16:53:28 -0700 Subject: [PATCH 08/18] Fix --- topi/include/topi/cuda/injective.h | 21 +-------------------- topi/include/topi/x86/injective.h | 23 +---------------------- topi/src/topi.cc | 14 +------------- 3 files changed, 3 insertions(+), 55 deletions(-) diff --git a/topi/include/topi/cuda/injective.h b/topi/include/topi/cuda/injective.h index 5f75bd71e581..584779525e26 100644 --- a/topi/include/topi/cuda/injective.h +++ b/topi/include/topi/cuda/injective.h @@ -34,25 +34,6 @@ using namespace tvm; namespace cuda { -/*! - * \brief Updates an existing schedule for the given injective ops. - * - * \param sch The schedule to update. - * \param out The tensor representing the injective op. - * - * \return The updated schedule. - */ -inline Schedule schedule_injective_from_existing(Schedule sch, const Tensor& out) { - auto fused = detail::Fuse(sch[out], sch[out]->op.as()->axis); - auto target = Target::Current(); - auto num_thread = target->max_num_threads; - IterVar bx, tx; - sch[out].split(fused, num_thread, &bx, &tx); - sch[out].bind(bx, thread_axis(Range(), "blockIdx.x")); - sch[out].bind(tx, thread_axis(Range(), "threadIdx.x")); - return sch; -} - /*! * \brief Create a CUDA schedule for the given output tensors. * @@ -69,7 +50,7 @@ inline Schedule schedule_injective(const Target &target, const Array& ou auto s = create_schedule(out_ops); tvm::schedule::AutoInlineInjective(s); for (auto out : outs) { - schedule_injective_from_existing(s, out); + tvm::GenericFunc::Get("schedule_injective_from_existing")(s, out); } return s; } diff --git a/topi/include/topi/x86/injective.h b/topi/include/topi/x86/injective.h index 7cb79ae322c2..a762714966cc 100644 --- a/topi/include/topi/x86/injective.h +++ b/topi/include/topi/x86/injective.h @@ -34,27 +34,6 @@ using namespace tvm; namespace x86 { -/*! - * \brief Updates an existing schedule for the given injective ops. - * - * \param sch The schedule to update. - * \param out The tensor representing the injective op. - * - * \return The updated schedule. - */ -inline Schedule schedule_injective_from_existing(Schedule sch, const Tensor& out) { - auto axis = sch[out]->op.as()->axis; - if (axis.size() == 4) { - auto n = axis[0]; - auto c = axis[1]; - auto fused = detail::Fuse(sch[out], { n, c }); // for nhwc layout, fuse n and h - sch[out].parallel(fused); - } else { - sch[out].parallel(axis[0]); - } - return sch; -} - /*! * \brief Create an x86 schedule for the given injective ops. * @@ -72,7 +51,7 @@ inline Schedule schedule_injective(const Target &target, const Array& ou tvm::schedule::AutoInlineInjective(s); auto x = outs[0]; - schedule_injective_from_existing(s, x); + tvm::GenericFunc::Get("schedule_injective_from_existing")(s, x); return s; } diff --git a/topi/src/topi.cc b/topi/src/topi.cc index 7114f4d878a8..d5fb969ad780 100644 --- a/topi/src/topi.cc +++ b/topi/src/topi.cc @@ -615,11 +615,6 @@ TVM_REGISTER_GLOBAL("topi.x86.schedule_injective") *rv = topi::x86::schedule_injective(args[0], args[1]); }); -TVM_REGISTER_GLOBAL("topi.x86.schedule_injective_from_existing") -.set_body([](TVMArgs args, TVMRetValue *rv) { - *rv = topi::x86::schedule_injective_from_existing(args[0], args[1]); - }); - /* ROCm schedules */ TVM_REGISTER_GLOBAL("topi.rocm.dense_cuda") .set_body([](TVMArgs args, TVMRetValue *rv) { @@ -657,11 +652,6 @@ TVM_REGISTER_GLOBAL("topi.cuda.schedule_injective") *rv = topi::cuda::schedule_injective(args[0], args[1]); }); -TVM_REGISTER_GLOBAL("topi.cuda.schedule_injective_from_existing") -.set_body([](TVMArgs args, TVMRetValue *rv) { - *rv = topi::cuda::schedule_injective_from_existing(args[0], args[1]); - }); - TVM_REGISTER_GLOBAL("topi.cuda.schedule_pool") .set_body([](TVMArgs args, TVMRetValue *rv) { *rv = topi::cuda::schedule_pool(args[0], args[1]); @@ -781,9 +771,7 @@ inline PackedFunc WrapScheduleFromExisting(FTVMScheduleFromExistingBuilder build } TVM_REGISTER_GENERIC_FUNC(schedule_injective_from_existing) -.set_default(WrapScheduleFromExisting(topi::generic::schedule_injective_from_existing)) -.register_func({ "cpu" }, WrapScheduleFromExisting(topi::x86::schedule_injective_from_existing)) -.register_func({ "cuda", "gpu" }, WrapScheduleFromExisting(topi::cuda::schedule_injective_from_existing)); +.set_default(WrapScheduleFromExisting(topi::generic::schedule_injective_from_existing)); /*! \brief Builder function for instantiating dense ops. */ using FTVMDenseOpBuilder = std::function Date: Mon, 23 Sep 2019 17:19:39 -0700 Subject: [PATCH 09/18] Add back to C++ --- topi/include/topi/cuda/injective.h | 20 +++++++++++++++++++- topi/include/topi/generic/injective.h | 5 +++-- topi/include/topi/x86/injective.h | 23 ++++++++++++++++++++++- topi/src/topi.cc | 20 ++++++++++++++++---- 4 files changed, 60 insertions(+), 8 deletions(-) diff --git a/topi/include/topi/cuda/injective.h b/topi/include/topi/cuda/injective.h index 584779525e26..885acdef230d 100644 --- a/topi/include/topi/cuda/injective.h +++ b/topi/include/topi/cuda/injective.h @@ -34,6 +34,24 @@ using namespace tvm; namespace cuda { +/*! + * \brief Updates an existing schedule for the given injective ops. + * + * \param sch The schedule to update. + * \param out The tensor representing the injective op. + * + * \return The updated schedule. + */ +inline Schedule schedule_injective_from_existing(const Target& target, Schedule sch, const Tensor& out) { + auto fused = detail::Fuse(sch[out], sch[out]->op.as()->axis); + auto num_thread = target->max_num_threads; + IterVar bx, tx; + sch[out].split(fused, num_thread, &bx, &tx); + sch[out].bind(bx, thread_axis(Range(), "blockIdx.x")); + sch[out].bind(tx, thread_axis(Range(), "threadIdx.x")); + return sch; +} + /*! * \brief Create a CUDA schedule for the given output tensors. * @@ -50,7 +68,7 @@ inline Schedule schedule_injective(const Target &target, const Array& ou auto s = create_schedule(out_ops); tvm::schedule::AutoInlineInjective(s); for (auto out : outs) { - tvm::GenericFunc::Get("schedule_injective_from_existing")(s, out); + tvm::GenericFunc::Get("schedule_injective_from_existing")(target, s, out); } return s; } diff --git a/topi/include/topi/generic/injective.h b/topi/include/topi/generic/injective.h index fa7df4ccfd46..2cf48b54c294 100644 --- a/topi/include/topi/generic/injective.h +++ b/topi/include/topi/generic/injective.h @@ -37,12 +37,13 @@ namespace generic { /*! * \brief Updates an existing schedule for the given injective ops. * + * \param target The target to update the schedule for. * \param sch The schedule to update. * \param out The tensor representing the injective op. * * \return The updated schedule. */ -inline Schedule schedule_injective_from_existing(Schedule sch, const Tensor& out) { +inline Schedule schedule_injective_from_existing(const Target& target, Schedule sch, const Tensor& out) { detail::Fuse(sch[out], sch[out]->op.as()->axis); return sch; } @@ -63,7 +64,7 @@ inline Schedule schedule_injective(const Target &target, const Array& ou auto s = create_schedule(out_ops); tvm::schedule::AutoInlineInjective(s); auto x = outs[0]; - schedule_injective_from_existing(s, x); + tvm::GenericFunc::Get("schedule_injective_from_existing")(target, s, x); return s; } diff --git a/topi/include/topi/x86/injective.h b/topi/include/topi/x86/injective.h index a762714966cc..614ca8b9ed29 100644 --- a/topi/include/topi/x86/injective.h +++ b/topi/include/topi/x86/injective.h @@ -34,6 +34,27 @@ using namespace tvm; namespace x86 { +/*! + * \brief Updates an existing schedule for the given injective ops. + * + * \param sch The schedule to update. + * \param out The tensor representing the injective op. + * + * \return The updated schedule. + */ +inline Schedule schedule_injective_from_existing(const Target& target, Schedule sch, const Tensor& out) { + auto axis = sch[out]->op.as()->axis; + if (axis.size() == 4) { + auto n = axis[0]; + auto c = axis[1]; + auto fused = detail::Fuse(sch[out], { n, c }); // for nhwc layout, fuse n and h + sch[out].parallel(fused); + } else { + sch[out].parallel(axis[0]); + } + return sch; +} + /*! * \brief Create an x86 schedule for the given injective ops. * @@ -51,7 +72,7 @@ inline Schedule schedule_injective(const Target &target, const Array& ou tvm::schedule::AutoInlineInjective(s); auto x = outs[0]; - tvm::GenericFunc::Get("schedule_injective_from_existing")(s, x); + tvm::GenericFunc::Get("schedule_injective_from_existing")(target, s, x); return s; } diff --git a/topi/src/topi.cc b/topi/src/topi.cc index d5fb969ad780..b7bea8dd1717 100644 --- a/topi/src/topi.cc +++ b/topi/src/topi.cc @@ -587,7 +587,7 @@ TVM_REGISTER_GLOBAL("topi.generic.schedule_injective") TVM_REGISTER_GLOBAL("topi.generic.schedule_injective_from_existing") .set_body([](TVMArgs args, TVMRetValue *rv) { - *rv = topi::generic::schedule_injective_from_existing(args[0], args[1]); + *rv = topi::generic::schedule_injective_from_existing(args[0], args[1], args[2]); }); /* x86 schedules */ @@ -615,6 +615,11 @@ TVM_REGISTER_GLOBAL("topi.x86.schedule_injective") *rv = topi::x86::schedule_injective(args[0], args[1]); }); +TVM_REGISTER_GLOBAL("topi.x86.schedule_injective_from_existing") +.set_body([](TVMArgs args, TVMRetValue *rv) { + *rv = topi::x86::schedule_injective_from_existing(args[0], args[1], args[2]); + }); + /* ROCm schedules */ TVM_REGISTER_GLOBAL("topi.rocm.dense_cuda") .set_body([](TVMArgs args, TVMRetValue *rv) { @@ -652,6 +657,11 @@ TVM_REGISTER_GLOBAL("topi.cuda.schedule_injective") *rv = topi::cuda::schedule_injective(args[0], args[1]); }); +TVM_REGISTER_GLOBAL("topi.cuda.schedule_injective_from_existing") +.set_body([](TVMArgs args, TVMRetValue *rv) { + *rv = topi::cuda::schedule_injective_from_existing(args[0], args[1], args[2]); + }); + TVM_REGISTER_GLOBAL("topi.cuda.schedule_pool") .set_body([](TVMArgs args, TVMRetValue *rv) { *rv = topi::cuda::schedule_pool(args[0], args[1]); @@ -753,7 +763,7 @@ TVM_REGISTER_GENERIC_FUNC(schedule_binary_dense) /*! \brief Builder function for instantiating schedules from existing schedules. */ using FTVMScheduleFromExistingBuilder = std::function< - tvm::Schedule(tvm::Schedule sch, const tvm::Tensor& out)>; + tvm::Schedule(const Target& target, tvm::Schedule sch, const tvm::Tensor& out)>; /*! * \brief Helper function for registering generic functions matching the @@ -766,12 +776,14 @@ using FTVMScheduleFromExistingBuilder = std::function< */ inline PackedFunc WrapScheduleFromExisting(FTVMScheduleFromExistingBuilder builder) { return PackedFunc([builder](TVMArgs args, TVMRetValue* ret) { - *ret = builder(args[0], args[1]); + *ret = builder(args[0], args[1], args[2]); }); } TVM_REGISTER_GENERIC_FUNC(schedule_injective_from_existing) -.set_default(WrapScheduleFromExisting(topi::generic::schedule_injective_from_existing)); +.set_default(WrapScheduleFromExisting(topi::generic::schedule_injective_from_existing)) +.register_func({ "cpu" }, WrapScheduleFromExisting(topi::x86::schedule_injective_from_existing)) +.register_func({ "cuda", "gpu" }, WrapScheduleFromExisting(topi::cuda::schedule_injective_from_existing)); /*! \brief Builder function for instantiating dense ops. */ using FTVMDenseOpBuilder = std::function Date: Mon, 23 Sep 2019 17:22:31 -0700 Subject: [PATCH 10/18] Fix style --- topi/include/topi/cuda/injective.h | 5 ++++- topi/include/topi/generic/injective.h | 4 +++- topi/include/topi/x86/injective.h | 5 ++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/topi/include/topi/cuda/injective.h b/topi/include/topi/cuda/injective.h index 885acdef230d..5c697692fca3 100644 --- a/topi/include/topi/cuda/injective.h +++ b/topi/include/topi/cuda/injective.h @@ -37,12 +37,15 @@ namespace cuda { /*! * \brief Updates an existing schedule for the given injective ops. * + * \param target The target to update the schedule for. * \param sch The schedule to update. * \param out The tensor representing the injective op. * * \return The updated schedule. */ -inline Schedule schedule_injective_from_existing(const Target& target, Schedule sch, const Tensor& out) { +inline Schedule schedule_injective_from_existing(const Target& target, + Schedule sch, + const Tensor& out) { auto fused = detail::Fuse(sch[out], sch[out]->op.as()->axis); auto num_thread = target->max_num_threads; IterVar bx, tx; diff --git a/topi/include/topi/generic/injective.h b/topi/include/topi/generic/injective.h index 2cf48b54c294..ccb714f512c3 100644 --- a/topi/include/topi/generic/injective.h +++ b/topi/include/topi/generic/injective.h @@ -43,7 +43,9 @@ namespace generic { * * \return The updated schedule. */ -inline Schedule schedule_injective_from_existing(const Target& target, Schedule sch, const Tensor& out) { +inline Schedule schedule_injective_from_existing(const Target& target, + Schedule sch, + const Tensor& out) { detail::Fuse(sch[out], sch[out]->op.as()->axis); return sch; } diff --git a/topi/include/topi/x86/injective.h b/topi/include/topi/x86/injective.h index 614ca8b9ed29..98acb7f75c2a 100644 --- a/topi/include/topi/x86/injective.h +++ b/topi/include/topi/x86/injective.h @@ -37,12 +37,15 @@ namespace x86 { /*! * \brief Updates an existing schedule for the given injective ops. * + * \param target The target to update the schedule for. * \param sch The schedule to update. * \param out The tensor representing the injective op. * * \return The updated schedule. */ -inline Schedule schedule_injective_from_existing(const Target& target, Schedule sch, const Tensor& out) { +inline Schedule schedule_injective_from_existing(const Target& target, + Schedule sch, + const Tensor& out) { auto axis = sch[out]->op.as()->axis; if (axis.size() == 4) { auto n = axis[0]; From 7da8ea25e2158a424b6fc91833385f3b90897a8b Mon Sep 17 00:00:00 2001 From: Jon Date: Tue, 24 Sep 2019 09:46:14 -0700 Subject: [PATCH 11/18] Injective schedule calls local schedule_injective_from_existing --- topi/include/topi/cuda/injective.h | 2 +- topi/include/topi/generic/injective.h | 2 +- topi/include/topi/x86/injective.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/topi/include/topi/cuda/injective.h b/topi/include/topi/cuda/injective.h index 5c697692fca3..491bbf4b1c40 100644 --- a/topi/include/topi/cuda/injective.h +++ b/topi/include/topi/cuda/injective.h @@ -71,7 +71,7 @@ inline Schedule schedule_injective(const Target &target, const Array& ou auto s = create_schedule(out_ops); tvm::schedule::AutoInlineInjective(s); for (auto out : outs) { - tvm::GenericFunc::Get("schedule_injective_from_existing")(target, s, out); + schedule_injective_from_existing(target, s, out); } return s; } diff --git a/topi/include/topi/generic/injective.h b/topi/include/topi/generic/injective.h index ccb714f512c3..6fe46b215c26 100644 --- a/topi/include/topi/generic/injective.h +++ b/topi/include/topi/generic/injective.h @@ -66,7 +66,7 @@ inline Schedule schedule_injective(const Target &target, const Array& ou auto s = create_schedule(out_ops); tvm::schedule::AutoInlineInjective(s); auto x = outs[0]; - tvm::GenericFunc::Get("schedule_injective_from_existing")(target, s, x); + schedule_injective_from_existing(target, s, x); return s; } diff --git a/topi/include/topi/x86/injective.h b/topi/include/topi/x86/injective.h index 98acb7f75c2a..8efe7ff7d86e 100644 --- a/topi/include/topi/x86/injective.h +++ b/topi/include/topi/x86/injective.h @@ -75,7 +75,7 @@ inline Schedule schedule_injective(const Target &target, const Array& ou tvm::schedule::AutoInlineInjective(s); auto x = outs[0]; - tvm::GenericFunc::Get("schedule_injective_from_existing")(target, s, x); + schedule_injective_from_existing(target, s, x); return s; } From decbb5e960488599fd4ed54efef0181fb8f9b677 Mon Sep 17 00:00:00 2001 From: Jon Soifer Date: Tue, 24 Sep 2019 13:08:19 -0700 Subject: [PATCH 12/18] Fix --- topi/python/topi/arm_cpu/injective.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/topi/python/topi/arm_cpu/injective.py b/topi/python/topi/arm_cpu/injective.py index cd14e3a0f058..3727754988f3 100644 --- a/topi/python/topi/arm_cpu/injective.py +++ b/topi/python/topi/arm_cpu/injective.py @@ -41,7 +41,7 @@ def schedule_injective_from_existing(sch, out): elif len(sch[out].op.axis) >= 3: fused = sch[out].fuse(sch[out].op.axis[0], sch[out].op.axis[1]) sch[out].parallel(fused) - elif len(sch[x].op.axis) >= 2: + elif len(sch[out].op.axis) >= 2: sch[out].parallel(sch[out].op.axis[0]) return sch From 847bb5de1f98079b63bdebb2619d286f2178ed08 Mon Sep 17 00:00:00 2001 From: Jon Soifer Date: Wed, 25 Sep 2019 09:07:33 -0700 Subject: [PATCH 13/18] Remove target arg from schedule_injective_from_existing --- topi/include/topi/cuda/injective.h | 7 +++---- topi/include/topi/generic/injective.h | 6 ++---- topi/include/topi/x86/injective.h | 6 ++---- topi/src/topi.cc | 10 +++++----- 4 files changed, 12 insertions(+), 17 deletions(-) diff --git a/topi/include/topi/cuda/injective.h b/topi/include/topi/cuda/injective.h index 491bbf4b1c40..888b41b37be4 100644 --- a/topi/include/topi/cuda/injective.h +++ b/topi/include/topi/cuda/injective.h @@ -43,10 +43,9 @@ namespace cuda { * * \return The updated schedule. */ -inline Schedule schedule_injective_from_existing(const Target& target, - Schedule sch, - const Tensor& out) { +inline Schedule schedule_injective_from_existing(Schedule sch, const Tensor& out) { auto fused = detail::Fuse(sch[out], sch[out]->op.as()->axis); + auto target = Target::Current(false); auto num_thread = target->max_num_threads; IterVar bx, tx; sch[out].split(fused, num_thread, &bx, &tx); @@ -71,7 +70,7 @@ inline Schedule schedule_injective(const Target &target, const Array& ou auto s = create_schedule(out_ops); tvm::schedule::AutoInlineInjective(s); for (auto out : outs) { - schedule_injective_from_existing(target, s, out); + schedule_injective_from_existing(s, out); } return s; } diff --git a/topi/include/topi/generic/injective.h b/topi/include/topi/generic/injective.h index 6fe46b215c26..24810edee07c 100644 --- a/topi/include/topi/generic/injective.h +++ b/topi/include/topi/generic/injective.h @@ -43,9 +43,7 @@ namespace generic { * * \return The updated schedule. */ -inline Schedule schedule_injective_from_existing(const Target& target, - Schedule sch, - const Tensor& out) { +inline Schedule schedule_injective_from_existing(Schedule sch, const Tensor& out) { detail::Fuse(sch[out], sch[out]->op.as()->axis); return sch; } @@ -66,7 +64,7 @@ inline Schedule schedule_injective(const Target &target, const Array& ou auto s = create_schedule(out_ops); tvm::schedule::AutoInlineInjective(s); auto x = outs[0]; - schedule_injective_from_existing(target, s, x); + schedule_injective_from_existing(s, x); return s; } diff --git a/topi/include/topi/x86/injective.h b/topi/include/topi/x86/injective.h index 8efe7ff7d86e..85b18aaed195 100644 --- a/topi/include/topi/x86/injective.h +++ b/topi/include/topi/x86/injective.h @@ -43,9 +43,7 @@ namespace x86 { * * \return The updated schedule. */ -inline Schedule schedule_injective_from_existing(const Target& target, - Schedule sch, - const Tensor& out) { +inline Schedule schedule_injective_from_existing(Schedule sch, const Tensor& out) { auto axis = sch[out]->op.as()->axis; if (axis.size() == 4) { auto n = axis[0]; @@ -75,7 +73,7 @@ inline Schedule schedule_injective(const Target &target, const Array& ou tvm::schedule::AutoInlineInjective(s); auto x = outs[0]; - schedule_injective_from_existing(target, s, x); + schedule_injective_from_existing(s, x); return s; } diff --git a/topi/src/topi.cc b/topi/src/topi.cc index b7bea8dd1717..71cc6f8858db 100644 --- a/topi/src/topi.cc +++ b/topi/src/topi.cc @@ -587,7 +587,7 @@ TVM_REGISTER_GLOBAL("topi.generic.schedule_injective") TVM_REGISTER_GLOBAL("topi.generic.schedule_injective_from_existing") .set_body([](TVMArgs args, TVMRetValue *rv) { - *rv = topi::generic::schedule_injective_from_existing(args[0], args[1], args[2]); + *rv = topi::generic::schedule_injective_from_existing(args[0], args[1]); }); /* x86 schedules */ @@ -617,7 +617,7 @@ TVM_REGISTER_GLOBAL("topi.x86.schedule_injective") TVM_REGISTER_GLOBAL("topi.x86.schedule_injective_from_existing") .set_body([](TVMArgs args, TVMRetValue *rv) { - *rv = topi::x86::schedule_injective_from_existing(args[0], args[1], args[2]); + *rv = topi::x86::schedule_injective_from_existing(args[0], args[1]); }); /* ROCm schedules */ @@ -659,7 +659,7 @@ TVM_REGISTER_GLOBAL("topi.cuda.schedule_injective") TVM_REGISTER_GLOBAL("topi.cuda.schedule_injective_from_existing") .set_body([](TVMArgs args, TVMRetValue *rv) { - *rv = topi::cuda::schedule_injective_from_existing(args[0], args[1], args[2]); + *rv = topi::cuda::schedule_injective_from_existing(args[0], args[1]); }); TVM_REGISTER_GLOBAL("topi.cuda.schedule_pool") @@ -763,7 +763,7 @@ TVM_REGISTER_GENERIC_FUNC(schedule_binary_dense) /*! \brief Builder function for instantiating schedules from existing schedules. */ using FTVMScheduleFromExistingBuilder = std::function< - tvm::Schedule(const Target& target, tvm::Schedule sch, const tvm::Tensor& out)>; + tvm::Schedule(tvm::Schedule sch, const tvm::Tensor& out)>; /*! * \brief Helper function for registering generic functions matching the @@ -776,7 +776,7 @@ using FTVMScheduleFromExistingBuilder = std::function< */ inline PackedFunc WrapScheduleFromExisting(FTVMScheduleFromExistingBuilder builder) { return PackedFunc([builder](TVMArgs args, TVMRetValue* ret) { - *ret = builder(args[0], args[1], args[2]); + *ret = builder(args[1], args[2]); }); } From eab0b01c1301dce306f74a652465dceeb74493a5 Mon Sep 17 00:00:00 2001 From: Jon Soifer Date: Wed, 25 Sep 2019 09:12:23 -0700 Subject: [PATCH 14/18] Fix docs --- topi/include/topi/cuda/injective.h | 1 - topi/include/topi/generic/injective.h | 1 - topi/include/topi/x86/injective.h | 1 - 3 files changed, 3 deletions(-) diff --git a/topi/include/topi/cuda/injective.h b/topi/include/topi/cuda/injective.h index 888b41b37be4..663bc1fcbeb2 100644 --- a/topi/include/topi/cuda/injective.h +++ b/topi/include/topi/cuda/injective.h @@ -37,7 +37,6 @@ namespace cuda { /*! * \brief Updates an existing schedule for the given injective ops. * - * \param target The target to update the schedule for. * \param sch The schedule to update. * \param out The tensor representing the injective op. * diff --git a/topi/include/topi/generic/injective.h b/topi/include/topi/generic/injective.h index 24810edee07c..fa7df4ccfd46 100644 --- a/topi/include/topi/generic/injective.h +++ b/topi/include/topi/generic/injective.h @@ -37,7 +37,6 @@ namespace generic { /*! * \brief Updates an existing schedule for the given injective ops. * - * \param target The target to update the schedule for. * \param sch The schedule to update. * \param out The tensor representing the injective op. * diff --git a/topi/include/topi/x86/injective.h b/topi/include/topi/x86/injective.h index 85b18aaed195..7cb79ae322c2 100644 --- a/topi/include/topi/x86/injective.h +++ b/topi/include/topi/x86/injective.h @@ -37,7 +37,6 @@ namespace x86 { /*! * \brief Updates an existing schedule for the given injective ops. * - * \param target The target to update the schedule for. * \param sch The schedule to update. * \param out The tensor representing the injective op. * From 2496cd77771ec805bcd658f907bb387a48a93ebc Mon Sep 17 00:00:00 2001 From: Jon Soifer Date: Wed, 25 Sep 2019 09:52:55 -0700 Subject: [PATCH 15/18] Try to fix unit test --- tests/cpp/build_module_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cpp/build_module_test.cc b/tests/cpp/build_module_test.cc index 1a7f791f65a0..697c07154ab9 100644 --- a/tests/cpp/build_module_test.cc +++ b/tests/cpp/build_module_test.cc @@ -114,7 +114,7 @@ TEST(BuildModule, Heterogeneous) { auto lowered_s2 = lower(s2, args2, "elemwise_sub", binds, config); Map> inputs = {{target_cuda, lowered_s1}, {target_llvm, lowered_s2}}; - auto module = build(inputs, Target(), config); + auto module = build(inputs, target_llvm, config); // Assertion for build. CHECK_EQ(module->imports().size(), 1); From 419f8464b2741d83a33adc3550afdc314b3c3496 Mon Sep 17 00:00:00 2001 From: Jon Soifer Date: Wed, 25 Sep 2019 14:49:56 -0700 Subject: [PATCH 16/18] Fix test --- tests/cpp/build_module_test.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/cpp/build_module_test.cc b/tests/cpp/build_module_test.cc index 697c07154ab9..a7237db482ac 100644 --- a/tests/cpp/build_module_test.cc +++ b/tests/cpp/build_module_test.cc @@ -102,7 +102,11 @@ TEST(BuildModule, Heterogeneous) { return copy[i] - C[i]; }, "elemwise_sub"); + const runtime::PackedFunc* enter_target_scope_func = runtime::Registry::Get("_EnterTargetScope"); + (*enter_target_scope_func)(target_cuda); auto s1 = topi::cuda::schedule_injective(target_cuda, {elemwise_add}); + + (*enter_target_scope_func)(target_llvm); auto s2 = create_schedule({elemwise_sub->op}); auto config = BuildConfig::Create(); @@ -114,7 +118,7 @@ TEST(BuildModule, Heterogeneous) { auto lowered_s2 = lower(s2, args2, "elemwise_sub", binds, config); Map> inputs = {{target_cuda, lowered_s1}, {target_llvm, lowered_s2}}; - auto module = build(inputs, target_llvm, config); + auto module = build(inputs, Target(), config); // Assertion for build. CHECK_EQ(module->imports().size(), 1); From 7aeac5ec1269f06ba13206bfb81cc3afe637dce1 Mon Sep 17 00:00:00 2001 From: Jon Soifer Date: Wed, 25 Sep 2019 16:09:28 -0700 Subject: [PATCH 17/18] Fix other tests --- tests/python/unittest/test_runtime_heterogeneous.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/python/unittest/test_runtime_heterogeneous.py b/tests/python/unittest/test_runtime_heterogeneous.py index e0ae2e18a3d6..eb51faef9090 100644 --- a/tests/python/unittest/test_runtime_heterogeneous.py +++ b/tests/python/unittest/test_runtime_heterogeneous.py @@ -174,7 +174,8 @@ def check_device(device, target_device): dev_tar = {"cuda": "cuda", "opencl": "opencl"} for device, target in dev_tar.items(): - check_device(device, target) + with tvm.target.create(device): + check_device(device, target) def get_duplex_graph(host_dev_type, device_dev_type): @@ -394,7 +395,8 @@ def check_load_module(): dev_tar = {"cuda": "cuda", "opencl": "opencl"} for device, target in dev_tar.items(): - check_device(device, target) + with tvm.target.create(device): + check_device(device, target) if __name__ == "__main__": test_simplex_data_transferring() From b698a15f68d01233eda79fc2092761c052c07880 Mon Sep 17 00:00:00 2001 From: Unknown Date: Wed, 25 Sep 2019 20:09:57 -0700 Subject: [PATCH 18/18] Fix bug --- topi/src/topi.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/topi/src/topi.cc b/topi/src/topi.cc index 71cc6f8858db..7114f4d878a8 100644 --- a/topi/src/topi.cc +++ b/topi/src/topi.cc @@ -776,7 +776,7 @@ using FTVMScheduleFromExistingBuilder = std::function< */ inline PackedFunc WrapScheduleFromExisting(FTVMScheduleFromExistingBuilder builder) { return PackedFunc([builder](TVMArgs args, TVMRetValue* ret) { - *ret = builder(args[1], args[2]); + *ret = builder(args[0], args[1]); }); }