diff --git a/topi/python/topi/arm_cpu/bitserial_conv2d.py b/topi/python/topi/arm_cpu/bitserial_conv2d.py index 470aea0b4523..cd6810af8177 100644 --- a/topi/python/topi/arm_cpu/bitserial_conv2d.py +++ b/topi/python/topi/arm_cpu/bitserial_conv2d.py @@ -327,6 +327,8 @@ def _schedule_spatial_conv2d_nhwc(s, data, data_q, data_pad, data_vec, def schedule_bitserial_conv2d_nhwc(outs): """Raspverry pi schedule for bitserial conv2d""" s = tvm.create_schedule([x.op for x in outs]) + scheduled_ops = [] + def traverse(op): """Traverse operators from computation graph""" # inline all one-to-one-mapping operators except the last stage (output) @@ -334,7 +336,7 @@ def traverse(op): if op not in s.outputs: s[op].compute_inline() for tensor in op.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) if 'spatial_bitserial_conv_nhwc' in op.tag: @@ -360,6 +362,7 @@ def traverse(op): _schedule_spatial_conv2d_nhwc(s, data, data_q, data_pad, data_vec, kernel, kernel_q, kernel_vec, conv_out, output, outs[0]) + scheduled_ops.append(op) traverse(outs[0].op) return s diff --git a/topi/python/topi/arm_cpu/conv2d.py b/topi/python/topi/arm_cpu/conv2d.py index f5dbec8e552b..e28c08cb93ee 100644 --- a/topi/python/topi/arm_cpu/conv2d.py +++ b/topi/python/topi/arm_cpu/conv2d.py @@ -39,10 +39,11 @@ def decl_spatial_pack(cfg, data, kernel, strides, padding, layout, out_dtype): def schedule_conv2d_nchw_arm_cpu(cfg, outs): """TOPI schedule callback""" s = tvm.create_schedule([x.op for x in outs]) + scheduled_ops = [] def _callback(op): # schedule conv2d - if 'spatial_conv_output' in op.tag: + if 'spatial_conv_output' in op.tag and op not in scheduled_ops: output = op.output(0) conv = op.input_tensors[0] @@ -64,6 +65,8 @@ def _callback(op): output = op.output(0) _schedule_winograd(cfg, s, output, outs[0]) + scheduled_ops.append(op) + traverse_inline(s, outs[0].op, _callback) return s diff --git a/topi/python/topi/arm_cpu/depthwise_conv2d.py b/topi/python/topi/arm_cpu/depthwise_conv2d.py index 65fabddb34df..c4dbeae38eeb 100644 --- a/topi/python/topi/arm_cpu/depthwise_conv2d.py +++ b/topi/python/topi/arm_cpu/depthwise_conv2d.py @@ -79,8 +79,10 @@ def _schedule(cfg, s, data, data_pad, kernel, output): return s + scheduled_ops = [] + def _callback(op): - if op.tag == 'depthwise_conv2d_nchw': + if op.tag == 'depthwise_conv2d_nchw' and op not in scheduled_ops: output = op.output(0) kernel = op.input_tensors[1] data = op.input_tensors[0] @@ -90,5 +92,7 @@ def _callback(op): data = data_pad.op.input_tensors[0] _schedule(cfg, s, data, data_pad, kernel, output) + scheduled_ops.append(op) + traverse_inline(s, outs[0].op, _callback) return s diff --git a/topi/python/topi/cuda/conv2d_hwcn.py b/topi/python/topi/cuda/conv2d_hwcn.py index 082966a3c05a..bbf89d22fca5 100644 --- a/topi/python/topi/cuda/conv2d_hwcn.py +++ b/topi/python/topi/cuda/conv2d_hwcn.py @@ -99,13 +99,15 @@ def schedule(Apad, W, B): sch[WW].bind(tx, thread_x) sch[WW].vectorize(fi) + scheduled_ops = [] + def traverse(operator): """Traverse operators from computation graph""" if tag.is_broadcast(operator.tag): if operator not in sch.outputs: sch[operator].compute_inline() for tensor in operator.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) elif operator.tag == 'conv2d_hwcn': Apad = operator.input_tensors[0] @@ -117,5 +119,7 @@ def traverse(operator): else: raise RuntimeError("Unsupported operator: %s" % operator.tag) + scheduled_ops.append(operator) + traverse(outs[0].op) return sch diff --git a/topi/python/topi/cuda/conv2d_nchw.py b/topi/python/topi/cuda/conv2d_nchw.py index 844d4a04758c..4f7539d224eb 100644 --- a/topi/python/topi/cuda/conv2d_nchw.py +++ b/topi/python/topi/cuda/conv2d_nchw.py @@ -492,6 +492,8 @@ def schedule(temp, Filter, Output): else: conv2d_56_64_64(s, Filter, temp_S, Filter_S, Out, Out_L) + scheduled_ops = [] + def traverse(OP): """Traverse operators from computation graph""" # inline all one-to-one-mapping operators except the last stage (output) @@ -499,7 +501,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule conv2d if 'conv2d_nchw' in OP.tag: @@ -510,6 +512,8 @@ def traverse(OP): Output = OP.output(0) schedule(temp, Filter, Output) + scheduled_ops.append(OP) + traverse(outs[0].op) return s diff --git a/topi/python/topi/cuda/conv2d_transpose_nchw.py b/topi/python/topi/cuda/conv2d_transpose_nchw.py index 0d439bfdfdea..4454bc54d3eb 100644 --- a/topi/python/topi/cuda/conv2d_transpose_nchw.py +++ b/topi/python/topi/cuda/conv2d_transpose_nchw.py @@ -73,6 +73,8 @@ def schedule(temp, Filter, Output): else: conv2d_56_64_64(s, Filter, temp_S, Filter_S, Out, Out_L) + scheduled_ops = [] + def traverse(OP): """Internal travserse function""" # inline all one-to-one-mapping operators except the last stage (output) @@ -80,7 +82,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule conv2d_transpose_nchw if 'conv2d_transpose_nchw' in OP.tag: @@ -91,6 +93,8 @@ def traverse(OP): Output = OP.output(0) schedule(temp, Filter, Output) + scheduled_ops.append(OP) + traverse(outs[0].op) return s diff --git a/topi/python/topi/cuda/dense.py b/topi/python/topi/cuda/dense.py index 7c62fab743f5..6981d88e4e3d 100644 --- a/topi/python/topi/cuda/dense.py +++ b/topi/python/topi/cuda/dense.py @@ -86,6 +86,8 @@ def _schedule(Dense): s[Dense].set_store_predicate(thread_x.var.equal(0)) s[Out].set_store_predicate(thread_x.var.equal(0)) + scheduled_ops = [] + def traverse(OP): """Internal travserse function""" # inline all one-to-one-mapping operators except the last stage (output) @@ -93,7 +95,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule dense elif OP.tag == 'dense': @@ -102,5 +104,7 @@ def traverse(OP): else: raise RuntimeError("Unsupported operator: %s" % OP.tag) + scheduled_ops.append(OP) + traverse(outs[0].op) return s diff --git a/topi/python/topi/cuda/depthwise_conv2d.py b/topi/python/topi/cuda/depthwise_conv2d.py index c7ab547c88ec..94fa5c7e79ca 100644 --- a/topi/python/topi/cuda/depthwise_conv2d.py +++ b/topi/python/topi/cuda/depthwise_conv2d.py @@ -101,6 +101,8 @@ def _schedule(PaddedInput, Filter, DepthwiseConv2d): s[FS].bind(ty, thread_y) s[FS].bind(tx, thread_x) + scheduled_ops = [] + def traverse(OP): """Internal travserse function""" # inline all one-to-one-mapping operators except the last stage (output) @@ -108,7 +110,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule depthwise_conv2d if OP.tag == 'depthwise_conv2d_nchw': @@ -119,6 +121,8 @@ def traverse(OP): DepthwiseConv2d = OP.output(0) _schedule(PaddedInput, Filter, DepthwiseConv2d) + scheduled_ops.append(OP) + traverse(outs[0].op) return s @@ -180,6 +184,8 @@ def _schedule(temp, Filter, DepthwiseConv2d): fused = s[FS].fuse(fi, ci) s[FS].bind(fused, thread_x) + scheduled_ops = [] + def traverse(OP): """Internal travserse function""" # inline all one-to-one-mapping operators except the last stage (output) @@ -187,7 +193,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule depthwise_conv2d if OP.tag == 'depthwise_conv2d_nhwc': @@ -198,6 +204,8 @@ def traverse(OP): DepthwiseConv2d = OP.output(0) _schedule(PaddedInput, Filter, DepthwiseConv2d) + scheduled_ops.append(OP) + traverse(outs[0].op) return s diff --git a/topi/python/topi/cuda/pooling.py b/topi/python/topi/cuda/pooling.py index ed4419491428..637f664fbd36 100644 --- a/topi/python/topi/cuda/pooling.py +++ b/topi/python/topi/cuda/pooling.py @@ -45,6 +45,8 @@ def _schedule(Pool): else: s[Pool].compute_at(s[Out], tx) + scheduled_ops = [] + def traverse(OP): """Internal travserse function""" # inline all one-to-one-mapping operators except the last stage (output) @@ -52,7 +54,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule global_pool elif OP.tag.startswith('global_pool'): @@ -61,6 +63,8 @@ def traverse(OP): else: raise RuntimeError("Unsupported operator: %s" % OP.tag) + scheduled_ops.append(OP) + traverse(outs[0].op) return s @@ -101,6 +105,8 @@ def _schedule(PaddedInput, Pool): else: s[Pool].compute_at(s[Out], tx) + scheduled_ops = [] + def traverse(OP): """Internal travserse function""" # inline all one-to-one-mapping operators except the last stage (output) @@ -108,7 +114,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule pool elif OP.tag.startswith('pool'): @@ -118,5 +124,7 @@ def traverse(OP): else: raise RuntimeError("Unsupported operator: %s" % OP.tag) + scheduled_ops.append(OP) + traverse(outs[0].op) return s diff --git a/topi/python/topi/cuda/reduction.py b/topi/python/topi/cuda/reduction.py index c1f7d19d82fb..52bacd3d1ae3 100644 --- a/topi/python/topi/cuda/reduction.py +++ b/topi/python/topi/cuda/reduction.py @@ -88,6 +88,7 @@ def schedule_reduce(outs): """ outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs sch = tvm.create_schedule([x.op for x in outs]) + scheduled_ops = [] def traverse_before_reduce(operator): """Internal travserse function""" @@ -96,10 +97,13 @@ def traverse_before_reduce(operator): elif tag.is_injective(operator.tag): sch[operator].compute_inline() for tensor in operator.input_tensors: - traverse_before_reduce(tensor.op) + if tensor.op not in scheduled_ops: + traverse_before_reduce(tensor.op) else: raise RuntimeError("Unsupported operator: %s" % operator.tag) + scheduled_ops.append(operator) + def traverse_after_reduce(operator): """Internal travserse function""" if tag.is_broadcast(operator.tag): @@ -107,13 +111,18 @@ def traverse_after_reduce(operator): elif operator.tag == 'comm_reduce': _schedule_reduce(operator, sch, is_idx_reduce=False) for tensor in operator.input_tensors: - traverse_before_reduce(tensor.op) + if tensor.op not in scheduled_ops: + traverse_before_reduce(tensor.op) elif operator.tag == 'comm_reduce_idx': _schedule_reduce(operator, sch, is_idx_reduce=True) - for tensor in operator.input_tensors[0].op.input_tensors: - traverse_before_reduce(tensor.op) + input_tensors = operator.input_tensors[0].op.input_tensors + for tensor in input_tensors: + if tensor.op not in scheduled_ops: + traverse_before_reduce(tensor.op) else: raise RuntimeError("Unsupported operator: %s" % operator.tag) + scheduled_ops.append(operator) + traverse_after_reduce(outs[0].op) return sch diff --git a/topi/python/topi/cuda/vision.py b/topi/python/topi/cuda/vision.py index c5d94b5ab4de..db97e6ffb45e 100644 --- a/topi/python/topi/cuda/vision.py +++ b/topi/python/topi/cuda/vision.py @@ -11,6 +11,8 @@ def _default_schedule(outs): target = tvm.target.current_target() outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs s = tvm.create_schedule([x.op for x in outs]) + scheduled_ops = [] + def traverse(op): """inline all one-to-one-mapping operators except the last stage (output)""" if "nms" in op.tag: @@ -32,9 +34,11 @@ def traverse(op): s[x].bind(bx, tvm.thread_axis("blockIdx.x")) s[x].bind(tx, tvm.thread_axis("threadIdx.x")) for tensor in op.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) + scheduled_ops.append(op) + traverse(outs[0].op) return s diff --git a/topi/python/topi/intel_graphics/conv2d.py b/topi/python/topi/intel_graphics/conv2d.py index 29b61dd02ee3..4275bd963d10 100644 --- a/topi/python/topi/intel_graphics/conv2d.py +++ b/topi/python/topi/intel_graphics/conv2d.py @@ -113,6 +113,7 @@ def schedule_conv2d_NCHWc(num_filter, kernel_size, stride, padding, layout, out_ """ outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs s = tvm.create_schedule([x.op for x in outs]) + scheduled_ops = [] def traverse(op): """inline all one-to-one-mapping operators except the last stage (output)""" @@ -120,12 +121,14 @@ def traverse(op): if op not in s.outputs: s[op].compute_inline() for tensor in op.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) if "4_5" in op.tag or "4_4" in op.tag or "2_7" in op.tag or "2_14" in op.tag \ or "1_16" in op.tag: _schedule_cl_spatialpack_NCHWc(s, op) + scheduled_ops.append(op) + traverse(outs[0].op) return s @@ -360,6 +363,7 @@ def schedule_conv2d_nchw(outs): """ outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs s = tvm.create_schedule([x.op for x in outs]) + scheduled_ops = [] def traverse(op): """inline all one-to-one-mapping operators except the last stage (output)""" @@ -367,12 +371,14 @@ def traverse(op): if op not in s.outputs: s[op].compute_inline() for tensor in op.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) if "4_5" in op.tag or "4_4" in op.tag or "2_7" in op.tag or "2_14" in op.tag \ or "1_16" in op.tag: _schedule_cl_spatialpack(s, op) + scheduled_ops.append(op) + traverse(outs[0].op) return s diff --git a/topi/python/topi/mali/conv2d.py b/topi/python/topi/mali/conv2d.py index ad1dfbe61740..fc6309a7ebf4 100644 --- a/topi/python/topi/mali/conv2d.py +++ b/topi/python/topi/mali/conv2d.py @@ -144,6 +144,7 @@ def schedule_conv2d_nchw(outs): """ outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs s = tvm.create_schedule([x.op for x in outs]) + scheduled_ops = [] def traverse(op): """inline all one-to-one-mapping operators except the last stage (output)""" @@ -151,7 +152,7 @@ def traverse(op): if op not in s.outputs: s[op].compute_inline() for tensor in op.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) if 'im2col_conv_output' in op.tag: @@ -163,6 +164,8 @@ def traverse(op): if 'winograd_conv_output' in op.tag: _schedule_winograd(s, op) + scheduled_ops.append(op) + traverse(outs[0].op) return s diff --git a/topi/python/topi/mali/dense.py b/topi/python/topi/mali/dense.py index ff88ce51866f..165d80a5ceef 100644 --- a/topi/python/topi/mali/dense.py +++ b/topi/python/topi/mali/dense.py @@ -81,6 +81,8 @@ def fuse_and_bind(s, tensor, axis=None, num_thread=None): # bias = s[outs[0]].op.input_tensors[1] # print(tvm.lower(s, [data, weight, bias, outs[0]], simple_mode=True)) + scheduled_ops = [] + def traverse(OP): """Internal travserse function""" # inline all one-to-one-mapping operators except the last stage (output) @@ -88,7 +90,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule dense elif OP.tag == 'dense': @@ -97,5 +99,7 @@ def traverse(OP): else: raise RuntimeError("Unsupported operator: %s" % OP.tag) + scheduled_ops.append(OP) + traverse(outs[0].op) return s diff --git a/topi/python/topi/mali/depthwise_conv2d.py b/topi/python/topi/mali/depthwise_conv2d.py index 61ec6334e0d2..cad0733a153f 100644 --- a/topi/python/topi/mali/depthwise_conv2d.py +++ b/topi/python/topi/mali/depthwise_conv2d.py @@ -86,6 +86,8 @@ def tile_and_bind3d(tensor, z, y, x, z_factor=2, y_factor=None, x_factor=None): s[conv].vectorize(xi) s[conv].compute_at(s[output], ji) + scheduled_ops = [] + def traverse(op): """Internal travserse function""" # inline all one-to-one-mapping operators except the last stage (output) @@ -93,7 +95,7 @@ def traverse(op): if op not in s.outputs: s[op].compute_inline() for tensor in op.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule depthwise_conv2d @@ -105,5 +107,7 @@ def traverse(op): conv = op.output(0) _schedule(pad_data, kernel, conv) + scheduled_ops.append(op) + traverse(outs[0].op) return s diff --git a/topi/python/topi/opengl/conv2d_nchw.py b/topi/python/topi/opengl/conv2d_nchw.py index 573270c375a0..a1e089e37aa2 100644 --- a/topi/python/topi/opengl/conv2d_nchw.py +++ b/topi/python/topi/opengl/conv2d_nchw.py @@ -21,6 +21,8 @@ def schedule_conv2d_nchw(outs): """ outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs s = tvm.create_schedule([x.op for x in outs]) + scheduled_ops = [] + def _schedule(conv2d, data): if conv2d.op in s.outputs: Out = conv2d @@ -37,7 +39,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].opengl() for tensor in OP.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule conv2d_nchw elif OP.tag.startswith('conv2d_nchw'): @@ -50,5 +52,7 @@ def traverse(OP): else: raise RuntimeError("Unsupported operator: %s" % OP.tag) + scheduled_ops.append(OP) + traverse(outs[0].op) return s diff --git a/topi/python/topi/opengl/dense.py b/topi/python/topi/opengl/dense.py index e4d327afa4d6..b98769b534ef 100644 --- a/topi/python/topi/opengl/dense.py +++ b/topi/python/topi/opengl/dense.py @@ -22,6 +22,8 @@ def schedule_dense(outs): """ outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs s = tvm.create_schedule([x.op for x in outs]) + scheduled_ops = [] + def _schedule(Dense): if Dense.op in s.outputs: Out = Dense @@ -37,7 +39,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule dense elif OP.tag == 'dense': @@ -46,5 +48,7 @@ def traverse(OP): else: raise RuntimeError("Unsupported operator: %s" % OP.tag) + scheduled_ops.append(OP) + traverse(outs[0].op) return s diff --git a/topi/python/topi/opengl/pooling.py b/topi/python/topi/opengl/pooling.py index 0bc17885548a..8195ea91d8a6 100644 --- a/topi/python/topi/opengl/pooling.py +++ b/topi/python/topi/opengl/pooling.py @@ -21,6 +21,8 @@ def schedule_global_pool(outs): """ outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs s = tvm.create_schedule([x.op for x in outs]) + scheduled_ops = [] + def _schedule(Pool): if Pool.op in s.outputs: Out = Pool @@ -36,7 +38,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].opengl() for tensor in OP.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule global_pool elif OP.tag.startswith('global_pool'): @@ -45,6 +47,8 @@ def traverse(OP): else: raise RuntimeError("Unsupported operator: %s" % OP.tag) + scheduled_ops.append(OP) + traverse(outs[0].op) return s @@ -66,6 +70,8 @@ def schedule_pool(outs): """ outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs s = tvm.create_schedule([x.op for x in outs]) + scheduled_ops = [] + def _schedule(PaddedInput, Pool): if isinstance(PaddedInput.op, tvm.tensor.ComputeOp): s[PaddedInput].opengl() @@ -82,7 +88,7 @@ def traverse(OP): if tag.is_broadcast(OP.tag): if OP not in s.outputs: s[OP].compute_inline() - for tensor in OP.input_tensors: + for tensor in OP.input_tensors and tensor.op not in scheduled_ops: if tensor.op.input_tensors: traverse(tensor.op) # schedule pool @@ -93,5 +99,7 @@ def traverse(OP): else: raise RuntimeError("Unsupported operator: %s" % OP.tag) + scheduled_ops.append(OP) + traverse(outs[0].op) return s diff --git a/topi/python/topi/x86/binary_dense.py b/topi/python/topi/x86/binary_dense.py index 8b28dd728842..4a3da5541efd 100644 --- a/topi/python/topi/x86/binary_dense.py +++ b/topi/python/topi/x86/binary_dense.py @@ -23,6 +23,7 @@ def schedule_binary_dense(outs): """ outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs s = tvm.create_schedule([x.op for x in outs]) + scheduled_ops = [] def _schedule(A, B, C): s[C].split(s[C].op.reduce_axis[0], factor=8) @@ -41,7 +42,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule binary_dense elif OP.tag == 'binary_dense': @@ -52,5 +53,7 @@ def traverse(OP): else: raise RuntimeError("Unsupported operator: %s" % OP.tag) + scheduled_ops.append(OP) + traverse(outs[0].op) return s diff --git a/topi/python/topi/x86/bitserial_conv2d.py b/topi/python/topi/x86/bitserial_conv2d.py index 1c01b96f9c30..0b864c383ca4 100644 --- a/topi/python/topi/x86/bitserial_conv2d.py +++ b/topi/python/topi/x86/bitserial_conv2d.py @@ -71,6 +71,7 @@ def _declaration_bitserial_conv2d(data, kernel, stride, padding, activation_bits def schedule_bitserial_conv2d(outs): """CPU schedule for bitserial convolutions NCHW and NHWC""" s = tvm.create_schedule([x.op for x in outs]) + scheduled_ops = [] def traverse(op): """Traverse operators from computation graph""" @@ -79,7 +80,7 @@ def traverse(op): if tag.is_broadcast(op.tag) or 'elemwise' in op.tag: if op not in s.outputs: s[op].compute_inline() - for tensor in op.input_tensors: + for tensor in op.input_tensors and tensor.op not in scheduled_ops: if tensor.op.input_tensors: traverse(tensor.op) @@ -111,6 +112,7 @@ def traverse(op): _schedule_spatial_conv2d_nhwc(s, data, data_q, data_pad, data_vec, kernel, kernel_q, kernel_vec, conv_out, output, outs[0]) + scheduled_ops.append(op) traverse(outs[0].op) return s diff --git a/topi/python/topi/x86/conv2d.py b/topi/python/topi/x86/conv2d.py index ae4a567f1dfa..721c7c169d99 100644 --- a/topi/python/topi/x86/conv2d.py +++ b/topi/python/topi/x86/conv2d.py @@ -188,6 +188,7 @@ def schedule_conv2d(outs): } s = tvm.create_schedule([x.op for x in outs]) target = tvm.target.current_target(allow_none=False) + scheduled_ops = [] def traverse(op): """Traverse operators from computation graph""" @@ -196,7 +197,7 @@ def traverse(op): if op not in s.outputs: s[op].compute_inline() for tensor in op.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) if 'conv2d_nchw' in op.tag: @@ -223,6 +224,8 @@ def traverse(op): _AVX_SCH_TO_SCH_FUNC[type(sch)](s, data, data_pad, data_vec, kernel, kernel_vec, conv_out, output, outs[0]) + scheduled_ops.append(op) + traverse(outs[0].op) return s @@ -232,6 +235,7 @@ def schedule_conv2d_nhwc(outs): """Create schedule for tensors""" s = tvm.create_schedule([x.op for x in outs]) output_op = outs[0].op + scheduled_ops = [] def traverse(op): """Traverse operators from computation graph""" @@ -246,7 +250,7 @@ def traverse(op): s[op].parallel(fused) s[op].vectorize(c) for tensor in op.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) if 'conv2d_nhwc' in op.tag: @@ -275,6 +279,8 @@ def traverse(op): fused = s[C].fuse(n, h, w) s[C].parallel(fused) + scheduled_ops.append(op) + traverse(output_op) return s @@ -288,6 +294,7 @@ def schedule_conv2d_NCHWc(num_filter, kernel_size, stride, padding, AVXConv1x1Fwd: conv2d_avx_1x1._schedule_conv_NCHWc } s = tvm.create_schedule([x.op for x in outs]) + scheduled_ops = [] def traverse(op): """Traverse operators from computation graph""" @@ -296,7 +303,7 @@ def traverse(op): if op not in s.outputs: s[op].compute_inline() for tensor in op.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) if 'conv2d_NCHWc' in op.tag: @@ -322,5 +329,7 @@ def traverse(op): _AVX_SCH_TO_SCH_FUNC[type(sch)](s, wkl, sch, data_vec, kernel, conv_out, outs[0]) + scheduled_ops.append(op) + traverse(outs[0].op) return s diff --git a/topi/python/topi/x86/nn.py b/topi/python/topi/x86/nn.py index ccb9f01471a1..03e07222c420 100644 --- a/topi/python/topi/x86/nn.py +++ b/topi/python/topi/x86/nn.py @@ -53,6 +53,7 @@ def schedule_dense(outs): outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs s = tvm.create_schedule([x.op for x in outs]) + scheduled_ops = [] def traverse(op): """Traverse operators from computation graph""" @@ -61,7 +62,7 @@ def traverse(op): if op not in s.outputs: s[op].compute_inline() for tensor in op.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) if 'dense' in op.tag: @@ -89,5 +90,7 @@ def traverse(op): # Parallelization s[C].parallel(yo) + scheduled_ops.append(op) + traverse(outs[0].op) return s diff --git a/topi/python/topi/x86/pooling.py b/topi/python/topi/x86/pooling.py index 946ecd2c7697..998edf7a0e16 100644 --- a/topi/python/topi/x86/pooling.py +++ b/topi/python/topi/x86/pooling.py @@ -32,6 +32,7 @@ def schedule_pool(outs): """ outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs s = tvm.create_schedule([x.op for x in outs]) + scheduled_ops = [] def _schedule(PaddedInput, Pool): if isinstance(PaddedInput.op, tvm.tensor.ComputeOp): @@ -45,7 +46,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule pool elif OP.tag.startswith('pool'): @@ -54,6 +55,9 @@ def traverse(OP): _schedule(PaddedInput, Pool) else: raise RuntimeError("Unsupported operator: %s" % OP.tag) + + scheduled_ops.append(OP) + traverse(outs[0].op) return s @@ -75,6 +79,8 @@ def schedule_global_pool(outs): """ outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs s = tvm.create_schedule([x.op for x in outs]) + scheduled_ops = [] + def traverse(OP): """Internal travserse function""" # inline all one-to-one-mapping operators except the last stage (output) @@ -82,7 +88,7 @@ def traverse(OP): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if tensor.op.input_tensors: + if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule pool elif OP.tag.startswith('global_pool'): @@ -90,5 +96,8 @@ def traverse(OP): _parallel_sch(s[Pool]) else: raise RuntimeError("Unsupported operator: %s" % OP.tag) + + scheduled_ops.append(OP) + traverse(outs[0].op) return s