From 9a7fae8d23b3d020687c55dc457b52b575bcbefa Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@uw.edu>
Date: Wed, 7 Oct 2020 22:09:36 +0000
Subject: [PATCH 01/17] Onnx loop almost working, checkpointing for safety.

---
 python/tvm/relay/frontend/onnx.py          | 168 ++++++++++++-
 tests/python/frontend/onnx/body.py         | 113 +++++++++
 tests/python/frontend/onnx/test_forward.py | 269 +++++++++++++++------
 3 files changed, 471 insertions(+), 79 deletions(-)
 create mode 100644 tests/python/frontend/onnx/body.py

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 9fae94b5a8a1..1460018ade61 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -17,8 +17,10 @@
 # pylint: disable=invalid-name, import-self, len-as-condition, unused-argument, too-many-lines
 # pylint: disable=import-outside-toplevel
 """ONNX: Open Neural Network Exchange frontend for Relay."""
+from collections import OrderedDict
 import numpy as np
 import tvm
+import onnx
 from tvm.ir import IRModule
 
 from ... import nd as _nd
@@ -27,6 +29,8 @@
 from .. import function as _function
 from .. import op as _op
 from .. import vision as _vision
+from .. import loops as _loops
+from .. import ty as _ty
 
 from .common import AttrCvt, Renamer
 from .common import get_relay_op, new_var, infer_shape, infer_channels
@@ -95,6 +99,25 @@ def get_numpy(tensor_proto):
     return to_array(tensor_proto)
 
 
+def get_type(elem_type):
+    """Converts onnx integer datatype to numpy datatype"""
+    return onnx.TensorProto.DataType.Name(elem_type).lower()
+
+
+def get_info(info_proto):
+    """Extract the shape from a ValueInfoProto."""
+    shape = []
+    for dim in info_proto.type.tensor_type.shape.dim:
+        value = dim.dim_value
+        if value is None:
+           value = _ty.Any 
+        shape.append(value)
+    
+    name = info_proto.name
+    dtype = get_type(info_proto.type.tensor_type.elem_type)
+    return name, {name: shape}, {name: dtype}
+
+
 def dimension_picker(prefix, suffix=""):
     """Check that dimensions are supported."""
 
@@ -1995,6 +2018,122 @@ def _impl_v11(cls, inputs, attr, params):
         return result
 
 
+class Loop(OnnxOpConverter):
+    """Operator converter for Loop
+    """
+    @classmethod
+    def _impl_v11(cls, inputs, attr, params):
+        max_loop_count = inputs[0]
+        cond = inputs[1]
+        loop_vars = inputs[2:]
+        num_vars = len(loop_vars)
+        body = attr['body']
+
+        # Determine what condition mode we're in.
+        assert cond is not None or max_loop_count is not None
+        is_for_loop = max_loop_count is not None and cond is None
+        is_while_loop = cond is not None and max_loop_count is None
+        is_condition_for_loop = cond is not None and max_loop_count is not None
+
+        # Loop inputs will be packed as
+        # [iter_count, condition, loop_vars, scan_outputs]
+        def cond_fn(loop_inputs):
+            i = loop_inputs[0]
+            w = loop_inputs[1]
+
+            if cond is not None:
+                out_while = _op.equal(w, _expr.const(True, 'bool'))
+            if max_loop_count is not None: 
+                out_loop = _op.less(i, max_loop_count)
+
+            if is_condition_for_loop:
+                return _op.logical_or(out_while, out_loop) 
+            elif is_for_loop:
+                return out_loop
+            return out_while
+
+        # Create new graphproto converter for our body.
+        graph_scope = GraphProto.current
+        g = GraphProto(graph_scope._shape, graph_scope._dtype)
+        # Load nodes from outer graph into inner graph.
+        g._nodes = graph_scope._nodes.copy()
+        # Create a list of the names of values that will update
+        # each iteration.
+        loop_iter_vars = [body.input[0].name, body.input[1].name] + [v.name_hint for v in loop_vars]
+        # Add initial loop and condition values to graph nodes.
+        g._nodes[body.input[0].name] = max_loop_count
+        g._nodes[body.input[1].name] = cond
+        # Add initial loop_var values to our node tracking.
+        for i, v in enumerate(loop_vars):
+            g._nodes[body.input[i + 2].name] = v
+
+        # Now we can remove loop iter variables from our inner loop's inputs.
+        # This is kind of a hack since we have graph inputs that we don't
+        # want to treat as actual inputs.
+        for i in range(len(loop_iter_vars)):
+            body.input.pop(0)
+
+        body_mod, body_params = g.from_onnx(body, 11, freeze_params=True)
+
+        # Now we have to find the inputs to our body. Unfortunately, these
+        # may be nodes that are not visible to this operation.
+        # Inputs that do not change in the loop can come from anywhere
+        # in the graph, and can have an arbitrary position in the body's
+        # argument list. We need to make a dictionary that maps the proper
+        # input index of these arguments to their value.
+        # After we compute the changing loop values, we'll insert the static
+        # ones into their proper index. We use an OrderedDict to make sure
+        # these values are inserted in the proper order.
+        body_vars = analysis.all_vars(body_mod['main'])
+        outer_scope_inputs = OrderedDict()
+        for i, var in enumerate(body_vars):
+            if var.name_hint in graph_scope._nodes.keys():
+                outer_scope_inputs[i] = graph_scope._nodes[var.name_hint]
+
+        # Body inputs a/re the combination of outer_scope_inputs and loop_vars.
+        # The body outputs are always in the form [condition, loop_vars, scan_outputs].
+        # We'll use the length of the initial loop_vars to break apart these outputs
+        # and prepare them for the next iteration.
+        # To keep track of the iteration count, we'll use a new variable that will
+        # be kept at the first index of the loop inputs and outputs.
+        # loop input / outputs will be packed as [iter_count, cond, loop_vars, scan_outputs]
+
+        # New strategy: populate g._nodes with the values needed
+        # by the next iteration. Then run node conversion on the updated
+        # inputs to build a progressively larger expression as needed.
+        iter_dtype = infer_type(max_loop_count).checked_type.dtype
+        iter_var = _expr.var('iter_var', shape=(), dtype=iter_dtype)
+        def body_fn(*loop_inputs):
+            # Unpack inputs
+            i = loop_inputs[0]
+            current_vars = list(loop_inputs[2:(2 + num_vars)])
+            scan_outputs = loop_inputs[-1]
+
+            # Prepare body inputs by adding global inputs.
+            for index in outer_scope_inputs.keys():
+                loop_vars.insert(index, outer_scope_inputs[index])
+
+            # Run the body graph for one iteration.
+            body_outputs = body_mod['main'](*loop_vars)
+            # Unpack the body outputs and prepare variables for next iteration.
+            w = body_outputs[0]
+            loop_vars_new = body_outputs[1:(1 + num_vars)]
+            scan_outputs_new = body_outputs[(1 + num_vars):]
+
+            # Increment counter.
+            if max_loop_count is not None:
+                incr = _expr.const(1, dtype='int32')
+                i = i + incr
+
+            # Pack loop outputs
+            return [i, loop_inputs[1]] + loop_vars_new + [scan_outputs_new]
+
+        # Prepare first input to loop.
+        loop_init = [iter_var, cond] + loop_vars + [[]]
+        out_graph = _loops.while_loop(cond, loop_init, body_fn)
+        return out_graph
+
+
 # compatible operators that do NOT require any conversion.
 _identity_list = []
 
@@ -2150,6 +2289,8 @@ def _get_convert_map(opset):
         "Resize": Resize.get_converter(opset),
         "NonZero": NonZero.get_converter(opset),
         "Range": Range.get_converter(opset),
+        # defs/control_flow
+        "Loop": Loop.get_converter(opset),
     }
 
 
@@ -2165,6 +2306,7 @@ class GraphProto:
     dtype : str or dict of str to str
         The input types to the graph
     """
+    current = None
 
     def __init__(self, shape, dtype):
         self._nodes = {}
@@ -2176,6 +2318,14 @@ def __init__(self, shape, dtype):
         self._shape = shape if shape else {}
         self._dtype = dtype
 
+    def __enter__(self):
+        self._old_manager = GraphProto.current
+        GraphProto.current = self
+        return self
+
+    def __exit__(self, ptype, value, trace):
+        GraphProto.current = self._old_manager
+
     def freeze(self, func, params):
         bind_map = {}
         for name in params.keys():
@@ -2317,6 +2467,13 @@ def from_onnx(self, graph, opset, freeze_params=False):
         for i_name in self._params:
             if i_name in free_vars and i_name not in self._inputs:
                 self._inputs[i_name] = self._nodes[i_name]
+        # For subgraphs, we may have free variables that come from an outer
+        # scope and should be converted to inputs.
+        if GraphProto.current is not self:
+            for i_name in free_vars:
+                if i_name not in self._inputs:
+                    self._inputs[i_name] = self._nodes[i_name]
+        # Create a function from our output expression and all input variables.
         func = _function.Function([v for k, v in self._inputs.items()], outputs)
         if freeze_params:
             func, params = self.freeze(func, self._params)
@@ -2348,7 +2505,7 @@ def _parse_attr(self, attr_proto):
         """Convert a list of AttributeProto to a dict, with names as keys."""
         attrs = {}
         for a in attr_proto:
-            for f in ["f", "i", "s"]:
+            for f in ["f", "i", "s", "g"]:
                 if a.HasField(f):
                     attrs[a.name] = getattr(a, f)
             for f in ["floats", "ints", "strings"]:
@@ -2362,12 +2519,9 @@ def _parse_attr(self, attr_proto):
                 if list(getattr(a, f)):
                     assert a.name not in attrs, "Only one type of attr is allowed"
                     attrs[a.name] = tuple(getattr(a, f))
-            for f in ["g"]:
-                if a.HasField(f):
-                    raise NotImplementedError("Filed {} is not supported in relay.".format(f))
             for f in ["graphs"]:
                 if list(getattr(a, f)):
-                    raise NotImplementedError("Filed {} is not supported in relay.".format(f))
+                    raise NotImplementedError("Field {} is not supported in relay.".format(f))
             if a.name not in attrs:
                 raise ValueError("Cannot parse attribute: \n{}\n.".format(a))
         return attrs
@@ -2482,5 +2636,7 @@ def from_onnx(model, shape=None, dtype="float32", opset=None, freeze_params=Fals
             opset = model.opset_import[0].version if model.opset_import else 1
         except AttributeError:
             opset = 1
-    mod, params = g.from_onnx(graph, opset, freeze_params)
+    # Use the graph proto as a scope so that ops can access other nodes if needed.
+    with g:
+        mod, params = g.from_onnx(graph, opset, freeze_params)
     return mod, params
diff --git a/tests/python/frontend/onnx/body.py b/tests/python/frontend/onnx/body.py
new file mode 100644
index 000000000000..2d4118919c51
--- /dev/null
+++ b/tests/python/frontend/onnx/body.py
@@ -0,0 +1,113 @@
+import onnx
+import numpy as np
+
+y_in = onnx.helper.make_tensor_value_info('y_in', onnx.TensorProto.FLOAT, [1])
+y_out = onnx.helper.make_tensor_value_info('y_out', onnx.TensorProto.FLOAT, [1])
+scan_out = onnx.helper.make_tensor_value_info('scan_out', onnx.TensorProto.FLOAT, [1])
+cond_in = onnx.helper.make_tensor_value_info('cond_in', onnx.TensorProto.BOOL, [])
+cond_out = onnx.helper.make_tensor_value_info('cond_out', onnx.TensorProto.BOOL, [])
+iter_count = onnx.helper.make_tensor_value_info('iter_count', onnx.TensorProto.INT64, [])
+
+x = np.array([1, 2, 3, 4, 5]).astype(np.float32)
+y = np.array([-2]).astype(np.float32)
+
+x_const_node = onnx.helper.make_node(
+    'Constant',
+    inputs=[],
+    outputs=['x'],
+    value=onnx.helper.make_tensor(
+        name='const_tensor_x',
+        data_type=onnx.TensorProto.FLOAT,
+        dims=x.shape,
+        vals=x.flatten().astype(float),
+    )
+)
+
+one_const_node = onnx.helper.make_node(
+    'Constant',
+    inputs=[],
+    outputs=['one'],
+    value=onnx.helper.make_tensor(
+        name='const_tensor_one',
+        data_type=onnx.TensorProto.INT64,
+        dims=(),
+        vals=[1]
+    )
+)
+
+i_add_node = onnx.helper.make_node(
+    'Add',
+    inputs=['iter_count', 'one'],
+    outputs=['end']
+)
+
+start_unsqueeze_node = onnx.helper.make_node(
+    'Unsqueeze',
+    inputs=['iter_count'],
+    outputs=['slice_start'],
+    axes=[0]
+)
+
+end_unsqueeze_node = onnx.helper.make_node(
+    'Unsqueeze',
+    inputs=['end'],
+    outputs=['slice_end'],
+    axes=[0]
+)
+
+slice_node = onnx.helper.make_node(
+    'Slice',
+    inputs=['x', 'slice_start', 'slice_end'],
+    outputs=['slice_out']
+)
+
+y_add_node = onnx.helper.make_node(
+    'Add',
+    inputs=['y_in', 'slice_out'],
+    outputs=['y_out']
+)
+
+identity_node = onnx.helper.make_node(
+    'Identity',
+    inputs=['cond_in'],
+    outputs=['cond_out']
+)
+
+scan_identity_node = onnx.helper.make_node(
+    'Identity',
+    inputs=['y_out'],
+    outputs=['scan_out']
+)
+
+loop_body = onnx.helper.make_graph(
+    [identity_node, x_const_node, one_const_node, i_add_node,
+     start_unsqueeze_node, end_unsqueeze_node, slice_node, y_add_node,
+     scan_identity_node],
+    'loop_body',
+    [iter_count, cond_in, y_in],
+    [cond_out, y_out, scan_out]
+)
+body_model = onnx.helper.make_model(loop_body)
+onnx.save(body_model, "body.onnx")
+
+node = onnx.helper.make_node(
+    'Loop',
+    inputs=['trip_count', 'cond', 'y'],
+    outputs=['res_y', 'res_scan'],
+    body=loop_body
+)
+
+trip_count = np.array(5).astype(np.int64)
+res_y = np.array([13]).astype(np.float32)
+cond = np.array(1).astype(np.bool)
+node_graph = onnx.helper.make_graph(
+    [node],
+    "loop_outer",
+    inputs=[onnx.helper.make_tensor_value_info('trip_count', onnx.TensorProto.INT64, [5]),
+            onnx.helper.make_tensor_value_info('cond', onnx.TensorProto.FLOAT, [1]),
+            onnx.helper.make_tensor_value_info('y', onnx.TensorProto.BOOL, [1])],
+    outputs=[onnx.helper.make_tensor_value_info('res_y', onnx.TensorProto.FLOAT, [13]),
+             onnx.helper.make_tensor_value_info('res_scan', onnx.TensorProto.FLOAT, [5, 1])]
+)
+node_model = onnx.helper.make_model(node_graph)
+onnx.save(node_model, "outer.onnx")
\ No newline at end of file
diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py
index 07e6dc465268..4fd774206d0c 100644
--- a/tests/python/frontend/onnx/test_forward.py
+++ b/tests/python/frontend/onnx/test_forward.py
@@ -3660,77 +3660,200 @@ def verify_roi_align(
     verify_roi_align((1, 4, 16, 16), 32, 7, 7, sampling_ratio=2, spatial_scale=1.0)
 
 
+def verify_loop():
+    y_in = helper.make_tensor_value_info('y_in', TensorProto.FLOAT, [1])
+    y_out = helper.make_tensor_value_info('y_out', TensorProto.FLOAT, [1])
+    scan_out = helper.make_tensor_value_info('scan_out', TensorProto.FLOAT, [1])
+    cond_in = helper.make_tensor_value_info('cond_in', TensorProto.BOOL, [])
+    cond_out = helper.make_tensor_value_info('cond_out', TensorProto.BOOL, [])
+    iter_count = helper.make_tensor_value_info('iter_count', TensorProto.INT64, [])
+
+    x = np.array([1, 2, 3, 4, 5]).astype(np.float32)
+    y = np.array([-2]).astype(np.float32)
+
+    x_const_node = helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=['x'],
+        value=helper.make_tensor(
+            name='const_tensor_x',
+            data_type=TensorProto.FLOAT,
+            dims=x.shape,
+            vals=x.flatten().astype(float),
+        )
+    )
+
+    one_const_node = helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=['one'],
+        value=helper.make_tensor(
+            name='const_tensor_one',
+            data_type=TensorProto.INT64,
+            dims=(),
+            vals=[1]
+        )
+    )
+
+    i_add_node = helper.make_node(
+        'Add',
+        inputs=['iter_count', 'one'],
+        outputs=['end']
+    )
+
+    start_unsqueeze_node = helper.make_node(
+        'Unsqueeze',
+        inputs=['iter_count'],
+        outputs=['slice_start'],
+        axes=[0]
+    )
+
+    end_unsqueeze_node = helper.make_node(
+        'Unsqueeze',
+        inputs=['end'],
+        outputs=['slice_end'],
+        axes=[0]
+    )
+
+    slice_node = helper.make_node(
+        'Slice',
+        inputs=['x', 'slice_start', 'slice_end'],
+        outputs=['slice_out']
+    )
+
+    y_add_node = helper.make_node(
+        'Add',
+        inputs=['y_in', 'slice_out'],
+        outputs=['y_out']
+    )
+
+    identity_node = helper.make_node(
+        'Identity',
+        inputs=['cond_in'],
+        outputs=['cond_out']
+    )
+
+    scan_identity_node = helper.make_node(
+        'Identity',
+        inputs=['y_out'],
+        outputs=['scan_out']
+    )
+
+    loop_body = helper.make_graph(
+        [identity_node, x_const_node, one_const_node, i_add_node,
+        start_unsqueeze_node, end_unsqueeze_node, slice_node, y_add_node,
+        scan_identity_node],
+        'loop_body',
+        [iter_count, cond_in, y_in],
+        [cond_out, y_out, scan_out]
+    )
+
+    loop_node = helper.make_node(
+        'Loop',
+        inputs=['trip_count', 'cond', 'y'],
+        outputs=['res_y', 'res_scan'],
+        body=loop_body
+    )
+
+    trip_count = np.array(5).astype(np.int64)
+    res_y = np.array([13]).astype(np.float32)
+    cond = np.array(1).astype(np.bool)
+    loop_graph = onnx.helper.make_graph(
+        [loop_node],
+        "loop_outer",
+        inputs=[onnx.helper.make_tensor_value_info('trip_count', onnx.TensorProto.INT64, []),
+                onnx.helper.make_tensor_value_info('cond', onnx.TensorProto.BOOL, []),
+                onnx.helper.make_tensor_value_info('y', onnx.TensorProto.FLOAT, [1])],
+        outputs=[onnx.helper.make_tensor_value_info('res_y', onnx.TensorProto.FLOAT, [1]),
+                onnx.helper.make_tensor_value_info('res_scan', onnx.TensorProto.FLOAT, [5, 1])]
+    )
+    loop_model = onnx.helper.make_model(loop_graph)
+
+    trip_count = np.array(5).astype(np.int64)
+    cond = np.array(1).astype(np.bool)
+    input_vals = [trip_count, cond, y]
+    onnx_out = get_onnxruntime_output(loop_model, input_vals)
+
+    for target, ctx in [('llvm', tvm.cpu())]:
+        tvm_out = get_tvm_output(loop_model, input_vals, target, ctx, output_dtype='float32')
+        tvm.testing.assert_allclose(
+            onnx_out[0], tvm_out, rtol=1e-05, atol=1e-05)
+
+
 if __name__ == "__main__":
-    test_flatten()
-    test_reshape()
-    test_shape()
-    test_expand()
-    test_power()
-    test_squeeze()
-    test_unsqueeze()
-    test_slice()
-    test_floor()
-    test_ceil()
-    test_round()
-    test_isinf()
-    test_isnan()
-    test_clip()
-    test_clip_min_max_as_inputs()
-    test_onehot()
-    test_matmul()
-    test_gather()
-    test_gatherelements()
-    test_gather_nd()
-    test_scatter()
-    test_lrn()
-    test_instance_norm()
-    test_upsample()
-    test_forward_min()
-    test_forward_max()
-    test_forward_mean()
-    test_forward_hardsigmoid()
-    test_forward_arg_min_max()
-    test_softmax()
-    test_constantofshape()
-    test_all_reduce_funcs()
-    test_pad()
-    test_split()
-    test_binary_ops()
-    test_single_ops()
-    test_leaky_relu()
-    test_elu()
-    test_selu()
-    test_prelu()
-    test_ThresholdedRelu()
-    test_ScaledTanh()
-    test_ParametricSoftplus()
-    test_Scale()
-    test_LogSoftmax()
-    test_resnet()
-    test_inception()
-    test_densenet()
-    test_sign()
-    test_not()
-    test_and()
-    test_tile()
-    test_erf()
-    test_where()
-    test_or()
-    test_depth_to_space()
-    test_space_to_depth()
-    test_batch_norm()
-    test_batch_norm_dynamic_subgraph()
-    test_conv()
-    test_convtranspose()
-    test_unsqueeze_constant()
-    test_pooling()
-    test_lppool()
-    test_lstm()
-    test_gru()
-    test_resize()
-    test_nonzero()
-    test_topk()
-    test_mod()
-    test_xor()
-    test_max_roi_pool()
-    test_roi_align()
+    verify_loop()
+    #test_flatten()
+    #test_reshape()
+    #test_shape()
+    #test_expand()
+    #test_power()
+    #test_squeeze()
+    #test_unsqueeze()
+    #test_slice()
+    #test_floor()
+    #test_ceil()
+    #test_round()
+    #test_isinf()
+    #test_isnan()
+    #test_clip()
+    #test_clip_min_max_as_inputs()
+    #test_onehot()
+    #test_matmul()
+    #test_batch_matmul()
+    #test_gather()
+    #test_gatherelements()
+    #test_gather_nd()
+    #test_scatter()
+    #test_lrn()
+    #test_instance_norm()
+    #test_upsample()
+    #test_forward_min()
+    #test_forward_max()
+    #test_forward_mean()
+    #test_forward_hardsigmoid()
+    #test_forward_arg_min_max()
+    #test_softmax()
+    #test_constantofshape()
+    #test_all_reduce_funcs()
+    #test_pad()
+    #test_split()
+    #test_binary_ops()
+    #test_single_ops()
+    #test_leaky_relu()
+    #test_elu()
+    #test_selu()
+    #test_prelu()
+    #test_ThresholdedRelu()
+    #test_ScaledTanh()
+    #test_ParametricSoftplus()
+    #test_Scale()
+    #test_LogSoftmax()
+    #test_resnet()
+    #test_inception()
+    #test_densenet()
+    #test_sign()
+    #test_not()
+    #test_and()
+    #test_tile()
+    #test_erf()
+    #test_where()
+    #test_or()
+    #test_depth_to_space()
+    #test_space_to_depth()
+    #test_batch_norm()
+    #test_batch_norm_dynamic_subgraph()
+    #test_conv()
+    #test_convtranspose()
+    #test_unsqueeze_constant()
+    #test_pooling()
+    #test_lppool()
+    #test_lstm()
+    #test_gru()
+    #test_resize()
+    #test_nonzero()
+    #test_topk()
+    #test_mod()
+    #test_xor()
+    #test_max_roi_pool()
+    #test_roi_align()
+    #test_range()

From dc81295022e821bcc7794da8e1cafa3dd4211578 Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@uw.edu>
Date: Thu, 8 Oct 2020 19:49:32 +0000
Subject: [PATCH 02/17] Very close to working.

---
 python/tvm/relay/frontend/onnx.py | 144 ++++++++++++++++--------------
 1 file changed, 75 insertions(+), 69 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 1460018ade61..63aeffc0a6aa 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -22,6 +22,7 @@
 import tvm
 import onnx
 from tvm.ir import IRModule
+from tvm.topi.util import get_const_tuple
 
 from ... import nd as _nd
 from .. import analysis
@@ -2025,9 +2026,10 @@ class Loop(OnnxOpConverter):
     def _impl_v11(cls, inputs, attr, params):
         max_loop_count = inputs[0]
         cond = inputs[1]
-        loop_vars = inputs[2:]
-        num_vars = len(loop_vars)
+        loop_deps = inputs[2:]
+        num_deps = len(loop_deps)
         body = attr['body']
+        iter_dtype = infer_type(max_loop_count).checked_type.dtype
 
         # Determine what condition mode we're in.
         assert cond is not None or max_loop_count is not None
@@ -2036,8 +2038,8 @@ def _impl_v11(cls, inputs, attr, params):
         is_condition_for_loop = cond is not None and max_loop_count is not None
 
         # Loop inputs will be packed as
-        # [iter_count, condition, loop_vars, scan_outputs]
-        def cond_fn(loop_inputs):
+        # [iter_count, condition, loop_deps, scan_outputs]
+        def cond_fn(*loop_inputs):
             i = loop_inputs[0]
             w = loop_inputs[1]
 
@@ -2057,81 +2059,83 @@ def cond_fn(loop_inputs):
         g = GraphProto(graph_scope._shape, graph_scope._dtype)
         # Load nodes from outer graph into inner graph.
         g._nodes = graph_scope._nodes.copy()
-        # Create a list of the names of values that will update
-        # each iteration.
-        loop_iter_vars = [body.input[0].name, body.input[1].name] + [v.name_hint for v in loop_vars]
-        # Add initial loop and condition values to graph nodes.
-        g._nodes[body.input[0].name] = max_loop_count
-        g._nodes[body.input[1].name] = cond
-        # Add initial loop_var values to our node tracking.
-        for i, v in enumerate(loop_vars):
-            g._nodes[body.input[i + 2].name] = v
+
+        # Create a list of variables for each value updated in the loop.
+        def get_var(name, val):
+            if val:
+                checked_type = infer_type(val)
+                if hasattr(checked_type, "type_annotation"):
+                    checked_type = checked_type.type_annotation
+                if hasattr(checked_type, "shape"):
+                    shape = get_const_tuple(checked_type.shape)
+                    actual_shape = []
+                    for dim in shape:
+                        if isinstance(dim, int) and dim == 0:
+                            actual_shape.append(Any())
+                        else:
+                            actual_shape.append(dim)
+                    return _expr.var(name, shape=actual_shape, dtype=checked_type.dtype)
+                else:
+                    return _expr.var(name, type_annotation=checked_type)
+            return _expr.var(name)
+        loop_vars = [
+            _expr.var(body.input[0].name, shape=(), dtype=iter_dtype), # iteration count
+            get_var(body.input[1].name, cond), # exit condition
+        ]
+        loop_vars += [get_var(body.input[i + 2].name, v) for i, v in enumerate(loop_deps)]
+        loop_var_names = [v.name_hint for v in loop_vars]
+        
+        # Next we need to figure out which of our outputs should be scanned.
+        num_scan_outputs = len(body.output) - (1 + num_deps)
+        scan_output_vars = [get_var(body.input[i + 2].name + "_scan", loop_deps[i]) for i in range(num_scan_outputs)]
 
         # Now we can remove loop iter variables from our inner loop's inputs.
         # This is kind of a hack since we have graph inputs that we don't
         # want to treat as actual inputs.
-        for i in range(len(loop_iter_vars)):
+        while len(body.input) != 0:
             body.input.pop(0)
 
-        body_mod, body_params = g.from_onnx(body, 11, freeze_params=True)
-
-        # Now we have to find the inputs to our body. Unfortunately, these
-        # may be nodes that are not visible to this operation.
-        # Inputs that do not change in the loop can come from anywhere
-        # in the graph, and can have an arbitrary position in the body's
-        # argument list. We need to make a dictionary that maps the proper
-        # input index of these arguments to their value.
-        # After we compute the changing loop values, we'll insert the static
-        # ones into their proper index. We use an OrderedDict to make sure
-        # these values are inserted in the proper order.
-        body_vars = analysis.all_vars(body_mod['main'])
-        outer_scope_inputs = OrderedDict()
-        for i, var in enumerate(body_vars):
-            if var.name_hint in graph_scope._nodes.keys():
-                outer_scope_inputs[i] = graph_scope._nodes[var.name_hint]
-
-        # Body inputs a/re the combination of outer_scope_inputs and loop_vars.
-        # The body outputs are always in the form [condition, loop_vars, scan_outputs].
-        # We'll use the length of the initial loop_vars to break apart these outputs
-        # and prepare them for the next iteration.
-        # To keep track of the iteration count, we'll use a new variable that will
-        # be kept at the first index of the loop inputs and outputs.
-        # loop input / outputs will be packed as [iter_count, cond, loop_vars, scan_outputs]
-
         # New strategy: populate g._nodes with the values needed
         # by the next iteration. Then run node conversion on the updated
         # inputs to build a progressively larger expression as needed.
-        iter_dtype = infer_type(max_loop_count).checked_type.dtype
-        iter_var = _expr.var('iter_var', shape=(), dtype=iter_dtype)
         def body_fn(*loop_inputs):
             # Unpack inputs
-            i = loop_inputs[0]
-            current_vars = list(loop_inputs[2:(2 + num_vars)])
-            scan_outputs = loop_inputs[-1]
-
-            # Prepare body inputs by adding global inputs.
-            for index in outer_scope_inputs.keys():
-                loop_vars.insert(index, outer_scope_inputs[index])
-
-            # Run the body graph for one iteration.
-            body_outputs = body_mod['main'](*loop_vars)
+            loop_count = loop_inputs[0]
+            cond = loop_inputs[1]
+            current_vars = list(loop_inputs[2:(2 + num_deps)])
+            scan_outputs = loop_inputs[(2 + num_deps):]
+
+            # Prepare body inputs by adding them to node dictionary.
+            new_inputs = [loop_count, cond] + current_vars
+            for i, inp in enumerate(new_inputs):
+                g._nodes[loop_var_names[i]] = inp
+
+            # Get the output of the current loop using the updated inputs.
+            loop_outputs = g.from_onnx(body, 11, get_output_expr=True)
             # Unpack the body outputs and prepare variables for next iteration.
-            w = body_outputs[0]
-            loop_vars_new = body_outputs[1:(1 + num_vars)]
-            scan_outputs_new = body_outputs[(1 + num_vars):]
+            new_cond = loop_outputs[0]
+            new_loop_vars = [loop_outputs[i] for i in range(1, 1 + num_deps)]
+            new_scan_outputs = [loop_outputs[i] for i in range(1 + num_deps, len(loop_outputs))]
 
             # Increment counter.
             if max_loop_count is not None:
-                incr = _expr.const(1, dtype='int32')
-                i = i + incr
+                incr = _expr.const(1, dtype=iter_dtype)
+                loop_count = loop_count + incr
 
-            # Pack loop outputs
-            return [i, loop_inputs[1]] + loop_vars_new + [scan_outputs_new]
+            # Add new scan outputs to tracking
+            combined_scan_outputs = []
+            for i, scan in enumerate(scan_outputs):
+                new_scan = _op.expand_dims(new_scan_outputs[i], axis=0)
+                combined_scan = _op.concatenate([scan, new_scan], axis=0)
+                combined_scan_outputs.append(combined_scan)
 
-        # Prepare first input to loop.
-        loop_init = [iter_var, cond] + loop_vars + [[]]
-        out_graph = _loops.while_loop(cond, loop_init, body_fn)
-        return out_graph
+            # Pack loop outputs for next iteration
+            # [iter_count, cond, loop_deps, loop_scans]
+            return [loop_count, new_cond] + new_loop_vars + combined_scan_outputs
+
+        loop = _loops.while_loop(cond_fn, loop_vars + scan_output_vars, body_fn)
+        # Now need to run initial values through the graph.
+        return loop
 
 
 # compatible operators that do NOT require any conversion.
@@ -2334,7 +2338,7 @@ def freeze(self, func, params):
         fn = _function.Function(analysis.free_vars(body), body)
         return fn, {}
 
-    def from_onnx(self, graph, opset, freeze_params=False):
+    def from_onnx(self, graph, opset, freeze_params=False, get_output_expr=False):
         """Construct Relay expression from ONNX graph.
 
         Onnx graph is a python protobuf object.
@@ -2358,6 +2362,11 @@ def from_onnx(self, graph, opset, freeze_params=False):
             at compile time and helps in making models static if certain inputs represent
             attributes relay would traditionally consider compile-time constants.
 
+        get_output_expr: bool
+            If set to true, this conversion will return each output expression rather
+            than a packaged module. This can be useful when converting subgraphs to 
+            relay.
+
         Returns
         -------
         mod : tvm.IRModule
@@ -2459,6 +2468,9 @@ def from_onnx(self, graph, opset, freeze_params=False):
         # now return the outputs
         outputs = [self._nodes[self._parse_value_proto(i)] for i in graph.output]
         outputs = outputs[0] if len(outputs) == 1 else _expr.Tuple(outputs)
+        # If requested, directly return the converted expressions.
+        if get_output_expr:
+            return outputs
         ## Maintain the order of inputs and parameters from the ONNX graph, but only include
         ## those parameters that are needed to execute the relay graph
         free_vars = analysis.free_vars(outputs)
@@ -2467,12 +2479,6 @@ def from_onnx(self, graph, opset, freeze_params=False):
         for i_name in self._params:
             if i_name in free_vars and i_name not in self._inputs:
                 self._inputs[i_name] = self._nodes[i_name]
-        # For subgraphs, we may have free variables that come from an outer
-        # scope and should be converted to inputs.
-        if GraphProto.current is not self:
-            for i_name in free_vars:
-                if i_name not in self._inputs:
-                    self._inputs[i_name] = self._nodes[i_name]
         # Create a function from our output expression and all input variables.
         func = _function.Function([v for k, v in self._inputs.items()], outputs)
         if freeze_params:

From b1aa9139a9963407a7cb5611faa7c0c7e5c85f3c Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@uw.edu>
Date: Fri, 9 Oct 2020 16:59:00 +0000
Subject: [PATCH 03/17] Last piece is fixing scan initialization.

---
 python/tvm/relay/frontend/onnx.py | 68 ++++++++++++++++---------------
 1 file changed, 35 insertions(+), 33 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 63aeffc0a6aa..ddb80590d96f 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -2038,15 +2038,16 @@ def _impl_v11(cls, inputs, attr, params):
         is_condition_for_loop = cond is not None and max_loop_count is not None
 
         # Loop inputs will be packed as
-        # [iter_count, condition, loop_deps, scan_outputs]
+        # [iter_count, max_count, condition, loop_deps, scan_outputs]
         def cond_fn(*loop_inputs):
             i = loop_inputs[0]
-            w = loop_inputs[1]
+            max_count = loop_inputs[1]
+            w = loop_inputs[2]
 
             if cond is not None:
                 out_while = _op.equal(w, _expr.const(True, 'bool'))
             if max_loop_count is not None: 
-                out_loop = _op.less(i, max_loop_count)
+                out_loop = _op.less(i, max_count)
 
             if is_condition_for_loop:
                 return _op.logical_or(out_while, out_loop) 
@@ -2054,40 +2055,36 @@ def cond_fn(*loop_inputs):
                 return out_loop
             return out_while
 
-        # Create new graphproto converter for our body.
+        # Get the current graph proto
         graph_scope = GraphProto.current
-        g = GraphProto(graph_scope._shape, graph_scope._dtype)
-        # Load nodes from outer graph into inner graph.
-        g._nodes = graph_scope._nodes.copy()
 
         # Create a list of variables for each value updated in the loop.
-        def get_var(name, val):
-            if val:
-                checked_type = infer_type(val)
-                if hasattr(checked_type, "type_annotation"):
-                    checked_type = checked_type.type_annotation
-                if hasattr(checked_type, "shape"):
-                    shape = get_const_tuple(checked_type.shape)
-                    actual_shape = []
-                    for dim in shape:
-                        if isinstance(dim, int) and dim == 0:
-                            actual_shape.append(Any())
-                        else:
-                            actual_shape.append(dim)
-                    return _expr.var(name, shape=actual_shape, dtype=checked_type.dtype)
+        def get_var(name, val, scan=False):
+            checked_type = infer_type(val)
+            if hasattr(checked_type, "type_annotation"):
+                checked_type = checked_type.type_annotation
+            shape = get_const_tuple(checked_type.shape)
+            actual_shape = []
+            for dim in shape:
+                if isinstance(dim, int) and dim == 0:
+                    actual_shape.append(_ty.Any())
                 else:
-                    return _expr.var(name, type_annotation=checked_type)
-            return _expr.var(name)
+                    actual_shape.append(dim)
+            if scan:
+                return _expr.var(name, shape=[_ty.Any()] + actual_shape, dtype=checked_type.dtype)
+            else:
+                return _expr.var(name, shape=actual_shape, dtype=checked_type.dtype)
         loop_vars = [
             _expr.var(body.input[0].name, shape=(), dtype=iter_dtype), # iteration count
+            _expr.var("max_count", shape=(), dtype=iter_dtype), # iteration count
             get_var(body.input[1].name, cond), # exit condition
         ]
         loop_vars += [get_var(body.input[i + 2].name, v) for i, v in enumerate(loop_deps)]
         loop_var_names = [v.name_hint for v in loop_vars]
         
-        # Next we need to figure out which of our outputs should be scanned.
         num_scan_outputs = len(body.output) - (1 + num_deps)
-        scan_output_vars = [get_var(body.input[i + 2].name + "_scan", loop_deps[i]) for i in range(num_scan_outputs)]
+        scan_output_vars = [get_var(body.input[i + 2].name + "_scan", loop_deps[i], scan=True) for i in range(num_scan_outputs)]
+        #scan_output_vars = [get_var(body.input[i + 2].name + "_scan", loop_deps[i]) for i in range(num_scan_outputs)]
 
         # Now we can remove loop iter variables from our inner loop's inputs.
         # This is kind of a hack since we have graph inputs that we don't
@@ -2101,17 +2098,18 @@ def get_var(name, val):
         def body_fn(*loop_inputs):
             # Unpack inputs
             loop_count = loop_inputs[0]
-            cond = loop_inputs[1]
-            current_vars = list(loop_inputs[2:(2 + num_deps)])
-            scan_outputs = loop_inputs[(2 + num_deps):]
+            max_count = loop_inputs[1]
+            cond = loop_inputs[2]
+            current_vars = list(loop_inputs[3:(3 + num_deps)])
+            scan_outputs = loop_inputs[(3 + num_deps):]
 
             # Prepare body inputs by adding them to node dictionary.
-            new_inputs = [loop_count, cond] + current_vars
+            new_inputs = [loop_count, max_count, cond] + current_vars
             for i, inp in enumerate(new_inputs):
-                g._nodes[loop_var_names[i]] = inp
+                graph_scope._nodes[loop_var_names[i]] = inp
 
             # Get the output of the current loop using the updated inputs.
-            loop_outputs = g.from_onnx(body, 11, get_output_expr=True)
+            loop_outputs = graph_scope.from_onnx(body, 11, get_output_expr=True)
             # Unpack the body outputs and prepare variables for next iteration.
             new_cond = loop_outputs[0]
             new_loop_vars = [loop_outputs[i] for i in range(1, 1 + num_deps)]
@@ -2131,11 +2129,15 @@ def body_fn(*loop_inputs):
 
             # Pack loop outputs for next iteration
             # [iter_count, cond, loop_deps, loop_scans]
-            return [loop_count, new_cond] + new_loop_vars + combined_scan_outputs
+            return [loop_count, max_count, new_cond] + new_loop_vars + combined_scan_outputs
 
         loop = _loops.while_loop(cond_fn, loop_vars + scan_output_vars, body_fn)
         # Now need to run initial values through the graph.
-        return loop
+        # make empty constant with zero rank.
+        init_count = _expr.const(0, dtype=iter_dtype)
+        loop_vals = loop(init_count, max_loop_count, cond, *loop_deps, _op.reshape(_expr.const([]), [0, 1]))
+        outputs = _expr.TupleWrapper(_expr.Tuple([_expr.TupleGetItem(loop_vals, i + 3) for i in range(num_deps + num_scan_outputs)]), num_deps + num_scan_outputs)
+        return outputs
 
 
 # compatible operators that do NOT require any conversion.

From 1b60cf0a3975b5529262bd63e2fadcb642680af9 Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@uw.edu>
Date: Fri, 9 Oct 2020 17:17:04 +0000
Subject: [PATCH 04/17] snapshotting for debug.

---
 python/tvm/relay/frontend/onnx.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index ddb80590d96f..7d47aebc2c7a 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -2084,7 +2084,6 @@ def get_var(name, val, scan=False):
         
         num_scan_outputs = len(body.output) - (1 + num_deps)
         scan_output_vars = [get_var(body.input[i + 2].name + "_scan", loop_deps[i], scan=True) for i in range(num_scan_outputs)]
-        #scan_output_vars = [get_var(body.input[i + 2].name + "_scan", loop_deps[i]) for i in range(num_scan_outputs)]
 
         # Now we can remove loop iter variables from our inner loop's inputs.
         # This is kind of a hack since we have graph inputs that we don't

From 46ff32dccabe90ea2c908aaf6c148caa087903ae Mon Sep 17 00:00:00 2001
From: Jared Roesch <jroesch@octoml.ai>
Date: Fri, 9 Oct 2020 15:38:26 -0700
Subject: [PATCH 05/17] Fix Josh's issue

---
 python/tvm/relay/frontend/onnx.py          | 21 ++++++++++++---------
 tests/python/frontend/onnx/test_forward.py |  2 +-
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 7d47aebc2c7a..9c00263fe498 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -111,9 +111,9 @@ def get_info(info_proto):
     for dim in info_proto.type.tensor_type.shape.dim:
         value = dim.dim_value
         if value is None:
-           value = _ty.Any 
+           value = _ty.Any
         shape.append(value)
-    
+
     name = info_proto.name
     dtype = get_type(info_proto.type.tensor_type.elem_type)
     return name, {name: shape}, {name: dtype}
@@ -2046,11 +2046,11 @@ def cond_fn(*loop_inputs):
 
             if cond is not None:
                 out_while = _op.equal(w, _expr.const(True, 'bool'))
-            if max_loop_count is not None: 
+            if max_loop_count is not None:
                 out_loop = _op.less(i, max_count)
 
             if is_condition_for_loop:
-                return _op.logical_or(out_while, out_loop) 
+                return _op.logical_or(out_while, out_loop)
             elif is_for_loop:
                 return out_loop
             return out_while
@@ -2081,7 +2081,7 @@ def get_var(name, val, scan=False):
         ]
         loop_vars += [get_var(body.input[i + 2].name, v) for i, v in enumerate(loop_deps)]
         loop_var_names = [v.name_hint for v in loop_vars]
-        
+
         num_scan_outputs = len(body.output) - (1 + num_deps)
         scan_output_vars = [get_var(body.input[i + 2].name + "_scan", loop_deps[i], scan=True) for i in range(num_scan_outputs)]
 
@@ -2134,11 +2134,14 @@ def body_fn(*loop_inputs):
         # Now need to run initial values through the graph.
         # make empty constant with zero rank.
         init_count = _expr.const(0, dtype=iter_dtype)
-        loop_vals = loop(init_count, max_loop_count, cond, *loop_deps, _op.reshape(_expr.const([]), [0, 1]))
-        outputs = _expr.TupleWrapper(_expr.Tuple([_expr.TupleGetItem(loop_vals, i + 3) for i in range(num_deps + num_scan_outputs)]), num_deps + num_scan_outputs)
+        let_var = _expr.var("var_cast_shape", shape=(tvm.tir.Any(), 1))
+        loop_vals = loop(init_count, max_loop_count, cond, *loop_deps, let_var)
+        tupled_result = _expr.Tuple([_expr.TupleGetItem(loop_vals, i + 3) for i in range(num_deps + num_scan_outputs)])
+        empty_tensor = _op.reshape(_expr.const([]), [0, 1])
+        tupled_result = _expr.Let(let_var, empty_tensor, tupled_result)
+        outputs = _expr.TupleWrapper(tupled_result, num_deps + num_scan_outputs)
         return outputs
 
-
 # compatible operators that do NOT require any conversion.
 _identity_list = []
 
@@ -2365,7 +2368,7 @@ def from_onnx(self, graph, opset, freeze_params=False, get_output_expr=False):
 
         get_output_expr: bool
             If set to true, this conversion will return each output expression rather
-            than a packaged module. This can be useful when converting subgraphs to 
+            than a packaged module. This can be useful when converting subgraphs to
             relay.
 
         Returns
diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py
index 4fd774206d0c..396afbdd9f98 100644
--- a/tests/python/frontend/onnx/test_forward.py
+++ b/tests/python/frontend/onnx/test_forward.py
@@ -3775,7 +3775,7 @@ def verify_loop():
     onnx_out = get_onnxruntime_output(loop_model, input_vals)
 
     for target, ctx in [('llvm', tvm.cpu())]:
-        tvm_out = get_tvm_output(loop_model, input_vals, target, ctx, output_dtype='float32')
+        tvm_out = get_tvm_output_with_vm(loop_model, input_vals, target, ctx, freeze_params=True)
         tvm.testing.assert_allclose(
             onnx_out[0], tvm_out, rtol=1e-05, atol=1e-05)
 

From ce31ccbc86640327ca23a328d0ace766b9f04cf5 Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@uw.edu>
Date: Mon, 12 Oct 2020 15:14:47 +0000
Subject: [PATCH 06/17] Use subgraph proto class.

---
 python/tvm/relay/frontend/onnx.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 9c00263fe498..2bdcc8856b17 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -2055,8 +2055,11 @@ def cond_fn(*loop_inputs):
                 return out_loop
             return out_while
 
-        # Get the current graph proto
+        # Get the current graph proto and create a clone for the subgraph
         graph_scope = GraphProto.current
+        subgraph_scope = GraphProto(graph_scope._shape, graph_scope._dtype)
+        # Load nodes from outer graph into inner graph.
+        subgraph_scope._nodes = graph_scope._nodes.copy()
 
         # Create a list of variables for each value updated in the loop.
         def get_var(name, val, scan=False):
@@ -2105,10 +2108,11 @@ def body_fn(*loop_inputs):
             # Prepare body inputs by adding them to node dictionary.
             new_inputs = [loop_count, max_count, cond] + current_vars
             for i, inp in enumerate(new_inputs):
-                graph_scope._nodes[loop_var_names[i]] = inp
+                subgraph_scope._nodes[loop_var_names[i]] = inp
 
             # Get the output of the current loop using the updated inputs.
-            loop_outputs = graph_scope.from_onnx(body, 11, get_output_expr=True)
+            with subgraph_scope:
+                loop_outputs = subgraph_scope.from_onnx(body, 11, get_output_expr=True)
             # Unpack the body outputs and prepare variables for next iteration.
             new_cond = loop_outputs[0]
             new_loop_vars = [loop_outputs[i] for i in range(1, 1 + num_deps)]
@@ -2134,12 +2138,14 @@ def body_fn(*loop_inputs):
         # Now need to run initial values through the graph.
         # make empty constant with zero rank.
         init_count = _expr.const(0, dtype=iter_dtype)
-        let_var = _expr.var("var_cast_shape", shape=(tvm.tir.Any(), 1))
-        loop_vals = loop(init_count, max_loop_count, cond, *loop_deps, let_var)
-        tupled_result = _expr.Tuple([_expr.TupleGetItem(loop_vals, i + 3) for i in range(num_deps + num_scan_outputs)])
-        empty_tensor = _op.reshape(_expr.const([]), [0, 1])
-        tupled_result = _expr.Let(let_var, empty_tensor, tupled_result)
-        outputs = _expr.TupleWrapper(tupled_result, num_deps + num_scan_outputs)
+        loop_vals = loop(init_count, max_loop_count, cond, *loop_deps, _op.reshape(_expr.const(0, dtype='float32'), [1, 1]))
+        outputs = _expr.TupleWrapper(_expr.Tuple([_expr.TupleGetItem(loop_vals, i + 3) for i in range(num_deps + num_scan_outputs)]), num_deps + num_scan_outputs)
+
+        # Update outer graph with constants found in the subgraph.
+        free_vars = analysis.free_vars(loop)
+        graph_scope._params.update(subgraph_scope._params)
+        for var in free_vars:
+            graph_scope._nodes.update({var.name_hint: var})
         return outputs
 
 # compatible operators that do NOT require any conversion.

From 344fc96c991183834a3801a43f1910c968eda112 Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@uw.edu>
Date: Thu, 15 Oct 2020 16:31:29 +0000
Subject: [PATCH 07/17] Loop with scan.

---
 python/tvm/relay/frontend/onnx.py |  2 +-
 python/tvm/relay/op/tensor.py     | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 2bdcc8856b17..25ae77f48e69 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -2138,7 +2138,7 @@ def body_fn(*loop_inputs):
         # Now need to run initial values through the graph.
         # make empty constant with zero rank.
         init_count = _expr.const(0, dtype=iter_dtype)
-        loop_vals = loop(init_count, max_loop_count, cond, *loop_deps, _op.reshape(_expr.const(0, dtype='float32'), [1, 1]))
+        loop_vals = loop(init_count, max_loop_count, cond, *loop_deps, _op.reshape(_expr.const([], dtype='float32'), [0, 1]))
         outputs = _expr.TupleWrapper(_expr.Tuple([_expr.TupleGetItem(loop_vals, i + 3) for i in range(num_deps + num_scan_outputs)]), num_deps + num_scan_outputs)
 
         # Update outer graph with constants found in the subgraph.
diff --git a/python/tvm/relay/op/tensor.py b/python/tvm/relay/op/tensor.py
index 832372a6ed0d..4be2b734cea7 100644
--- a/python/tvm/relay/op/tensor.py
+++ b/python/tvm/relay/op/tensor.py
@@ -18,10 +18,12 @@
 # pylint: disable=redefined-builtin
 from tvm.runtime import ndarray as _nd
 from tvm.runtime import TVMContext as _TVMContext
+from tvm.te.hybrid import script
 
 from . import _make
 from .dyn import _make as _dyn_make
 from ..expr import Tuple, Expr
+from . import op as reg
 
 
 # We create a wrapper function for each operator in the
@@ -1138,6 +1140,19 @@ def copy(data):
     return _make.copy(data)
 
 
+@script
+def _copy_shape_func(data_shape):
+    return data_shape
+
+
+@reg.register_shape_func("copy", False)
+def copy_shape_func(attrs, inputs, _):
+    """
+    Shape function for copy op.
+    """
+    return [_copy_shape_func(inputs[0])]
+
+
 def device_copy(data, src_dev, dst_dev):
     """Copy data from the source device to the destination device. This
     operator helps data transferring between difference contexts for

From 5f5e2b1264fded074bba0fb4d01fa4b80b581b30 Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@uw.edu>
Date: Fri, 16 Oct 2020 17:25:57 +0000
Subject: [PATCH 08/17] Simple loop test now working.

---
 python/tvm/relay/frontend/onnx.py          | 43 +++++++------
 tests/python/frontend/onnx/test_forward.py | 70 +++-------------------
 2 files changed, 33 insertions(+), 80 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 25ae77f48e69..bc7e4173d56c 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -18,6 +18,7 @@
 # pylint: disable=import-outside-toplevel
 """ONNX: Open Neural Network Exchange frontend for Relay."""
 from collections import OrderedDict
+import warnings
 import numpy as np
 import tvm
 import onnx
@@ -2050,7 +2051,7 @@ def cond_fn(*loop_inputs):
                 out_loop = _op.less(i, max_count)
 
             if is_condition_for_loop:
-                return _op.logical_or(out_while, out_loop)
+                return _op.logical_and(out_while, out_loop)
             elif is_for_loop:
                 return out_loop
             return out_while
@@ -2086,7 +2087,8 @@ def get_var(name, val, scan=False):
         loop_var_names = [v.name_hint for v in loop_vars]
 
         num_scan_outputs = len(body.output) - (1 + num_deps)
-        scan_output_vars = [get_var(body.input[i + 2].name + "_scan", loop_deps[i], scan=True) for i in range(num_scan_outputs)]
+        if num_scan_outputs != 0:
+            warnings.warn("Loop conversion does not currently support scan outputs. They will be ignored.")
 
         # Now we can remove loop iter variables from our inner loop's inputs.
         # This is kind of a hack since we have graph inputs that we don't
@@ -2103,7 +2105,6 @@ def body_fn(*loop_inputs):
             max_count = loop_inputs[1]
             cond = loop_inputs[2]
             current_vars = list(loop_inputs[3:(3 + num_deps)])
-            scan_outputs = loop_inputs[(3 + num_deps):]
 
             # Prepare body inputs by adding them to node dictionary.
             new_inputs = [loop_count, max_count, cond] + current_vars
@@ -2116,30 +2117,35 @@ def body_fn(*loop_inputs):
             # Unpack the body outputs and prepare variables for next iteration.
             new_cond = loop_outputs[0]
             new_loop_vars = [loop_outputs[i] for i in range(1, 1 + num_deps)]
-            new_scan_outputs = [loop_outputs[i] for i in range(1 + num_deps, len(loop_outputs))]
 
             # Increment counter.
             if max_loop_count is not None:
                 incr = _expr.const(1, dtype=iter_dtype)
                 loop_count = loop_count + incr
 
-            # Add new scan outputs to tracking
-            combined_scan_outputs = []
-            for i, scan in enumerate(scan_outputs):
-                new_scan = _op.expand_dims(new_scan_outputs[i], axis=0)
-                combined_scan = _op.concatenate([scan, new_scan], axis=0)
-                combined_scan_outputs.append(combined_scan)
-
             # Pack loop outputs for next iteration
             # [iter_count, cond, loop_deps, loop_scans]
-            return [loop_count, max_count, new_cond] + new_loop_vars + combined_scan_outputs
+            return [loop_count, max_count, new_cond] + new_loop_vars
+
+        # Create the loop function.
+        loop = _loops.while_loop(cond_fn, loop_vars, body_fn)
 
-        loop = _loops.while_loop(cond_fn, loop_vars + scan_output_vars, body_fn)
         # Now need to run initial values through the graph.
-        # make empty constant with zero rank.
         init_count = _expr.const(0, dtype=iter_dtype)
-        loop_vals = loop(init_count, max_loop_count, cond, *loop_deps, _op.reshape(_expr.const([], dtype='float32'), [0, 1]))
-        outputs = _expr.TupleWrapper(_expr.Tuple([_expr.TupleGetItem(loop_vals, i + 3) for i in range(num_deps + num_scan_outputs)]), num_deps + num_scan_outputs)
+        loop_vals = loop(init_count, max_loop_count, cond, *loop_deps)
+
+        # Extract final iteration outputs.
+        if num_deps == 1:
+            outputs = _expr.TupleGetItem(loop_vals, 3)
+        else:
+            outputs = [_expr.TupleGetItem(loop_vals, i + 3) for i in range(num_deps)]
+
+        # Wrap outputs in a tuple if needed and add 0s for each scan output.
+        # TODO (jwfromm) Add support for scan outputs once type unification is fixed.
+        if num_scan_outputs != 0 or isinstance(outputs, list):
+            if not isinstance(outputs, list):
+                outputs = [outputs]
+            outputs = _expr.TupleWrapper(_expr.Tuple(outputs + [_expr.const(0) for i in range(num_scan_outputs)]), num_deps + num_scan_outputs)
 
         # Update outer graph with constants found in the subgraph.
         free_vars = analysis.free_vars(loop)
@@ -2343,7 +2349,8 @@ def __exit__(self, ptype, value, trace):
     def freeze(self, func, params):
         bind_map = {}
         for name in params.keys():
-            bind_map[self._nodes[name]] = _expr.const(params[name])
+            if name in self._nodes.keys():
+                bind_map[self._nodes[name]] = _expr.const(params[name])
         body = _expr.bind(func.body, bind_map)
         fn = _function.Function(analysis.free_vars(body), body)
         return fn, {}
@@ -2639,8 +2646,6 @@ def from_onnx(model, shape=None, dtype="float32", opset=None, freeze_params=Fals
             try:
                 onnx.checker.check_model(model)
             except onnx.onnx_cpp2py_export.checker.ValidationError as e:
-                import warnings
-
                 # the checker is a bit violent about errors, so simply print warnings here
                 warnings.warn(str(e))
     except ImportError:
diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py
index 396afbdd9f98..6505c4bf1b08 100644
--- a/tests/python/frontend/onnx/test_forward.py
+++ b/tests/python/frontend/onnx/test_forward.py
@@ -3671,59 +3671,16 @@ def verify_loop():
     x = np.array([1, 2, 3, 4, 5]).astype(np.float32)
     y = np.array([-2]).astype(np.float32)
 
-    x_const_node = helper.make_node(
-        'Constant',
-        inputs=[],
-        outputs=['x'],
-        value=helper.make_tensor(
-            name='const_tensor_x',
-            data_type=TensorProto.FLOAT,
-            dims=x.shape,
-            vals=x.flatten().astype(float),
-        )
-    )
-
-    one_const_node = helper.make_node(
-        'Constant',
-        inputs=[],
-        outputs=['one'],
-        value=helper.make_tensor(
-            name='const_tensor_one',
-            data_type=TensorProto.INT64,
-            dims=(),
-            vals=[1]
-        )
-    )
-
-    i_add_node = helper.make_node(
-        'Add',
-        inputs=['iter_count', 'one'],
-        outputs=['end']
-    )
-
-    start_unsqueeze_node = helper.make_node(
-        'Unsqueeze',
+    iter_cast_node = helper.make_node(
+        'Cast',
         inputs=['iter_count'],
-        outputs=['slice_start'],
-        axes=[0]
-    )
-
-    end_unsqueeze_node = helper.make_node(
-        'Unsqueeze',
-        inputs=['end'],
-        outputs=['slice_end'],
-        axes=[0]
-    )
-
-    slice_node = helper.make_node(
-        'Slice',
-        inputs=['x', 'slice_start', 'slice_end'],
-        outputs=['slice_out']
+        outputs=['iter_cast'],
+        to=onnx.TensorProto.FLOAT
     )
 
     y_add_node = helper.make_node(
         'Add',
-        inputs=['y_in', 'slice_out'],
+        inputs=['y_in', 'iter_cast'],
         outputs=['y_out']
     )
 
@@ -3733,25 +3690,17 @@ def verify_loop():
         outputs=['cond_out']
     )
 
-    scan_identity_node = helper.make_node(
-        'Identity',
-        inputs=['y_out'],
-        outputs=['scan_out']
-    )
-
     loop_body = helper.make_graph(
-        [identity_node, x_const_node, one_const_node, i_add_node,
-        start_unsqueeze_node, end_unsqueeze_node, slice_node, y_add_node,
-        scan_identity_node],
+        [identity_node, iter_cast_node, y_add_node],
         'loop_body',
         [iter_count, cond_in, y_in],
-        [cond_out, y_out, scan_out]
+        [cond_out, y_out]
     )
 
     loop_node = helper.make_node(
         'Loop',
         inputs=['trip_count', 'cond', 'y'],
-        outputs=['res_y', 'res_scan'],
+        outputs=['res_y'],
         body=loop_body
     )
 
@@ -3764,8 +3713,7 @@ def verify_loop():
         inputs=[onnx.helper.make_tensor_value_info('trip_count', onnx.TensorProto.INT64, []),
                 onnx.helper.make_tensor_value_info('cond', onnx.TensorProto.BOOL, []),
                 onnx.helper.make_tensor_value_info('y', onnx.TensorProto.FLOAT, [1])],
-        outputs=[onnx.helper.make_tensor_value_info('res_y', onnx.TensorProto.FLOAT, [1]),
-                onnx.helper.make_tensor_value_info('res_scan', onnx.TensorProto.FLOAT, [5, 1])]
+        outputs=[onnx.helper.make_tensor_value_info('res_y', onnx.TensorProto.FLOAT, [1])]
     )
     loop_model = onnx.helper.make_model(loop_graph)
 

From 10962645762f10c14842c709ad33d180d5054d23 Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@uw.edu>
Date: Fri, 16 Oct 2020 19:00:12 +0000
Subject: [PATCH 09/17] Scan outputs now working.

---
 python/tvm/relay/frontend/onnx.py          | 34 +++++++++++-----------
 tests/python/frontend/onnx/test_forward.py | 21 ++++++++-----
 2 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index bc7e4173d56c..523863a09451 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -2087,8 +2087,7 @@ def get_var(name, val, scan=False):
         loop_var_names = [v.name_hint for v in loop_vars]
 
         num_scan_outputs = len(body.output) - (1 + num_deps)
-        if num_scan_outputs != 0:
-            warnings.warn("Loop conversion does not currently support scan outputs. They will be ignored.")
+        scan_output_vars = [get_var(body.input[i + 2].name + "_scan", loop_deps[i], scan=True) for i in range(num_scan_outputs)]
 
         # Now we can remove loop iter variables from our inner loop's inputs.
         # This is kind of a hack since we have graph inputs that we don't
@@ -2096,15 +2095,15 @@ def get_var(name, val, scan=False):
         while len(body.input) != 0:
             body.input.pop(0)
 
-        # New strategy: populate g._nodes with the values needed
-        # by the next iteration. Then run node conversion on the updated
-        # inputs to build a progressively larger expression as needed.
+        # Define the loop body, in this function we need to unpack loop inputs,
+        # convert the loop subgraph, and pack outputs for the next iteration.
         def body_fn(*loop_inputs):
             # Unpack inputs
             loop_count = loop_inputs[0]
             max_count = loop_inputs[1]
             cond = loop_inputs[2]
             current_vars = list(loop_inputs[3:(3 + num_deps)])
+            scan_outputs = loop_inputs[(3 + num_deps):]
 
             # Prepare body inputs by adding them to node dictionary.
             new_inputs = [loop_count, max_count, cond] + current_vars
@@ -2117,35 +2116,36 @@ def body_fn(*loop_inputs):
             # Unpack the body outputs and prepare variables for next iteration.
             new_cond = loop_outputs[0]
             new_loop_vars = [loop_outputs[i] for i in range(1, 1 + num_deps)]
+            new_scan_outputs = [loop_outputs[i] for i in range(1 + num_deps, len(loop_outputs))]
 
             # Increment counter.
             if max_loop_count is not None:
                 incr = _expr.const(1, dtype=iter_dtype)
                 loop_count = loop_count + incr
 
+            # Add new scan outputs to tracking
+            combined_scan_outputs = []
+            for i, scan in enumerate(scan_outputs):
+                new_scan = _op.expand_dims(new_scan_outputs[i], axis=0)
+                combined_scan = _op.concatenate([scan, new_scan], axis=0)
+                combined_scan_outputs.append(combined_scan)
+
             # Pack loop outputs for next iteration
             # [iter_count, cond, loop_deps, loop_scans]
-            return [loop_count, max_count, new_cond] + new_loop_vars
+            return [loop_count, max_count, new_cond] + new_loop_vars + combined_scan_outputs
 
         # Create the loop function.
-        loop = _loops.while_loop(cond_fn, loop_vars, body_fn)
+        loop = _loops.while_loop(cond_fn, loop_vars + scan_output_vars, body_fn)
 
         # Now need to run initial values through the graph.
         init_count = _expr.const(0, dtype=iter_dtype)
-        loop_vals = loop(init_count, max_loop_count, cond, *loop_deps)
+        loop_vals = loop(init_count, max_loop_count, cond, *(loop_deps + [_op.reshape(_expr.const([], dtype='float32'), [0, 1]) for i in range(num_scan_outputs)]))
 
         # Extract final iteration outputs.
-        if num_deps == 1:
+        if num_deps + num_scan_outputs == 1:
             outputs = _expr.TupleGetItem(loop_vals, 3)
         else:
-            outputs = [_expr.TupleGetItem(loop_vals, i + 3) for i in range(num_deps)]
-
-        # Wrap outputs in a tuple if needed and add 0s for each scan output.
-        # TODO (jwfromm) Add support for scan outputs once type unification is fixed.
-        if num_scan_outputs != 0 or isinstance(outputs, list):
-            if not isinstance(outputs, list):
-                outputs = [outputs]
-            outputs = _expr.TupleWrapper(_expr.Tuple(outputs + [_expr.const(0) for i in range(num_scan_outputs)]), num_deps + num_scan_outputs)
+            outputs = _expr.TupleWrapper(_expr.Tuple([_expr.TupleGetItem(loop_vals, i + 3) for i in range(num_deps + num_scan_outputs)]), num_deps + num_scan_outputs)
 
         # Update outer graph with constants found in the subgraph.
         free_vars = analysis.free_vars(loop)
diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py
index 6505c4bf1b08..235fd9d1e0dc 100644
--- a/tests/python/frontend/onnx/test_forward.py
+++ b/tests/python/frontend/onnx/test_forward.py
@@ -3668,7 +3668,6 @@ def verify_loop():
     cond_out = helper.make_tensor_value_info('cond_out', TensorProto.BOOL, [])
     iter_count = helper.make_tensor_value_info('iter_count', TensorProto.INT64, [])
 
-    x = np.array([1, 2, 3, 4, 5]).astype(np.float32)
     y = np.array([-2]).astype(np.float32)
 
     iter_cast_node = helper.make_node(
@@ -3690,17 +3689,23 @@ def verify_loop():
         outputs=['cond_out']
     )
 
+    scan_identity_node = helper.make_node(
+        'Identity',
+        inputs=['y_out'],
+        outputs=['scan_out']
+    )
+
     loop_body = helper.make_graph(
-        [identity_node, iter_cast_node, y_add_node],
+        [identity_node, iter_cast_node, y_add_node, scan_identity_node],
         'loop_body',
         [iter_count, cond_in, y_in],
-        [cond_out, y_out]
+        [cond_out, y_out, scan_out]
     )
 
     loop_node = helper.make_node(
         'Loop',
         inputs=['trip_count', 'cond', 'y'],
-        outputs=['res_y'],
+        outputs=['res_y', 'res_scan'],
         body=loop_body
     )
 
@@ -3713,7 +3718,8 @@ def verify_loop():
         inputs=[onnx.helper.make_tensor_value_info('trip_count', onnx.TensorProto.INT64, []),
                 onnx.helper.make_tensor_value_info('cond', onnx.TensorProto.BOOL, []),
                 onnx.helper.make_tensor_value_info('y', onnx.TensorProto.FLOAT, [1])],
-        outputs=[onnx.helper.make_tensor_value_info('res_y', onnx.TensorProto.FLOAT, [1])]
+        outputs=[onnx.helper.make_tensor_value_info('res_y', onnx.TensorProto.FLOAT, [1]),
+                 onnx.helper.make_tensor_value_info('res_scan', onnx.TensorProto.FLOAT, [5, 1])]
     )
     loop_model = onnx.helper.make_model(loop_graph)
 
@@ -3724,8 +3730,9 @@ def verify_loop():
 
     for target, ctx in [('llvm', tvm.cpu())]:
         tvm_out = get_tvm_output_with_vm(loop_model, input_vals, target, ctx, freeze_params=True)
-        tvm.testing.assert_allclose(
-            onnx_out[0], tvm_out, rtol=1e-05, atol=1e-05)
+        for i in range(len(tvm_out)):
+            tvm.testing.assert_allclose(
+                onnx_out[i], tvm_out[i], rtol=1e-05, atol=1e-05)
 
 
 if __name__ == "__main__":

From f93a9a85815dad2b25328b7bcdf3aed06bc9b04e Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@uw.edu>
Date: Fri, 16 Oct 2020 22:21:08 +0000
Subject: [PATCH 10/17] Added second loop test.

---
 python/tvm/relay/frontend/onnx.py          |  50 +++-
 tests/python/frontend/onnx/body.py         | 113 --------
 tests/python/frontend/onnx/test_forward.py | 302 +++++++++++++--------
 3 files changed, 226 insertions(+), 239 deletions(-)
 delete mode 100644 tests/python/frontend/onnx/body.py

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 523863a09451..fd909754da21 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -112,7 +112,7 @@ def get_info(info_proto):
     for dim in info_proto.type.tensor_type.shape.dim:
         value = dim.dim_value
         if value is None:
-           value = _ty.Any
+            value = _ty.Any
         shape.append(value)
 
     name = info_proto.name
@@ -2021,15 +2021,15 @@ def _impl_v11(cls, inputs, attr, params):
 
 
 class Loop(OnnxOpConverter):
-    """Operator converter for Loop
-    """
+    """Operator converter for Loop"""
+
     @classmethod
     def _impl_v11(cls, inputs, attr, params):
         max_loop_count = inputs[0]
         cond = inputs[1]
         loop_deps = inputs[2:]
         num_deps = len(loop_deps)
-        body = attr['body']
+        body = attr["body"]
         iter_dtype = infer_type(max_loop_count).checked_type.dtype
 
         # Determine what condition mode we're in.
@@ -2046,7 +2046,7 @@ def cond_fn(*loop_inputs):
             w = loop_inputs[2]
 
             if cond is not None:
-                out_while = _op.equal(w, _expr.const(True, 'bool'))
+                out_while = _op.equal(w, _expr.const(True, "bool"))
             if max_loop_count is not None:
                 out_loop = _op.less(i, max_count)
 
@@ -2078,16 +2078,30 @@ def get_var(name, val, scan=False):
                 return _expr.var(name, shape=[_ty.Any()] + actual_shape, dtype=checked_type.dtype)
             else:
                 return _expr.var(name, shape=actual_shape, dtype=checked_type.dtype)
+
         loop_vars = [
-            _expr.var(body.input[0].name, shape=(), dtype=iter_dtype), # iteration count
-            _expr.var("max_count", shape=(), dtype=iter_dtype), # iteration count
-            get_var(body.input[1].name, cond), # exit condition
+            _expr.var(body.input[0].name, shape=(), dtype=iter_dtype),  # iteration count
+            _expr.var("max_count", shape=(), dtype=iter_dtype),  # iteration count
+            get_var(body.input[1].name, cond),  # exit condition
         ]
         loop_vars += [get_var(body.input[i + 2].name, v) for i, v in enumerate(loop_deps)]
         loop_var_names = [v.name_hint for v in loop_vars]
 
         num_scan_outputs = len(body.output) - (1 + num_deps)
-        scan_output_vars = [get_var(body.input[i + 2].name + "_scan", loop_deps[i], scan=True) for i in range(num_scan_outputs)]
+        # TODO (jwfromm) Test with strided slice once type unifier for this case is fixed.
+        if num_scan_outputs != 0 and "Slice" in [n.op_type for n in body.node]:
+            warnings.warn(
+                "Using scan outputs in a loop with strided slice currently may cause errors during compilation."
+            )
+        scan_output_vars = [
+            get_var(body.input[i + 2].name + "_scan", loop_deps[i], scan=True)
+            for i in range(num_scan_outputs)
+        ]
+        # Create initial empty output scan tensors.
+        loop_scans = [
+            _op.reshape(_expr.const([]), [0] + list(loop_vars[i + 3].type_annotation.shape))
+            for i in range(num_scan_outputs)
+        ]
 
         # Now we can remove loop iter variables from our inner loop's inputs.
         # This is kind of a hack since we have graph inputs that we don't
@@ -2102,8 +2116,8 @@ def body_fn(*loop_inputs):
             loop_count = loop_inputs[0]
             max_count = loop_inputs[1]
             cond = loop_inputs[2]
-            current_vars = list(loop_inputs[3:(3 + num_deps)])
-            scan_outputs = loop_inputs[(3 + num_deps):]
+            current_vars = list(loop_inputs[3 : (3 + num_deps)])
+            scan_outputs = loop_inputs[(3 + num_deps) :]
 
             # Prepare body inputs by adding them to node dictionary.
             new_inputs = [loop_count, max_count, cond] + current_vars
@@ -2139,13 +2153,21 @@ def body_fn(*loop_inputs):
 
         # Now need to run initial values through the graph.
         init_count = _expr.const(0, dtype=iter_dtype)
-        loop_vals = loop(init_count, max_loop_count, cond, *(loop_deps + [_op.reshape(_expr.const([], dtype='float32'), [0, 1]) for i in range(num_scan_outputs)]))
+        loop_vals = loop(init_count, max_loop_count, cond, *loop_deps, *loop_scans)
 
         # Extract final iteration outputs.
         if num_deps + num_scan_outputs == 1:
             outputs = _expr.TupleGetItem(loop_vals, 3)
         else:
-            outputs = _expr.TupleWrapper(_expr.Tuple([_expr.TupleGetItem(loop_vals, i + 3) for i in range(num_deps + num_scan_outputs)]), num_deps + num_scan_outputs)
+            outputs = _expr.TupleWrapper(
+                _expr.Tuple(
+                    [
+                        _expr.TupleGetItem(loop_vals, i + 3)
+                        for i in range(num_deps + num_scan_outputs)
+                    ]
+                ),
+                num_deps + num_scan_outputs,
+            )
 
         # Update outer graph with constants found in the subgraph.
         free_vars = analysis.free_vars(loop)
@@ -2154,6 +2176,7 @@ def body_fn(*loop_inputs):
             graph_scope._nodes.update({var.name_hint: var})
         return outputs
 
+
 # compatible operators that do NOT require any conversion.
 _identity_list = []
 
@@ -2326,6 +2349,7 @@ class GraphProto:
     dtype : str or dict of str to str
         The input types to the graph
     """
+
     current = None
 
     def __init__(self, shape, dtype):
diff --git a/tests/python/frontend/onnx/body.py b/tests/python/frontend/onnx/body.py
deleted file mode 100644
index 2d4118919c51..000000000000
--- a/tests/python/frontend/onnx/body.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import onnx
-import numpy as np
-
-y_in = onnx.helper.make_tensor_value_info('y_in', onnx.TensorProto.FLOAT, [1])
-y_out = onnx.helper.make_tensor_value_info('y_out', onnx.TensorProto.FLOAT, [1])
-scan_out = onnx.helper.make_tensor_value_info('scan_out', onnx.TensorProto.FLOAT, [1])
-cond_in = onnx.helper.make_tensor_value_info('cond_in', onnx.TensorProto.BOOL, [])
-cond_out = onnx.helper.make_tensor_value_info('cond_out', onnx.TensorProto.BOOL, [])
-iter_count = onnx.helper.make_tensor_value_info('iter_count', onnx.TensorProto.INT64, [])
-
-x = np.array([1, 2, 3, 4, 5]).astype(np.float32)
-y = np.array([-2]).astype(np.float32)
-
-x_const_node = onnx.helper.make_node(
-    'Constant',
-    inputs=[],
-    outputs=['x'],
-    value=onnx.helper.make_tensor(
-        name='const_tensor_x',
-        data_type=onnx.TensorProto.FLOAT,
-        dims=x.shape,
-        vals=x.flatten().astype(float),
-    )
-)
-
-one_const_node = onnx.helper.make_node(
-    'Constant',
-    inputs=[],
-    outputs=['one'],
-    value=onnx.helper.make_tensor(
-        name='const_tensor_one',
-        data_type=onnx.TensorProto.INT64,
-        dims=(),
-        vals=[1]
-    )
-)
-
-i_add_node = onnx.helper.make_node(
-    'Add',
-    inputs=['iter_count', 'one'],
-    outputs=['end']
-)
-
-start_unsqueeze_node = onnx.helper.make_node(
-    'Unsqueeze',
-    inputs=['iter_count'],
-    outputs=['slice_start'],
-    axes=[0]
-)
-
-end_unsqueeze_node = onnx.helper.make_node(
-    'Unsqueeze',
-    inputs=['end'],
-    outputs=['slice_end'],
-    axes=[0]
-)
-
-slice_node = onnx.helper.make_node(
-    'Slice',
-    inputs=['x', 'slice_start', 'slice_end'],
-    outputs=['slice_out']
-)
-
-y_add_node = onnx.helper.make_node(
-    'Add',
-    inputs=['y_in', 'slice_out'],
-    outputs=['y_out']
-)
-
-identity_node = onnx.helper.make_node(
-    'Identity',
-    inputs=['cond_in'],
-    outputs=['cond_out']
-)
-
-scan_identity_node = onnx.helper.make_node(
-    'Identity',
-    inputs=['y_out'],
-    outputs=['scan_out']
-)
-
-loop_body = onnx.helper.make_graph(
-    [identity_node, x_const_node, one_const_node, i_add_node,
-     start_unsqueeze_node, end_unsqueeze_node, slice_node, y_add_node,
-     scan_identity_node],
-    'loop_body',
-    [iter_count, cond_in, y_in],
-    [cond_out, y_out, scan_out]
-)
-body_model = onnx.helper.make_model(loop_body)
-onnx.save(body_model, "body.onnx")
-
-node = onnx.helper.make_node(
-    'Loop',
-    inputs=['trip_count', 'cond', 'y'],
-    outputs=['res_y', 'res_scan'],
-    body=loop_body
-)
-
-trip_count = np.array(5).astype(np.int64)
-res_y = np.array([13]).astype(np.float32)
-cond = np.array(1).astype(np.bool)
-node_graph = onnx.helper.make_graph(
-    [node],
-    "loop_outer",
-    inputs=[onnx.helper.make_tensor_value_info('trip_count', onnx.TensorProto.INT64, [5]),
-            onnx.helper.make_tensor_value_info('cond', onnx.TensorProto.FLOAT, [1]),
-            onnx.helper.make_tensor_value_info('y', onnx.TensorProto.BOOL, [1])],
-    outputs=[onnx.helper.make_tensor_value_info('res_y', onnx.TensorProto.FLOAT, [13]),
-             onnx.helper.make_tensor_value_info('res_scan', onnx.TensorProto.FLOAT, [5, 1])]
-)
-node_model = onnx.helper.make_model(node_graph)
-onnx.save(node_model, "outer.onnx")
\ No newline at end of file
diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py
index 235fd9d1e0dc..1f7b2172ae57 100644
--- a/tests/python/frontend/onnx/test_forward.py
+++ b/tests/python/frontend/onnx/test_forward.py
@@ -3660,53 +3660,119 @@ def verify_roi_align(
     verify_roi_align((1, 4, 16, 16), 32, 7, 7, sampling_ratio=2, spatial_scale=1.0)
 
 
-def verify_loop():
-    y_in = helper.make_tensor_value_info('y_in', TensorProto.FLOAT, [1])
-    y_out = helper.make_tensor_value_info('y_out', TensorProto.FLOAT, [1])
-    scan_out = helper.make_tensor_value_info('scan_out', TensorProto.FLOAT, [1])
-    cond_in = helper.make_tensor_value_info('cond_in', TensorProto.BOOL, [])
-    cond_out = helper.make_tensor_value_info('cond_out', TensorProto.BOOL, [])
-    iter_count = helper.make_tensor_value_info('iter_count', TensorProto.INT64, [])
+def verify_cond_loop():
+    y_in = helper.make_tensor_value_info("y_in", TensorProto.FLOAT, [1])
+    y_out = helper.make_tensor_value_info("y_out", TensorProto.FLOAT, [1])
+    scan_out = helper.make_tensor_value_info("scan_out", TensorProto.FLOAT, [1])
+    cond_in = helper.make_tensor_value_info("cond_in", TensorProto.BOOL, [])
+    cond_out = helper.make_tensor_value_info("cond_out", TensorProto.BOOL, [])
+    iter_count = helper.make_tensor_value_info("iter_count", TensorProto.INT64, [])
 
     y = np.array([-2]).astype(np.float32)
 
+    five_const_node = helper.make_node(
+        "Constant",
+        inputs=[],
+        outputs=["five"],
+        value=helper.make_tensor(
+            name="const_tensor_five", data_type=TensorProto.FLOAT, dims=(), vals=[5]
+        ),
+    )
+
     iter_cast_node = helper.make_node(
-        'Cast',
-        inputs=['iter_count'],
-        outputs=['iter_cast'],
-        to=onnx.TensorProto.FLOAT
+        "Cast", inputs=["iter_count"], outputs=["iter_cast"], to=onnx.TensorProto.FLOAT
     )
 
-    y_add_node = helper.make_node(
-        'Add',
-        inputs=['y_in', 'iter_cast'],
-        outputs=['y_out']
+    y_add_node = helper.make_node("Add", inputs=["y_in", "iter_cast"], outputs=["y_out"])
+
+    less_node = helper.make_node("Less", inputs=["y_out", "five"], outputs=["cond_less"])
+
+    squeeze_node = helper.make_node("Squeeze", inputs=["cond_less"], outputs=["cond_squeeze"])
+
+    cond_cast_node = helper.make_node(
+        "Cast", inputs=["cond_squeeze"], outputs=["cond_out"], to=onnx.TensorProto.BOOL
     )
 
-    identity_node = helper.make_node(
-        'Identity',
-        inputs=['cond_in'],
-        outputs=['cond_out']
+    scan_identity_node = helper.make_node("Identity", inputs=["y_out"], outputs=["scan_out"])
+
+    loop_body = helper.make_graph(
+        [
+            five_const_node,
+            iter_cast_node,
+            y_add_node,
+            less_node,
+            squeeze_node,
+            cond_cast_node,
+            scan_identity_node,
+        ],
+        "loop_body",
+        [iter_count, cond_in, y_in],
+        [cond_out, y_out, scan_out],
+    )
+
+    loop_node = helper.make_node(
+        "Loop", inputs=["trip_count", "cond", "y"], outputs=["res_y", "res_scan"], body=loop_body
+    )
+
+    trip_count = np.array(5).astype(np.int64)
+    res_y = np.array([13]).astype(np.float32)
+    cond = np.array(1).astype(np.bool)
+    loop_graph = onnx.helper.make_graph(
+        [loop_node],
+        "loop_outer",
+        inputs=[
+            onnx.helper.make_tensor_value_info("trip_count", onnx.TensorProto.INT64, []),
+            onnx.helper.make_tensor_value_info("cond", onnx.TensorProto.BOOL, []),
+            onnx.helper.make_tensor_value_info("y", onnx.TensorProto.FLOAT, [1]),
+        ],
+        outputs=[
+            onnx.helper.make_tensor_value_info("res_y", onnx.TensorProto.FLOAT, [1]),
+            onnx.helper.make_tensor_value_info("res_scan", onnx.TensorProto.FLOAT, [5, 1]),
+        ],
     )
+    loop_model = onnx.helper.make_model(loop_graph)
+
+    # Set a high trip count so that condition trips first.
+    trip_count = np.array(40).astype(np.int64)
+    cond = np.array(1).astype(np.bool)
+    input_vals = [trip_count, cond, y]
+    onnx_out = get_onnxruntime_output(loop_model, input_vals)
+
+    for target, ctx in [("llvm", tvm.cpu())]:
+        tvm_out = get_tvm_output_with_vm(loop_model, input_vals, target, ctx, freeze_params=True)
+        for i in range(len(tvm_out)):
+            tvm.testing.assert_allclose(onnx_out[i], tvm_out[i], rtol=1e-05, atol=1e-05)
+
+
+def verify_count_loop():
+    y_in = helper.make_tensor_value_info("y_in", TensorProto.FLOAT, [1])
+    y_out = helper.make_tensor_value_info("y_out", TensorProto.FLOAT, [1])
+    scan_out = helper.make_tensor_value_info("scan_out", TensorProto.FLOAT, [1])
+    cond_in = helper.make_tensor_value_info("cond_in", TensorProto.BOOL, [])
+    cond_out = helper.make_tensor_value_info("cond_out", TensorProto.BOOL, [])
+    iter_count = helper.make_tensor_value_info("iter_count", TensorProto.INT64, [])
 
-    scan_identity_node = helper.make_node(
-        'Identity',
-        inputs=['y_out'],
-        outputs=['scan_out']
+    y = np.array([-2]).astype(np.float32)
+
+    iter_cast_node = helper.make_node(
+        "Cast", inputs=["iter_count"], outputs=["iter_cast"], to=onnx.TensorProto.FLOAT
     )
 
+    y_add_node = helper.make_node("Add", inputs=["y_in", "iter_cast"], outputs=["y_out"])
+
+    identity_node = helper.make_node("Identity", inputs=["cond_in"], outputs=["cond_out"])
+
+    scan_identity_node = helper.make_node("Identity", inputs=["y_out"], outputs=["scan_out"])
+
     loop_body = helper.make_graph(
         [identity_node, iter_cast_node, y_add_node, scan_identity_node],
-        'loop_body',
+        "loop_body",
         [iter_count, cond_in, y_in],
-        [cond_out, y_out, scan_out]
+        [cond_out, y_out, scan_out],
     )
 
     loop_node = helper.make_node(
-        'Loop',
-        inputs=['trip_count', 'cond', 'y'],
-        outputs=['res_y', 'res_scan'],
-        body=loop_body
+        "Loop", inputs=["trip_count", "cond", "y"], outputs=["res_y", "res_scan"], body=loop_body
     )
 
     trip_count = np.array(5).astype(np.int64)
@@ -3715,11 +3781,15 @@ def verify_loop():
     loop_graph = onnx.helper.make_graph(
         [loop_node],
         "loop_outer",
-        inputs=[onnx.helper.make_tensor_value_info('trip_count', onnx.TensorProto.INT64, []),
-                onnx.helper.make_tensor_value_info('cond', onnx.TensorProto.BOOL, []),
-                onnx.helper.make_tensor_value_info('y', onnx.TensorProto.FLOAT, [1])],
-        outputs=[onnx.helper.make_tensor_value_info('res_y', onnx.TensorProto.FLOAT, [1]),
-                 onnx.helper.make_tensor_value_info('res_scan', onnx.TensorProto.FLOAT, [5, 1])]
+        inputs=[
+            onnx.helper.make_tensor_value_info("trip_count", onnx.TensorProto.INT64, []),
+            onnx.helper.make_tensor_value_info("cond", onnx.TensorProto.BOOL, []),
+            onnx.helper.make_tensor_value_info("y", onnx.TensorProto.FLOAT, [1]),
+        ],
+        outputs=[
+            onnx.helper.make_tensor_value_info("res_y", onnx.TensorProto.FLOAT, [1]),
+            onnx.helper.make_tensor_value_info("res_scan", onnx.TensorProto.FLOAT, [5, 1]),
+        ],
     )
     loop_model = onnx.helper.make_model(loop_graph)
 
@@ -3728,87 +3798,93 @@ def verify_loop():
     input_vals = [trip_count, cond, y]
     onnx_out = get_onnxruntime_output(loop_model, input_vals)
 
-    for target, ctx in [('llvm', tvm.cpu())]:
+    for target, ctx in [("llvm", tvm.cpu())]:
         tvm_out = get_tvm_output_with_vm(loop_model, input_vals, target, ctx, freeze_params=True)
         for i in range(len(tvm_out)):
-            tvm.testing.assert_allclose(
-                onnx_out[i], tvm_out[i], rtol=1e-05, atol=1e-05)
+            tvm.testing.assert_allclose(onnx_out[i], tvm_out[i], rtol=1e-05, atol=1e-05)
+
+
+def test_loop():
+    # Test a loop that exits once a condition is met.
+    verify_cond_loop()
+    # Test a loop that exits after a fixed number of iterations.
+    verify_count_loop()
 
 
 if __name__ == "__main__":
-    verify_loop()
-    #test_flatten()
-    #test_reshape()
-    #test_shape()
-    #test_expand()
-    #test_power()
-    #test_squeeze()
-    #test_unsqueeze()
-    #test_slice()
-    #test_floor()
-    #test_ceil()
-    #test_round()
-    #test_isinf()
-    #test_isnan()
-    #test_clip()
-    #test_clip_min_max_as_inputs()
-    #test_onehot()
-    #test_matmul()
-    #test_batch_matmul()
-    #test_gather()
-    #test_gatherelements()
-    #test_gather_nd()
-    #test_scatter()
-    #test_lrn()
-    #test_instance_norm()
-    #test_upsample()
-    #test_forward_min()
-    #test_forward_max()
-    #test_forward_mean()
-    #test_forward_hardsigmoid()
-    #test_forward_arg_min_max()
-    #test_softmax()
-    #test_constantofshape()
-    #test_all_reduce_funcs()
-    #test_pad()
-    #test_split()
-    #test_binary_ops()
-    #test_single_ops()
-    #test_leaky_relu()
-    #test_elu()
-    #test_selu()
-    #test_prelu()
-    #test_ThresholdedRelu()
-    #test_ScaledTanh()
-    #test_ParametricSoftplus()
-    #test_Scale()
-    #test_LogSoftmax()
-    #test_resnet()
-    #test_inception()
-    #test_densenet()
-    #test_sign()
-    #test_not()
-    #test_and()
-    #test_tile()
-    #test_erf()
-    #test_where()
-    #test_or()
-    #test_depth_to_space()
-    #test_space_to_depth()
-    #test_batch_norm()
-    #test_batch_norm_dynamic_subgraph()
-    #test_conv()
-    #test_convtranspose()
-    #test_unsqueeze_constant()
-    #test_pooling()
-    #test_lppool()
-    #test_lstm()
-    #test_gru()
-    #test_resize()
-    #test_nonzero()
-    #test_topk()
-    #test_mod()
-    #test_xor()
-    #test_max_roi_pool()
-    #test_roi_align()
-    #test_range()
+    test_flatten()
+    test_reshape()
+    test_shape()
+    test_expand()
+    test_power()
+    test_squeeze()
+    test_unsqueeze()
+    test_slice()
+    test_floor()
+    test_ceil()
+    test_round()
+    test_isinf()
+    test_isnan()
+    test_clip()
+    test_clip_min_max_as_inputs()
+    test_onehot()
+    test_matmul()
+    test_batch_matmul()
+    test_gather()
+    test_gatherelements()
+    test_gather_nd()
+    test_scatter()
+    test_lrn()
+    test_instance_norm()
+    test_upsample()
+    test_forward_min()
+    test_forward_max()
+    test_forward_mean()
+    test_forward_hardsigmoid()
+    test_forward_arg_min_max()
+    test_softmax()
+    test_constantofshape()
+    test_all_reduce_funcs()
+    test_pad()
+    test_split()
+    test_binary_ops()
+    test_single_ops()
+    test_leaky_relu()
+    test_elu()
+    test_selu()
+    test_prelu()
+    test_ThresholdedRelu()
+    test_ScaledTanh()
+    test_ParametricSoftplus()
+    test_Scale()
+    test_LogSoftmax()
+    test_resnet()
+    test_inception()
+    test_densenet()
+    test_sign()
+    test_not()
+    test_and()
+    test_tile()
+    test_erf()
+    test_where()
+    test_or()
+    test_depth_to_space()
+    test_space_to_depth()
+    test_batch_norm()
+    test_batch_norm_dynamic_subgraph()
+    test_conv()
+    test_convtranspose()
+    test_unsqueeze_constant()
+    test_pooling()
+    test_lppool()
+    test_lstm()
+    test_gru()
+    test_resize()
+    test_nonzero()
+    test_topk()
+    test_mod()
+    test_xor()
+    test_max_roi_pool()
+    test_roi_align()
+    test_range()
+    test_loop()

From 38cdbf5a33818755be50cc52768ad0f58654595b Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@uw.edu>
Date: Fri, 16 Oct 2020 22:23:01 +0000
Subject: [PATCH 11/17] Removed unneeded helper functions.

---
 python/tvm/relay/frontend/onnx.py | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index fd909754da21..8332c9d0cacf 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -101,25 +101,6 @@ def get_numpy(tensor_proto):
     return to_array(tensor_proto)
 
 
-def get_type(elem_type):
-    """Converts onnx integer datatype to numpy datatype"""
-    return onnx.TensorProto.DataType.Name(elem_type).lower()
-
-
-def get_info(info_proto):
-    """Extract the shape from a ValueInfoProto."""
-    shape = []
-    for dim in info_proto.type.tensor_type.shape.dim:
-        value = dim.dim_value
-        if value is None:
-            value = _ty.Any
-        shape.append(value)
-
-    name = info_proto.name
-    dtype = get_type(info_proto.type.tensor_type.elem_type)
-    return name, {name: shape}, {name: dtype}
-
-
 def dimension_picker(prefix, suffix=""):
     """Check that dimensions are supported."""
 

From 121a479de7613e709b375b75a8c890bdc8757a8f Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@uw.edu>
Date: Fri, 16 Oct 2020 22:26:06 +0000
Subject: [PATCH 12/17] Remove bad merge artifact.

---
 tests/python/frontend/onnx/test_forward.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py
index 1f7b2172ae57..81b5186d0e26 100644
--- a/tests/python/frontend/onnx/test_forward.py
+++ b/tests/python/frontend/onnx/test_forward.py
@@ -3829,7 +3829,6 @@ def test_loop():
     test_clip_min_max_as_inputs()
     test_onehot()
     test_matmul()
-    test_batch_matmul()
     test_gather()
     test_gatherelements()
     test_gather_nd()

From 7cc53b24fb55f065977ec99a1b7dd765981ad871 Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@uw.edu>
Date: Fri, 16 Oct 2020 22:46:43 +0000
Subject: [PATCH 13/17] Cleaned up scan output creation.

---
 python/tvm/relay/frontend/onnx.py | 38 +++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 10 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 8332c9d0cacf..039c8e658b45 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -101,6 +101,25 @@ def get_numpy(tensor_proto):
     return to_array(tensor_proto)
 
 
+def get_type(elem_type):
+    """Converts onnx integer datatype to numpy datatype"""
+    return onnx.TensorProto.DataType.Name(elem_type).lower()
+
+
+def get_info(info_proto):
+    """Extract the shape from a ValueInfoProto."""
+    shape = []
+    for dim in info_proto.type.tensor_type.shape.dim:
+        value = dim.dim_value
+        if value is None:
+            value = _ty.Any
+        shape.append(value)
+
+    name = info_proto.name
+    dtype = get_type(info_proto.type.tensor_type.elem_type)
+    return name, shape, dtype
+
+
 def dimension_picker(prefix, suffix=""):
     """Check that dimensions are supported."""
 
@@ -2074,15 +2093,14 @@ def get_var(name, val, scan=False):
             warnings.warn(
                 "Using scan outputs in a loop with strided slice currently may cause errors during compilation."
             )
-        scan_output_vars = [
-            get_var(body.input[i + 2].name + "_scan", loop_deps[i], scan=True)
-            for i in range(num_scan_outputs)
-        ]
-        # Create initial empty output scan tensors.
-        loop_scans = [
-            _op.reshape(_expr.const([]), [0] + list(loop_vars[i + 3].type_annotation.shape))
-            for i in range(num_scan_outputs)
-        ]
+
+        # Construct variables and intial empty tensors for any scan outputs.
+        scan_output_vars = []
+        scan_output_init = []
+        for i in range(num_scan_outputs):
+            name, shape, dtype = get_info(body.output[i + 1 + num_deps])
+            scan_output_vars.append(_expr.var(name, shape=([_ty.Any()] + shape), dtype=dtype))
+            scan_output_init.append(_op.reshape(_expr.const([]), [0] + shape))
 
         # Now we can remove loop iter variables from our inner loop's inputs.
         # This is kind of a hack since we have graph inputs that we don't
@@ -2134,7 +2152,7 @@ def body_fn(*loop_inputs):
 
         # Now need to run initial values through the graph.
         init_count = _expr.const(0, dtype=iter_dtype)
-        loop_vals = loop(init_count, max_loop_count, cond, *loop_deps, *loop_scans)
+        loop_vals = loop(init_count, max_loop_count, cond, *loop_deps, *scan_output_init)
 
         # Extract final iteration outputs.
         if num_deps + num_scan_outputs == 1:

From ee93cdb5d7e68734af46d81f6d1496f615276466 Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@uw.edu>
Date: Fri, 16 Oct 2020 23:00:12 +0000
Subject: [PATCH 14/17] Cleaned up some style mistakes.

---
 python/tvm/relay/frontend/onnx.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 039c8e658b45..133610a6086c 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -17,11 +17,10 @@
 # pylint: disable=invalid-name, import-self, len-as-condition, unused-argument, too-many-lines
 # pylint: disable=import-outside-toplevel
 """ONNX: Open Neural Network Exchange frontend for Relay."""
-from collections import OrderedDict
 import warnings
 import numpy as np
-import tvm
 import onnx
+import tvm
 from tvm.ir import IRModule
 from tvm.topi.util import get_const_tuple
 
@@ -2035,7 +2034,6 @@ def _impl_v11(cls, inputs, attr, params):
         # Determine what condition mode we're in.
         assert cond is not None or max_loop_count is not None
         is_for_loop = max_loop_count is not None and cond is None
-        is_while_loop = cond is not None and max_loop_count is None
         is_condition_for_loop = cond is not None and max_loop_count is not None
 
         # Loop inputs will be packed as
@@ -2052,7 +2050,7 @@ def cond_fn(*loop_inputs):
 
             if is_condition_for_loop:
                 return _op.logical_and(out_while, out_loop)
-            elif is_for_loop:
+            if is_for_loop:
                 return out_loop
             return out_while
 
@@ -2076,8 +2074,8 @@ def get_var(name, val, scan=False):
                     actual_shape.append(dim)
             if scan:
                 return _expr.var(name, shape=[_ty.Any()] + actual_shape, dtype=checked_type.dtype)
-            else:
-                return _expr.var(name, shape=actual_shape, dtype=checked_type.dtype)
+
+            return _expr.var(name, shape=actual_shape, dtype=checked_type.dtype)
 
         loop_vars = [
             _expr.var(body.input[0].name, shape=(), dtype=iter_dtype),  # iteration count
@@ -2091,7 +2089,10 @@ def get_var(name, val, scan=False):
         # TODO (jwfromm) Test with strided slice once type unifier for this case is fixed.
         if num_scan_outputs != 0 and "Slice" in [n.op_type for n in body.node]:
             warnings.warn(
-                "Using scan outputs in a loop with strided slice currently may cause errors during compilation."
+                """
+                Using scan outputs in a loop with strided slice
+                currently may cause errors during compilation.
+                """
             )
 
         # Construct variables and intial empty tensors for any scan outputs.
@@ -2662,8 +2663,6 @@ def from_onnx(model, shape=None, dtype="float32", opset=None, freeze_params=Fals
         The parameter dict to be used by relay
     """
     try:
-        import onnx
-
         if hasattr(onnx.checker, "check_model"):
             # try use onnx's own model checker before converting any model
             try:

From be81d0fbccdbc831d33b6c3d25348cc53bc46d3b Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@uw.edu>
Date: Fri, 16 Oct 2020 23:01:53 +0000
Subject: [PATCH 15/17] Add pylint skip for unused-argument.

---
 python/tvm/relay/op/tensor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/relay/op/tensor.py b/python/tvm/relay/op/tensor.py
index 4be2b734cea7..453a9b7a7759 100644
--- a/python/tvm/relay/op/tensor.py
+++ b/python/tvm/relay/op/tensor.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 """Basic tensor operations."""
-# pylint: disable=redefined-builtin
+# pylint: disable=redefined-builtin, unused-argument
 from tvm.runtime import ndarray as _nd
 from tvm.runtime import TVMContext as _TVMContext
 from tvm.te.hybrid import script

From 24b350ea2cb9d6c4bdbb27e52b90570b17dbfb10 Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@uw.edu>
Date: Sat, 17 Oct 2020 00:50:26 +0000
Subject: [PATCH 16/17] Remove onnx dependency.

---
 python/tvm/relay/frontend/onnx.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 133610a6086c..e2c6b9abc449 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -19,7 +19,6 @@
 """ONNX: Open Neural Network Exchange frontend for Relay."""
 import warnings
 import numpy as np
-import onnx
 import tvm
 from tvm.ir import IRModule
 from tvm.topi.util import get_const_tuple
@@ -102,7 +101,11 @@ def get_numpy(tensor_proto):
 
 def get_type(elem_type):
     """Converts onnx integer datatype to numpy datatype"""
-    return onnx.TensorProto.DataType.Name(elem_type).lower()
+    try:
+        from onnx import TensorProto
+    except ImportError as e:
+        raise ImportError("Unable to import onnx which is required {}".format(e))
+    return TensorProto.DataType.Name(elem_type).lower()
 
 
 def get_info(info_proto):
@@ -2663,6 +2666,8 @@ def from_onnx(model, shape=None, dtype="float32", opset=None, freeze_params=Fals
         The parameter dict to be used by relay
     """
     try:
+        import onnx
+
         if hasattr(onnx.checker, "check_model"):
             # try use onnx's own model checker before converting any model
             try:

From 08068c507872076bf415210bf6cb7efe4aa57080 Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@uw.edu>
Date: Mon, 19 Oct 2020 20:06:55 +0000
Subject: [PATCH 17/17] Remove now obsolete checks for 0 shaped tensors.

---
 src/relay/backend/vm/compiler.cc      | 7 -------
 src/relay/transforms/fold_constant.cc | 3 ---
 2 files changed, 10 deletions(-)

diff --git a/src/relay/backend/vm/compiler.cc b/src/relay/backend/vm/compiler.cc
index c7ceca3604c8..c3bf80571638 100644
--- a/src/relay/backend/vm/compiler.cc
+++ b/src/relay/backend/vm/compiler.cc
@@ -343,13 +343,6 @@ class VMFunctionCompiler : ExprFunctor<void(const Expr& expr)> {
   void VisitExpr_(const ConstantNode* const_node) {
     // Check the shape is valid
     NDArray data = const_node->data;
-    const DLTensor* tensor = data.operator->();
-    if (tensor->ndim > 0) {
-      int64_t* shapes = reinterpret_cast<int64_t*>(tensor->shape);
-      for (auto i = 0; i < tensor->ndim; i++) {
-        CHECK_GT(shapes[i], 0U);
-      }
-    }
     size_t konst_idx = context_->constants.size();
     if (expr_device_map_.empty()) {
       context_->const_device_type.push_back(targets_.begin()->first);
diff --git a/src/relay/transforms/fold_constant.cc b/src/relay/transforms/fold_constant.cc
index 8d2cba05be49..1de690d91036 100644
--- a/src/relay/transforms/fold_constant.cc
+++ b/src/relay/transforms/fold_constant.cc
@@ -199,9 +199,6 @@ class ConstantFolder : public MixedModeMutator {
   Expr ObjectToExpr(const ObjectRef& value) {
     if (value->IsInstance<runtime::NDArray::ContainerType>()) {
       auto nd_array = Downcast<runtime::NDArray>(value);
-      for (auto dim : nd_array.Shape()) {
-        CHECK_GT(dim, 0) << "invalid dimension after constant eval";
-      }
       return Constant(nd_array);
     } else if (const auto* val = value.as<runtime::ADTObj>()) {
       runtime::ADT adt = GetRef<runtime::ADT>(val);