diff --git a/conftest.py b/conftest.py
index 861abc14b843..88e21f494113 100644
--- a/conftest.py
+++ b/conftest.py
@@ -31,23 +31,17 @@
 # taken from the 20 (arbitrary number) of tests as from
 # https://ci.tlcpack.ai/job/tvm/job/main/2907/testReport
 _slowest_tests = [
-    "tests/python/frontend/tensorflow/test_forward.py::test_forward_broadcast_args",
-    "tests/python/frontend/tensorflow/test_forward.py::test_forward_broadcast_to",
     "tests/python/topi/python/test_topi_conv2d_int8.py::test_conv2d_nchw[int8]",
     "tests/python/topi/python/test_topi_conv2d_int8.py::test_conv2d_nchw[uint8]",
     "tests/python/topi/python/test_topi_upsampling.py::test_upsampling3d",
     "tests/python/topi/python/test_topi_upsampling.py::test_upsampling3d",
     "tests/python/topi/python/test_topi_conv2d_int8.py::test_conv2d_nchw[int8]",
-    "tests/python/frontend/tflite/test_forward.py::test_all_elemwise",
-    "tests/python/frontend/pytorch/test_object_detection.py::test_detection_models",
     "tests/python/topi/python/test_topi_conv2d_int8.py::test_conv2d_nchw[uint8]",
     "tests/python/topi/python/test_topi_conv2d_NCHWc.py::test_conv2d_NCHWc",
     "tests/python/topi/python/test_topi_conv2d_hwnc_tensorcore.py::test_conv2d_hwnc_tensorcore",
     "tests/python/contrib/test_tensorrt.py::test_binary[compile]",
-    "tests/python/frontend/pytorch/test_forward.py::test_segmentation_models",
     "tests/python/topi/python/test_topi_conv2d_NCHWc.py::test_conv2d_NCHWc",
     "tests/python/relay/test_py_converter.py::test_global_recursion",
-    "tests/python/frontend/tensorflow/test_forward.py::test_forward_ptb",
     "tests/python/relay/test_op_level6.py::test_topk",
     "tests/python/topi/python/test_topi_conv2d_winograd.py::test_conv2d_nchw",
     "tests/python/relay/test_py_converter.py::test_global_recursion",
diff --git a/tests/lint/pylint.sh b/tests/lint/pylint.sh
index 90e50dfa9433..4d10b01485a0 100755
--- a/tests/lint/pylint.sh
+++ b/tests/lint/pylint.sh
@@ -41,17 +41,5 @@ python3 -m pylint tests/python/contrib/test_hexagon/conv2d/*.py --rcfile="$(dirn
 python3 -m pylint tests/python/contrib/test_hexagon/topi/*.py --rcfile="$(dirname "$0")"/pylintrc
 python3 -m pylint tests/python/contrib/test_hexagon/metaschedule_e2e/*.py --rcfile="$(dirname "$0")"/pylintrc
 
-# tests/python/frontend tests
-python3 -m pylint tests/python/frontend/caffe/test_forward.py --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/frontend/caffe2/*.py --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/frontend/darknet/test_forward.py --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/frontend/coreml/*.py --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/frontend/keras/test_forward.py --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/frontend/darknet/test_forward.py --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/frontend/oneflow/*.py --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/frontend/tensorflow/test_forward.py --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/frontend/pytorch/test_forward.py --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/frontend/tflite/test_forward.py --rcfile="$(dirname "$0")"/pylintrc
-
 # tests/python/contrib/test_msc tests
 python3 -m pylint tests/python/contrib/test_msc/*.py --rcfile="$(dirname "$0")"/pylintrc
diff --git a/tests/python/frontend/caffe/test_forward.py b/tests/python/frontend/caffe/test_forward.py
deleted file mode 100644
index d0ba1dfac40b..000000000000
--- a/tests/python/frontend/caffe/test_forward.py
+++ /dev/null
@@ -1,1166 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument, unspecified-encoding
-"""
-Caffe testcases
-====================
-This article is a test script to test Caffe operator with Relay.
-"""
-import os
-import logging
-import numpy as np
-import pytest
-
-from google.protobuf import text_format
-import caffe
-from caffe import layers as L, params as P
-from caffe.proto import caffe_pb2 as pb
-
-import tvm
-import tvm.testing
-from tvm import relay
-from tvm.contrib import graph_executor
-from tvm.contrib.download import download_testdata
-
-os.environ["GLOG_minloglevel"] = "2"
-
-logging.basicConfig(level=logging.ERROR)
-
-CURRENT_DIR = os.path.join(os.path.expanduser("~"), ".tvm_test_data", "caffe_test")
-
-#######################################################################
-# Generic functions for TVM & Caffe
-# ------------------------------------------
-
-
-def _create_dir(d_path):
-    """If the directory is not existed, create it"""
-    if not (os.path.exists(d_path) and os.path.isdir(d_path)):
-        os.makedirs(d_path)
-
-
-def _list_to_str(ll):
-    """Convert list or tuple to str, separated by underline."""
-    if isinstance(ll, (tuple, list)):
-        tmp = [str(i) for i in ll]
-        res = "_".join(tmp)
-    return res
-
-
-def _gen_filename_str(op_name, data_shape, *args, **kwargs):
-    """Combining the filename according to the op_name, shape and other args."""
-    file_dir = os.path.join(CURRENT_DIR, op_name)
-    _create_dir(file_dir)
-    res = op_name + "_"
-    shape_str = _list_to_str(list(data_shape))
-    res += shape_str
-    for arg in args:
-        if isinstance(arg, (tuple, list)):
-            res += "_" + _list_to_str(arg)
-        elif isinstance(arg, (int, float, str)):
-            res += "_" + str(arg)
-    for _, v in kwargs.items():
-        if isinstance(v, (tuple, list)):
-            res += "_" + _list_to_str(v)
-        elif isinstance(v, (int, float, str)):
-            res += "_" + str(v)
-    res = res.replace(".", "_")
-    res = res.replace("-", "_")
-    proto_file = os.path.join(file_dir, res + ".prototxt")
-    blob_file = os.path.join(file_dir, res + ".caffemodel")
-    solver_file = os.path.join(file_dir, res + "_solver.prototxt")
-
-    return (proto_file, blob_file, solver_file)
-
-
-def _save_prototxt(n_netspec, f_path):
-    """Generate .prototxt file according to caffe.NetSpec"""
-    s = n_netspec.to_proto()
-    with open(f_path, "w") as f:
-        f.write(str(s))
-
-
-def _save_solver(solver_file, proto_file, blob_file):
-    """Define a solver proto, you can change the configs."""
-    blob_file_prefix = blob_file.split(".caffemodel")[0]
-    s = pb.SolverParameter()
-    s.train_net = proto_file
-    s.base_lr = 0.01
-    s.momentum = 0.9
-    s.weight_decay = 0.0005
-    s.lr_policy = "inv"
-    s.gamma = 0.0001
-    s.power = 0.75
-    s.display = 1
-    s.max_iter = 100000
-    s.snapshot = 100000
-    s.snapshot_prefix = blob_file_prefix
-
-    with open(solver_file, "w") as f:
-        f.write(str(s))
-
-
-def _save_caffemodel(solver_file, blob_file):
-    """Generate .caffemodel file."""
-    solver = caffe.SGDSolver(solver_file)
-    solver.net.save(blob_file)
-
-
-def _gen_model_files(n_netspec, proto_file, blob_file, solver_file):
-    _save_prototxt(n_netspec, proto_file)
-    _save_solver(solver_file, proto_file, blob_file)
-    _save_caffemodel(solver_file, blob_file)
-
-
-def _siso_op(data, func, *args, **kwargs):
-    """Create single input and single output Caffe op"""
-    n = caffe.NetSpec()
-    n.data = L.Input(input_param={"shape": {"dim": list(data.shape)}})
-    n.output = func(n.data, *args, **kwargs)
-    return n
-
-
-def _miso_op(data_list, func, *args, **kwargs):
-    """Create multi input and single output Caffe op"""
-    n = caffe.NetSpec()
-    if not isinstance(data_list, (tuple, list)):
-        raise TypeError(f"Need tuple or list but get {type(data_list)}")
-    input_list = []
-    for idx, data in enumerate(data_list):
-        n["data" + str(idx)] = L.Input(input_param={"shape": {"dim": list(data.shape)}})
-        input_list.append(n["data" + str(idx)])
-    n.output = func(*input_list, *args, **kwargs)
-    return n
-
-
-def _simo_op(data, func, *args, **kwargs):
-    """Create single input and multi output Caffe op"""
-    n = caffe.NetSpec()
-    n.data = L.Input(input_param={"shape": {"dim": list(data.shape)}})
-    output_list = func(n.data, *args, **kwargs)
-    for idx, out in enumerate(output_list):
-        n["output" + str(idx)] = out
-    return n
-
-
-def _run_caffe(data, proto_file, blob_file):
-    """Run caffe model by Caffe according to .caffemodel and .prototxt"""
-    net = caffe.Net(proto_file, blob_file, caffe.TEST)
-    if isinstance(data, (list, tuple)):
-        for idx, d in enumerate(data):
-            net.blobs["data" + str(idx)].data[...] = d
-    else:
-        net.blobs["data"].data[...] = data
-    out = net.forward()
-
-    caffe_output = []
-    for i in range(len(out.keys())):
-        if "output" + str(i) not in out.keys():
-            caffe_output.clear()
-            return list(out.values())
-        caffe_output.append(out["output" + str(i)])
-    return caffe_output
-
-
-def _run_tvm(data, proto_file, blob_file):
-    """Run caffe model by TVM according to .caffemodel and .prototxt"""
-    init_net = pb.NetParameter()
-    predict_net = pb.NetParameter()
-
-    # load model
-    with open(proto_file, "r") as f:
-        text_format.Merge(f.read(), predict_net)
-    # load blob
-    with open(blob_file, "rb") as f:
-        init_net.ParseFromString(f.read())
-
-    shape_dict = {}
-    dtype_dict = {}
-    if isinstance(data, (tuple, list)):
-        for idx, d in enumerate(data):
-            shape_dict["data" + str(idx)] = d.shape
-            dtype_dict["data" + str(idx)] = "float32"
-    else:
-        shape_dict = {"data": data.shape}
-        dtype_dict = {"data": "float32"}
-
-    mod, params = relay.frontend.from_caffe(init_net, predict_net, shape_dict, dtype_dict)
-
-    target = "llvm"
-
-    dev = tvm.cpu(0)
-    with tvm.transform.PassContext(opt_level=3):
-        lib = relay.build(mod, target=target, params=params)
-    dtype = "float32"
-    m = graph_executor.GraphModule(lib["default"](dev))
-    if isinstance(data, (tuple, list)):
-        for idx, d in enumerate(data):
-            m.set_input("data" + str(idx), tvm.nd.array(d.astype(dtype)))
-    else:
-        m.set_input("data", tvm.nd.array(data.astype(dtype)))
-    # execute
-    m.run()
-    tvm_output = []
-    # get outputs
-    for i in range(m.get_num_outputs()):
-        tvm_output.append(m.get_output(i).numpy())
-    return tvm_output
-
-
-def _compare_caffe_tvm(caffe_out, tvm_out, is_network=False):
-    for i, _ in enumerate(caffe_out):
-        if is_network:
-            caffe_out[i] = caffe_out[i][:1]
-        tvm.testing.assert_allclose(caffe_out[i], tvm_out[i], rtol=1e-5, atol=1e-5)
-
-
-def _test_op(data, func_op, op_name, **kwargs):
-    """Single op testing pipline."""
-    shape_list = []
-    if isinstance(data, (list, tuple)):
-        n = _miso_op(data, func_op, **kwargs)
-        for d in data:
-            shape_list.extend(list(d.shape))
-    else:
-        output_num = 1
-        if "ntop" in kwargs:
-            output_num = kwargs["ntop"]
-        if output_num == 1:
-            n = _siso_op(data, func_op, **kwargs)
-        else:
-            n = _simo_op(data, func_op, **kwargs)
-        shape_list = list(data.shape)
-
-    # obtain the .caffemodel file and .prototxt file
-    (proto_file, blob_file, solver_file) = _gen_filename_str(op_name, shape_list, **kwargs)
-    _gen_model_files(n, proto_file, blob_file, solver_file)
-    # run model in Caffe
-    caffe_out = _run_caffe(data, proto_file, blob_file)
-    # run model in TVM
-    tvm_out = _run_tvm(data, proto_file, blob_file)
-    _compare_caffe_tvm(caffe_out, tvm_out)
-
-
-def _test_network(data, proto_file, blob_file):
-    # run model in Caffe
-    caffe_out = _run_caffe(data, proto_file, blob_file)
-    # run model in TVM
-    tvm_out = _run_tvm(data, proto_file, blob_file)
-    _compare_caffe_tvm(caffe_out, tvm_out, is_network=True)
-
-
-#######################################################################
-# BatchNorm
-# -----------
-
-
-def _test_batchnorm(data, moving_average_fraction=0.999, eps=1e-5):
-    """One iteration of BatchNorm"""
-    _test_op(
-        data, L.BatchNorm, "BatchNorm", moving_average_fraction=moving_average_fraction, eps=eps
-    )
-
-
-def test_forward_BatchNorm():
-    """BatchNorm"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_batchnorm(data)
-    _test_batchnorm(data, moving_average_fraction=0.88, eps=1e-4)
-
-
-#######################################################################
-# Concat
-# -----------
-
-
-def _test_concat(data_list, axis=1):
-    """One iteration of Concat"""
-    _test_op(data_list, L.Concat, "Concat", axis=axis)
-
-
-def test_forward_Concat():
-    """Concat"""
-    _test_concat([np.random.rand(1, 3, 10, 10), np.random.rand(1, 2, 10, 10)], axis=1)
-    _test_concat([np.random.rand(3, 10, 10), np.random.rand(2, 10, 10)], axis=0)
-    _test_concat([np.random.rand(3, 10), np.random.rand(2, 10)], axis=0)
-
-
-#######################################################################
-# Convolution
-# -----------
-
-
-def _test_convolution(data, **kwargs):
-    """One iteration of Convolution"""
-    _test_op(data, L.Convolution, "Convolution", **kwargs)
-
-
-def test_forward_Convolution():
-    """Convolution"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_convolution(
-        data,
-        num_output=20,
-        bias_term=True,
-        pad=0,
-        kernel_size=3,
-        stride=2,
-        dilation=1,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    _test_convolution(
-        data,
-        num_output=20,
-        bias_term=False,
-        pad=[1, 2],
-        kernel_size=3,
-        stride=2,
-        dilation=1,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    _test_convolution(
-        data,
-        num_output=20,
-        bias_term=True,
-        pad=[1, 2],
-        kernel_size=[3, 5],
-        stride=[2, 1],
-        dilation=[1, 2],
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    _test_convolution(
-        np.random.rand(1, 2, 10, 10).astype(np.float32),
-        num_output=20,
-        bias_term=True,
-        pad=[1, 2],
-        kernel_size=[3, 5],
-        stride=[2, 1],
-        dilation=[1, 2],
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-        group=2,
-    )
-    _test_convolution(
-        data,
-        num_output=20,
-        bias_term=True,
-        pad_h=1,
-        pad_w=2,
-        kernel_h=3,
-        kernel_w=5,
-        stride_h=2,
-        stride_w=1,
-        dilation=[1, 2],
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-
-
-#######################################################################
-# Crop
-# -----------
-
-
-def _test_crop(data, **kwargs):
-    """One iteration of Crop"""
-    _test_op(data, L.Crop, "Crop", **kwargs)
-
-
-def test_forward_Crop():
-    """Crop"""
-    _test_crop([np.random.rand(10, 10, 120, 120), np.random.rand(10, 5, 50, 60)])
-    _test_crop([np.random.rand(10, 10, 120, 120), np.random.rand(10, 5, 50, 60)], axis=1)
-    _test_crop([np.random.rand(10, 10, 120, 120), np.random.rand(10, 5, 50, 60)], axis=1, offset=2)
-    _test_crop(
-        [np.random.rand(10, 10, 120, 120), np.random.rand(10, 5, 50, 60)], axis=1, offset=[1, 2, 4]
-    )
-    _test_crop(
-        [np.random.rand(10, 10, 120, 120), np.random.rand(10, 5, 50, 60)], axis=2, offset=[2, 4]
-    )
-    _test_crop([np.random.rand(10, 120, 120), np.random.rand(5, 50, 60)], axis=1, offset=[2, 4])
-    _test_crop([np.random.rand(120, 120), np.random.rand(50, 60)], axis=0, offset=[2, 4])
-
-
-#######################################################################
-# Deconvolution
-# -----------
-
-
-def _test_deconvolution(data, **kwargs):
-    """One iteration of Deconvolution"""
-    _test_op(data, L.Deconvolution, "Deconvolution", **kwargs)
-
-
-def test_forward_Deconvolution():
-    """Deconvolution"""
-    data = np.random.rand(1, 16, 32, 32).astype(np.float32)
-    _test_deconvolution(
-        data,
-        convolution_param=dict(
-            num_output=20,
-            bias_term=True,
-            pad=0,
-            kernel_size=3,
-            stride=2,
-            dilation=1,
-            weight_filler=dict(type="xavier"),
-            bias_filler=dict(type="xavier"),
-        ),
-    )
-    _test_deconvolution(
-        data,
-        convolution_param=dict(
-            num_output=20,
-            bias_term=False,
-            pad=[1, 2],
-            kernel_size=3,
-            stride=2,
-            dilation=1,
-            weight_filler=dict(type="xavier"),
-            bias_filler=dict(type="xavier"),
-        ),
-    )
-    _test_deconvolution(
-        data,
-        convolution_param=dict(
-            num_output=20,
-            bias_term=True,
-            pad_h=1,
-            pad_w=2,
-            kernel_h=3,
-            kernel_w=5,
-            stride_h=2,
-            stride_w=1,
-            dilation=1,
-            weight_filler=dict(type="xavier"),
-            bias_filler=dict(type="xavier"),
-        ),
-    )
-    _test_deconvolution(
-        data,
-        convolution_param=dict(
-            num_output=16,
-            bias_term=False,
-            pad=0,
-            kernel_size=2,
-            stride=2,
-            dilation=1,
-            group=16,
-            weight_filler=dict(type="xavier"),
-            bias_filler=dict(type="xavier"),
-        ),
-    )
-    data = np.random.rand(1, 100, 32, 32).astype(np.float32)
-    _test_deconvolution(
-        data,
-        convolution_param=dict(
-            num_output=100,
-            bias_term=False,
-            pad=0,
-            kernel_size=2,
-            stride=2,
-            dilation=1,
-            group=100,
-            weight_filler=dict(type="xavier"),
-            bias_filler=dict(type="xavier"),
-        ),
-    )
-
-
-#######################################################################
-# Dropout
-# -----------
-
-
-def _test_dropout(data, **kwargs):
-    """One iteration of Dropout"""
-    _test_op(data, L.Dropout, "Dropout", **kwargs)
-
-
-def test_forward_Dropout():
-    """Dropout"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_dropout(data)
-    _test_dropout(data, dropout_ratio=0.7)
-
-
-#######################################################################
-# Eltwise
-# -----------
-
-
-def _test_eltwise(data_list, **kwargs):
-    """One iteration of Eltwise"""
-    _test_op(data_list, L.Eltwise, "Eltwise", **kwargs)
-
-
-def test_forward_Eltwise():
-    """Eltwise"""
-    _test_eltwise(
-        [
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-        ],
-        operation=0,
-    )
-    _test_eltwise(
-        [
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-        ],
-        operation=1,
-    )
-    _test_eltwise(
-        [
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-        ],
-        operation=2,
-    )
-    _test_eltwise(
-        [
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-        ],
-        operation=1,
-        coeff=[0.5, 1],
-    )
-    _test_eltwise(
-        [
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-        ],
-        operation=0,
-    )
-    _test_eltwise(
-        [
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-        ],
-        operation=1,
-    )
-    _test_eltwise(
-        [
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-        ],
-        operation=2,
-    )
-    _test_eltwise(
-        [
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-        ],
-        operation=1,
-        coeff=[0.5, 1, 0.2, 1.8, 3.1, 0.1],
-    )
-
-
-#######################################################################
-# Flatten
-# -----------
-
-
-def _test_flatten(data, axis=1):
-    """One iteration of Flatten"""
-    _test_op(data, L.Flatten, "Flatten", axis=axis)
-
-
-def test_forward_Flatten():
-    """Flatten"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_flatten(data)
-    _test_flatten(data, axis=1)
-
-
-#######################################################################
-# Flatten
-# -----------
-
-
-def _test_inner_product(data, **kwargs):
-    """One iteration of InnerProduct"""
-    _test_op(data, L.InnerProduct, "InnerProduct", **kwargs)
-
-
-def test_forward_InnerProduct():
-    """InnerProduct"""
-    data = np.random.rand(1, 3, 10, 10)
-    _test_inner_product(data, num_output=20, bias_term=False, weight_filler=dict(type="xavier"))
-    _test_inner_product(
-        data,
-        num_output=20,
-        bias_term=True,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    _test_inner_product(
-        np.random.rand(20, 10).astype(np.float32),
-        num_output=30,
-        bias_term=True,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-
-
-#######################################################################
-# LRN
-# -----------
-
-
-def _test_lrn(data, local_size=5, alpha=1.0, beta=0.75, k=1.0):
-    """One iteration of LRN"""
-    _test_op(data, L.LRN, "LRN", local_size=local_size, alpha=alpha, beta=beta, k=k)
-
-
-def test_forward_LRN():
-    """LRN"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_lrn(data)
-    _test_lrn(data, local_size=3)
-    _test_lrn(data, local_size=3, alpha=2.0)
-    _test_lrn(
-        data,
-        local_size=3,
-        alpha=2.0,
-        beta=0.5,
-    )
-    _test_lrn(data, local_size=3, alpha=2.0, beta=0.5, k=2.0)
-
-
-#######################################################################
-# Permute
-# -------
-
-
-def _test_permute(data, **kwargs):
-    """One iteration of Permute."""
-    _test_op(data, L.Permute, "Permute", **kwargs)
-
-
-def test_forward_Permute():
-    """Permute"""
-    data = np.random.rand(2, 3, 4).astype(np.float32)
-    _test_permute(data, permute_param={"order": [0, 1, 2]})
-    _test_permute(data, permute_param={"order": [0, 2, 1]})
-    _test_permute(data, permute_param={"order": [1, 0, 2]})
-    _test_permute(data, permute_param={"order": [1, 2, 0]})
-    _test_permute(data, permute_param={"order": [2, 0, 1]})
-    _test_permute(data, permute_param={"order": [2, 1, 0]})
-
-
-#######################################################################
-# Pooling
-# -----------
-
-
-def _test_pooling(data, **kwargs):
-    """One iteration of Pooling."""
-    _test_op(data, L.Pooling, "Pooling", **kwargs)
-
-
-def test_forward_Pooling():
-    """Pooing"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    # MAX Pooling
-    _test_pooling(data, kernel_size=2, stride=2, pad=0, pool=P.Pooling.MAX)
-    _test_pooling(
-        data, kernel_h=2, kernel_w=3, stride_h=2, stride_w=1, pad_h=1, pad_w=2, pool=P.Pooling.MAX
-    )
-    _test_pooling(data, pool=P.Pooling.MAX, global_pooling=True)
-
-    # AVE Pooing
-    _test_pooling(data, kernel_size=2, stride=2, pad=0, pool=P.Pooling.AVE)
-    _test_pooling(
-        data, kernel_h=2, kernel_w=3, stride_h=2, stride_w=1, pad_h=1, pad_w=2, pool=P.Pooling.AVE
-    )
-    _test_pooling(data, pool=P.Pooling.AVE, global_pooling=True)
-
-
-#######################################################################
-# Power
-# -----
-def _test_power(data, **kwargs):
-    """One iteration of Power."""
-    _test_op(data, L.Power, "Power", **kwargs)
-
-
-def test_forward_Power():
-    """Power"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_power(data, power_param={"power": 0.37, "scale": 0.83, "shift": -2.4})
-    _test_power(data, power_param={"power": 0.37, "scale": 0.83, "shift": 0.0})
-    _test_power(data, power_param={"power": 0.0, "scale": 0.83, "shift": -2.4})
-    _test_power(data, power_param={"power": 1.0, "scale": 0.83, "shift": -2.4})
-    _test_power(data, power_param={"power": 2.0, "scale": 0.34, "shift": -2.4})
-    _test_power(data, power_param={"power": 1.0, "scale": 1.0, "shift": 0.0})
-
-
-#######################################################################
-# PReLU
-# -----------
-
-
-def _test_prelu(data, **kwargs):
-    """One iteration of PReLU."""
-    _test_op(data, L.PReLU, "PReLU", **kwargs)
-
-
-def test_forward_PReLU():
-    """PReLU"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_prelu(data, filler=dict(type="constant", value=0.5))
-    _test_prelu(data)
-    _test_prelu(np.random.rand(10, 20).astype(np.float32))
-
-
-#######################################################################
-# ReLU
-# -----------
-
-
-def _test_relu(data, **kwargs):
-    """One iteration of ReLU."""
-    _test_op(data, L.ReLU, "ReLU", **kwargs)
-
-
-def test_forward_ReLU():
-    """ReLU"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_relu(data)
-    _test_relu(np.random.rand(10, 20).astype(np.float32))
-
-
-#######################################################################
-# Reshape
-# -----------
-
-
-def _test_reshape(data, **kwargs):
-    """One iteration of Reshape."""
-    _test_op(data, L.Reshape, "Reshape", **kwargs)
-
-
-def test_forward_Reshape():
-    """Reshape"""
-    data = np.random.rand(1, 8, 6).astype(np.float32)
-    _test_reshape(data, reshape_param={"shape": {"dim": [4, 3, 4]}})
-    _test_reshape(data, reshape_param={"shape": {"dim": [2, 0, 3]}})
-    _test_reshape(data, reshape_param={"shape": {"dim": [2, 0, -1]}})
-    _test_reshape(data, reshape_param={"shape": {"dim": [0, -1]}})
-
-    _test_reshape(data, reshape_param={"shape": {"dim": [2, 3]}, "axis": 2})
-    _test_reshape(data, reshape_param={"shape": {"dim": [4, 3, 4]}, "axis": 1})
-    _test_reshape(data, reshape_param={"shape": {"dim": [4, 3, 4]}, "axis": -3})
-
-    _test_reshape(data, reshape_param={"shape": {"dim": [2, 4]}, "axis": 1, "num_axes": 1})
-    _test_reshape(data, reshape_param={"shape": {"dim": [3, 16]}, "axis": 1, "num_axes": 2})
-
-
-#######################################################################
-# Scale
-# -----------
-
-
-def _test_scale(data, **kwargs):
-    """One iteration of Scale."""
-    _test_op(data, L.Scale, "Scale", **kwargs)
-
-
-def test_forward_Scale():
-    """Scale"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_scale(data, filler=dict(type="xavier"))
-    _test_scale(data, filler=dict(type="xavier"), bias_term=True, bias_filler=dict(type="xavier"))
-
-
-#######################################################################
-# Sigmoid
-# -----------
-
-
-def _test_sigmoid(data, **kwargs):
-    """One iteration of Sigmoid."""
-    _test_op(data, L.Sigmoid, "Sigmoid", **kwargs)
-
-
-def test_forward_Sigmoid():
-    """Sigmoid"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_sigmoid(data)
-
-
-#######################################################################
-# Slice
-# -----------
-
-
-def _test_slice(data, **kwargs):
-    """One iteration of Slice"""
-    _test_op(data, L.Slice, "Slice", **kwargs)
-
-
-def test_forward_Slice():
-    """Slice"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_slice(data, ntop=2, slice_param=dict(axis=1, slice_point=[1]))
-    _test_slice(data, ntop=2, slice_param=dict(axis=-1, slice_point=[1]))
-    _test_slice(data, ntop=3, slice_param=dict(axis=2, slice_point=[1, 6]))
-    _test_slice(data, ntop=3)
-
-
-#######################################################################
-# Softmax
-# -----------
-
-
-def _test_softmax(data, **kwargs):
-    """One iteration of Softmax"""
-    _test_op(data, L.Softmax, "Softmax", **kwargs)
-
-
-def test_forward_Softmax():
-    """Softmax"""
-    _test_softmax(np.random.rand(1, 3, 10, 10).astype(np.float32))
-    _test_softmax(np.random.rand(1, 3, 10, 10).astype(np.float32), axis=2)
-    _test_softmax(np.random.rand(10, 10).astype(np.float32), axis=0)
-    _test_softmax(np.random.rand(2, 10, 10).astype(np.float32), axis=1)
-
-
-#######################################################################
-# TanH
-# -----------
-
-
-def _test_tanh(data, **kwargs):
-    """One iteration of TanH"""
-    _test_op(data, L.TanH, "TanH", **kwargs)
-
-
-def test_forward_TanH():
-    """TanH"""
-    _test_tanh(np.random.rand(1, 3, 10, 10).astype(np.float32))
-    _test_tanh(np.random.rand(3, 10, 10).astype(np.float32))
-    _test_tanh(np.random.rand(10, 10).astype(np.float32))
-    _test_tanh(np.random.rand(10).astype(np.float32))
-
-
-#######################################################################
-# Reduction
-# -----------
-
-
-def _test_reduction(data, **kwargs):
-    """One iteration of Reduction"""
-    _test_op(data, L.Reduction, "Reduction", **kwargs)
-
-
-def test_forward_Reduction():
-    """Reduction"""
-    reduction_op = {"SUM": 1, "ASUM": 2, "SUMSQ": 3, "MEAN": 4}
-    _test_reduction(np.random.rand(10).astype(np.float32), operation=reduction_op["SUM"], axis=0)
-    _test_reduction(
-        np.random.rand(10, 20, 30, 40).astype(np.float32), operation=reduction_op["SUM"], axis=3
-    )
-    _test_reduction(
-        np.random.rand(10, 20, 30, 40).astype(np.float32), operation=reduction_op["SUM"], axis=1
-    )
-    _test_reduction(
-        np.random.rand(10).astype(np.float32), operation=reduction_op["SUM"], axis=0, coeff=0.5
-    )
-    _test_reduction(
-        np.random.rand(10, 20, 30, 40).astype(np.float32),
-        operation=reduction_op["SUM"],
-        axis=3,
-        coeff=5.0,
-    )
-    _test_reduction(np.random.rand(10).astype(np.float32), operation=reduction_op["ASUM"])
-    _test_reduction(
-        np.random.rand(10, 20).astype(np.float32), operation=reduction_op["ASUM"], axis=1
-    )
-    _test_reduction(
-        np.random.rand(10, 20, 30, 40).astype(np.float32), operation=reduction_op["ASUM"], axis=3
-    )
-    _test_reduction(
-        np.random.rand(10).astype(np.float32), operation=reduction_op["ASUM"], axis=0, coeff=0.0
-    )
-    _test_reduction(
-        np.random.rand(10, 20, 30).astype(np.float32),
-        operation=reduction_op["ASUM"],
-        axis=2,
-        coeff=7.0,
-    )
-    _test_reduction(
-        np.random.rand(10, 20, 30, 40, 10).astype(np.float32),
-        operation=reduction_op["ASUM"],
-        axis=3,
-        coeff=1.0,
-    )
-    _test_reduction(np.random.rand(10).astype(np.float32), operation=reduction_op["SUMSQ"], axis=0)
-    _test_reduction(
-        np.random.rand(10, 20, 30, 40).astype(np.float32), operation=reduction_op["SUMSQ"], axis=3
-    )
-    _test_reduction(
-        np.random.rand(10).astype(np.float32), operation=reduction_op["SUMSQ"], axis=0, coeff=0.0
-    )
-    _test_reduction(
-        np.random.rand(10, 20, 30, 40, 50).astype(np.float32),
-        operation=reduction_op["SUMSQ"],
-        axis=4,
-        coeff=2.0,
-    )
-    _test_reduction(np.random.rand(10).astype(np.float32), operation=reduction_op["MEAN"], axis=0)
-    _test_reduction(
-        np.random.rand(10, 20, 30, 40).astype(np.float32), operation=reduction_op["MEAN"], axis=3
-    )
-    _test_reduction(
-        np.random.rand(10).astype(np.float32), operation=reduction_op["MEAN"], axis=0, coeff=0.0
-    )
-    _test_reduction(
-        np.random.rand(10, 20, 30, 40).astype(np.float32),
-        operation=reduction_op["MEAN"],
-        axis=3,
-        coeff=2.0,
-    )
-
-
-#######################################################################
-# Embed
-# -----------
-
-
-def _test_embed(data, **kwargs):
-    """One iteration of Embed"""
-    _test_op(data, L.Embed, "Embed", **kwargs)
-
-
-def test_forward_Embed():
-    """Embed"""
-    k = 20
-    data = list(i for i in range(k))
-    np.random.shuffle(data)
-    # dimension is 1
-    data = np.asarray(data)
-    _test_embed(
-        data,
-        num_output=30,
-        input_dim=k,
-        bias_term=True,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    _test_embed(
-        data,
-        num_output=30,
-        input_dim=k,
-        bias_term=False,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    # dimension is 2
-    data = np.reshape(data, [4, 5])
-    _test_embed(
-        data,
-        num_output=30,
-        input_dim=k,
-        bias_term=True,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    _test_embed(
-        data,
-        num_output=30,
-        input_dim=k,
-        bias_term=False,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    # dimension is 3
-    data = np.reshape(data, [2, 2, 5])
-    _test_embed(
-        data,
-        num_output=30,
-        input_dim=k,
-        bias_term=True,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    _test_embed(
-        data,
-        num_output=30,
-        input_dim=k,
-        bias_term=False,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    # dimension is 4
-    data = np.reshape(data, [2, 2, 5, 1])
-    _test_embed(
-        data,
-        num_output=30,
-        input_dim=k,
-        bias_term=True,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    _test_embed(
-        data,
-        num_output=30,
-        input_dim=k,
-        bias_term=False,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-
-
-#######################################################################
-# Mobilenetv2
-# -----------
-
-
-def _test_mobilenetv2(data):
-    """One iteration of Mobilenetv2"""
-    mean_val = np.array([103.939, 116.779, 123.68], dtype=np.float32)
-    mean_val = np.reshape(mean_val, (1, 3, 1, 1))
-    mean_val = np.tile(mean_val, (1, 1, 224, 224))
-    data_process = data - mean_val
-    data_process = data_process / 58.8
-    data_process = data_process.astype(np.float32)
-
-    proto_file_url = (
-        "https://github.com/shicai/MobileNet-Caffe/raw/master/mobilenet_v2_deploy.prototxt"
-    )
-    blob_file_url = (
-        "https://github.com/shicai/MobileNet-Caffe/blob/master/mobilenet_v2.caffemodel?raw=true"
-    )
-    proto_file = download_testdata(proto_file_url, "mobilenetv2.prototxt", module="model")
-    blob_file = download_testdata(blob_file_url, "mobilenetv2.caffemodel", module="model")
-    _test_network(data_process, proto_file, blob_file)
-
-
-def test_forward_Mobilenetv2():
-    """Mobilenetv2"""
-    data = np.random.randint(0, 256, size=(1, 3, 224, 224)).astype(np.float32)
-    _test_mobilenetv2(data)
-
-
-#######################################################################
-# Alexnet
-# -----------
-
-
-def _test_alexnet(data):
-    """One iteration of Alexnet"""
-    mean_val = np.array([103.939, 116.779, 123.68], dtype=np.float32)
-    mean_val = np.reshape(mean_val, (1, 3, 1, 1))
-    mean_val = np.tile(mean_val, (1, 1, 227, 227))
-    data_process = data - mean_val
-    data_process = data_process.astype(np.float32)
-
-    proto_file_url = (
-        "https://github.com/BVLC/caffe/raw/master/models/" + "bvlc_alexnet/deploy.prototxt"
-    )
-    blob_file_url = "http://dl.caffe.berkeleyvision.org/bvlc_alexnet.caffemodel"
-    proto_file = download_testdata(proto_file_url, "alexnet.prototxt", module="model")
-    blob_file = download_testdata(blob_file_url, "alexnet.caffemodel", module="model")
-    _test_network(data_process, proto_file, blob_file)
-
-
-@pytest.mark.skip(reason="See https://github.com/apache/tvm/issues/13227")
-def test_forward_Alexnet():
-    """Alexnet"""
-    data = np.random.randint(0, 256, size=(1, 3, 227, 227)).astype(np.float32)
-    _test_alexnet(data)
-
-
-#######################################################################
-# Resnet50
-# -----------
-
-
-def _test_resnet50(data):
-    """One iteration of Resnet50"""
-    mean_val = np.array([103.939, 116.779, 123.68], dtype=np.float32)
-    mean_val = np.reshape(mean_val, (1, 3, 1, 1))
-    mean_val = np.tile(mean_val, (1, 1, 224, 224))
-    data_process = data - mean_val
-    data_process = data_process.astype(np.float32)
-
-    proto_file_url = (
-        "https://github.com/fernchen/CaffeModels/raw/master/resnet/ResNet-50-deploy.prototxt"
-    )
-    blob_file_url = (
-        "https://github.com/fernchen/CaffeModels/raw/master/resnet/ResNet-50-model.caffemodel"
-    )
-
-    proto_file = download_testdata(proto_file_url, "resnet50.prototxt", module="model")
-    blob_file = download_testdata(blob_file_url, "resnet50.caffemodel", module="model")
-
-    _test_network(data_process, proto_file, blob_file)
-
-
-def test_forward_Resnet50():
-    """Resnet50"""
-    data = np.random.randint(0, 256, size=(1, 3, 224, 224)).astype(np.float32)
-    _test_resnet50(data)
-
-
-#######################################################################
-# Inceptionv4
-# -----------
-
-
-def _test_inceptionv1(data):
-    """One iteration of Inceptionv4"""
-    mean_val = np.array([103.939, 116.779, 123.68], dtype=np.float32)
-    mean_val = np.reshape(mean_val, (1, 3, 1, 1))
-    mean_val = np.tile(mean_val, (1, 1, 224, 224))
-    data_process = data - mean_val
-    data_process = data_process / 58.8
-    data_process = data_process.astype(np.float32)
-
-    proto_file_url = (
-        "https://github.com/BVLC/caffe/raw/master/models" + "/bvlc_googlenet/deploy.prototxt"
-    )
-    blob_file_url = "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel"
-    proto_file = download_testdata(proto_file_url, "inceptionv1.prototxt", module="model")
-    blob_file = download_testdata(blob_file_url, "inceptionv1.caffemodel", module="model")
-    _test_network(data_process, proto_file, blob_file)
-
-
-@pytest.mark.skip(reason="See issue https://github.com/apache/tvm/issues/13227")
-def test_forward_Inceptionv1():
-    """Inceptionv4"""
-    data = np.random.randint(0, 256, size=(1, 3, 224, 224)).astype(np.float32)
-    _test_inceptionv1(data)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/caffe2/model_zoo/__init__.py b/tests/python/frontend/caffe2/model_zoo/__init__.py
deleted file mode 100644
index 946367f9ed4f..000000000000
--- a/tests/python/frontend/caffe2/model_zoo/__init__.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Store for caffe2 examples and common models."""
-from __future__ import absolute_import as _abs
-import os
-import sys
-import importlib
-from caffe2.python.models.download import ModelDownloader
-from . import squeezenet
-
-models = [
-    "squeezenet",
-    "resnet50",
-    "vgg19",
-]
-
-mf = ModelDownloader()
-
-
-class Model:
-    def __init__(self, model_name):
-        self.init_net, self.predict_net, self.value_info = mf.get_c2_model(model_name)
-
-
-for model in models:
-    try:
-        locals()["c2_" + model] = importlib.import_module("caffe2.python.models." + model)
-    except ImportError:
-        locals()["c2_" + model] = Model(model)
-
-# squeezenet
-def relay_squeezenet():
-    return squeezenet.get_workload()
diff --git a/tests/python/frontend/caffe2/model_zoo/squeezenet.py b/tests/python/frontend/caffe2/model_zoo/squeezenet.py
deleted file mode 100644
index 06e99567e5a8..000000000000
--- a/tests/python/frontend/caffe2/model_zoo/squeezenet.py
+++ /dev/null
@@ -1,135 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# coding: utf-8
-# pylint: disable=unused-argument
-
-"""
-Symbol of SqueezeNet
-
-Reference:
-Iandola, Forrest N., et al.
-"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and< 0.5 mb model size." (2016).
-"""
-
-from tvm import relay
-from tvm.relay.testing import create_workload
-
-# Helpers
-def _make_fire(net, squeeze_channels, expand1x1_channels, expand3x3_channels, prefix=""):
-    net = _make_fire_conv(net, squeeze_channels, 1, 0, f"{prefix}/squeeze1x1")
-
-    left = _make_fire_conv(net, expand1x1_channels, 1, 0, f"{prefix}/expand1x1")
-    right = _make_fire_conv(net, expand3x3_channels, 3, 1, f"{prefix}/expand3x3")
-    # NOTE : Assume NCHW layout here
-    net = relay.concatenate((left, right), axis=1)
-    return net
-
-
-def _make_fire_conv(net, channels, kernel_size, padding=0, prefix=""):
-    net = relay.nn.conv2d(
-        net,
-        relay.var(f"{prefix}_weight"),
-        channels=channels,
-        kernel_size=(kernel_size, kernel_size),
-        padding=(padding, padding),
-    )
-    net = relay.nn.bias_add(net, relay.var(f"{prefix}_bias"))
-    net = relay.nn.relu(net)
-    return net
-
-
-# Net
-def get_net(batch_size, image_shape, num_classes, dtype):
-    """Get symbol of SqueezeNet
-
-    Parameters
-    ----------
-    batch_size : int
-        The batch size used in the model
-
-    image_shape : tuple
-        The input image shape
-
-    num_classes: int
-        The number of classification results
-
-    dtype : str
-        The data type
-
-    """
-    data_shape = (batch_size,) + image_shape
-    net = relay.var("data", shape=data_shape, dtype=dtype)
-    net = relay.nn.conv2d(
-        net,
-        relay.var("conv1_weight"),
-        channels=64,
-        kernel_size=(3, 3),
-        strides=(2, 2),
-        padding=(0, 0),
-    )
-    net = relay.nn.bias_add(net, relay.var("conv1_bias"))
-    net = relay.nn.relu(net)
-    net = relay.nn.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
-    net = _make_fire(net, 16, 64, 64, "fire2")
-    net = _make_fire(net, 16, 64, 64, "fire3")
-    net = relay.nn.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
-    net = _make_fire(net, 32, 128, 128, "fire4")
-    net = _make_fire(net, 32, 128, 128, "fire5")
-    net = relay.nn.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
-    net = _make_fire(net, 48, 192, 192, "fire6")
-    net = _make_fire(net, 48, 192, 192, "fire7")
-    net = _make_fire(net, 64, 256, 256, "fire8")
-    net = _make_fire(net, 64, 256, 256, "fire9")
-    net = relay.nn.dropout(net, rate=0.5)
-    net = relay.nn.conv2d(net, relay.var("conv10_weight"), channels=num_classes, kernel_size=(1, 1))
-    net = relay.nn.bias_add(net, relay.var("conv10_bias"))
-    net = relay.nn.relu(net)
-    net = relay.nn.global_avg_pool2d(net)
-    net = relay.nn.softmax(net, axis=1)
-    args = relay.analysis.free_vars(net)
-    return relay.Function(args, net)
-
-
-def get_workload(batch_size=1, image_shape=(3, 224, 224), num_classes=1000, dtype="float32"):
-    """Get benchmark workload for SqueezeNet
-
-    Parameters
-    ----------
-    batch_size : int, optional
-        The batch size used in the model
-
-    num_classes : int, optional
-        Number of classes
-
-    image_shape : tuple, optional
-        The input image shape
-
-    dtype : str, optional
-        The data type
-
-    Returns
-    -------
-    net : relay.Function
-        The computational graph
-
-    params : dict of str to NDArray
-        The parameters.
-    """
-
-    net = get_net(batch_size, image_shape, num_classes, dtype)
-    return create_workload(net)
diff --git a/tests/python/frontend/caffe2/test_forward.py b/tests/python/frontend/caffe2/test_forward.py
deleted file mode 100644
index 9758d937c254..000000000000
--- a/tests/python/frontend/caffe2/test_forward.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Caffe2 testcases
-====================
-This article is a test script to test Caffe2 operator with Relay.
-"""
-from collections import namedtuple
-import numpy as np
-
-from caffe2.python import workspace, core
-from caffe2.proto import caffe2_pb2
-from model_zoo import c2_squeezenet, c2_resnet50, c2_vgg19
-import tvm
-from tvm.contrib import graph_executor
-from tvm import relay
-
-import tvm.testing
-
-
-def get_tvm_output(model, input_data, target, device, output_shape, output_dtype="float32"):
-    """Generic function to execute and get tvm output"""
-    # supporting multiple inputs in caffe2 in a bit tricky,
-    # because the input names can appear at the beginning or end of model.predict_net.external_input
-    assert isinstance(input_data, np.ndarray)
-
-    # here we use the first input blob to the first op to get the input name
-    input_names = model.predict_net.op[0].input[0]
-    shape_dict = {input_names: input_data.shape}
-    dtype_dict = {input_names: input_data.dtype}
-    mod, params = relay.frontend.from_caffe2(
-        model.init_net, model.predict_net, shape_dict, dtype_dict
-    )
-    with tvm.transform.PassContext(opt_level=3):
-        lib = relay.build(mod, target, params=params)
-
-    m = graph_executor.GraphModule(lib["default"](device))
-
-    # set inputs
-    m.set_input(input_names, tvm.nd.array(input_data.astype(input_data.dtype)))
-
-    # execute
-    m.run()
-
-    # get outputs
-    if isinstance(output_shape, list) and isinstance(output_dtype, list):
-        tvm_output_list = []
-        for i, s in enumerate(output_shape):
-            tvm_output = m.get_output(i, tvm.nd.empty((s), output_dtype[i]))
-            tvm_output_list.append(tvm_output.numpy())
-        return tvm_output_list
-    else:
-        tvm_output = m.get_output(0, tvm.nd.empty((output_shape), output_dtype))
-        return tvm_output.numpy()
-
-
-def get_caffe2_output(model, x, dtype="float32"):
-    workspace.RunNetOnce(model.init_net)
-
-    input_blob = model.predict_net.op[0].input[0]
-    workspace.FeedBlob(input_blob, x.astype(dtype))
-    workspace.RunNetOnce(model.predict_net)
-
-    output_blob = model.predict_net.external_output[0]
-    c2_output = workspace.FetchBlob(output_blob)
-    return c2_output
-
-
-def verify_caffe2_forward_impl(model, data_shape, out_shape):
-    dtype = "float32"
-    data = np.random.uniform(size=data_shape).astype(dtype)
-    c2_out = get_caffe2_output(model, data, dtype)
-    for target, dev in tvm.testing.enabled_targets():
-        tvm_out = get_tvm_output(model, data, target, dev, out_shape, dtype)
-        tvm.testing.assert_allclose(c2_out, tvm_out, rtol=1e-5, atol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_squeezenet1_1():
-    verify_caffe2_forward_impl(c2_squeezenet, (1, 3, 224, 224), (1, 1000, 1, 1))
-
-
-@tvm.testing.uses_gpu
-def test_forward_resnet50():
-    verify_caffe2_forward_impl(c2_resnet50, (1, 3, 224, 224), (1, 1000))
-
-
-@tvm.testing.uses_gpu
-def test_forward_vgg19():
-    verify_caffe2_forward_impl(c2_vgg19, (1, 3, 224, 224), (1, 1000))
-
-
-Model = namedtuple("Model", ["init_net", "predict_net"])
-
-
-@tvm.testing.uses_gpu
-def test_elementwise_add():
-    """Elewise_add"""
-    data_shape = (1, 16, 9, 9)
-    init_net = caffe2_pb2.NetDef()
-    init_net.name = "test_init_net"
-    init_net.external_output[:] = ["A", "B"]
-    init_net.op.extend(
-        [
-            core.CreateOperator(
-                "GivenTensorFill",
-                [],
-                ["A"],
-                shape=data_shape,
-                values=np.random.uniform(size=data_shape).flatten().tolist(),
-            ),
-            core.CreateOperator(
-                "GivenTensorFill",
-                [],
-                ["B"],
-                shape=data_shape,
-                values=np.random.uniform(size=data_shape).flatten().tolist(),
-            ),
-        ]
-    )
-
-    predict_net = caffe2_pb2.NetDef()
-    predict_net.name = "test_predict_net"
-    predict_net.external_input[:] = ["A", "B"]
-    predict_net.external_output[:] = ["C"]
-    predict_net.op.extend(
-        [
-            core.CreateOperator(
-                "Add",
-                ["A", "B"],
-                ["C"],
-            )
-        ]
-    )
-
-    model = Model(init_net, predict_net)
-    verify_caffe2_forward_impl(model, data_shape, data_shape)
-
-
-@tvm.testing.uses_gpu
-def test_elementwise_add_with_broadcast():
-    """Elewise_add_with_broadcast"""
-    data_shape = (1, 16, 9, 9)
-    init_net = caffe2_pb2.NetDef()
-    init_net.name = "test_init_net"
-    init_net.external_output[:] = ["A", "B"]
-    init_net.op.extend(
-        [
-            core.CreateOperator(
-                "GivenTensorFill",
-                [],
-                ["A"],
-                shape=data_shape,
-                values=np.random.uniform(size=data_shape).flatten().tolist(),
-            ),
-            core.CreateOperator(
-                "GivenTensorFill",
-                [],
-                ["B"],
-                shape=(1,),
-                values=np.random.uniform(size=1).flatten().tolist(),
-            ),
-        ]
-    )
-
-    predict_net = caffe2_pb2.NetDef()
-    predict_net.name = "test_predict_net"
-    predict_net.external_input[:] = ["A", "B"]
-    predict_net.external_output[:] = ["C"]
-    predict_net.op.extend(
-        [
-            core.CreateOperator(
-                "Add",
-                ["A", "B"],
-                ["C"],
-                broadcast=1,
-            )
-        ]
-    )
-
-    model = Model(init_net, predict_net)
-    verify_caffe2_forward_impl(model, data_shape, data_shape)
-
-
-@tvm.testing.uses_gpu
-def test_normalize_yuv():
-    """Normalize_yuv"""
-    data_shape = (1, 3, 96, 96)
-    init_net = caffe2_pb2.NetDef()
-    init_net.name = "test_init_net"
-    init_net.external_output[:] = ["A", "mean", "std"]
-    init_net.op.extend(
-        [
-            core.CreateOperator(
-                "GivenTensorFill",
-                [],
-                ["A"],
-                shape=data_shape,
-                values=np.random.uniform(size=data_shape).flatten().tolist(),
-            ),
-            core.CreateOperator(
-                "GivenTensorFill",
-                [],
-                ["mean"],
-                shape=(
-                    1,
-                    3,
-                ),
-                values=np.random.uniform(size=3).flatten().tolist(),
-            ),
-            core.CreateOperator(
-                "GivenTensorFill",
-                [],
-                ["std"],
-                shape=(
-                    1,
-                    3,
-                ),
-                values=np.random.uniform(size=3).flatten().tolist(),
-            ),
-        ]
-    )
-
-    predict_net = caffe2_pb2.NetDef()
-    predict_net.name = "test_predict_net"
-    predict_net.external_input[:] = ["A", "mean", "std"]
-    predict_net.external_output[:] = ["C"]
-    predict_net.op.extend(
-        [
-            core.CreateOperator(
-                "NormalizePlanarYUV",
-                ["A", "mean", "std"],
-                ["C"],
-            )
-        ]
-    )
-
-    model = Model(init_net, predict_net)
-    verify_caffe2_forward_impl(model, data_shape, data_shape)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/caffe2/test_graph.py b/tests/python/frontend/caffe2/test_graph.py
deleted file mode 100644
index 3bf5beff3fce..000000000000
--- a/tests/python/frontend/caffe2/test_graph.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Test graph equality of caffe2 models."""
-from model_zoo import c2_squeezenet, relay_squeezenet
-import tvm
-from tvm import relay
-from tvm.relay import transform
-
-
-def compare_graph(lhs_mod, rhs_mod):
-    lhs_mod = transform.InferType()(lhs_mod)
-    rhs_mod = transform.InferType()(rhs_mod)
-    tvm.ir.assert_structural_equal(lhs_mod["main"], rhs_mod["main"])
-
-
-def test_squeeze_net():
-    shape_dict = {"data": (1, 3, 224, 224)}
-    dtype_dict = {"data": "float32"}
-    mod, _, = relay.frontend.from_caffe2(
-        c2_squeezenet.init_net, c2_squeezenet.predict_net, shape_dict, dtype_dict
-    )
-    relay_mod, _ = relay_squeezenet()
-    compare_graph(mod, relay_mod)
-
-
-if __name__ == "__main__":
-    test_squeeze_net()
diff --git a/tests/python/frontend/coreml/model_zoo/__init__.py b/tests/python/frontend/coreml/model_zoo/__init__.py
deleted file mode 100644
index ea2f3478fde4..000000000000
--- a/tests/python/frontend/coreml/model_zoo/__init__.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""coreml model zoo for testing purposes."""
-import os
-from PIL import Image
-import numpy as np
-from tvm.contrib.download import download_testdata
-
-
-def get_mobilenet():
-    url = "https://docs-assets.developer.apple.com/coreml/models/MobileNet.mlmodel"
-    dst = "mobilenet.mlmodel"
-    real_dst = download_testdata(url, dst, module="coreml")
-    return os.path.abspath(real_dst)
-
-
-def get_resnet50():
-    url = "https://docs-assets.developer.apple.com/coreml/models/Resnet50.mlmodel"
-    dst = "resnet50.mlmodel"
-    real_dst = download_testdata(url, dst, module="coreml")
-    return os.path.abspath(real_dst)
-
-
-def get_cat_image():
-    """Get cat image"""
-    url = (
-        "https://gist.githubusercontent.com/zhreshold/"
-        + "bcda4716699ac97ea44f791c24310193/raw/fa7ef0e9c9a5daea686d6473a62aacd1a5885849/cat.png"
-    )
-    dst = "cat.png"
-    real_dst = download_testdata(url, dst, module="data")
-    img = Image.open(real_dst).resize((224, 224))
-    # CoreML's standard model image format is BGR
-    img_bgr = np.array(img)[:, :, ::-1]
-    img = np.transpose(img_bgr, (2, 0, 1))[np.newaxis, :]
-    return np.asarray(img)
diff --git a/tests/python/frontend/coreml/test_forward.py b/tests/python/frontend/coreml/test_forward.py
deleted file mode 100644
index 26ddcba6ef41..000000000000
--- a/tests/python/frontend/coreml/test_forward.py
+++ /dev/null
@@ -1,851 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-CoreML testcases
-====================
-This article is a test script to test CoreML operator with Relay.
-"""
-from os import path
-from enum import Enum
-import tempfile
-import numpy as np
-import model_zoo
-import coremltools as cm
-from coremltools.models.neural_network import NeuralNetworkBuilder
-from coremltools.models import datatypes
-from tensorflow import keras
-
-import tvm
-import tvm.topi.testing
-import tvm.testing
-from tvm.contrib import graph_executor
-from tvm.topi.testing import conv2d_nchw_python
-from tvm import relay
-
-
-def get_tvm_output(
-    func, x, params, target, device, out_shape=(1, 1000), input_name="image", dtype="float32"
-):
-    """Generic function to execute and get tvm output"""
-    with tvm.transform.PassContext(opt_level=3):
-        lib = relay.build(func, target, params=params)
-    m = graph_executor.GraphModule(lib["default"](device))
-    # set inputs
-    m.set_input(input_name, tvm.nd.array(x.astype(dtype)))
-    m.run()
-    # get outputs
-    out = m.get_output(0, tvm.nd.empty(out_shape, dtype))
-    return out.numpy()
-
-
-def run_model_checkonly(model_file, model_name="", input_name="image"):
-    model = cm.models.MLModel(model_file)
-    x = model_zoo.get_cat_image()
-    shape_dict = {input_name: x.shape}
-    # Some Relay passes change operators on the fly. Ensuring that we generate
-    # new graph for each target.
-    for target, dev in tvm.testing.enabled_targets():
-        mod, params = relay.frontend.from_coreml(model, shape_dict)
-        tvm_output = get_tvm_output(mod["main"], x, params, target, dev)
-        print(target, dev, model_name, "prediction id: ", np.argmax(tvm_output.flat))
-
-
-@tvm.testing.uses_gpu
-def test_mobilenet_checkonly():
-    model_file = model_zoo.get_mobilenet()
-    run_model_checkonly(model_file, "mobilenet")
-
-
-@tvm.testing.uses_gpu
-def test_resnet50_checkonly():
-    model_file = model_zoo.get_resnet50()
-    run_model_checkonly(model_file, "resnet50")
-
-
-def run_tvm_graph(
-    coreml_model, target, device, input_data, input_name, output_shape, output_dtype="float32"
-):
-    """Generic function to compile on relay and execute on tvm"""
-    if isinstance(input_data, list):
-        shape_dict = {}
-        dtype_dict = {}
-        for i, inp in enumerate(input_name):
-            shape_dict[inp] = input_data[i].shape
-            dtype_dict[inp] = input_data[i].dtype
-    else:
-        shape_dict = {input_name: input_data.shape}
-        dtype_dict = {input_name: input_data.dtype}
-
-    mod, params = relay.frontend.from_coreml(coreml_model, shape_dict)
-    with tvm.transform.PassContext(opt_level=3):
-        lib = relay.build(mod, target, params=params)
-
-    m = graph_executor.GraphModule(lib["default"](device))
-    # set inputs
-    if isinstance(input_data, list):
-        for i, inp in enumerate(input_name):
-            m.set_input(inp, tvm.nd.array(input_data[i].astype(input_data[i].dtype)))
-    else:
-        m.set_input(input_name, tvm.nd.array(input_data.astype(input_data.dtype)))
-
-    # execute
-    m.run()
-    # get outputs
-    if isinstance(output_shape, list) and isinstance(output_dtype, list):
-        tvm_output_list = []
-        for i, s in enumerate(output_shape):
-            tvm_output = m.get_output(i, tvm.nd.empty((s), output_dtype[i]))
-            tvm_output_list.append(tvm_output.numpy())
-        return tvm_output_list
-    else:
-        if not output_shape:
-            tvm_output = m.get_output(0)
-        else:
-            tvm_output = m.get_output(0, tvm.nd.empty((output_shape), output_dtype))
-        return tvm_output.numpy()
-
-
-def verify_add_layer_params(input_dim, alpha=2):
-    """Verify add layer params"""
-    dtype = "float32"
-
-    a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-
-    b_np = np.add(a_np1, a_np2) + alpha
-    inputs = [("input1", datatypes.Array(*input_dim)), ("input2", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_elementwise(
-        name="Add", alpha=alpha, input_names=["input1", "input2"], output_name="output", mode="ADD"
-    )
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(
-            model, target, dev, [a_np1, a_np2], ["input1", "input2"], b_np.shape, dtype
-        )
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_add_layer_params():
-    verify_add_layer_params((1, 2, 2), 0)
-    verify_add_layer_params((1, 2, 2), 1)
-    verify_add_layer_params((1, 3, 3), 2)
-
-
-def verify_multiply_layer_params(input_dim, alpha):
-    """Verify multiply layer params"""
-    dtype = "float32"
-
-    a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-
-    b_np = np.multiply(a_np1, a_np2) * alpha
-    inputs = [("input1", datatypes.Array(*input_dim)), ("input2", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_elementwise(
-        name="Mul",
-        alpha=alpha,
-        input_names=["input1", "input2"],
-        output_name="output",
-        mode="MULTIPLY",
-    )
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(
-            model, target, dev, [a_np1, a_np2], ["input1", "input2"], b_np.shape, dtype
-        )
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_multiply_layer_params():
-    verify_multiply_layer_params((1, 2, 2), 0)
-    verify_multiply_layer_params((1, 2, 2), 1)
-    verify_multiply_layer_params((1, 3, 3), 2)
-
-
-def verify_concat_layer_params(input1_dim, input2_dim):
-    """Verify concat layer params"""
-    dtype = "float32"
-
-    a_np1 = np.random.uniform(size=input1_dim).astype(dtype)
-    a_np2 = np.random.uniform(size=input2_dim).astype(dtype)
-
-    b_np = np.concatenate((a_np1, a_np2), axis=1)
-    inputs = [("input1", datatypes.Array(*input1_dim)), ("input2", datatypes.Array(*input2_dim))]
-    output = [("output", datatypes.Array(*b_np.shape))]  # pylint:disable=not-an-iterable
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_elementwise(
-        name="Concate", input_names=["input1", "input2"], output_name="output", mode="CONCAT"
-    )
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(
-            model, target, dev, [a_np1, a_np2], ["input1", "input2"], b_np.shape, dtype
-        )
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_concat_layer_params():
-    verify_concat_layer_params((1, 1, 2, 2), (1, 2, 2, 2))
-    verify_concat_layer_params((1, 2, 4, 4), (1, 3, 4, 4))
-
-
-def _verify_upsample_layer_params(input_dim, scale, mode):
-    dtype = "float32"
-
-    a_np = np.full(input_dim, 1, dtype=dtype)
-
-    if mode == "NN":
-        method = "nearest_neighbor"
-        coord_trans = "asymmetric"
-    else:
-        method = "linear"
-        coord_trans = "align_corners"
-
-    b_np = tvm.topi.testing.resize2d_python(a_np, (scale, scale), "NCHW", method, coord_trans)
-
-    input_data = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(input_data, output)
-    builder.add_upsample(
-        name="Upsample",
-        scaling_factor_h=scale,
-        scaling_factor_w=scale,
-        mode=mode,
-        input_name="input",
-        output_name="output",
-    )
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, a_np, "input", b_np.shape, dtype)
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_upsample_layer_params():
-    """Upsample Layer Params"""
-    _verify_upsample_layer_params((1, 16, 32, 32), 2, "NN")
-    _verify_upsample_layer_params((1, 4, 6, 6), 3, "BILINEAR")
-
-
-def _verify_l2_normalize(input_dim, eps):
-    dtype = "float32"
-
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    b_np = tvm.topi.testing.l2_normalize_python(a_np, eps, 1)
-
-    input_data = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(input_data, output)
-    builder.add_l2_normalize(name="L2", epsilon=eps, input_name="input", output_name="output")
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, a_np, "input", b_np.shape, dtype)
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_l2_normalize():
-    _verify_l2_normalize((1, 3, 20, 20), 0.001)
-
-
-def _verify_lrn(input_dim, size, bias, alpha, beta):
-    dtype = "float32"
-    axis = 1
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    b_np = tvm.topi.testing.lrn_python(a_np, size, axis, bias, alpha, beta)
-
-    input_data = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(input_data, output)
-    builder.add_lrn(
-        name="LRN",
-        input_name="input",
-        output_name="output",
-        alpha=alpha,
-        beta=beta,
-        k=bias,
-        local_size=size,
-    )
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, a_np, "input", b_np.shape, dtype)
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_lrn():
-    _verify_lrn((1, 3, 10, 20), 3, 1.0, 1.0, 0.5)
-
-
-def _verify_average(input_dim1, input_dim2, axis=0):
-    dtype = "float32"
-
-    a_np1 = np.random.uniform(size=input_dim1).astype(dtype)
-    a_np2 = np.random.uniform(size=input_dim2).astype(dtype)
-
-    b_np = np.mean((a_np1, a_np2), axis=axis, dtype=float)
-
-    inputs = [("input1", datatypes.Array(*input_dim1)), ("input2", datatypes.Array(*input_dim2))]
-    output = [("output", datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_elementwise(
-        name="MEAN", input_names=["input1", "input2"], output_name="output", mode="AVE"
-    )
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(
-            model, target, dev, [a_np1, a_np2], ["input1", "input2"], b_np.shape, dtype
-        )
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_average():
-    _verify_average((1, 3, 20, 20), (1, 3, 20, 20))
-    # disable tests for now because ValueError: setting an array element with a sequence.
-    # The requested array has an inhomogeneous shape after 1 dimensions. The detected shape
-    # was (2,) + inhomogeneous part.
-    # _verify_average((3, 20, 20), (1, 3, 20, 20))
-    # _verify_average((20, 20), (1, 3, 20, 20))
-
-
-def _verify_max(input_dim):
-    dtype = "float32"
-
-    a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
-    b_np = np.max((a_np1, a_np2, a_np3), axis=0)
-
-    inputs = [
-        ("input1", datatypes.Array(*input_dim)),
-        ("input2", datatypes.Array(*input_dim)),
-        ("input3", datatypes.Array(*input_dim)),
-    ]
-    output = [("output", datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_elementwise(
-        name="Max", input_names=["input1", "input2", "input3"], output_name="output", mode="MAX"
-    )
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(
-            model,
-            target,
-            dev,
-            [a_np1, a_np2, a_np3],
-            ["input1", "input2", "input3"],
-            b_np.shape,
-            dtype,
-        )
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_max():
-    _verify_max((1, 3, 20, 20))
-    _verify_max((20, 20))
-
-
-def _verify_min(input_dim):
-    dtype = "float32"
-
-    a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
-    b_np = np.min((a_np1, a_np2, a_np3), axis=0)
-
-    inputs = [
-        ("input1", datatypes.Array(*input_dim)),
-        ("input2", datatypes.Array(*input_dim)),
-        ("input3", datatypes.Array(*input_dim)),
-    ]
-    output = [("output", datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_elementwise(
-        name="Min", input_names=["input1", "input2", "input3"], output_name="output", mode="MIN"
-    )
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(
-            model,
-            target,
-            dev,
-            [a_np1, a_np2, a_np3],
-            ["input1", "input2", "input3"],
-            b_np.shape,
-            dtype,
-        )
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_min():
-    _verify_min((1, 3, 20, 20))
-    _verify_min((20, 20))
-
-
-def verify_unary_sqrt(input_dim):
-    """Verify unary sqrt"""
-    dtype = "float32"
-
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    ref_val = np.sqrt(a_np)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*ref_val.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_unary(name="sqrt", input_name="input", output_name="output", mode="sqrt")
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-def verify_unary_rsqrt(input_dim, epsilon=0):
-    """Verify unary rsqrt"""
-    dtype = "float32"
-
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    ref_val = 1 / np.sqrt(a_np + epsilon)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*ref_val.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_unary(
-        name="rsqrt", input_name="input", output_name="output", mode="rsqrt", epsilon=epsilon
-    )
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-def verify_unary_inverse(input_dim, epsilon=0):
-    """Verify unary inverse"""
-    dtype = "float32"
-
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    ref_val = 1 / (a_np + epsilon)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*ref_val.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_unary(
-        name="inverse", input_name="input", output_name="output", mode="inverse", epsilon=epsilon
-    )
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-def verify_unary_power(input_dim, alpha):
-    """Verify unary power"""
-    dtype = "float32"
-
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    ref_val = np.power(a_np, alpha)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*ref_val.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_unary(
-        name="power", input_name="input", output_name="output", mode="power", alpha=alpha
-    )
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-def verify_unary_exp(input_dim):
-    """Verify unary exp"""
-    dtype = "float32"
-
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    ref_val = np.exp(a_np)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*ref_val.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_unary(name="exp", input_name="input", output_name="output", mode="exp")
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-def verify_unary_log(input_dim):
-    """Verify unary log"""
-    dtype = "float32"
-
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    ref_val = np.log(a_np)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*ref_val.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_unary(name="log", input_name="input", output_name="output", mode="log")
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-def verify_unary_abs(input_dim):
-    """Verify unary abs"""
-    dtype = "float32"
-
-    a_np = np.random.uniform(-100.0, 100.0, size=input_dim).astype(dtype)
-    ref_val = np.abs(a_np)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*ref_val.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_unary(name="abs", input_name="input", output_name="output", mode="abs")
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-def verify_unary_threshold(input_dim, alpha):
-    """Verify unary threshold"""
-    dtype = "float32"
-
-    a_np = np.random.uniform(-100.0, 100.0, size=input_dim).astype(dtype)
-    ref_val = np.maximum(a_np, alpha)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*ref_val.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_unary(
-        name="threshold", input_name="input", output_name="output", mode="threshold", alpha=alpha
-    )
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_unary():
-    """All unary"""
-    verify_unary_sqrt((1, 3, 20, 20))
-    verify_unary_rsqrt((1, 3, 20, 20))
-    verify_unary_rsqrt((1, 3, 20, 20), epsilon=1e-6)
-    verify_unary_inverse((1, 3, 20, 20))
-    verify_unary_inverse((1, 3, 20, 20), epsilon=1e-6)
-    verify_unary_power((1, 3, 20, 20), alpha=0.5)
-    verify_unary_power((1, 3, 20, 20), alpha=4)
-    verify_unary_exp((1, 3, 20, 20))
-    verify_unary_log((1, 3, 20, 20))
-    verify_unary_abs((1, 3, 20, 20))
-    verify_unary_threshold((1, 3, 20, 20), alpha=-6.0)
-    verify_unary_threshold((1, 3, 20, 20), alpha=5.0)
-
-
-@tvm.testing.uses_gpu
-def test_forward_reduce():
-    """Reduce"""
-
-    class ReduceAxis(Enum):
-        # pylint: disable=invalid-name
-        CHW = 0
-        HW = 1
-        C = 2
-        H = 3
-        W = 4
-
-    def _verify_reduce(input_dim, mode, axis, ref_func, dtype="float32"):
-        print(input_dim, mode, axis)
-        a_np = np.random.uniform(size=input_dim).astype(dtype)
-
-        # translate to axis from coreml format
-        if axis == ReduceAxis.CHW:
-            np_axis = (-3, -2, -1)
-        elif axis == ReduceAxis.HW:
-            np_axis = (-2, -1)
-        elif axis == ReduceAxis.C:
-            np_axis = -3
-        elif axis == ReduceAxis.H:
-            np_axis = -2
-        elif axis == ReduceAxis.W:
-            np_axis = -1
-
-        if ref_func is np.argmax:
-            ref_val = np.expand_dims(ref_func(a_np, np_axis), np_axis).astype(dtype)
-        else:
-            ref_val = ref_func(a_np, np_axis, keepdims=True)
-
-        inputs = [("input", datatypes.Array(*input_dim))]
-        output = [("output", datatypes.Array(*ref_val.shape))]
-        builder = NeuralNetworkBuilder(inputs, output)
-        builder.add_reduce(
-            name=mode, input_name="input", output_name="output", axis=axis.name, mode=mode
-        )
-
-        model = cm.models.MLModel(builder.spec)
-        for target, dev in tvm.testing.enabled_targets():
-            out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-            tvm.testing.assert_allclose(out, ref_val, rtol=1e-5, atol=1e-5)
-
-    dshapes = [[10, 10], [1, 10, 10], [1, 3, 10, 10]]
-    for dshape in dshapes:
-        for axis in ReduceAxis:
-            if len(dshape) < 3 and axis in [ReduceAxis.CHW, ReduceAxis.C]:
-                # input must have rank at least 3
-                continue
-            _verify_reduce(dshape, "sum", axis, np.sum)
-            _verify_reduce(dshape, "avg", axis, np.mean)
-            _verify_reduce(dshape, "prod", axis, np.prod)
-            _verify_reduce(dshape, "min", axis, np.min)
-            _verify_reduce(dshape, "max", axis, np.max)
-            if axis in [ReduceAxis.C, ReduceAxis.H, ReduceAxis.W]:
-                # For mode ArgMax, axis must be [-1] or [-2] or [-3]
-                _verify_reduce(dshape, "argmax", axis, np.argmax, dtype="int32")
-
-
-def verify_reshape(input_dim, target_shape, mode):
-    """Reshape"""
-    dtype = "float32"
-
-    a_np = np.random.uniform(-100.0, 100.0, size=input_dim).astype(dtype)
-    ref_val = np.reshape(a_np, target_shape)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*ref_val.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_reshape(
-        name="reshape",
-        input_name="input",
-        output_name="output",
-        target_shape=target_shape,
-        mode=mode,
-    )
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-def test_forward_reshape():
-    for mode in [0, 1]:
-        verify_reshape((20,), (1, 2, 2, 5), mode)
-        verify_reshape((1, 3, 20, 20), (1, 12, 10, 10), mode)
-
-
-def _verify_split(input_dim, out_nums):
-    dtype = "float32"
-
-    a_np = np.random.uniform(-100.0, 100.0, size=input_dim).astype(dtype)
-    ref_val = np.split(a_np, out_nums, axis=-3)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-
-    output_names = []
-    outputs = []
-    output_shapes = []
-    for i, out in enumerate(ref_val):
-        output_name = "output" + str(i)
-        output_names = output_names + [output_name]
-        outputs = outputs + [(output_name, datatypes.Array(*out.shape))]
-        output_shapes = output_shapes + [out.shape]
-
-    builder = NeuralNetworkBuilder(inputs, outputs)
-    builder.add_split(name="split", input_name="input", output_names=output_names)
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(
-            model, target, dev, [a_np], ["input"], output_shapes, [dtype] * len(output_shapes)
-        )
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-def test_forward_split():
-    """Split"""
-    _verify_split(
-        (
-            1,
-            4,
-            4,
-            4,
-        ),
-        2,
-    )
-    _verify_split(
-        (
-            1,
-            3,
-            30,
-            20,
-        ),
-        3,
-    )
-
-
-def verify_image_scaler(input_dim, blue_bias=0.0, green_bias=0.0, red_bias=0.0, image_scale=1.0):
-    """Verify image scaler"""
-    dtype = "float32"
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    # make sure it is valid image format CHW.
-    assert len(a_np.shape) == 3 and a_np.shape[0] == 3
-    b_np = np.zeros(a_np.shape, dtype=dtype)
-    b_np[0, :, :] = image_scale * a_np[0, :, :] + blue_bias
-    b_np[1, :, :] = image_scale * a_np[1, :, :] + green_bias
-    b_np[2, :, :] = image_scale * a_np[2, :, :] + red_bias
-    b_np = np.add(a_np, b_np)
-    inputs = [("input1", datatypes.Array(*input_dim)), ("input2", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.set_pre_processing_parameters(
-        image_input_names=["input1"],
-        is_bgr=True,
-        blue_bias=blue_bias,
-        green_bias=green_bias,
-        red_bias=red_bias,
-        image_scale=image_scale,
-    )
-    # add one add layer to make CoreML model format valid
-    # add layer has been tested before.
-    builder.add_elementwise(
-        name="add", input_names=["input1", "input2"], output_name="output", alpha=0, mode="ADD"
-    )
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(
-            model, target, dev, [a_np, a_np], ["input1", "input2"], b_np.shape, dtype
-        )
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_image_scaler():
-    verify_image_scaler((3, 224, 224), image_scale=0.17)
-    verify_image_scaler(
-        (3, 224, 224),
-        blue_bias=-1.7669800519943237,
-        green_bias=-1.985260009765625,
-        red_bias=-2.102560043334961,
-        image_scale=0.379,
-    )
-
-
-def verify_convolution(input_dim, filter_, padding):
-    """Verify convolution"""
-    dtype = "float32"
-    _, c, h, width = input_dim
-    out_c, _, kernel_h, kernel_w = filter_
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    w_np = np.random.uniform(size=(out_c, c, kernel_h, kernel_w)).astype(dtype)
-    w_np_cm = np.transpose(w_np, axes=(2, 3, 1, 0))
-    b_np = conv2d_nchw_python(a_np, w_np, [1, 1], padding)
-    inputs = [("input1", datatypes.Array(c, h, width))]
-    output = [("output", datatypes.Array(*b_np.shape))]  # pylint:disable=not-an-iterable
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_convolution(
-        name="conv",
-        kernel_channels=3,
-        output_channels=out_c,
-        height=kernel_h,
-        width=kernel_w,
-        stride_height=1,
-        stride_width=1,
-        border_mode=padding.lower(),
-        groups=1,
-        W=w_np_cm,
-        b=None,
-        has_bias=False,
-        is_deconv=False,
-        input_name="input1",
-        output_name="output",
-    )
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input1"], output_shape=None)
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_convolution():
-    verify_convolution((1, 3, 224, 224), filter_=(32, 3, 3, 3), padding="VALID")
-    verify_convolution((1, 3, 224, 224), filter_=(32, 3, 3, 3), padding="SAME")
-
-
-def test_can_build_keras_to_coreml_to_relay():
-    """Test multiple conversion paths and importing from a saved file."""
-    model = keras.models.Sequential()
-    model.add(
-        keras.layers.Conv2D(
-            filters=6,
-            kernel_size=(1, 1),
-            activation="relu",
-            padding="same",
-            input_shape=(3, 3, 1),
-            data_format="channels_first",
-        )
-    )
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        kmodel_fn = path.join(tmpdir, "c1mdl.h5")
-        model.save(kmodel_fn)
-
-        mdl = cm.convert(
-            kmodel_fn, convert_to="neuralnetwork", minimum_deployment_target=cm.target.macOS11
-        )
-        model_file = path.join(tmpdir, "c1.mlmodel")
-        mdl.save(model_file)
-
-        mdl = cm.models.MLModel(model_file)
-        desc = mdl.get_spec().description
-        iname = desc.input[0].name
-        ishape = desc.input[0].type.multiArrayType.shape
-        shape_dict = {}
-        for i in mdl.get_spec().description.input:
-            iname = i.name
-            ishape = i.type.multiArrayType.shape
-            shape_dict[iname] = ishape
-        mod, params = relay.frontend.from_coreml(mdl, shape_dict)
-
-        with tvm.transform.PassContext(opt_level=3):
-            relay.build(mod, "llvm", params=params)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/darknet/test_forward.py b/tests/python/frontend/darknet/test_forward.py
deleted file mode 100644
index e78e35ff5c7c..000000000000
--- a/tests/python/frontend/darknet/test_forward.py
+++ /dev/null
@@ -1,537 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-argument
-"""
-Test Darknet Models
-===================
-This article is a test script to test darknet models with Relay.
-All the required models and libraries will be downloaded from the internet
-by the script.
-"""
-from cffi import FFI
-import numpy as np
-import tvm
-from tvm.contrib import graph_executor
-from tvm.contrib.download import download_testdata
-
-from tvm.relay.testing.darknet import LAYERTYPE
-from tvm.relay.testing.darknet import __darknetffi__
-from tvm.relay.frontend.darknet import ACTIVATION
-from tvm import relay
-
-REPO_URL = "https://github.com/dmlc/web-data/blob/main/darknet/"
-
-# Lazily initialized
-DARKNET_TEST_IMAGE_PATH = None
-LIB = None
-
-
-def _lib():
-    global LIB
-    lib = "libdarknet2.0.so"
-    url = REPO_URL + "lib/" + lib + "?raw=true"
-    if LIB is None:
-        LIB = __darknetffi__.dlopen(download_testdata(url, lib, module="darknet"))
-
-    return LIB
-
-
-def _darknet_test_image_path():
-    global DARKNET_TEST_IMAGE_PATH
-    if DARKNET_TEST_IMAGE_PATH is None:
-        name = "dog.jpg"
-        url = REPO_URL + "data/" + name + "?raw=true"
-        DARKNET_TEST_IMAGE_PATH = download_testdata(url, name, module="data")
-    return DARKNET_TEST_IMAGE_PATH
-
-
-def astext(program, unify_free_vars=False):
-    """check that program is parsable in text format"""
-    text = program.astext()
-    if isinstance(program, relay.Expr):
-        roundtrip_program = tvm.relay.parse_expr(text)
-    else:
-        roundtrip_program = tvm.relay.fromtext(text)
-
-    tvm.ir.assert_structural_equal(roundtrip_program, program, map_free_vars=True)
-
-
-def _read_memory_buffer(shape, data, dtype="float32"):
-    length = 1
-    for x in shape:
-        length *= x
-    data_np = np.zeros(length, dtype=dtype)
-    for i in range(length):
-        data_np[i] = data[i]
-    return data_np.reshape(shape)
-
-
-def _get_tvm_output(net, data, build_dtype="float32", states=None):
-    """Compute TVM output"""
-    dtype = "float32"
-    mod, params = relay.frontend.from_darknet(net, data.shape, dtype)
-    # verify that from_darknet creates a valid, parsable relay program
-    mod = relay.transform.InferType()(mod)
-    astext(mod)
-
-    target = "llvm"
-    lib = relay.build(mod, target, params=params)
-
-    # Execute on TVM
-    dev = tvm.cpu(0)
-    m = graph_executor.GraphModule(lib["default"](dev))
-    # set inputs
-    m.set_input("data", tvm.nd.array(data.astype(dtype)))
-    if states:
-        for name in states.keys():
-            m.set_input(name, tvm.nd.array(states[name].astype(dtype)))
-    m.run()
-    # get outputs
-    tvm_out = []
-    for i in range(m.get_num_outputs()):
-        tvm_out.append(m.get_output(i).numpy())
-    return tvm_out
-
-
-def _load_net(cfg_url, cfg_name, weights_url, weights_name):
-    cfg_path = download_testdata(cfg_url, cfg_name, module="darknet")
-    weights_path = download_testdata(weights_url, weights_name, module="darknet")
-    net = _lib().load_network(cfg_path.encode("utf-8"), weights_path.encode("utf-8"), 0)
-    return net
-
-
-def verify_darknet_frontend(net, build_dtype="float32"):
-    """Test network with given input image on both darknet and tvm"""
-
-    def get_darknet_output(net, img):
-        _lib().network_predict_image(net, img)
-        out = []
-        for i in range(net.n):
-            layer = net.layers[i]
-            if layer.type == LAYERTYPE.REGION:
-                attributes = np.array(
-                    [
-                        layer.n,
-                        layer.out_c,
-                        layer.out_h,
-                        layer.out_w,
-                        layer.classes,
-                        layer.coords,
-                        layer.background,
-                    ],
-                    dtype=np.int32,
-                )
-                out.insert(0, attributes)
-                out.insert(0, _read_memory_buffer((layer.n * 2,), layer.biases))
-                layer_outshape = (layer.batch, layer.out_c, layer.out_h, layer.out_w)
-                out.insert(0, _read_memory_buffer(layer_outshape, layer.output))
-            elif layer.type == LAYERTYPE.YOLO:
-                attributes = np.array(
-                    [layer.n, layer.out_c, layer.out_h, layer.out_w, layer.classes, layer.total],
-                    dtype=np.int32,
-                )
-                out.insert(0, attributes)
-                out.insert(0, _read_memory_buffer((layer.total * 2,), layer.biases))
-                out.insert(0, _read_memory_buffer((layer.n,), layer.mask, dtype="int32"))
-                layer_outshape = (layer.batch, layer.out_c, layer.out_h, layer.out_w)
-                out.insert(0, _read_memory_buffer(layer_outshape, layer.output))
-            elif i == net.n - 1:
-                if layer.type == LAYERTYPE.CONNECTED:
-                    darknet_outshape = (layer.batch, layer.out_c)
-                elif layer.type in [LAYERTYPE.SOFTMAX]:
-                    darknet_outshape = (layer.batch, layer.outputs)
-                else:
-                    darknet_outshape = (layer.batch, layer.out_c, layer.out_h, layer.out_w)
-                out.insert(0, _read_memory_buffer(darknet_outshape, layer.output))
-        return out
-
-    dtype = "float32"
-
-    img = _lib().letterbox_image(
-        _lib().load_image_color(_darknet_test_image_path().encode("utf-8"), 0, 0), net.w, net.h
-    )
-    darknet_output = get_darknet_output(net, img)
-    batch_size = 1
-    data = np.empty([batch_size, img.c, img.h, img.w], dtype)
-    i = 0
-    for c in range(img.c):
-        for h in range(img.h):
-            for k in range(img.w):
-                data[0][c][h][k] = img.data[i]
-                i = i + 1
-
-    tvm_out = _get_tvm_output(net, data, build_dtype)
-    for tvm_outs, darknet_out in zip(tvm_out, darknet_output):
-        tvm.testing.assert_allclose(darknet_out, tvm_outs, rtol=1e-3, atol=1e-3)
-
-
-def _test_rnn_network(net, states):
-    """Test network with given input data on both darknet and tvm"""
-
-    def get_darknet_network_predict(net, data):
-        return _lib().network_predict(net, data)
-
-    ffi = FFI()
-    np_arr = np.zeros([1, net.inputs], dtype="float32")
-    np_arr[0, 2] = 1
-    cffi_arr = ffi.cast("float*", np_arr.ctypes.data)
-    tvm_out = _get_tvm_output(net, np_arr, states=states)[0]
-    darknet_output = get_darknet_network_predict(net, cffi_arr)
-    darknet_out = np.zeros(net.outputs, dtype="float32")
-    for i in range(net.outputs):
-        darknet_out[i] = darknet_output[i]
-    last_layer = net.layers[net.n - 1]
-    darknet_outshape = (last_layer.batch, last_layer.outputs)
-    darknet_out = darknet_out.reshape(darknet_outshape)
-    tvm.testing.assert_allclose(darknet_out, tvm_out, rtol=1e-4, atol=1e-4)
-
-
-def test_forward_extraction():
-    """test extraction model"""
-    model_name = "extraction"
-    cfg_name = model_name + ".cfg"
-    weights_name = model_name + ".weights"
-    cfg_url = "https://github.com/pjreddie/darknet/blob/master/cfg/" + cfg_name + "?raw=true"
-    weights_url = "http://pjreddie.com/media/files/" + weights_name + "?raw=true"
-    net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_alexnet():
-    """test alexnet model"""
-    model_name = "alexnet"
-    cfg_name = model_name + ".cfg"
-    weights_name = model_name + ".weights"
-    cfg_url = "https://github.com/pjreddie/darknet/blob/master/cfg/" + cfg_name + "?raw=true"
-    weights_url = "http://pjreddie.com/media/files/" + weights_name + "?raw=true"
-    net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_resnet50():
-    """test resnet50 model"""
-    model_name = "resnet50"
-    cfg_name = model_name + ".cfg"
-    weights_name = model_name + ".weights"
-    cfg_url = "https://github.com/pjreddie/darknet/blob/master/cfg/" + cfg_name + "?raw=true"
-    weights_url = "http://pjreddie.com/media/files/" + weights_name + "?raw=true"
-    net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_resnext50():
-    """test resnet50 model"""
-    model_name = "resnext50"
-    cfg_name = model_name + ".cfg"
-    weights_name = model_name + ".weights"
-    cfg_url = "https://github.com/pjreddie/darknet/blob/master/cfg/" + cfg_name + "?raw=true"
-    weights_url = "http://pjreddie.com/media/files/" + weights_name + "?raw=true"
-    net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_yolov2():
-    """test yolov2 model"""
-    model_name = "yolov2"
-    cfg_name = model_name + ".cfg"
-    weights_name = model_name + ".weights"
-    cfg_url = "https://github.com/pjreddie/darknet/blob/master/cfg/" + cfg_name + "?raw=true"
-    weights_url = "http://pjreddie.com/media/files/" + weights_name + "?raw=true"
-    net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
-    build_dtype = {}
-    verify_darknet_frontend(net, build_dtype)
-    _lib().free_network(net)
-
-
-def test_forward_yolov3():
-    """test yolov3 model"""
-    model_name = "yolov3"
-    cfg_name = model_name + ".cfg"
-    weights_name = model_name + ".weights"
-    cfg_url = "https://github.com/pjreddie/darknet/blob/master/cfg/" + cfg_name + "?raw=true"
-    weights_url = "http://pjreddie.com/media/files/" + weights_name + "?raw=true"
-    net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
-    build_dtype = {}
-    verify_darknet_frontend(net, build_dtype)
-    _lib().free_network(net)
-
-
-def test_forward_convolutional():
-    """test convolutional layer"""
-    net = _lib().make_network(1)
-    layer = _lib().make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0)
-    net.layers[0] = layer
-    net.w = net.h = 224
-    _lib().resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_dense():
-    """test fully connected layer"""
-    net = _lib().make_network(1)
-    layer = _lib().make_connected_layer(1, 75, 20, 1, 0, 0)
-    net.layers[0] = layer
-    net.w = net.h = 5
-    _lib().resize_network(net, 5, 5)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_dense_batchnorm():
-    """test fully connected layer with batchnorm"""
-    net = _lib().make_network(1)
-    layer = _lib().make_connected_layer(1, 12, 2, 1, 1, 0)
-    for i in range(5):
-        layer.rolling_mean[i] = np.random.rand(1)
-        layer.rolling_variance[i] = np.random.rand(1) + 0.5
-        layer.scales[i] = np.random.rand(1)
-    net.layers[0] = layer
-    net.w = net.h = 2
-    _lib().resize_network(net, 2, 2)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_maxpooling():
-    """test maxpooling layer"""
-    net = _lib().make_network(1)
-    layer = _lib().make_maxpool_layer(1, 224, 224, 3, 2, 2, 0)
-    net.layers[0] = layer
-    net.w = net.h = 224
-    _lib().resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_avgpooling():
-    """test avgerage pooling layer"""
-    net = _lib().make_network(1)
-    layer = _lib().make_avgpool_layer(1, 224, 224, 3)
-    net.layers[0] = layer
-    net.w = net.h = 224
-    _lib().resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_conv_batch_norm():
-    """test batch normalization layer"""
-    net = _lib().make_network(1)
-    layer = _lib().make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 1, 0, 0, 0)
-    for i in range(32):
-        layer.rolling_mean[i] = np.random.rand(1)
-        layer.rolling_variance[i] = np.random.rand(1) + 0.5
-    net.layers[0] = layer
-    net.w = net.h = 224
-    _lib().resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_shortcut():
-    """test shortcut layer"""
-    net = _lib().make_network(3)
-    layer_1 = _lib().make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0)
-    layer_2 = _lib().make_convolutional_layer(1, 111, 111, 32, 32, 1, 1, 1, 0, 1, 0, 0, 0, 0)
-    layer_3 = _lib().make_shortcut_layer(1, 0, 111, 111, 32, 111, 111, 32)
-    layer_3.activation = ACTIVATION.RELU
-    layer_3.alpha = 1
-    layer_3.beta = 1
-    net.layers[0] = layer_1
-    net.layers[1] = layer_2
-    net.layers[2] = layer_3
-    net.w = net.h = 224
-    _lib().resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_reorg():
-    """test reorg layer"""
-    net = _lib().make_network(2)
-    layer_1 = _lib().make_convolutional_layer(1, 222, 222, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0)
-    layer_2 = _lib().make_reorg_layer(1, 110, 110, 32, 2, 0, 0, 0)
-    net.layers[0] = layer_1
-    net.layers[1] = layer_2
-    net.w = net.h = 222
-    _lib().resize_network(net, 222, 222)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_region():
-    """test region layer"""
-    net = _lib().make_network(2)
-    layer_1 = _lib().make_convolutional_layer(1, 19, 19, 3, 425, 1, 1, 1, 0, 1, 0, 0, 0, 0)
-    layer_2 = _lib().make_region_layer(1, 19, 19, 5, 80, 4)
-    layer_2.softmax = 1
-    net.layers[0] = layer_1
-    net.layers[1] = layer_2
-    net.w = net.h = 19
-    _lib().resize_network(net, 19, 19)
-    build_dtype = {}
-    verify_darknet_frontend(net, build_dtype)
-    _lib().free_network(net)
-
-
-def test_forward_yolo_op():
-    """test yolo layer"""
-    net = _lib().make_network(2)
-    layer_1 = _lib().make_convolutional_layer(1, 224, 224, 3, 14, 1, 3, 2, 0, 1, 0, 0, 0, 0)
-    layer_2 = _lib().make_yolo_layer(1, 111, 111, 2, 9, __darknetffi__.NULL, 2)
-    net.layers[0] = layer_1
-    net.layers[1] = layer_2
-    net.w = net.h = 224
-    _lib().resize_network(net, 224, 224)
-    build_dtype = {}
-    verify_darknet_frontend(net, build_dtype)
-    _lib().free_network(net)
-
-
-def test_forward_upsample():
-    """test upsample layer"""
-    net = _lib().make_network(1)
-    layer = _lib().make_upsample_layer(1, 19, 19, 3, 3)
-    layer.scale = 1
-    net.layers[0] = layer
-    net.w = net.h = 19
-    _lib().resize_network(net, 19, 19)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_l2normalize():
-    """test l2 normalization layer"""
-    net = _lib().make_network(1)
-    layer = _lib().make_l2norm_layer(1, 224 * 224 * 3)
-    layer.c = layer.out_c = 3
-    layer.h = layer.out_h = 224
-    layer.w = layer.out_w = 224
-    net.layers[0] = layer
-    net.w = net.h = 224
-    _lib().resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_elu():
-    """test elu activation layer"""
-    net = _lib().make_network(1)
-    layer_1 = _lib().make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0)
-    layer_1.activation = ACTIVATION.ELU
-    net.layers[0] = layer_1
-    net.w = net.h = 224
-    _lib().resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_softmax():
-    """test softmax layer"""
-    net = _lib().make_network(1)
-    layer_1 = _lib().make_softmax_layer(1, 75, 1)
-    layer_1.temperature = 1
-    net.layers[0] = layer_1
-    net.w = net.h = 5
-    _lib().resize_network(net, net.w, net.h)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_softmax_temperature():
-    """test softmax layer"""
-    net = _lib().make_network(1)
-    layer_1 = _lib().make_softmax_layer(1, 75, 1)
-    layer_1.temperature = 0.8
-    net.layers[0] = layer_1
-    net.w = net.h = 5
-    _lib().resize_network(net, net.w, net.h)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_activation_logistic():
-    """test logistic activation layer"""
-    net = _lib().make_network(1)
-    batch = 1
-    h = 224
-    width = 224
-    c = 3
-    n = 32
-    groups = 1
-    size = 3
-    stride = 2
-    padding = 0
-    activation = ACTIVATION.LOGISTIC
-    batch_normalize = 0
-    binary = 0
-    xnor = 0
-    adam = 0
-    layer_1 = _lib().make_convolutional_layer(
-        batch,
-        h,
-        width,
-        c,
-        n,
-        groups,
-        size,
-        stride,
-        padding,
-        activation,
-        batch_normalize,
-        binary,
-        xnor,
-        adam,
-    )
-    net.layers[0] = layer_1
-    net.w = width
-    net.h = h
-    _lib().resize_network(net, net.w, net.h)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_rnn():
-    """test RNN layer"""
-    net = _lib().make_network(1)
-    batch = 1
-    inputs = 4
-    outputs = 4
-    steps = 1
-    activation = ACTIVATION.RELU
-    batch_normalize = 0
-    adam = 0
-    layer_1 = _lib().make_rnn_layer(
-        batch, inputs, outputs, steps, activation, batch_normalize, adam
-    )
-    net.layers[0] = layer_1
-    net.inputs = inputs
-    net.outputs = outputs
-    net.w = net.h = 0
-    _lib().resize_network(net, net.w, net.h)
-    states = {"rnn0_state": np.zeros([1, net.inputs])}
-    _test_rnn_network(net, states)
-    _lib().free_network(net)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/keras/test_forward.py b/tests/python/frontend/keras/test_forward.py
deleted file mode 100644
index 52505e259d23..000000000000
--- a/tests/python/frontend/keras/test_forward.py
+++ /dev/null
@@ -1,926 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Unit tests for various models and operators"""
-from packaging import version as package_version
-import numpy as np
-
-try:
-    import tensorflow.compat.v1 as tf
-except ImportError:
-    import tensorflow as tf
-
-from tensorflow import keras as tf_keras
-
-# prevent Keras from using up all gpu memory
-import keras
-
-import pytest
-import tvm
-from tvm import relay
-from tvm.contrib import graph_executor
-import tvm.testing
-
-if tf.executing_eagerly():
-    GPUS = tf.config.experimental.list_physical_devices("GPU")
-    for gpu in GPUS:
-        tf.config.experimental.set_memory_growth(gpu, True)
-else:
-    from keras.backend.tensorflow_backend import set_session
-
-    CONFIG = tf.ConfigProto()
-    CONFIG.gpu_options.per_process_gpu_memory_fraction = 0.5
-    set_session(tf.Session(config=CONFIG))
-
-
-def pytest_generate_tests(metafunc):
-    """
-    This function generates the list of tests for pytest, based
-    on scenarios that will change the parameters in which the
-    tests use to run.
-    https://docs.pytest.org/en/latest/example/parametrize.html
-    """
-    idlist = []
-    argvalues = []
-    for scenario in metafunc.cls.scenarios:
-        idlist.append(scenario[0])
-        items = scenario[1].items()
-        argnames = [x[0] for x in items]
-        argvalues.append([x[1] for x in items])
-    metafunc.parametrize(argnames, argvalues, ids=idlist, scope="class")
-
-
-# Scenarios:
-# - classic keras, using keras from "import keras"
-# - tensorflow keras, using keras from "from tensorflow import keras as tf_keras"
-USING_CLASSIC_KERAS = ("keras", {"keras_mod": keras})
-USING_TENSORFLOW_KERAS = ("tf_keras", {"keras_mod": tf_keras})
-
-
-def verify_keras_frontend(keras_model, need_transpose=True, layout="NCHW"):
-    """Generic function to generate and compare Keras and TVM output"""
-    # Keras frontend currently supports tensorflow backend only.
-    assert keras.backend.backend() == "tensorflow"
-
-    if layout != "NCHW":
-        need_transpose = False
-
-    in_shapes = []
-    for layer in keras_model._input_layers:
-        if tf.executing_eagerly():
-            in_shapes.append(tuple(dim if dim is not None else 1 for dim in layer.input.shape))
-        else:
-            in_shapes.append(
-                tuple(dim.value if dim.value is not None else 1 for dim in layer.input.shape)
-            )
-
-    def get_keras_output(in_data):
-        return keras_model.predict(in_data)
-
-    def get_tvm_output(in_data, target, dev, dtype="float32"):
-        shape_dict = {name: x.shape for (name, x) in zip(keras_model.input_names, in_data)}
-        mod, params = relay.frontend.from_keras(keras_model, shape_dict, layout=layout)
-        with tvm.transform.PassContext(opt_level=3):
-            lib = relay.build(mod, target, params=params)
-        m = graph_executor.GraphModule(lib["default"](dev))
-        for name, x in zip(keras_model.input_names, in_data):
-            m.set_input(name, tvm.nd.array(x.astype(dtype)))
-        m.run()
-        return [m.get_output(i).numpy() for i in range(m.get_num_outputs())]
-
-    def to_channels_first(arr):
-        return arr.transpose([0, -1] + list(range(1, arr.ndim - 1)))
-
-    def to_channels_last(arr):
-        return arr.transpose([0] + list(range(2, arr.ndim)) + [1])
-
-    in_data = [np.random.uniform(size=shape, low=-1.0, high=1.0) for shape in in_shapes]
-    keras_out = get_keras_output(in_data)
-    keras_out = keras_out if isinstance(keras_out, list) else [keras_out]
-    for target, dev in tvm.testing.enabled_targets():
-        inputs = [to_channels_first(x) for x in in_data] if need_transpose else in_data
-        tvm_out = get_tvm_output(inputs, target, dev)
-        for kout, tout in zip(keras_out, tvm_out):
-            if need_transpose:
-                tout = to_channels_last(tout)
-            tvm.testing.assert_allclose(kout, tout, rtol=1e-5, atol=1e-5)
-
-
-def get_mobilenet(keras_mod):
-    if hasattr(keras_mod.applications, "MobileNet"):
-        # Keras 2.4.x and older
-        mobilenet_mod = keras_mod.applications.MobileNet
-    else:
-        # Keras 2.6.x and newer
-        mobilenet_mod = keras_mod.applications.mobilenet.MobileNet
-
-    return mobilenet_mod
-
-
-@tvm.testing.uses_gpu
-class TestKeras:
-    """Keras test"""
-
-    scenarios = [USING_CLASSIC_KERAS, USING_TENSORFLOW_KERAS]
-
-    def test_forward_merge(self, keras_mod):
-        """test_forward_merge"""
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        conv2d_x = keras_mod.layers.Conv2D(8, (3, 3), padding="same")(data)
-        conv2d_y = keras_mod.layers.Conv2D(8, (3, 3), padding="same")(conv2d_x)
-        conv2d_z = keras_mod.layers.Conv2D(8, (3, 3), padding="same")(conv2d_y)
-        merge_funcs = [
-            keras_mod.layers.Add(),
-            keras_mod.layers.Subtract(),
-            keras_mod.layers.Multiply(),
-            keras_mod.layers.Maximum(),
-            keras_mod.layers.Minimum(),
-            keras_mod.layers.Average(),
-            keras_mod.layers.Concatenate(),
-        ]
-        for merge_func in merge_funcs:
-            class_name = type(merge_func).__name__
-            if class_name in ("Subtract", "Dot"):
-                out = merge_func([conv2d_x, conv2d_y])
-            else:
-                out = merge_func([conv2d_x, conv2d_y, conv2d_z])
-            keras_model = keras_mod.models.Model(data, out)
-            verify_keras_frontend(keras_model)
-
-    def test_forward_concatenate(self, keras_mod):
-        """test_forward_concatenate"""
-        data1 = keras_mod.layers.Input(shape=(1, 2, 2))
-        data2 = keras_mod.layers.Input(shape=(1, 1, 2))
-        merge_func = keras_mod.layers.Concatenate(axis=2)
-        out = merge_func([data1, data2])
-        keras_model = keras_mod.models.Model([data1, data2], out)
-        verify_keras_frontend(keras_model, layout="NHWC")
-        verify_keras_frontend(keras_model, layout="NCHW")
-        # test default axis (e.g., -1)
-        data1 = keras_mod.layers.Input(shape=(1, 2, 2))
-        data2 = keras_mod.layers.Input(shape=(1, 2, 3))
-        merge_func = keras_mod.layers.Concatenate()
-        out = merge_func([data1, data2])
-        keras_model = keras_mod.models.Model([data1, data2], out)
-        verify_keras_frontend(keras_model, layout="NHWC")
-        verify_keras_frontend(keras_model, layout="NCHW")
-        # test axis at last dimension
-        data1 = keras_mod.layers.Input(shape=(1, 2, 2))
-        data2 = keras_mod.layers.Input(shape=(1, 2, 3))
-        merge_func = keras_mod.layers.Concatenate(axis=3)
-        out = merge_func([data1, data2])
-        keras_model = keras_mod.models.Model([data1, data2], out)
-        verify_keras_frontend(keras_model, layout="NHWC")
-        verify_keras_frontend(keras_model, layout="NCHW")
-
-    def test_forward_merge_dot(self, keras_mod):
-        """test_forward_merge_dot"""
-        data1 = keras_mod.layers.Input(shape=(2, 2))
-        data2 = keras_mod.layers.Input(shape=(2, 2))
-        merge_funcs = [
-            keras_mod.layers.Dot(axes=[1, 2]),
-            keras_mod.layers.Dot(axes=[2, 1]),
-            keras_mod.layers.Dot(axes=[1, 1]),
-            keras_mod.layers.Dot(axes=[2, 2]),
-            keras_mod.layers.Dot(axes=1),
-            keras_mod.layers.Dot(axes=2),
-        ]
-        for merge_func in merge_funcs:
-            out = merge_func([data1, data2])
-            keras_model = keras_mod.models.Model([data1, data2], out)
-            verify_keras_frontend(keras_model)
-
-    def test_forward_activations(self, keras_mod):
-        """test_forward_activations"""
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        act_funcs = [
-            keras_mod.layers.Activation("softmax"),
-            keras_mod.layers.Softmax(),
-            keras_mod.layers.Softmax(axis=-1),
-            keras_mod.layers.Softmax(axis=1),
-            keras_mod.layers.Softmax(axis=2),
-            keras_mod.layers.Softmax(axis=3),
-            keras_mod.layers.Activation("softplus"),
-            keras_mod.layers.Activation("relu"),
-            keras_mod.layers.Activation("softsign"),
-            keras_mod.layers.Activation("hard_sigmoid"),
-            keras_mod.layers.Activation("sigmoid"),
-            keras_mod.layers.Activation("tanh"),
-            keras_mod.layers.Activation("linear"),
-            keras_mod.layers.Activation("selu"),
-            keras_mod.layers.Activation("swish"),
-            keras_mod.layers.ReLU(),
-            keras_mod.layers.ReLU(max_value=6.0),
-            keras_mod.layers.ReLU(max_value=6.0, threshold=0.0),
-            keras_mod.layers.ReLU(max_value=6.0, threshold=1.0),
-            keras_mod.layers.ReLU(max_value=6.0, threshold=1.0, negative_slope=0.0),
-            keras_mod.layers.ReLU(max_value=6.0, threshold=1.0, negative_slope=0.5),
-            keras_mod.layers.ReLU(max_value=6.0, threshold=1.0, negative_slope=1.0),
-            keras_mod.layers.LeakyReLU(alpha=0.3),
-            keras_mod.layers.PReLU(weights=np.random.rand(1, 32, 32, 3)),
-            keras_mod.layers.ELU(alpha=0.5),
-            keras_mod.layers.ThresholdedReLU(theta=0.5),
-        ]
-        for act_func in act_funcs:
-            x = act_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model)
-            verify_keras_frontend(keras_model, need_transpose=False, layout="NHWC")
-        # Test the input dimension = 1
-        data = keras_mod.layers.Input(shape=(11,))
-        act_func = keras_mod.layers.Softmax()
-        x = act_func(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-        verify_keras_frontend(keras_model, need_transpose=False, layout="NHWC")
-
-    def test_forward_activations_except(self, keras_mod):
-        """
-        test invalid attribute alpha=None for LeakyReLU and ELU.
-        after version 2.3.1 in keras, checking was added to reject the invalid api call:
-        LeakyReLU(alpha=None) and ELU(alpha=None),
-        (see issue: https://github.com/tensorflow/tensorflow/pull/47017)
-        Thus, it's necessary to check the keras version to avoid crash at LeakyReLU(alpha=None)
-        and ELU(alpha=None)
-        """
-        if package_version.parse(keras_mod.__version__.split("-tf")[0]) <= package_version.parse(
-            "2.3.1"
-        ):
-            act_funcs = [
-                keras_mod.layers.LeakyReLU(alpha=None),
-                keras_mod.layers.ELU(2, 3, 4),
-                keras_mod.layers.ReLU(threshold=None),
-            ]
-            data = keras_mod.layers.Input(shape=(2, 3, 4))
-            for act_func in act_funcs:
-                layer = act_func(data)
-                keras_model = keras_mod.models.Model(data, layer)
-                with pytest.raises(tvm.error.OpAttributeInvalid):
-                    verify_keras_frontend(keras_model)
-
-    def test_forward_dense(self, keras_mod):
-        """test_forward_dense"""
-        data = keras_mod.layers.Input(shape=(32, 32, 1))
-        x = keras_mod.layers.Flatten()(data)
-        x = keras_mod.layers.Dropout(0.5)(x)
-        x = keras_mod.layers.Dense(10, activation="relu", kernel_initializer="uniform")(x)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-        # RNN dense
-        data = keras_mod.layers.Input(shape=(1, 32))
-        x = keras_mod.layers.Dense(32, activation="relu", kernel_initializer="uniform")(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-        data = keras_mod.layers.Input(shape=(120, 2560), name="image_set")
-        x = keras_mod.layers.Dense(1, activation="linear", name="e")(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-        data = keras_mod.layers.Input(shape=(10, 12, 2560), name="image_set")
-        x = keras_mod.layers.Dense(32, activation="linear", name="e")(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-    def test_forward_permute(self, keras_mod):
-        data = keras_mod.layers.Input(shape=(2, 3, 4))
-        x = keras_mod.layers.Permute([2, 3, 1])(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-    def test_forward_sequential(self, keras_mod):
-        """test_forward_sequential"""
-        keras_model = keras_mod.models.Sequential(
-            [
-                keras_mod.layers.Dense(16, input_dim=32, activation="relu"),
-                keras_mod.layers.Dropout(0.5),
-                keras_mod.layers.Dense(8, activation="relu"),
-                keras_mod.layers.Dropout(0.5),
-                keras_mod.layers.Dense(1, activation="sigmoid"),
-            ]
-        )
-        verify_keras_frontend(keras_model)
-
-    def test_forward_pool(self, keras_mod):
-        """test_forward_pool"""
-        data = keras_mod.layers.Input(shape=(32, 32, 1))
-        # maxpool
-        x = keras_mod.layers.MaxPooling2D((3, 3), strides=(1, 1), padding="same")(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-        # avgpool
-        y = keras_mod.layers.AveragePooling2D((3, 3), strides=(1, 1), padding="same")(data)
-        keras_model = keras_mod.models.Model(data, y)
-        verify_keras_frontend(keras_model)
-        # reject the invalid input shape
-        data = keras_mod.layers.Input(shape=(0, 3, 6, 4))
-        x = keras_mod.layers.GlobalAveragePooling3D()(data)
-        keras_model = keras_mod.models.Model(data, x)
-        with pytest.raises(ValueError):
-            verify_keras_frontend(keras_model)
-
-    def test_forward_conv1d(self, keras_mod):
-        """test_forward_conv1d"""
-        data = keras_mod.layers.Input(shape=(32, 3))
-        conv_funcs = [
-            keras_mod.layers.Conv1D(filters=10, kernel_size=(3,), strides=(2,), padding="same"),
-            keras_mod.layers.Conv1D(
-                filters=10, kernel_size=(3,), dilation_rate=(2,), padding="same"
-            ),
-            keras_mod.layers.Conv1D(filters=1, kernel_size=(3,), padding="valid", use_bias=False),
-            keras_mod.layers.Conv1D(filters=10, kernel_size=(2,), padding="valid"),
-            # Enable when relay conv1dtranspose handles NWC
-            # keras.layers.Conv1DTranspose(filters=10, kernel_size=(3), padding="valid"),
-        ]
-        for conv_func in conv_funcs:
-            x = conv_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, layout="NWC")
-
-    def test_forward_conv(self, keras_mod):
-        """test_forward_conv"""
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        conv_funcs = [
-            keras_mod.layers.Conv2D(filters=10, kernel_size=(3, 3), strides=(2, 2), padding="same"),
-            keras_mod.layers.Conv2D(
-                filters=10, kernel_size=(3, 3), dilation_rate=(2, 2), padding="same"
-            ),
-            keras_mod.layers.Conv2D(filters=1, kernel_size=(3, 3), padding="same"),
-            keras_mod.layers.DepthwiseConv2D(kernel_size=(3, 3), padding="same"),
-            keras_mod.layers.Conv2DTranspose(filters=10, kernel_size=(3, 3), padding="valid"),
-            keras_mod.layers.SeparableConv2D(filters=10, kernel_size=(3, 3), padding="same"),
-            keras_mod.layers.SeparableConv2D(filters=10, kernel_size=(3, 3), dilation_rate=(2, 2)),
-            keras_mod.layers.SeparableConv2D(filters=2, kernel_size=(3, 3), dilation_rate=2),
-        ]
-        for conv_func in conv_funcs:
-            x = conv_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model)
-
-    def test_forward_conv_transpose(self, keras_mod):
-        """test_forward_conv_transpose"""
-        data = keras_mod.layers.Input(shape=(32, 32, 128))
-        conv_funcs = [
-            keras_mod.layers.Conv2DTranspose(filters=64, kernel_size=(2, 2), padding="valid"),
-            keras_mod.layers.Conv2DTranspose(
-                filters=2, kernel_size=(3, 3), strides=(2, 2), output_padding=(1, 1)
-            ),
-        ]
-        for conv_func in conv_funcs:
-            x = conv_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, layout="NHWC")
-
-    def test_forward_batch_norm(self, keras_mod):
-        """test_forward_batch_norm"""
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        batch_norm_funcs = [
-            keras_mod.layers.BatchNormalization(
-                axis=-1,
-                momentum=0.99,
-                epsilon=0.001,
-                center=True,
-                scale=False,
-                beta_initializer="zeros",
-                gamma_initializer="ones",
-                moving_mean_initializer="zeros",
-                moving_variance_initializer="ones",
-            ),
-            keras_mod.layers.BatchNormalization(
-                axis=-1,
-                momentum=0.99,
-                epsilon=0.001,
-                center=True,
-                scale=True,
-                beta_initializer="zeros",
-                gamma_initializer="ones",
-                moving_mean_initializer="zeros",
-                moving_variance_initializer="ones",
-            ),
-            keras_mod.layers.BatchNormalization(
-                axis=-1,
-                momentum=0.99,
-                epsilon=0.001,
-                center=False,
-                scale=True,
-                beta_initializer="zeros",
-                gamma_initializer="ones",
-                moving_mean_initializer="zeros",
-                moving_variance_initializer="ones",
-            ),
-            keras_mod.layers.BatchNormalization(
-                axis=-1,
-                momentum=0.99,
-                epsilon=0.001,
-                center=False,
-                scale=False,
-                beta_initializer="zeros",
-                gamma_initializer="ones",
-                moving_mean_initializer="zeros",
-                moving_variance_initializer="ones",
-            ),
-        ]
-        for batch_norm_func in batch_norm_funcs:
-            x = batch_norm_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model)
-
-    def test_forward_upsample(self, keras_mod, interpolation="nearest"):
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        x = keras_mod.layers.UpSampling2D(size=(3, 3), interpolation=interpolation)(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-        # Height and width are not equal for the attribute size
-        data = keras_mod.layers.Input(shape=(2, 1, 3))
-        x = keras_mod.layers.UpSampling2D(size=(1, 2), interpolation=interpolation)(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-
-    def test_forward_reshape(self, keras_mod):
-        """test_forward_reshape"""
-        # input_shape len is 3, target_shape len is 3
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        x = keras_mod.layers.Reshape(target_shape=(16, 64, 3))(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-        # input_shape len is 3, target_shape len is 2
-        data = keras_mod.layers.Input(shape=(32, 8, 3))
-        x = keras_mod.layers.Reshape(target_shape=(256, 3))(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-        # input_shape len is 2, target_shape len is 3
-        data = keras_mod.layers.Input(shape=(256, 3))
-        x = keras_mod.layers.Reshape(target_shape=(8, 32, 3))(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-        # input_shape len is 2, target_shape len is 1
-        data = keras_mod.layers.Input(shape=(2, 8))
-        x = keras_mod.layers.Reshape(target_shape=(16,))(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-        # input_shape len is 1, target_shape len is 2
-        data = keras_mod.layers.Input(shape=(16,))
-        x = keras_mod.layers.Reshape(target_shape=(4, 4))(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-        # input_shape len is 2, target_shape len is 2
-        data = keras_mod.layers.Input(shape=(2, 8))
-        x = keras_mod.layers.Reshape(target_shape=(4, 4))(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-        # "non-square" target shape
-        data = keras_mod.layers.Input(shape=(15,))
-        x = keras_mod.layers.Reshape(target_shape=(5, 3))(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-        # modify channel dim
-        data = keras_mod.layers.Input(shape=(3, 2, 4))
-        x = keras_mod.layers.Reshape(target_shape=(3, 8))(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-
-    def test_forward_crop(self, keras_mod):
-        """test_forward_crop"""
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        x = keras_mod.layers.Cropping2D(cropping=((1, 1), (1, 1)))(data)
-        x = keras_mod.layers.Cropping2D(cropping=(1, 1))(x)
-        x = keras_mod.layers.Cropping2D(cropping=1)(x)
-        x = keras_mod.layers.Cropping2D(cropping=((0, 1), (1, 0)))(x)
-        x = keras_mod.layers.Cropping2D(cropping=(1, 0))(x)
-        x = keras_mod.layers.Cropping2D(cropping=0)(x)
-        x = keras_mod.layers.Add()([x, x])
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, layout="NHWC")
-        verify_keras_frontend(keras_model, layout="NHWC")
-
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        x = keras_mod.layers.Cropping2D(cropping=(2, 1))(data)
-        x = keras_mod.layers.Cropping2D(cropping=(1, 2))(x)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, layout="NHWC")
-        verify_keras_frontend(keras_model, layout="NCHW")
-
-    def test_forward_multi_inputs(self, keras_mod):
-        data1 = keras_mod.layers.Input(shape=(32, 32, 3))
-        data2 = keras_mod.layers.Input(shape=(32, 32, 3))
-        x = keras_mod.layers.Conv2D(8, (3, 3), padding="same")(data1)
-        y = keras_mod.layers.Conv2D(8, (3, 3), padding="same")(data2)
-        average_z = keras_mod.layers.Average()([x, y])
-        out = keras_mod.layers.GlobalAveragePooling2D()(average_z)
-        keras_model = keras_mod.models.Model([data1, data2], out)
-        verify_keras_frontend(keras_model)
-
-    def test_forward_multi_outputs(self, keras_mod):
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        x = keras_mod.layers.Conv2D(8, (3, 3), padding="same")(data)
-        x = keras_mod.layers.GlobalAveragePooling2D()(x)
-        y = keras_mod.layers.Conv2D(8, (3, 3), padding="same")(data)
-        y = keras_mod.layers.GlobalAveragePooling2D()(y)
-        keras_model = keras_mod.models.Model(data, [x, y])
-        verify_keras_frontend(keras_model)
-
-    def test_forward_reuse_layers(self, keras_mod):
-        """test_forward_reuse_layers"""
-        # reuse conv2d
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        conv2d = keras_mod.layers.Conv2D(8, (3, 3), padding="same")
-        x = conv2d(data)
-        y = conv2d(data)
-        add_z = keras_mod.layers.Add()([x, y])
-        out = keras_mod.layers.GlobalAveragePooling2D()(add_z)
-        keras_model = keras_mod.models.Model(data, out)
-        verify_keras_frontend(keras_model)
-        # reuse add
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        x = keras_mod.layers.Conv2D(8, (3, 3), padding="same")(data)
-        add = keras_mod.layers.Add()
-        x = add([x, x])
-        x = add([x, x])
-        out = keras_mod.layers.GlobalAveragePooling2D()(x)
-        keras_model = keras_mod.models.Model(data, out)
-        verify_keras_frontend(keras_model)
-
-    def test_forward_lstm(self, keras_mod):
-        """test_forward_lstm"""
-        data = keras_mod.layers.Input(shape=(10, 32))
-        rnn_funcs = [
-            keras_mod.layers.LSTM(16),
-            keras_mod.layers.LSTM(16, return_sequences=True),
-            keras_mod.layers.LSTM(16, go_backwards=True),
-            keras_mod.layers.LSTM(16, return_sequences=True, go_backwards=True),
-            keras_mod.layers.LSTM(16, return_sequences=True, use_bias=False),
-        ]
-        for rnn_func in rnn_funcs:
-            x = rnn_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, need_transpose=False)
-
-    def test_forward_rnn(self, keras_mod):
-        """test_forward_rnn"""
-        data = keras_mod.layers.Input(shape=(1, 32))
-        rnn_funcs = [
-            keras_mod.layers.LSTM(
-                units=16, return_state=False, recurrent_activation="sigmoid", activation="tanh"
-            ),
-            keras_mod.layers.LSTM(
-                units=16,
-                return_state=False,
-                recurrent_activation="sigmoid",
-                activation="tanh",
-                use_bias=False,
-            ),
-            keras_mod.layers.SimpleRNN(units=16, return_state=False, activation="tanh"),
-            keras_mod.layers.SimpleRNN(
-                units=16, return_state=False, activation="tanh", use_bias=False
-            ),
-            keras_mod.layers.SimpleRNN(
-                units=16, return_state=False, activation="tanh", go_backwards=True
-            ),
-            keras_mod.layers.GRU(
-                units=16,
-                return_state=False,
-                recurrent_activation="sigmoid",
-                activation="tanh",
-                reset_after=False,
-            ),
-            keras_mod.layers.GRU(
-                units=16,
-                return_state=False,
-                recurrent_activation="sigmoid",
-                activation="tanh",
-                reset_after=False,
-                use_bias=False,
-            ),
-            keras_mod.layers.GRU(
-                units=16,
-                return_state=False,
-                recurrent_activation="sigmoid",
-                activation="tanh",
-                reset_after=False,
-                use_bias=False,
-                go_backwards=True,
-            ),
-        ]
-        for rnn_func in rnn_funcs:
-            x = rnn_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, need_transpose=False)
-
-    def test_forward_vgg16(self, keras_mod, layout="NCHW"):
-        """test_forward_vgg16"""
-        if hasattr(keras_mod.applications, "VGG16"):
-            # Keras 2.4.x and older
-            vgg16_mod = keras_mod.applications.VGG16
-        else:
-            # Keras 2.6.x and newer
-            vgg16_mod = keras_mod.applications.vgg16.VGG16
-
-        keras_model = vgg16_mod(
-            include_top=True, weights="imagenet", input_shape=(224, 224, 3), classes=1000
-        )
-        verify_keras_frontend(keras_model, layout=layout)
-
-    def test_forward_xception(self, keras_mod, layout="NCHW"):
-        """test_forward_vgg16"""
-        if hasattr(keras_mod.applications, "Xception"):
-            # Keras 2.4.x and older
-            xception_mod = keras_mod.applications.Xception
-        else:
-            # Keras 2.6.x and newer
-            xception_mod = keras_mod.applications.xception.Xception
-
-        keras_model = xception_mod(
-            include_top=True, weights="imagenet", input_shape=(299, 299, 3), classes=1000
-        )
-        verify_keras_frontend(keras_model, layout=layout)
-
-    def test_forward_resnet50(self, keras_mod, layout="NCHW"):
-        """test_forward_resnet50"""
-        if hasattr(keras_mod.applications, "ResNet50"):
-            # Keras 2.4.x and older
-            resnet50_mod = keras_mod.applications.ResNet50
-        else:
-            # Keras 2.6.x and newer
-            resnet50_mod = keras_mod.applications.resnet.ResNet50
-
-        keras_model = resnet50_mod(
-            include_top=True, weights="imagenet", input_shape=(224, 224, 3), classes=1000
-        )
-        verify_keras_frontend(keras_model, layout=layout)
-
-    def test_forward_inception_v3(self, keras_mod, layout="NCHW"):
-        """test_forward_inception_v3"""
-        if hasattr(keras_mod.applications, "InceptionV3"):
-            # Keras 2.4.x and older
-            inception_v3_mod = keras_mod.applications.InceptionV3
-        else:
-            # Keras 2.6.x and newer
-            inception_v3_mod = keras_mod.applications.inception_v3.InceptionV3
-
-        keras_model = inception_v3_mod(
-            include_top=True, weights=None, input_shape=(299, 299, 3), classes=1000
-        )
-        verify_keras_frontend(keras_model, layout=layout)
-
-    def test_forward_mobilenet(self, keras_mod, layout="NCHW"):
-        mobilenet_mod = get_mobilenet(keras_mod)
-
-        keras_model = mobilenet_mod(
-            include_top=True, weights="imagenet", input_shape=(224, 224, 3), classes=1000
-        )
-        verify_keras_frontend(keras_model, layout=layout)
-
-    def test_forward_conv3d(self, keras_mod):
-        """test_forward_conv3d"""
-        data = keras_mod.layers.Input(shape=(32, 32, 32, 3))
-        conv_funcs = [
-            keras_mod.layers.Conv3D(
-                filters=10, kernel_size=(3, 3, 3), strides=(2, 2, 2), padding="same"
-            ),
-            keras_mod.layers.Conv3D(
-                filters=10, kernel_size=(3, 3, 3), dilation_rate=(2, 2, 2), padding="same"
-            ),
-            keras_mod.layers.Conv3D(
-                filters=1, kernel_size=(3, 3, 3), padding="valid", use_bias=False
-            ),
-            keras_mod.layers.Conv3D(filters=10, kernel_size=(2, 2, 2), padding="valid"),
-        ]
-        for conv_func in conv_funcs:
-            x = conv_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, layout="NDHWC")
-
-    def test_forward_conv3d_transpose(self, keras_mod):
-        """test_forward_conv3d_transpose"""
-        data = keras_mod.layers.Input(shape=(32, 32, 32, 3))
-        conv_funcs = [
-            keras_mod.layers.Conv3DTranspose(
-                filters=10, kernel_size=(3, 3, 3), strides=(2, 2, 2), padding="same"
-            ),
-            keras_mod.layers.Conv3DTranspose(
-                filters=10, kernel_size=(1, 1, 1), dilation_rate=(1, 1, 1), padding="same"
-            ),
-            keras_mod.layers.Conv3DTranspose(
-                filters=1, kernel_size=(3, 3, 3), padding="valid", use_bias=False
-            ),
-            keras_mod.layers.Conv3DTranspose(filters=10, kernel_size=(2, 2, 2), padding="valid"),
-            keras_mod.layers.Conv3DTranspose(
-                filters=2, kernel_size=(3, 3, 3), strides=(2, 2, 2), output_padding=(1, 1, 1)
-            ),
-        ]
-        for conv_func in conv_funcs:
-            x = conv_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, layout="NDHWC")
-
-    def test_forward_pool3d(self, keras_mod):
-        """test_forward_pool3d"""
-        data = keras_mod.layers.Input(shape=(32, 32, 32, 1))
-        pool_funcs = [  # maxpool
-            keras_mod.layers.MaxPooling3D(pool_size=(2, 2, 2), strides=(1, 1, 1), padding="same"),
-            keras_mod.layers.MaxPooling3D(pool_size=(3, 3, 3), strides=(2, 2, 2), padding="valid"),
-            # avgpool
-            keras_mod.layers.AveragePooling3D(
-                pool_size=(3, 3, 3), strides=(2, 2, 2), padding="same"
-            ),
-            keras_mod.layers.AveragePooling3D(
-                pool_size=(2, 2, 2), strides=(1, 1, 1), padding="valid"
-            ),
-        ]
-        for pool_func in pool_funcs:
-            x = pool_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, layout="NDHWC")
-
-    def test_forward_upsample3d(self, keras_mod):
-        data = keras_mod.layers.Input(shape=(32, 32, 32, 3))
-        x = keras_mod.layers.UpSampling3D(size=(2, 3, 4))(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, layout="NDHWC")
-
-    def test_forward_zero_padding3d(self, keras_mod):
-        """test_forward_zero_padding3d"""
-        data = keras_mod.layers.Input(shape=(32, 32, 32, 3))
-        pad_funcs = [  # Integer
-            keras_mod.layers.ZeroPadding3D(padding=2),
-            # tuple of 3 ints
-            keras_mod.layers.ZeroPadding3D(padding=(1, 2, 3)),
-            # tuple of 3 tuples of 2 ints
-            keras_mod.layers.ZeroPadding3D(padding=((1, 1), (2, 2), (2, 2))),
-            # tuple of 3 tuples of 2 ints different values
-            keras_mod.layers.ZeroPadding3D(padding=((1, 2), (2, 3), (3, 2))),
-        ]
-        for pad_func in pad_funcs:
-            x = pad_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, layout="NDHWC")
-
-    def test_forward_embedding(self, keras_mod):
-        """test_forward_embedding"""
-        data = keras_mod.layers.Input(shape=(2, 4), dtype="int32")
-        x = keras_mod.layers.Embedding(10, 3)(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-        data = keras_mod.layers.Input(shape=(2, 3, 4), dtype="int32")
-        x = keras_mod.layers.Embedding(4, 5)(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-        data = keras_mod.layers.Input(shape=(6, 2, 3, 4), dtype="int32")
-        x = keras_mod.layers.Embedding(4, 5)(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-    def test_forward_repeat_vector(self, keras_mod):
-        """test_forward_repeat_vector"""
-        data = keras_mod.layers.Input(shape=(5,), dtype="float32")
-        x = keras_mod.layers.Dense(6)(data)
-        x = keras_mod.layers.RepeatVector(2)(x)
-
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-        data = keras_mod.layers.Input(shape=(10,), dtype="float32")
-        x = keras_mod.layers.RepeatVector(3)(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-        data = keras_mod.layers.Input(shape=(4,), dtype="float32")
-        x = keras_mod.layers.RepeatVector(1)(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-    def test_forward_global_pool3d(self, keras_mod):
-        """test_forward_zero_padding3d"""
-        data = keras_mod.layers.Input(shape=(32, 32, 32, 1))
-        pool_funcs = [  # global maxpool
-            keras_mod.layers.GlobalMaxPooling3D(),
-            # global avgpool
-            keras_mod.layers.GlobalAveragePooling3D(),
-        ]
-        for pool_func in pool_funcs:
-            x = pool_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, layout="NDHWC")
-
-    def test_forward_nested_layers(self, keras_mod):
-        """test_forward_nested_layers"""
-        mobilenet_mod = get_mobilenet(keras_mod)
-
-        sub_model = mobilenet_mod(include_top=False, weights="imagenet", input_shape=(224, 224, 3))
-        keras_model = keras_mod.Sequential(
-            [
-                sub_model,
-                keras_mod.layers.GlobalAveragePooling2D(),
-                keras_mod.layers.Dense(1024, activation="relu"),
-                keras_mod.layers.Dense(2, activation="sigmoid"),
-            ]
-        )
-        verify_keras_frontend(keras_model)
-
-    def test_forward_l2_normalize(self, keras_mod):
-        """test_forward_l2_normalize"""
-        data = keras_mod.layers.Input(shape=(16, 12, 8))
-        k_backend = keras_mod.backend
-        l2_funcs = [
-            keras_mod.layers.Lambda(lambda v: k_backend.l2_normalize(v, axis=-2)),
-            keras_mod.layers.Lambda(lambda v: k_backend.l2_normalize(x=v, axis=-1)),
-            keras_mod.layers.Lambda(lambda v: k_backend.l2_normalize(axis=1, x=v)),
-            keras_mod.layers.Lambda(lambda v: k_backend.l2_normalize(v, 2)),
-            keras_mod.layers.Lambda(lambda v: k_backend.l2_normalize(v, axis=3)),
-            keras_mod.layers.Lambda(lambda v: k_backend.l2_normalize(v, axis=(2, 3))),
-            keras_mod.layers.Lambda(lambda v: k_backend.l2_normalize(v, (1, 2))),
-            keras_mod.layers.Lambda(lambda v: k_backend.l2_normalize(v, axis=[-2, -1])),
-            keras_mod.layers.Lambda(lambda v: k_backend.l2_normalize(v, [-3, -2])),
-        ]
-        for l2_func in l2_funcs:
-            x = l2_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, layout="NCHW")
-            verify_keras_frontend(keras_model, layout="NHWC")
-
-    def test_forward_time_distributed(self, keras_mod):
-        """test_forward_time_distributed"""
-        conv2d_inputs = keras_mod.Input(shape=(10, 128, 128, 3))
-        conv_2d_layer = keras_mod.layers.Conv2D(64, (3, 3))
-        conv2d_model = keras_mod.models.Model(
-            conv2d_inputs, keras_mod.layers.TimeDistributed(conv_2d_layer)(conv2d_inputs)
-        )
-        verify_keras_frontend(conv2d_model, layout="NDHWC")
-
-        dense_inputs = keras_mod.Input(shape=(5, 1))
-        dense_layer = keras_mod.layers.Dense(1)
-        dense_model = keras_mod.models.Model(
-            dense_inputs, keras_mod.layers.TimeDistributed(dense_layer)(dense_inputs)
-        )
-        verify_keras_frontend(dense_model, need_transpose=False)
-
-    def test_simplernn_with_infertype(self, keras_mod):
-        """This test case is from https://github.com/apache/tvm/issues/14868"""
-        input_shape = (2, 2, 2)
-        x = keras_mod.layers.Input(shape=input_shape[1:], dtype="float32")
-        layer = keras_mod.layers.SimpleRNN(units=4)
-        y = layer(x)
-        model = keras_mod.models.Model(x, y)
-        mod, _ = relay.frontend.from_keras(model, {model.input_names[0]: input_shape})
-        relay.transform.InferType()(mod)
-
-
-if __name__ == "__main__":
-    for k in [keras, tf_keras]:
-        sut = TestKeras()
-        sut.test_forward_concatenate(keras_mod=k)
-        sut.test_forward_merge_dot(keras_mod=k)
-        sut.test_forward_merge(keras_mod=k)
-        sut.test_forward_activations(keras_mod=k)
-        sut.test_forward_activations_except(keras_mod=k)
-        sut.test_forward_dense(keras_mod=k)
-        sut.test_forward_permute(keras_mod=k)
-        sut.test_forward_sequential(keras_mod=k)
-        sut.test_forward_pool(keras_mod=k)
-        sut.test_forward_conv(keras_mod=k)
-        sut.test_forward_conv1d(keras_mod=k)
-        sut.test_forward_batch_norm(keras_mod=k)
-        sut.test_forward_upsample(keras_mod=k, interpolation="nearest")
-        sut.test_forward_upsample(keras_mod=k, interpolation="bilinear")
-        sut.test_forward_reshape(keras_mod=k)
-        sut.test_forward_crop(keras_mod=k)
-        sut.test_forward_multi_inputs(keras_mod=k)
-        sut.test_forward_multi_outputs(keras_mod=k)
-        sut.test_forward_reuse_layers(keras_mod=k)
-        sut.test_forward_lstm(keras_mod=k)
-        sut.test_forward_rnn(keras_mod=k)
-        sut.test_forward_vgg16(keras_mod=k)
-        sut.test_forward_vgg16(keras_mod=k, layout="NHWC")
-        sut.test_forward_xception(keras_mod=k)
-        sut.test_forward_resnet50(keras_mod=k)
-        sut.test_forward_resnet50(keras_mod=k, layout="NHWC")
-        sut.test_forward_inception_v3(keras_mod=k)
-        sut.test_forward_inception_v3(keras_mod=k, layout="NHWC")
-        sut.test_forward_mobilenet(keras_mod=k)
-        sut.test_forward_mobilenet(keras_mod=k, layout="NHWC")
-        sut.test_forward_conv3d(keras_mod=k)
-        sut.test_forward_conv3d_transpose(keras_mod=k)
-        sut.test_forward_pool3d(keras_mod=k)
-        sut.test_forward_global_pool3d(keras_mod=k)
-        sut.test_forward_upsample3d(keras_mod=k)
-        sut.test_forward_zero_padding3d(keras_mod=k)
-        sut.test_forward_embedding(keras_mod=k)
-        sut.test_forward_repeat_vector(keras_mod=k)
-        sut.test_forward_l2_normalize(keras_mod=k)
-        sut.test_forward_time_distributed(keras_mod=k)
-        sut.test_simplernn_with_infertype(keras_mod=k)
diff --git a/tests/python/frontend/mxnet/model_zoo/__init__.py b/tests/python/frontend/mxnet/model_zoo/__init__.py
deleted file mode 100644
index 2c324a060d25..000000000000
--- a/tests/python/frontend/mxnet/model_zoo/__init__.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""MXNet model zoo for testing purposes."""
-from __future__ import absolute_import
-from . import mlp, vgg, resnet, dqn, inception_v3, squeezenet, dcgan
-import tvm.relay.testing
-
-# mlp
-def mx_mlp():
-    num_class = 10
-    return mlp.get_symbol(num_class)
-
-
-def relay_mlp():
-    num_class = 10
-    return tvm.relay.testing.mlp.get_workload(1, num_class)[0]
-
-
-# vgg
-def mx_vgg(num_layers):
-    num_class = 1000
-    return vgg.get_symbol(num_class, num_layers)
-
-
-def relay_vgg(num_layers):
-    num_class = 1000
-    return tvm.relay.testing.vgg.get_workload(1, num_class, num_layers=num_layers)[0]
-
-
-# resnet
-def mx_resnet(num_layers):
-    num_class = 1000
-    return resnet.get_symbol(num_class, num_layers, "3,224,224")
-
-
-def relay_resnet(num_layers):
-    num_class = 1000
-    return tvm.relay.testing.resnet.get_workload(1, num_class, num_layers=num_layers)[0]
-
-
-# dqn
-mx_dqn = dqn.get_symbol
-
-
-def relay_dqn():
-    return tvm.relay.testing.dqn.get_workload(1)[0]
-
-
-# squeezenet
-def mx_squeezenet(version):
-    return squeezenet.get_symbol(version=version)
-
-
-def relay_squeezenet(version):
-    return tvm.relay.testing.squeezenet.get_workload(1, version=version)[0]
-
-
-# inception
-mx_inception_v3 = inception_v3.get_symbol
-
-
-def relay_inception_v3():
-    return tvm.relay.testing.inception_v3.get_workload(1)[0]
-
-
-# dcgan generator
-mx_dcgan = dcgan.get_symbol
-
-
-def relay_dcgan(batch_size):
-    return tvm.relay.testing.dcgan.get_workload(batch_size=batch_size)[0]
diff --git a/tests/python/frontend/mxnet/model_zoo/dcgan.py b/tests/python/frontend/mxnet/model_zoo/dcgan.py
deleted file mode 100644
index 67c20ccc65c9..000000000000
--- a/tests/python/frontend/mxnet/model_zoo/dcgan.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-argument
-"""
-The MXNet symbol of DCGAN generator
-
-Adopted from:
-https://github.com/tqchen/mxnet-gan/blob/main/mxgan/generator.py
-
-Reference:
-Radford, Alec, Luke Metz, and Soumith Chintala.
-"Unsupervised representation learning with deep convolutional generative adversarial networks."
-arXiv preprint arXiv:1511.06434 (2015).
-"""
-
-import mxnet as mx
-
-
-def deconv2d(data, ishape, oshape, kshape, name, stride=(2, 2)):
-    """a deconv layer that enlarges the feature map"""
-    target_shape = (oshape[-2], oshape[-1])
-    pad_y = (kshape[0] - 1) // 2
-    pad_x = (kshape[1] - 1) // 2
-    adj_y = (target_shape[0] + 2 * pad_y - kshape[0]) % stride[0]
-    adj_x = (target_shape[1] + 2 * pad_x - kshape[1]) % stride[1]
-
-    net = mx.sym.Deconvolution(
-        data,
-        kernel=kshape,
-        stride=stride,
-        pad=(pad_y, pad_x),
-        adj=(adj_y, adj_x),
-        num_filter=oshape[0],
-        no_bias=True,
-        name=name,
-    )
-    return net
-
-
-def deconv2d_bn_relu(data, prefix, **kwargs):
-    """a block of deconv + batch norm + relu"""
-    eps = 1e-5 + 1e-12
-
-    net = deconv2d(data, name="%s_deconv" % prefix, **kwargs)
-    net = mx.sym.BatchNorm(net, eps=eps, name="%s_bn" % prefix)
-    net = mx.sym.Activation(net, name="%s_act" % prefix, act_type="relu")
-    return net
-
-
-def get_symbol(oshape=(3, 64, 64), ngf=128, code=None):
-    """get symbol of dcgan generator"""
-    assert oshape[-1] == 64, "Only support 64x64 image"
-    assert oshape[-2] == 64, "Only support 64x64 image"
-
-    code = mx.sym.Variable("data") if code is None else code
-    net = mx.sym.FullyConnected(
-        code, name="g1", num_hidden=ngf * 8 * 4 * 4, no_bias=True, flatten=False
-    )
-    net = mx.sym.Activation(net, act_type="relu")
-    # 4 x 4
-    net = mx.sym.reshape(net, shape=(-1, ngf * 8, 4, 4))
-    # 8 x 8
-    net = deconv2d_bn_relu(
-        net, ishape=(ngf * 8, 4, 4), oshape=(ngf * 4, 8, 8), kshape=(4, 4), prefix="g2"
-    )
-    # 16x16
-    net = deconv2d_bn_relu(
-        net, ishape=(ngf * 4, 8, 8), oshape=(ngf * 2, 16, 16), kshape=(4, 4), prefix="g3"
-    )
-    # 32x32
-    net = deconv2d_bn_relu(
-        net, ishape=(ngf * 2, 16, 16), oshape=(ngf, 32, 32), kshape=(4, 4), prefix="g4"
-    )
-    # 64x64
-    net = deconv2d(net, ishape=(ngf, 32, 32), oshape=oshape[-3:], kshape=(4, 4), name="g5_deconv")
-    net = mx.sym.Activation(net, act_type="tanh")
-    return net
diff --git a/tests/python/frontend/mxnet/model_zoo/dqn.py b/tests/python/frontend/mxnet/model_zoo/dqn.py
deleted file mode 100644
index df611c701258..000000000000
--- a/tests/python/frontend/mxnet/model_zoo/dqn.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-The mxnet symbol of Nature DQN
-
-Reference:
-Mnih, Volodymyr, et al.
-"Human-level control through deep reinforcement learning."
-Nature 518.7540 (2015): 529.
-"""
-
-import mxnet as mx
-
-
-def get_symbol(num_action=18):
-    data = mx.sym.Variable(name="data")
-    net = mx.sym.Convolution(data, kernel=(8, 8), stride=(4, 4), num_filter=32, name="conv1")
-    net = mx.sym.Activation(net, act_type="relu", name="relu1")
-    net = mx.sym.Convolution(net, kernel=(4, 4), stride=(2, 2), num_filter=64, name="conv2")
-    net = mx.sym.Activation(net, act_type="relu", name="relu2")
-    net = mx.sym.Convolution(net, kernel=(3, 3), stride=(1, 1), num_filter=64, name="conv3")
-    net = mx.sym.Activation(net, act_type="relu", name="relu3")
-    net = mx.sym.FullyConnected(net, num_hidden=512, name="fc4")
-    net = mx.sym.Activation(net, act_type="relu", name="relu4")
-    net = mx.sym.FullyConnected(net, num_hidden=num_action, name="fc5", flatten=False)
-
-    return net
diff --git a/tests/python/frontend/mxnet/model_zoo/inception_v3.py b/tests/python/frontend/mxnet/model_zoo/inception_v3.py
deleted file mode 100644
index 872662a01c10..000000000000
--- a/tests/python/frontend/mxnet/model_zoo/inception_v3.py
+++ /dev/null
@@ -1,368 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Inception V3, suitable for images with around 299 x 299
-
-Reference:
-Szegedy, Christian, et al. "Rethinking the Inception Architecture for Computer Vision." arXiv preprint arXiv:1512.00567 (2015).
-
-Adopted from https://github.com/apache/incubator-mxnet/blob/master/
-             example/image-classification/symbols/inception-v3.py
-"""
-import mxnet as mx
-import numpy as np
-
-
-def Conv(data, num_filter, kernel=(1, 1), stride=(1, 1), pad=(0, 0), name=None, suffix=""):
-    conv = mx.sym.Convolution(
-        data=data,
-        num_filter=num_filter,
-        kernel=kernel,
-        stride=stride,
-        pad=pad,
-        no_bias=True,
-        name="%s%s_conv2d" % (name, suffix),
-    )
-    bn = mx.sym.BatchNorm(data=conv, eps=2e-5, name="%s%s_batchnorm" % (name, suffix))
-    act = mx.sym.Activation(data=bn, act_type="relu", name="%s%s_relu" % (name, suffix))
-    return act
-
-
-def Inception7A(
-    data, num_1x1, num_3x3_red, num_3x3_1, num_3x3_2, num_5x5_red, num_5x5, pool, proj, name
-):
-    tower_1x1 = Conv(data, num_1x1, name=("%s_conv" % name))
-    tower_5x5 = Conv(data, num_5x5_red, name=("%s_tower" % name), suffix="_conv")
-    tower_5x5 = Conv(
-        tower_5x5, num_5x5, kernel=(5, 5), pad=(2, 2), name=("%s_tower" % name), suffix="_conv_1"
-    )
-    tower_3x3 = Conv(data, num_3x3_red, name=("%s_tower_1" % name), suffix="_conv")
-    tower_3x3 = Conv(
-        tower_3x3,
-        num_3x3_1,
-        kernel=(3, 3),
-        pad=(1, 1),
-        name=("%s_tower_1" % name),
-        suffix="_conv_1",
-    )
-    tower_3x3 = Conv(
-        tower_3x3,
-        num_3x3_2,
-        kernel=(3, 3),
-        pad=(1, 1),
-        name=("%s_tower_1" % name),
-        suffix="_conv_2",
-    )
-    pooling = mx.sym.Pooling(
-        data=data,
-        kernel=(3, 3),
-        stride=(1, 1),
-        pad=(1, 1),
-        pool_type=pool,
-        name=("%s_pool_%s_pool" % (pool, name)),
-    )
-    cproj = Conv(pooling, proj, name=("%s_tower_2" % name), suffix="_conv")
-    concat = mx.sym.Concat(
-        *[tower_1x1, tower_5x5, tower_3x3, cproj], name="ch_concat_%s_chconcat" % name
-    )
-    return concat
-
-
-# First Downsample
-def Inception7B(data, num_3x3, num_d3x3_red, num_d3x3_1, num_d3x3_2, pool, name):
-    tower_3x3 = Conv(
-        data, num_3x3, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=("%s_conv" % name)
-    )
-    tower_d3x3 = Conv(data, num_d3x3_red, name=("%s_tower" % name), suffix="_conv")
-    tower_d3x3 = Conv(
-        tower_d3x3,
-        num_d3x3_1,
-        kernel=(3, 3),
-        pad=(1, 1),
-        stride=(1, 1),
-        name=("%s_tower" % name),
-        suffix="_conv_1",
-    )
-    tower_d3x3 = Conv(
-        tower_d3x3,
-        num_d3x3_2,
-        kernel=(3, 3),
-        pad=(0, 0),
-        stride=(2, 2),
-        name=("%s_tower" % name),
-        suffix="_conv_2",
-    )
-    pooling = mx.sym.Pooling(
-        data=data,
-        kernel=(3, 3),
-        stride=(2, 2),
-        pad=(0, 0),
-        pool_type="max",
-        name=("max_pool_%s_pool" % name),
-    )
-    concat = mx.sym.Concat(*[tower_3x3, tower_d3x3, pooling], name="ch_concat_%s_chconcat" % name)
-    return concat
-
-
-def Inception7C(
-    data,
-    num_1x1,
-    num_d7_red,
-    num_d7_1,
-    num_d7_2,
-    num_q7_red,
-    num_q7_1,
-    num_q7_2,
-    num_q7_3,
-    num_q7_4,
-    pool,
-    proj,
-    name,
-):
-    tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=("%s_conv" % name))
-    tower_d7 = Conv(data=data, num_filter=num_d7_red, name=("%s_tower" % name), suffix="_conv")
-    tower_d7 = Conv(
-        data=tower_d7,
-        num_filter=num_d7_1,
-        kernel=(1, 7),
-        pad=(0, 3),
-        name=("%s_tower" % name),
-        suffix="_conv_1",
-    )
-    tower_d7 = Conv(
-        data=tower_d7,
-        num_filter=num_d7_2,
-        kernel=(7, 1),
-        pad=(3, 0),
-        name=("%s_tower" % name),
-        suffix="_conv_2",
-    )
-    tower_q7 = Conv(data=data, num_filter=num_q7_red, name=("%s_tower_1" % name), suffix="_conv")
-    tower_q7 = Conv(
-        data=tower_q7,
-        num_filter=num_q7_1,
-        kernel=(7, 1),
-        pad=(3, 0),
-        name=("%s_tower_1" % name),
-        suffix="_conv_1",
-    )
-    tower_q7 = Conv(
-        data=tower_q7,
-        num_filter=num_q7_2,
-        kernel=(1, 7),
-        pad=(0, 3),
-        name=("%s_tower_1" % name),
-        suffix="_conv_2",
-    )
-    tower_q7 = Conv(
-        data=tower_q7,
-        num_filter=num_q7_3,
-        kernel=(7, 1),
-        pad=(3, 0),
-        name=("%s_tower_1" % name),
-        suffix="_conv_3",
-    )
-    tower_q7 = Conv(
-        data=tower_q7,
-        num_filter=num_q7_4,
-        kernel=(1, 7),
-        pad=(0, 3),
-        name=("%s_tower_1" % name),
-        suffix="_conv_4",
-    )
-    pooling = mx.sym.Pooling(
-        data=data,
-        kernel=(3, 3),
-        stride=(1, 1),
-        pad=(1, 1),
-        pool_type=pool,
-        name=("%s_pool_%s_pool" % (pool, name)),
-    )
-    cproj = Conv(
-        data=pooling, num_filter=proj, kernel=(1, 1), name=("%s_tower_2" % name), suffix="_conv"
-    )
-    # concat
-    concat = mx.sym.Concat(
-        *[tower_1x1, tower_d7, tower_q7, cproj], name="ch_concat_%s_chconcat" % name
-    )
-    return concat
-
-
-def Inception7D(
-    data, num_3x3_red, num_3x3, num_d7_3x3_red, num_d7_1, num_d7_2, num_d7_3x3, pool, name
-):
-    tower_3x3 = Conv(data=data, num_filter=num_3x3_red, name=("%s_tower" % name), suffix="_conv")
-    tower_3x3 = Conv(
-        data=tower_3x3,
-        num_filter=num_3x3,
-        kernel=(3, 3),
-        pad=(0, 0),
-        stride=(2, 2),
-        name=("%s_tower" % name),
-        suffix="_conv_1",
-    )
-    tower_d7_3x3 = Conv(
-        data=data, num_filter=num_d7_3x3_red, name=("%s_tower_1" % name), suffix="_conv"
-    )
-    tower_d7_3x3 = Conv(
-        data=tower_d7_3x3,
-        num_filter=num_d7_1,
-        kernel=(1, 7),
-        pad=(0, 3),
-        name=("%s_tower_1" % name),
-        suffix="_conv_1",
-    )
-    tower_d7_3x3 = Conv(
-        data=tower_d7_3x3,
-        num_filter=num_d7_2,
-        kernel=(7, 1),
-        pad=(3, 0),
-        name=("%s_tower_1" % name),
-        suffix="_conv_2",
-    )
-    tower_d7_3x3 = Conv(
-        data=tower_d7_3x3,
-        num_filter=num_d7_3x3,
-        kernel=(3, 3),
-        stride=(2, 2),
-        name=("%s_tower_1" % name),
-        suffix="_conv_3",
-    )
-    pooling = mx.sym.Pooling(
-        data=data,
-        kernel=(3, 3),
-        stride=(2, 2),
-        pool_type=pool,
-        name=("%s_pool_%s_pool" % (pool, name)),
-    )
-    # concat
-    concat = mx.sym.Concat(*[tower_3x3, tower_d7_3x3, pooling], name="ch_concat_%s_chconcat" % name)
-    return concat
-
-
-def Inception7E(
-    data,
-    num_1x1,
-    num_d3_red,
-    num_d3_1,
-    num_d3_2,
-    num_3x3_d3_red,
-    num_3x3,
-    num_3x3_d3_1,
-    num_3x3_d3_2,
-    pool,
-    proj,
-    name,
-):
-    tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=("%s_conv" % name))
-    tower_d3 = Conv(data=data, num_filter=num_d3_red, name=("%s_tower" % name), suffix="_conv")
-    tower_d3_a = Conv(
-        data=tower_d3,
-        num_filter=num_d3_1,
-        kernel=(1, 3),
-        pad=(0, 1),
-        name=("%s_tower" % name),
-        suffix="_mixed_conv",
-    )
-    tower_d3_b = Conv(
-        data=tower_d3,
-        num_filter=num_d3_2,
-        kernel=(3, 1),
-        pad=(1, 0),
-        name=("%s_tower" % name),
-        suffix="_mixed_conv_1",
-    )
-    tower_3x3_d3 = Conv(
-        data=data, num_filter=num_3x3_d3_red, name=("%s_tower_1" % name), suffix="_conv"
-    )
-    tower_3x3_d3 = Conv(
-        data=tower_3x3_d3,
-        num_filter=num_3x3,
-        kernel=(3, 3),
-        pad=(1, 1),
-        name=("%s_tower_1" % name),
-        suffix="_conv_1",
-    )
-    tower_3x3_d3_a = Conv(
-        data=tower_3x3_d3,
-        num_filter=num_3x3_d3_1,
-        kernel=(1, 3),
-        pad=(0, 1),
-        name=("%s_tower_1" % name),
-        suffix="_mixed_conv",
-    )
-    tower_3x3_d3_b = Conv(
-        data=tower_3x3_d3,
-        num_filter=num_3x3_d3_2,
-        kernel=(3, 1),
-        pad=(1, 0),
-        name=("%s_tower_1" % name),
-        suffix="_mixed_conv_1",
-    )
-    pooling = mx.sym.Pooling(
-        data=data,
-        kernel=(3, 3),
-        stride=(1, 1),
-        pad=(1, 1),
-        pool_type=pool,
-        name=("%s_pool_%s_pool" % (pool, name)),
-    )
-    cproj = Conv(
-        data=pooling, num_filter=proj, kernel=(1, 1), name=("%s_tower_2" % name), suffix="_conv"
-    )
-    # concat
-    concat = mx.sym.Concat(
-        *[tower_1x1, tower_d3_a, tower_d3_b, tower_3x3_d3_a, tower_3x3_d3_b, cproj],
-        name="ch_concat_%s_chconcat" % name,
-    )
-    return concat
-
-
-def get_symbol(num_classes=1000, **kwargs):
-    data = mx.sym.Variable(name="data")
-    # stage 1
-    conv = Conv(data, 32, kernel=(3, 3), stride=(2, 2), name="conv")
-    conv_1 = Conv(conv, 32, kernel=(3, 3), name="conv_1")
-    conv_2 = Conv(conv_1, 64, kernel=(3, 3), pad=(1, 1), name="conv_2")
-    pool = mx.sym.Pooling(data=conv_2, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool")
-    # stage 2
-    conv_3 = Conv(pool, 80, kernel=(1, 1), name="conv_3")
-    conv_4 = Conv(conv_3, 192, kernel=(3, 3), name="conv_4")
-    pool1 = mx.sym.Pooling(data=conv_4, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool1")
-
-    # # stage 3
-    in3a = Inception7A(pool1, 64, 64, 96, 96, 48, 64, "avg", 32, "mixed")
-    in3b = Inception7A(in3a, 64, 64, 96, 96, 48, 64, "avg", 64, "mixed_1")
-    in3c = Inception7A(in3b, 64, 64, 96, 96, 48, 64, "avg", 64, "mixed_2")
-    in3d = Inception7B(in3c, 384, 64, 96, 96, "max", "mixed_3")
-    # stage 4
-    in4a = Inception7C(in3d, 192, 128, 128, 192, 128, 128, 128, 128, 192, "avg", 192, "mixed_4")
-    in4b = Inception7C(in4a, 192, 160, 160, 192, 160, 160, 160, 160, 192, "avg", 192, "mixed_5")
-    in4c = Inception7C(in4b, 192, 160, 160, 192, 160, 160, 160, 160, 192, "avg", 192, "mixed_6")
-    in4d = Inception7C(in4c, 192, 192, 192, 192, 192, 192, 192, 192, 192, "avg", 192, "mixed_7")
-    in4e = Inception7D(in4d, 192, 320, 192, 192, 192, 192, "max", "mixed_8")
-    # stage 5
-    in5a = Inception7E(in4e, 320, 384, 384, 384, 448, 384, 384, 384, "avg", 192, "mixed_9")
-    in5b = Inception7E(in5a, 320, 384, 384, 384, 448, 384, 384, 384, "max", 192, "mixed_10")
-    # pool
-    pool = mx.sym.Pooling(
-        data=in5b, kernel=(8, 8), stride=(1, 1), pool_type="avg", name="global_pool"
-    )
-    flatten = mx.sym.Flatten(data=pool, name="flatten")
-    fc1 = mx.sym.FullyConnected(data=flatten, num_hidden=num_classes, name="fc1", flatten=False)
-    softmax = mx.sym.SoftmaxOutput(data=fc1, name="softmax")
-    return softmax
diff --git a/tests/python/frontend/mxnet/model_zoo/mlp.py b/tests/python/frontend/mxnet/model_zoo/mlp.py
deleted file mode 100644
index 45f33f991de5..000000000000
--- a/tests/python/frontend/mxnet/model_zoo/mlp.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-a simple multilayer perceptron
-"""
-import mxnet as mx
-
-
-def get_symbol(num_classes=10, **kwargs):
-    data = mx.symbol.Variable("data")
-    data = mx.sym.Flatten(data=data)
-    try:
-        fc1 = mx.symbol.FullyConnected(data=data, name="fc1", num_hidden=128, flatten=False)
-        act1 = mx.symbol.Activation(data=fc1, name="relu1", act_type="relu")
-        fc2 = mx.symbol.FullyConnected(data=act1, name="fc2", num_hidden=64, flatten=False)
-        act2 = mx.symbol.Activation(data=fc2, name="relu2", act_type="relu")
-        fc3 = mx.symbol.FullyConnected(data=act2, name="fc3", num_hidden=num_classes, flatten=False)
-        mlp = mx.symbol.softmax(data=fc3, name="softmax")
-    except:
-        fc1 = mx.symbol.FullyConnected(data=data, name="fc1", num_hidden=128)
-        act1 = mx.symbol.Activation(data=fc1, name="relu1", act_type="relu")
-        fc2 = mx.symbol.FullyConnected(data=act1, name="fc2", num_hidden=64)
-        act2 = mx.symbol.Activation(data=fc2, name="relu2", act_type="relu")
-        fc3 = mx.symbol.FullyConnected(data=act2, name="fc3", num_hidden=num_classes)
-        mlp = mx.symbol.softmax(data=fc3, name="softmax")
-    return mlp
diff --git a/tests/python/frontend/mxnet/model_zoo/resnet.py b/tests/python/frontend/mxnet/model_zoo/resnet.py
deleted file mode 100644
index 00e68958b462..000000000000
--- a/tests/python/frontend/mxnet/model_zoo/resnet.py
+++ /dev/null
@@ -1,326 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py
-Original author Wei Wu
-
-Implemented the following paper:
-
-Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. "Identity Mappings in Deep Residual Networks"
-"""
-import mxnet as mx
-import numpy as np
-
-
-def residual_unit(
-    data,
-    num_filter,
-    stride,
-    dim_match,
-    name,
-    bottle_neck=True,
-    bn_mom=0.9,
-    workspace=256,
-    memonger=False,
-):
-    """Return ResNet Unit symbol for building ResNet
-    Parameters
-    ----------
-    data : str
-        Input data
-    num_filter : int
-        Number of output channels
-    bnf : int
-        Bottle neck channels factor with regard to num_filter
-    stride : tuple
-        Stride used in convolution
-    dim_match : Boolean
-        True means channel number between input and output is the same, otherwise means differ
-    name : str
-        Base name of the operators
-    workspace : int
-        Workspace used in convolution operator
-    """
-    if bottle_neck:
-        bn1 = mx.sym.BatchNorm(
-            data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + "_bn1"
-        )
-        act1 = mx.sym.Activation(data=bn1, act_type="relu", name=name + "_relu1")
-        conv1 = mx.sym.Convolution(
-            data=act1,
-            num_filter=int(num_filter * 0.25),
-            kernel=(1, 1),
-            stride=stride,
-            pad=(0, 0),
-            no_bias=True,
-            workspace=workspace,
-            name=name + "_conv1",
-        )
-        bn2 = mx.sym.BatchNorm(
-            data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + "_bn2"
-        )
-        act2 = mx.sym.Activation(data=bn2, act_type="relu", name=name + "_relu2")
-        conv2 = mx.sym.Convolution(
-            data=act2,
-            num_filter=int(num_filter * 0.25),
-            kernel=(3, 3),
-            stride=(1, 1),
-            pad=(1, 1),
-            no_bias=True,
-            workspace=workspace,
-            name=name + "_conv2",
-        )
-        bn3 = mx.sym.BatchNorm(
-            data=conv2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + "_bn3"
-        )
-        act3 = mx.sym.Activation(data=bn3, act_type="relu", name=name + "_relu3")
-        conv3 = mx.sym.Convolution(
-            data=act3,
-            num_filter=num_filter,
-            kernel=(1, 1),
-            stride=(1, 1),
-            pad=(0, 0),
-            no_bias=True,
-            workspace=workspace,
-            name=name + "_conv3",
-        )
-        if dim_match:
-            shortcut = data
-        else:
-            shortcut = mx.sym.Convolution(
-                data=act1,
-                num_filter=num_filter,
-                kernel=(1, 1),
-                stride=stride,
-                no_bias=True,
-                workspace=workspace,
-                name=name + "_sc",
-            )
-        if memonger:
-            shortcut._set_attr(mirror_stage="True")
-        return conv3 + shortcut
-    else:
-        bn1 = mx.sym.BatchNorm(
-            data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + "_bn1"
-        )
-        act1 = mx.sym.Activation(data=bn1, act_type="relu", name=name + "_relu1")
-        conv1 = mx.sym.Convolution(
-            data=act1,
-            num_filter=num_filter,
-            kernel=(3, 3),
-            stride=stride,
-            pad=(1, 1),
-            no_bias=True,
-            workspace=workspace,
-            name=name + "_conv1",
-        )
-        bn2 = mx.sym.BatchNorm(
-            data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + "_bn2"
-        )
-        act2 = mx.sym.Activation(data=bn2, act_type="relu", name=name + "_relu2")
-        conv2 = mx.sym.Convolution(
-            data=act2,
-            num_filter=num_filter,
-            kernel=(3, 3),
-            stride=(1, 1),
-            pad=(1, 1),
-            no_bias=True,
-            workspace=workspace,
-            name=name + "_conv2",
-        )
-        if dim_match:
-            shortcut = data
-        else:
-            shortcut = mx.sym.Convolution(
-                data=act1,
-                num_filter=num_filter,
-                kernel=(1, 1),
-                stride=stride,
-                no_bias=True,
-                workspace=workspace,
-                name=name + "_sc",
-            )
-        if memonger:
-            shortcut._set_attr(mirror_stage="True")
-        return conv2 + shortcut
-
-
-def resnet(
-    units,
-    num_stages,
-    filter_list,
-    num_classes,
-    image_shape,
-    bottle_neck=True,
-    bn_mom=0.9,
-    workspace=256,
-    dtype="float32",
-    memonger=False,
-):
-    """Return ResNet symbol of
-    Parameters
-    ----------
-    units : list
-        Number of units in each stage
-    num_stages : int
-        Number of stage
-    filter_list : list
-        Channel size of each stage
-    num_classes : int
-        Output size of symbol
-    dataset : str
-        Dataset type, only cifar10 and imagenet supports
-    workspace : int
-        Workspace used in convolution operator
-    dtype : str
-        Precision (float32 or float16)
-    """
-    num_unit = len(units)
-    assert num_unit == num_stages
-    data = mx.sym.Variable(name="data")
-    if dtype == "float32":
-        # data = mx.sym.identity(data=data, name='id')
-        data = data
-    else:
-        if dtype == "float16":
-            data = mx.sym.Cast(data=data, dtype=np.float16)
-    data = mx.sym.BatchNorm(data=data, fix_gamma=True, eps=2e-5, momentum=bn_mom, name="bn_data")
-    (nchannel, height, width) = image_shape
-    if height <= 32:  # such as cifar10
-        body = mx.sym.Convolution(
-            data=data,
-            num_filter=filter_list[0],
-            kernel=(3, 3),
-            stride=(1, 1),
-            pad=(1, 1),
-            no_bias=True,
-            name="conv0",
-            workspace=workspace,
-        )
-    else:  # often expected to be 224 such as imagenet
-        body = mx.sym.Convolution(
-            data=data,
-            num_filter=filter_list[0],
-            kernel=(7, 7),
-            stride=(2, 2),
-            pad=(3, 3),
-            no_bias=True,
-            name="conv0",
-            workspace=workspace,
-        )
-        body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name="bn0")
-        body = mx.sym.Activation(data=body, act_type="relu", name="relu0")
-        body = mx.sym.Pooling(data=body, kernel=(3, 3), stride=(2, 2), pad=(1, 1), pool_type="max")
-
-    for i in range(num_stages):
-        body = residual_unit(
-            body,
-            filter_list[i + 1],
-            (1 if i == 0 else 2, 1 if i == 0 else 2),
-            False,
-            name="stage%d_unit%d" % (i + 1, 1),
-            bottle_neck=bottle_neck,
-            workspace=workspace,
-            memonger=memonger,
-        )
-        for j in range(units[i] - 1):
-            body = residual_unit(
-                body,
-                filter_list[i + 1],
-                (1, 1),
-                True,
-                name="stage%d_unit%d" % (i + 1, j + 2),
-                bottle_neck=bottle_neck,
-                workspace=workspace,
-                memonger=memonger,
-            )
-    bn1 = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name="bn1")
-    relu1 = mx.sym.Activation(data=bn1, act_type="relu", name="relu1")
-    # Although kernel is not used here when global_pool=True, we should put one
-    pool1 = mx.sym.Pooling(
-        data=relu1, global_pool=True, kernel=(7, 7), pool_type="avg", name="pool1"
-    )
-    flat = mx.sym.Flatten(data=pool1)
-    try:
-        fc1 = mx.sym.FullyConnected(data=flat, num_hidden=num_classes, name="fc1", flatten=False)
-    except:
-        fc1 = mx.sym.FullyConnected(data=flat, num_hidden=num_classes, name="fc1")
-    if dtype == "float16":
-        fc1 = mx.sym.Cast(data=fc1, dtype=np.float32)
-    return mx.sym.softmax(data=fc1, name="softmax")
-
-
-def get_symbol(num_classes, num_layers, image_shape, conv_workspace=256, dtype="float32", **kwargs):
-    """
-    Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py
-    Original author Wei Wu
-    """
-    image_shape = [int(l) for l in image_shape.split(",")]
-    (nchannel, height, width) = image_shape
-    if height <= 28:
-        num_stages = 3
-        if (num_layers - 2) % 9 == 0 and num_layers >= 164:
-            per_unit = [(num_layers - 2) // 9]
-            filter_list = [16, 64, 128, 256]
-            bottle_neck = True
-        elif (num_layers - 2) % 6 == 0 and num_layers < 164:
-            per_unit = [(num_layers - 2) // 6]
-            filter_list = [16, 16, 32, 64]
-            bottle_neck = False
-        else:
-            raise ValueError(
-                "no experiments done on num_layers {}, you can do it yourself".format(num_layers)
-            )
-        units = per_unit * num_stages
-    else:
-        if num_layers >= 50:
-            filter_list = [64, 256, 512, 1024, 2048]
-            bottle_neck = True
-        else:
-            filter_list = [64, 64, 128, 256, 512]
-            bottle_neck = False
-        num_stages = 4
-        if num_layers == 18:
-            units = [2, 2, 2, 2]
-        elif num_layers == 34:
-            units = [3, 4, 6, 3]
-        elif num_layers == 50:
-            units = [3, 4, 6, 3]
-        elif num_layers == 101:
-            units = [3, 4, 23, 3]
-        elif num_layers == 152:
-            units = [3, 8, 36, 3]
-        elif num_layers == 200:
-            units = [3, 24, 36, 3]
-        elif num_layers == 269:
-            units = [3, 30, 48, 8]
-        else:
-            raise ValueError(
-                "no experiments done on num_layers {}, you can do it yourself".format(num_layers)
-            )
-
-    return resnet(
-        units=units,
-        num_stages=num_stages,
-        filter_list=filter_list,
-        num_classes=num_classes,
-        image_shape=image_shape,
-        bottle_neck=bottle_neck,
-        workspace=conv_workspace,
-        dtype=dtype,
-    )
diff --git a/tests/python/frontend/mxnet/model_zoo/squeezenet.py b/tests/python/frontend/mxnet/model_zoo/squeezenet.py
deleted file mode 100644
index 146f7fa7e8e6..000000000000
--- a/tests/python/frontend/mxnet/model_zoo/squeezenet.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Symbol of SqueezeNet
-
-Reference:
-Iandola, Forrest N., et al.
-"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and< 0.5 mb model size." (2016).
-"""
-
-import mxnet as mx
-
-# Helpers
-def _make_fire(net, squeeze_channels, expand1x1_channels, expand3x3_channels):
-    net = _make_fire_conv(net, squeeze_channels, 1, 0)
-
-    left = _make_fire_conv(net, expand1x1_channels, 1, 0)
-    right = _make_fire_conv(net, expand3x3_channels, 3, 1)
-    # NOTE : Assume NCHW layout here
-    net = mx.sym.concat(left, right, dim=1)
-
-    return net
-
-
-def _make_fire_conv(net, channels, kernel_size, padding=0):
-    net = mx.sym.Convolution(
-        net, num_filter=channels, kernel=(kernel_size, kernel_size), pad=(padding, padding)
-    )
-    net = mx.sym.Activation(net, act_type="relu")
-    return net
-
-
-# Net
-def get_symbol(num_classes=1000, version="1.0", **kwargs):
-    """Get symbol of SqueezeNet
-
-    Parameters
-    ----------
-    num_classes: int
-        The number of classification results
-
-    version : str, optional
-        "1.0" or "1.1" of SqueezeNet
-    """
-    assert version in [
-        "1.0",
-        "1.1",
-    ], "Unsupported SqueezeNet version {version}:" "1.0 or 1.1 expected".format(version=version)
-    net = mx.sym.Variable("data")
-    if version == "1.0":
-        net = mx.sym.Convolution(net, num_filter=96, kernel=(7, 7), stride=(2, 2), pad=(3, 3))
-        net = mx.sym.Activation(net, act_type="relu")
-        net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type="max", stride=(2, 2))
-        net = _make_fire(net, 16, 64, 64)
-        net = _make_fire(net, 16, 64, 64)
-        net = _make_fire(net, 32, 128, 128)
-        net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type="max", stride=(2, 2))
-        net = _make_fire(net, 32, 128, 128)
-        net = _make_fire(net, 48, 192, 192)
-        net = _make_fire(net, 48, 192, 192)
-        net = _make_fire(net, 64, 256, 256)
-        net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type="max", stride=(2, 2))
-        net = _make_fire(net, 64, 256, 256)
-    else:
-        net = mx.sym.Convolution(net, num_filter=64, kernel=(3, 3), stride=(2, 2), pad=(1, 1))
-        net = mx.sym.Activation(net, act_type="relu")
-        net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type="max", stride=(2, 2))
-        net = _make_fire(net, 16, 64, 64)
-        net = _make_fire(net, 16, 64, 64)
-        net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type="max", stride=(2, 2))
-        net = _make_fire(net, 32, 128, 128)
-        net = _make_fire(net, 32, 128, 128)
-        net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type="max", stride=(2, 2))
-        net = _make_fire(net, 48, 192, 192)
-        net = _make_fire(net, 48, 192, 192)
-        net = _make_fire(net, 64, 256, 256)
-        net = _make_fire(net, 64, 256, 256)
-    net = mx.sym.Dropout(net, p=0.5)
-    net = mx.sym.Convolution(net, num_filter=num_classes, kernel=(1, 1))
-    net = mx.sym.Activation(net, act_type="relu")
-    net = mx.sym.Pooling(data=net, global_pool=True, kernel=(13, 13), pool_type="avg")
-    net = mx.sym.flatten(net)
-    return mx.sym.softmax(net)
diff --git a/tests/python/frontend/mxnet/model_zoo/vgg.py b/tests/python/frontend/mxnet/model_zoo/vgg.py
deleted file mode 100644
index 157803446811..000000000000
--- a/tests/python/frontend/mxnet/model_zoo/vgg.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""References:
-
-Simonyan, Karen, and Andrew Zisserman. "Very deep convolutional networks for
-large-scale image recognition." arXiv preprint arXiv:1409.1556 (2014).
-"""
-
-import mxnet as mx
-import numpy as np
-
-
-def get_feature(internel_layer, layers, filters, batch_norm=False, **kwargs):
-    for i, num in enumerate(layers):
-        for j in range(num):
-            internel_layer = mx.sym.Convolution(
-                data=internel_layer,
-                kernel=(3, 3),
-                pad=(1, 1),
-                num_filter=filters[i],
-                name="conv%s_%s" % (i + 1, j + 1),
-            )
-            if batch_norm:
-                internel_layer = mx.symbol.BatchNorm(
-                    data=internel_layer, name="bn%s_%s" % (i + 1, j + 1)
-                )
-            internel_layer = mx.sym.Activation(
-                data=internel_layer, act_type="relu", name="relu%s_%s" % (i + 1, j + 1)
-            )
-        internel_layer = mx.sym.Pooling(
-            data=internel_layer,
-            pool_type="max",
-            kernel=(2, 2),
-            stride=(2, 2),
-            name="pool%s" % (i + 1),
-        )
-    return internel_layer
-
-
-def get_classifier(input_data, num_classes, **kwargs):
-    flatten = mx.sym.Flatten(data=input_data, name="flatten")
-    try:
-        fc6 = mx.sym.FullyConnected(data=flatten, num_hidden=4096, name="fc6", flatten=False)
-        relu6 = mx.sym.Activation(data=fc6, act_type="relu", name="relu6")
-        drop6 = mx.sym.Dropout(data=relu6, p=0.5, name="drop6")
-        fc7 = mx.sym.FullyConnected(data=drop6, num_hidden=4096, name="fc7", flatten=False)
-        relu7 = mx.sym.Activation(data=fc7, act_type="relu", name="relu7")
-        drop7 = mx.sym.Dropout(data=relu7, p=0.5, name="drop7")
-        fc8 = mx.sym.FullyConnected(data=drop7, num_hidden=num_classes, name="fc8", flatten=False)
-    except:
-        fc6 = mx.sym.FullyConnected(data=flatten, num_hidden=4096, name="fc6")
-        relu6 = mx.sym.Activation(data=fc6, act_type="relu", name="relu6")
-        drop6 = mx.sym.Dropout(data=relu6, p=0.5, name="drop6")
-        fc7 = mx.sym.FullyConnected(data=drop6, num_hidden=4096, name="fc7")
-        relu7 = mx.sym.Activation(data=fc7, act_type="relu", name="relu7")
-        drop7 = mx.sym.Dropout(data=relu7, p=0.5, name="drop7")
-        fc8 = mx.sym.FullyConnected(data=drop7, num_hidden=num_classes, name="fc8")
-    return fc8
-
-
-def get_symbol(num_classes, num_layers=11, batch_norm=False, dtype="float32", **kwargs):
-    """
-    Parameters
-    ----------
-    num_classes : int, default 1000
-        Number of classification classes.
-    num_layers : int
-        Number of layers for the variant of densenet. Options are 11, 13, 16, 19.
-    batch_norm : bool, default False
-        Use batch normalization.
-    dtype: str, float32 or float16
-        Data precision.
-    """
-    vgg_spec = {
-        11: ([1, 1, 2, 2, 2], [64, 128, 256, 512, 512]),
-        13: ([2, 2, 2, 2, 2], [64, 128, 256, 512, 512]),
-        16: ([2, 2, 3, 3, 3], [64, 128, 256, 512, 512]),
-        19: ([2, 2, 4, 4, 4], [64, 128, 256, 512, 512]),
-    }
-    if num_layers not in vgg_spec:
-        raise ValueError(
-            "Invalide num_layers {}. Possible choices are 11,13,16,19.".format(num_layers)
-        )
-    layers, filters = vgg_spec[num_layers]
-    data = mx.sym.Variable(name="data")
-    if dtype == "float16":
-        data = mx.sym.Cast(data=data, dtype=np.float16)
-    feature = get_feature(data, layers, filters, batch_norm)
-    classifier = get_classifier(feature, num_classes)
-    if dtype == "float16":
-        classifier = mx.sym.Cast(data=classifier, dtype=np.float32)
-    symbol = mx.sym.softmax(data=classifier, name="softmax")
-    return symbol
diff --git a/tests/python/frontend/mxnet/test_forward.py b/tests/python/frontend/mxnet/test_forward.py
deleted file mode 100644
index cf206a3d5261..000000000000
--- a/tests/python/frontend/mxnet/test_forward.py
+++ /dev/null
@@ -1,2369 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import operator
-import random
-
-import numpy as np
-import pytest
-import tvm
-import tvm.testing
-from tvm import relay, te
-from tvm.contrib import graph_executor
-
-import model_zoo
-import mxnet as mx
-from mxnet import gluon
-from mxnet.gluon.model_zoo import vision
-
-
-def verify_mxnet_frontend_impl(
-    mx_symbol,
-    data_shape=(1, 3, 224, 224),
-    out_shape=(1, 1000),
-    gluon_impl=False,
-    name=None,
-    dtype="float32",
-):
-    """Use name different from test to avoid pytest picking it up"""
-    if gluon_impl:
-
-        def get_gluon_output(name, x):
-            try:
-                net = vision.get_model(name)
-            except RuntimeError:
-                pytest.skip(reason="mxnet downloads no longer supported")
-            net.collect_params().initialize(mx.init.Xavier())
-            net_sym = gluon.nn.SymbolBlock(
-                outputs=net(mx.sym.var("data")),
-                inputs=mx.sym.var("data"),
-                params=net.collect_params(),
-            )
-            out = net_sym(mx.nd.array(x.astype(dtype))).asnumpy()
-            return out, net_sym
-
-    else:
-
-        def get_mxnet_output(symbol, x, dtype="float32"):
-            from collections import namedtuple
-
-            Batch = namedtuple("Batch", ["data"])
-            mod = mx.mod.Module(symbol, label_names=None)
-            mod.bind(data_shapes=[("data", x.shape)], for_training=False)
-            mod.init_params()
-            mod.forward(Batch([mx.nd.array(x.astype(dtype))]))
-            out = mod.get_outputs()[0].asnumpy()
-            args, auxs = mod.get_params()
-            return out, args, auxs
-
-    def get_tvm_output(symbol, x, args, auxs, target, dev, dtype="float32"):
-        shape_dict = {"data": x.shape}
-        if gluon_impl:
-            mod, params = relay.frontend.from_mxnet(symbol, shape_dict)
-        else:
-            mod, params = relay.frontend.from_mxnet(
-                symbol, shape_dict, arg_params=args, aux_params=auxs
-            )
-        with tvm.transform.PassContext(opt_level=3):
-            lib = relay.build(mod, target, params=params)
-        m = graph_executor.GraphModule(lib["default"](dev))
-        # set inputs
-        m.set_input("data", tvm.nd.array(x.astype(dtype)))
-        m.run()
-        # get outputs
-        out = m.get_output(0, tvm.nd.empty(out_shape, dtype))
-        return out.numpy()
-
-    # random input
-    x = np.random.uniform(size=data_shape)
-    if gluon_impl:
-        gluon_out, gluon_sym = get_gluon_output(name, x)
-        for target, dev in tvm.testing.enabled_targets():
-            tvm_out = get_tvm_output(gluon_sym, x, None, None, target, dev, dtype)
-            tvm.testing.assert_allclose(gluon_out, tvm_out, rtol=1e-5, atol=1e-5)
-    else:
-        mx_out, args, auxs = get_mxnet_output(mx_symbol, x, dtype)
-        assert "data" not in args
-        for target, dev in tvm.testing.enabled_targets():
-            tvm_out = get_tvm_output(mx_symbol, x, args, auxs, target, dev, dtype)
-            tvm.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_mlp():
-    mlp = model_zoo.mx_mlp()
-    verify_mxnet_frontend_impl(mlp, data_shape=(1, 1, 28, 28), out_shape=(1, 10))
-
-
-@tvm.testing.uses_gpu
-def test_forward_vgg():
-    for n in [11]:
-        mx_sym = model_zoo.mx_vgg(n)
-        verify_mxnet_frontend_impl(mx_sym)
-
-
-@tvm.testing.uses_gpu
-def test_forward_resnet():
-    for n in [18]:
-        mx_sym = model_zoo.mx_resnet(18)
-        verify_mxnet_frontend_impl(mx_sym)
-
-
-@tvm.testing.uses_gpu
-def test_forward_leaky_relu():
-    data = mx.sym.var("data")
-    data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
-    mx_sym = mx.sym.LeakyReLU(data)
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-    mx_sym = mx.sym.LeakyReLU(data, act_type="leaky")
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-
-@tvm.testing.uses_gpu
-def test_forward_elu():
-    data = mx.sym.var("data")
-    data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
-    mx_sym = mx.sym.LeakyReLU(data, act_type="elu")
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-
-@tvm.testing.uses_gpu
-def test_forward_rrelu():
-    data = mx.sym.var("data")
-    data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
-    mx_sym = mx.sym.LeakyReLU(data, act_type="rrelu", lower_bound=0.3, upper_bound=0.7)
-    verify_mxnet_frontend_impl(mx_sym[0], (1, 3, 100, 100), (1, 6, 100, 100))
-
-
-@tvm.testing.uses_gpu
-def test_forward_prelu():
-    data = mx.sym.var("data")
-    data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
-    mx_sym = mx.sym.LeakyReLU(data, act_type="prelu")
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-
-@tvm.testing.uses_gpu
-def test_forward_gelu():
-    data = mx.sym.var("data")
-    data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
-    mx_sym = mx.sym.LeakyReLU(data, act_type="gelu")
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-
-@tvm.testing.uses_gpu
-def test_forward_softrelu():
-    data = mx.sym.var("data")
-    data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
-    mx_sym = mx.sym.Activation(data, act_type="softrelu")
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-
-@tvm.testing.uses_gpu
-def test_forward_fc_flatten():
-    # test flatten=True option in mxnet 0.11.1
-    data = mx.sym.var("data")
-    try:
-        mx_sym = mx.sym.FullyConnected(data, num_hidden=100, flatten=True)
-        verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 100))
-        mx_sym = mx.sym.FullyConnected(mx.sym.Flatten(data), num_hidden=100, flatten=False)
-        verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 100))
-    except:
-        pass
-
-
-@tvm.testing.uses_gpu
-def test_forward_clip():
-    data = mx.sym.var("data")
-    data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
-    mx_sym = mx.sym.clip(data, a_min=0, a_max=1)
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-
-@tvm.testing.uses_gpu
-def test_forward_split():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.split(data, axis=1, num_outputs=4, squeeze_axis=False)
-    verify_mxnet_frontend_impl(mx_sym, (1, 4, 2, 1), (1, 1, 2, 1))
-
-
-@tvm.testing.uses_gpu
-def test_forward_split_squeeze():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.split(data, axis=1, num_outputs=4, squeeze_axis=True)
-    verify_mxnet_frontend_impl(mx_sym, (1, 4, 2, 1), (1, 2, 1))
-
-
-@tvm.testing.uses_gpu
-def test_forward_expand_dims():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.expand_dims(data, axis=1)
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 1, 3, 4))
-
-
-@tvm.testing.uses_gpu
-def test_forward_pooling():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.Pooling(data, kernel=(3, 3), pad=(1, 1), pool_type="avg")
-    verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8), (1, 20, 8, 8))
-
-    mx_sym = mx.sym.Pooling(data, kernel=(3, 3), pad=(1, 1), pool_type="max")
-    verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8), (1, 20, 8, 8))
-
-
-@tvm.testing.uses_gpu
-def test_forward_pooling3d():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.Pooling(data, kernel=(3, 3, 3), pad=(1, 1, 1), pool_type="avg")
-    verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8, 8), (1, 20, 8, 8, 8))
-
-    mx_sym = mx.sym.Pooling(data, kernel=(3, 3, 3), pad=(1, 1, 1), pool_type="max")
-    verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8, 8), (1, 20, 8, 8, 8))
-
-
-@tvm.testing.uses_gpu
-def test_forward_adaptive_pooling():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.contrib.AdaptiveAvgPooling2D(data, output_size=(1,))
-    verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8), (1, 20, 1, 1))
-
-    mx_sym = mx.sym.contrib.AdaptiveAvgPooling2D(data, output_size=(3, 3))
-    verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8), (1, 20, 3, 3))
-
-
-@tvm.testing.uses_gpu
-def test_forward_lrn():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.LRN(data, alpha=2, beta=2, knorm=1, nsize=5)
-    verify_mxnet_frontend_impl(mx_sym, (1, 10, 24, 24), (1, 10, 24, 24))
-
-
-@tvm.testing.uses_gpu
-def test_forward_ones():
-    data = mx.sym.var("data")
-    ones = mx.sym.ones(shape=(2, 3, 4), dtype="float32")
-    mx_sym = mx.sym.elemwise_add(data, ones)
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
-
-
-@tvm.testing.uses_gpu
-def test_forward_zeros():
-    data = mx.sym.var("data")
-    zeros = mx.sym.zeros(shape=(2, 3, 4), dtype="float32")
-    mx_sym = mx.sym.elemwise_add(data, zeros)
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
-
-
-@tvm.testing.uses_gpu
-def test_forward_ones_like():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.ones_like(data, dtype="float32")
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
-
-
-@tvm.testing.uses_gpu
-def test_forward_make_loss():
-    data = mx.sym.var("data")
-    ones = mx.sym.ones(shape=(2, 3, 4), dtype="float32")
-    mx_sym = mx.sym.make_loss((data - ones) ** 2 / 2, dtype="float32")
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
-
-
-@tvm.testing.uses_gpu
-def test_forward_zeros_like():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.zeros_like(data, dtype="float32")
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
-
-
-@tvm.testing.uses_gpu
-def test_forward_argmax():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.argmax(data, axis=1)
-    verify_mxnet_frontend_impl(mx_sym, (5, 3), (5,))
-
-
-@tvm.testing.uses_gpu
-def test_forward_argmin():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.argmin(data, axis=0)
-    verify_mxnet_frontend_impl(mx_sym, (5, 4), (4,))
-
-
-@tvm.testing.uses_gpu
-def test_forward_slice():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.slice(data, begin=(0, 1), end=(2, 4))
-    verify_mxnet_frontend_impl(mx_sym, (3, 4), (2, 3))
-    mx_sym = mx.sym.slice(data, begin=(-1, 1), end=(-3, 4), step=(-1, 2))
-    verify_mxnet_frontend_impl(mx_sym, (3, 4), (2, 2))
-
-
-@tvm.testing.uses_gpu
-def test_forward_where():
-    cond = mx.sym.var("cond")
-    x = mx.sym.var("x")
-    y = mx.sym.var("y")
-    dshape = (2, 2)
-    dtype = "float32"
-    mx_sym = mx.sym.where(cond, x, y)
-    np_cond = np.array([[0, 1], [-1, 0]]).astype(dtype)
-    np_x = np.random.uniform(size=dshape).astype(dtype)
-    np_y = np.random.uniform(size=dshape).astype(dtype)
-    mx_cond = mx.nd.array(np_cond)
-    mx_x = mx.nd.array(np_x)
-    mx_y = mx.nd.array(np_y)
-    shapes = {"cond": dshape, "x": dshape, "y": dshape}
-    mod = mx.mod.Module(mx_sym, label_names=None, data_names=["cond", "x", "y"])
-    mod.bind(data_shapes=shapes.items(), for_training=False)
-    mod.init_params()
-    args, auxs = mod.get_params()
-    mx_out = mx.nd.where(mx_cond, mx_x, mx_y).asnumpy()
-
-    mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, args, auxs)
-    for target, dev in tvm.testing.enabled_targets():
-        for kind in ["graph", "debug"]:
-            op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                np_cond, np_x, np_y
-            )
-            tvm.testing.assert_allclose(op_res.numpy(), mx_out)
-
-
-@tvm.testing.uses_gpu
-def test_forward_arange():
-    def _mx_symbol(F, start, stop, step):
-        if start is None and step is None:
-            sym = F.arange(stop)
-        elif start is None:
-            sym = F.arange(stop, step=step)
-        elif step is None:
-            sym = F.arange(start, stop)
-        else:
-            sym = F.arange(start, stop, step)
-        return sym
-
-    def verify(start, stop, step):
-        ref_res = _mx_symbol(mx.nd, start, stop, step)
-        mx_sym = _mx_symbol(mx.sym, start, stop, step)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(
-                    kind, mod=mod, device=dev, target=target
-                ).evaluate()()
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify(0, 20, None)
-    verify(0, 20, 2)
-    verify(1, 20, None)
-    verify(1, 20, 2)
-    verify(1, 20, 1.5)
-    verify(1, 20.5, None)
-    verify(1, 20, 3)
-    verify(20, 1, -1)
-    verify(20, 1, -1.5)
-
-
-def _mx_symbol(F, op_name, inputs):
-    op = getattr(F, op_name)
-    return op(*inputs)
-
-
-@tvm.testing.uses_gpu
-def test_forward_broadcast_ops():
-    for op in [
-        "broadcast_add",
-        "broadcast_plus",
-        "broadcast_sub",
-        "broadcast_minus",
-        "broadcast_mul",
-        "broadcast_div",
-        "broadcast_mod",
-        "broadcast_maximum",
-        "broadcast_minimum",
-        "broadcast_equal",
-        "broadcast_not_equal",
-        "broadcast_greater",
-        "broadcast_greater_equal",
-        "broadcast_lesser",
-        "broadcast_lesser_equal",
-        "broadcast_power",
-        "broadcast_logical_or",
-        "broadcast_logical_and",
-        "broadcast_logical_xor",
-    ]:
-        a_shape = (3, 4, 5)
-        b_shape = (4, 5)
-        if op == "broadcast_mod":
-            dtype = "int32"
-            a_np = np.random.randint(1, 100, size=a_shape).astype(dtype)
-            b_np = np.random.randint(1, 100, size=b_shape).astype(dtype)
-        else:
-            dtype = "float32"
-            a_np = np.random.uniform(size=a_shape).astype(dtype)
-            b_np = np.random.uniform(size=b_shape).astype(dtype)
-        mx_sym = _mx_symbol(mx.sym, op, [mx.sym.var("a"), mx.sym.var("b")])
-        ref_res = _mx_symbol(mx.nd, op, [mx.nd.array(a_np), mx.nd.array(b_np)])
-        shapes = {"a": a_shape, "b": b_shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    a_np, b_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-
-@tvm.testing.uses_gpu
-def test_forward_elemwise_ops():
-    for op in [
-        "elemwise_add",
-        "elemwise_sub",
-        "elemwise_mul",
-        "elemwise_div",
-        "maximum",
-        "minimum",
-        operator.lt,
-        operator.le,
-        operator.eq,
-        operator.ne,
-        operator.gt,
-        operator.ge,
-    ]:
-        shape = (3, 4, 5)
-        dtype = "float32"
-        a_np = np.random.uniform(size=shape).astype(dtype)
-        b_np = np.random.uniform(size=shape).astype(dtype)
-        if type(op) == str:
-            mx_sym = _mx_symbol(mx.sym, op, [mx.sym.var("a"), mx.sym.var("b")])
-            ref_res = _mx_symbol(mx.nd, op, [mx.nd.array(a_np), mx.nd.array(b_np)])
-        else:
-            mx_sym = op(mx.sym.var("a"), mx.sym.var("b"))
-            ref_res = op(mx.nd.array(a_np), mx.nd.array(b_np))
-        shapes = {"a": shape, "b": shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    a_np, b_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-
-@tvm.testing.uses_gpu
-def test_forward_softmin():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.softmin(data)
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 3, 100, 100))
-
-    mx_sym = mx.sym.softmin(data, axis=2)
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 3, 100, 100))
-
-
-@tvm.testing.uses_gpu
-def test_forward_unary_ops():
-    for op in [
-        "abs",
-        "sqrt",
-        "ceil",
-        "floor",
-        "round",
-        "reciprocal",
-        "trunc",
-        "softsign",
-        "hard_sigmoid",
-        "cos",
-        "sin",
-        "tan",
-        "cosh",
-        "sinh",
-        "tanh",
-        "arccos",
-        "arcsin",
-        "arctan",
-        "arccosh",
-        "arcsinh",
-        "arctanh",
-    ]:
-        shape = (1, 3, 4, 5)
-        dtype = "float32"
-        a_np = np.random.uniform(size=shape).astype(dtype)
-        mx_sym = _mx_symbol(mx.sym, op, [mx.sym.var("a")])
-        ref_res = _mx_symbol(mx.nd, op, [mx.nd.array(a_np)])
-        shapes = {"a": shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    a_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5, atol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_scalar_ops():
-    for op in [
-        operator.add,
-        operator.sub,
-        operator.mul,
-        operator.truediv,
-        operator.pow,
-        operator.lt,
-        operator.le,
-        operator.eq,
-        operator.ne,
-        operator.gt,
-        operator.ge,
-    ]:
-        dtype = "float32"
-        a_shape = (3, 4, 5)
-        a_np = np.random.uniform(size=a_shape).astype(dtype)
-        b_scalar = 2.3
-        mx_sym = op(mx.sym.var("a"), b_scalar)
-        ref_res = op(mx.nd.array(a_np), b_scalar)
-        shapes = {"a": a_shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    a_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-    for op in ["maximum", "minimum"]:
-        dtype = "float32"
-        a_shape = (3, 4, 5)
-        a_np = np.random.uniform(size=a_shape).astype(dtype)
-        b_scalar = 2.3
-        mx_sym = _mx_symbol(mx.sym, op, [mx.sym.var("a"), b_scalar])
-        ref_res = _mx_symbol(mx.nd, op, [mx.nd.array(a_np), b_scalar])
-        shapes = {"a": a_shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    a_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-
-@tvm.testing.uses_gpu
-def test_forward_slice_axis():
-    def verify(shape, axis, begin, end):
-        data_np = np.random.uniform(size=shape).astype("float32")
-        ref_res = mx.nd.slice_axis(mx.nd.array(data_np), axis, begin, end)
-        mx_sym = mx.sym.slice_axis(mx.sym.var("data"), axis, begin, end)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    data_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((3, 4), 0, 1, 2)
-    verify((3, 4), 0, 1, None)
-    verify((3, 4), 1, 0, 2)
-    verify((3, 4), 1, -3, -1)
-    verify((3, 4), -1, -3, -1)
-
-
-@tvm.testing.uses_gpu
-def test_forward_slice_like():
-    def verify(x_shape, y_shape, axes):
-        x_np = np.random.uniform(size=x_shape).astype("float32")
-        y_np = np.random.uniform(size=y_shape).astype("float32")
-        if axes is None:
-            ref_res = mx.nd.slice_like(mx.nd.array(x_np), mx.nd.array(y_np))
-            mx_sym = mx.sym.slice_like(mx.sym.var("x"), mx.sym.var("y"))
-        else:
-            ref_res = mx.nd.slice_like(mx.nd.array(x_np), mx.nd.array(y_np), axes=axes)
-            mx_sym = mx.sym.slice_like(mx.sym.var("x"), mx.sym.var("y"), axes=axes)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": x_shape, "y": y_shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_np, y_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((3, 4), (2, 3), None)
-    verify((3, 4), (2, 3), (0, 1))
-    verify((3, 4), (2, 3), (0))
-    verify((3, 4), (2, 3), (-1))
-
-
-@tvm.testing.uses_gpu
-def test_forward_sequence_reverse():
-    def verify(shape, seq_lengths, use_seq_lengths, seq_axis):
-        data_np = np.random.uniform(size=shape).astype("float32")
-
-        ref_res_args = [mx.nd.array(data_np), None, use_seq_lengths, seq_axis]
-        mx_sym_args = [mx.sym.var("data"), None, use_seq_lengths, seq_axis]
-        from_mxnet_args = [{"data": shape}, {"data": "float32"}]
-        in_data = [data_np]
-
-        if use_seq_lengths and seq_lengths:
-            seq_lengths_np = np.array(seq_lengths).astype("int32")
-            ref_res_args[1] = mx.nd.array(seq_lengths_np)
-            mx_sym_args[1] = mx.sym.var("seq_lengths")
-            from_mxnet_args[0].update({"seq_lengths": seq_lengths_np.shape})
-            from_mxnet_args[1].update({"seq_lengths": "int32"})
-            in_data.append(seq_lengths_np)
-
-        ref_res = mx.nd.SequenceReverse(*ref_res_args)
-        mx_sym = mx.sym.SequenceReverse(*mx_sym_args)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, *from_mxnet_args)
-
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    *in_data
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((3, 4), [1, 2, 3, 1], True, 0)
-    verify((3, 4), None, False, 0)
-    verify((3, 5, 5, 6), [1, 2, 3, 1, 3], True, 0)
-    # MXNet accepts axis value as 0 only
-    # verify((3, 4, 5, 6), None, False, 2)
-
-
-@tvm.testing.uses_gpu
-def test_forward_l2_normalize():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.L2Normalization(data, mode="channel")
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4, 5), (2, 3, 4, 5))
-
-    mx_sym = mx.sym.L2Normalization(data, mode="instance")
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4, 5), (2, 3, 4, 5))
-
-    mx_sym = mx.sym.L2Normalization(data, mode="spatial")
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4, 5), (2, 3, 4, 5))
-
-
-@tvm.testing.uses_gpu
-def test_forward_logistic_regression_output():
-    data_shape = (1, 10)
-    dtype = "float32"
-    data_np = np.random.uniform(size=data_shape).astype(dtype)
-    label_np = np.random.uniform(size=data_shape).astype(dtype)
-    mx_sym = mx.symbol.LogisticRegressionOutput(mx.sym.var("data"), mx.sym.var("label"))
-    ref_res = mx.nd.LogisticRegressionOutput(mx.nd.array(data_np), mx.nd.array(label_np))
-    shapes = {"data": data_shape}
-    mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-    for target, dev in tvm.testing.enabled_targets():
-        for kind in ["graph", "debug"]:
-            op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                data_np
-            )
-            tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-
-@tvm.testing.uses_gpu
-def test_forward_dot():
-    def verify(a_shape, b_shape, transpose_b=False):
-        dtype = "float32"
-        a_np = np.random.uniform(size=a_shape).astype(dtype)
-        b_np = np.random.uniform(size=b_shape).astype(dtype)
-        mx_sym = mx.symbol.dot(mx.sym.var("a"), mx.sym.var("b"), transpose_b=transpose_b)
-        ref_res = mx.nd.dot(mx.nd.array(a_np), mx.nd.array(b_np), transpose_b=transpose_b)
-        shapes = {"a": a_shape, "b": b_shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    a_np, b_np
-                )
-                tvm.testing.assert_allclose(
-                    op_res.numpy(), ref_res.asnumpy(), rtol=1e-05, atol=1e-05
-                )
-
-    verify((1, 256), (256, 1))
-    verify((1, 256), (1, 256), transpose_b=True)
-    verify((5,), (5,))
-    verify((3,), (3, 5))
-    verify((3,), (5, 3), transpose_b=True)
-    verify((3,), (3, 5, 3, 5))
-    verify((3,), (5, 5, 3, 3), transpose_b=True)
-    verify((10, 1), (1,))
-    verify((1, 1), (4, 3, 2, 1), transpose_b=True)
-    verify((4, 3, 2, 1), (1,))
-    verify((1, 2, 3, 4), (1, 4), transpose_b=True)
-    verify((4, 1, 1), (1, 2, 3))
-    verify((1, 1, 4), (2, 3, 4), transpose_b=True)
-
-
-@tvm.testing.uses_gpu
-def test_forward_shape_array():
-    def verify(shape):
-        x_np = np.random.uniform(size=shape).astype("float32")
-        ref_res = mx.nd.shape_array(mx.nd.array(x_np))
-        mx_sym = mx.sym.shape_array(mx.sym.var("x"))
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((1,))
-    verify((3, 4, 5))
-    verify((3, 4, 5, 6))
-
-
-@tvm.testing.uses_gpu
-def test_forward_squeeze():
-    def verify(shape, axis):
-        x_np = np.random.uniform(size=shape).astype("float32")
-        if axis is None:
-            ref_res = mx.nd.squeeze(mx.nd.array(x_np))
-            mx_sym = mx.sym.squeeze(mx.sym.var("x"))
-        else:
-            ref_res = mx.nd.squeeze(mx.nd.array(x_np), axis=axis)
-            mx_sym = mx.sym.squeeze(mx.sym.var("x"), axis=axis)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((1, 3, 1), None)
-    verify((1, 3, 1), 0)
-    verify((1, 3, 1), 2)
-    verify((1, 3, 1), (0, 2))
-
-
-@tvm.testing.uses_gpu
-def test_forward_broadcast_axis():
-    def verify(shape, axis, size):
-        x_np = np.random.uniform(size=shape).astype("float32")
-        for op in ["broadcast_axis", "broadcast_axes"]:
-            mx_sym = _mx_symbol(mx.sym, op, [mx.sym.var("x"), axis, size])
-            ref_res = _mx_symbol(mx.nd, op, [mx.nd.array(x_np), axis, size])
-            mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
-            for target, dev in tvm.testing.enabled_targets():
-                for kind in ["graph", "debug"]:
-                    op_res = relay.create_executor(
-                        kind, mod=mod, device=dev, target=target
-                    ).evaluate()(x_np)
-                    tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((1, 2, 1), 2, 3)
-    verify((1, 2, 1), (0, 2), (2, 3))
-
-
-@tvm.testing.uses_gpu
-def test_forward_broadcast_to():
-    def verify(input_shape, shape):
-        x_np = np.random.uniform(size=input_shape).astype("float32")
-        ref_res = mx.nd.broadcast_to(mx.nd.array(x_np), shape=shape)
-        mx_sym = mx.sym.broadcast_to(mx.sym.var("x"), shape=shape)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": input_shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((1, 2, 3), (3, 2, 3))
-    verify((4, 1, 32, 32), (4, 8, 32, 32))
-
-
-@tvm.testing.uses_gpu
-def test_forward_broadcast_like():
-    def verify(input_shape, like_shape):
-        x_np = np.random.uniform(size=input_shape).astype("float32")
-        y_np = np.random.uniform(size=like_shape).astype("float32")
-        ref_res = mx.nd.broadcast_like(mx.nd.array(x_np), mx.nd.array(y_np))
-        mx_sym = mx.sym.broadcast_like(mx.sym.var("x"), mx.sym.var("y"))
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": input_shape, "y": like_shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_np, y_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((1, 2, 3), (3, 2, 3))
-    verify((4, 1, 32, 32), (4, 8, 32, 32))
-
-
-@tvm.testing.uses_gpu
-def test_forward_logical_not():
-    a_shape = (3, 4, 5)
-    dtype = "float32"
-    a_np = np.random.uniform(size=a_shape).astype(dtype)
-    mx_sym = mx.sym.logical_not(mx.sym.var("a"))
-    ref_res = mx.nd.logical_not(mx.nd.array(a_np))
-    shapes = {"a": a_shape}
-    mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-    for target, dev in tvm.testing.enabled_targets():
-        for kind in ["graph", "debug"]:
-            op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                a_np
-            )
-            tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-
-@tvm.testing.uses_gpu
-def test_forward_full():
-    def verify(val, shape, dtype):
-        dev = mx.cpu()
-        ref_res = mx.nd.full(shape, val, dtype=dtype)
-        mx_sym = mx.sym.full(shape, val, dtype=dtype)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {})
-        for target, dev in tvm.testing.enabled_targets():
-            # Skip testing graph executor because this op will be optimized out
-            # by constant folding.
-            for kind in ["debug"]:
-                op_res = relay.create_executor(
-                    kind, mod=mod, device=dev, target=target
-                ).evaluate()()
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify(2, (3, 4), "float32")
-    verify(2, (3, 4), "int32")
-    verify(3.5, (1, 3, 4), "float32")
-
-
-@tvm.testing.uses_gpu
-def test_forward_embedding():
-    def verify(data_shape, weight_shape):
-        in_dim, out_dim = weight_shape
-        x_np = np.random.randint(0, weight_shape[0], size=data_shape).astype("float32")
-        w_np = np.random.uniform(size=weight_shape).astype("float32")
-        ref_res = mx.nd.Embedding(
-            mx.nd.array(x_np), mx.nd.array(w_np), input_dim=in_dim, output_dim=out_dim
-        )
-        mx_sym = mx.sym.Embedding(
-            mx.sym.var("x"), mx.sym.var("w"), input_dim=in_dim, output_dim=out_dim
-        )
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": data_shape, "w": weight_shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x=x_np, w=w_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((2, 2), (4, 5))
-    verify((2, 3, 4), (4, 5))
-
-
-@tvm.testing.uses_gpu
-def test_forward_smooth_l1():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.smooth_l1(data)
-    verify_mxnet_frontend_impl(mx_sym, (3, 4), (3, 4))
-    mx_sym = mx.sym.smooth_l1(data, scalar=1.0)
-    verify_mxnet_frontend_impl(mx_sym, (3, 4), (3, 4))
-
-
-@tvm.testing.uses_gpu
-def test_forward_take():
-    def verify(shape, indices_src, axis, mode="clip"):
-        x_np = np.random.uniform(size=shape).astype("float32")
-        indices_np = np.array(indices_src, dtype="float32")
-        ref_res = mx.nd.take(mx.nd.array(x_np), mx.nd.array(indices_np), axis, mode)
-        mx_sym = mx.sym.take(mx.sym.var("x"), mx.sym.var("y"), axis, mode)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape, "y": indices_np.shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_np, indices_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((2, 2), [[[1, 0], [0, 1]]], 0)
-    verify((2, 2), [[[1, 0], [0, 1]]], 1)
-    verify((4, 3, 5, 6), [[2, 1, 0, 0]], -2)
-    verify((3, 4), [-1, 5], 0)
-    verify((3, 4), [-1, 5], 0, mode="wrap")
-    verify((3, 4), [-1, 5], 1)
-    verify((3, 4), [-1, 5], 1, mode="wrap")
-
-
-@tvm.testing.uses_gpu
-def test_forward_gather_nd():
-    def verify(xshape, yshape, y_data, error=False):
-        x_data = np.random.uniform(size=xshape).astype("float32")
-        ref_res = mx.nd.gather_nd(mx.nd.array(x_data), mx.nd.array(y_data))
-        mx_sym = mx.sym.gather_nd(mx.sym.var("x_data"), mx.sym.var("y_data"))
-        mod, _ = relay.frontend.from_mxnet(
-            mx_sym, {"x_data": xshape, "y_data": yshape}, {"x_data": "float32", "y_data": "int32"}
-        )
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_data, y_data
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((2, 2), (2, 3), [[1, 1, 0], [0, 1, 0]])
-    verify((2, 2, 2), (2, 2), [[0, 1], [1, 0]])
-    verify((3, 2, 2), (2, 2), [[0, 1], [1, 0]])
-    verify((3, 2), (2, 2, 3), [[[0, 1, 2], [2, 0, 1]], [[0, 0, 0], [1, 1, 1]]])
-    verify((1, 4), (1, 1), [[0]])
-
-
-@tvm.testing.uses_gpu
-def test_forward_bilinear_resize():
-    # add tests including scale_height and scale_width when mxnet is updated to version 1.5
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.contrib.BilinearResize2D(data, height=5, width=10)
-    verify_mxnet_frontend_impl(mx_sym, (1, 2, 3, 4), (1, 2, 5, 10))
-
-
-@tvm.testing.uses_gpu
-def test_forward_grid_generator():
-    def verify(shape, transform_type, target_shape):
-        x = np.random.uniform(size=shape).astype("float32")
-        ref_res = mx.nd.GridGenerator(mx.nd.array(x), transform_type, target_shape)
-        mx_sym = mx.sym.GridGenerator(mx.sym.var("x"), transform_type, target_shape)
-        shape_dict = {"x": x.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5, atol=1e-5)
-
-    verify((4, 6), "affine", (16, 32))
-    verify((4, 2, 16, 16), "warp", None)
-    verify((1, 2, 16, 16), "warp", None)
-
-
-@tvm.testing.uses_gpu
-def test_forward_bilinear_sampler():
-    def verify(data_shape, grid_shape):
-        data = np.random.uniform(size=data_shape).astype("float32")
-        grid = np.random.uniform(low=-1.5, high=1.5, size=grid_shape).astype("float32")
-        ref_res = mx.nd.BilinearSampler(mx.nd.array(data), mx.nd.array(grid))
-        mx_sym = mx.sym.BilinearSampler(mx.sym.var("data"), mx.sym.var("grid"))
-        shape_dict = {"data": data.shape, "grid": grid.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    data, grid
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5, atol=1e-5)
-
-    verify((4, 4, 16, 32), (4, 2, 8, 8))
-    verify((4, 4, 16, 32), (4, 2, 32, 32))
-
-
-@tvm.testing.uses_gpu
-def test_forward_rnn_layer():
-    def verify(
-        mode,
-        seq_len,
-        input_size,
-        hidden_size,
-        num_layers,
-        batch=1,
-        init_states=True,
-        bidirectional=False,
-    ):
-        if mode == "rnn":
-            layer = gluon.rnn.RNN(hidden_size, num_layers, bidirectional=bidirectional)
-        elif mode == "gru":
-            layer = gluon.rnn.GRU(hidden_size, num_layers, bidirectional=bidirectional)
-        else:  # mode == "lstm"
-            layer = gluon.rnn.LSTM(hidden_size, num_layers, bidirectional=bidirectional)
-        num_states = 2 if mode == "lstm" else 1
-        layer.initialize()
-        layer.hybridize()
-
-        dtype = "float32"
-        directions = 2 if bidirectional else 1
-        data_np = np.random.uniform(size=(seq_len, batch, input_size)).astype(dtype)
-        data_mx = mx.nd.array(data_np)
-
-        if init_states:
-            shape_dict = {"data0": data_np.shape}
-            inputs = {"data0": data_np}
-            state_shape = (num_layers * directions, batch, hidden_size)
-            states_np = []
-            states_mx = []
-            for i in range(num_states):
-                s = np.random.uniform(size=state_shape).astype(dtype)
-                states_np.append(s)
-                states_mx.append(mx.nd.array(s))
-                shape_dict["data%s" % (i + 1)] = s.shape
-                inputs["data%s" % (i + 1)] = s
-            mx_out, mx_states = layer(data_mx, states_mx)
-            mx_res = [mx_out] + mx_states
-        else:
-            shape_dict = {"data": data_np.shape}
-            inputs = {"data": data_np}
-            mx_res = layer(data_mx)
-
-        mx_sym = layer._cached_graph[1]
-        mx_params = {}
-        for name, param in layer.collect_params().items():
-            mx_params[name] = param._reduce()
-
-        mod, params = relay.frontend.from_mxnet(mx_sym, shape=shape_dict, arg_params=mx_params)
-        for target, dev in tvm.testing.enabled_targets():
-            # only test graph executor because debug runtime is too slow
-            for kind in ["graph"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    **inputs, **params
-                )
-                if init_states:
-                    assert len(op_res) == len(mx_res)
-                    for i, val in enumerate(op_res):
-                        tvm.testing.assert_allclose(val.numpy(), mx_res[i].asnumpy(), rtol=1e-3)
-                else:
-                    tvm.testing.assert_allclose(op_res.numpy(), mx_res.asnumpy(), rtol=1e-3)
-
-    for mode in ["rnn", "gru", "lstm"]:
-        verify(mode, 1, 64, 64, 1)
-        verify(mode, 10, 64, 64, 2)
-        verify(mode, 10, 64, 32, 2)
-        verify(mode, 10, 64, 32, 2, batch=2)
-        verify(mode, 10, 32, 64, 1, bidirectional=True)
-        # The following two codeblocks need to be fixed for mxnet 1.5
-        # verify(mode, 10, 64, 64, 3, init_states=False)
-        # verify(mode, 10, 64, 64, 3, batch=2, bidirectional=True, init_states=False)
-
-
-@tvm.testing.uses_gpu
-def test_forward_Crop():
-    def verify(xshape, yshape, offset=None):
-        x_data = np.random.uniform(size=xshape).astype("float32")
-        y_data = np.random.uniform(size=yshape).astype("float32")
-        if offset is None:
-            mx_sym = mx.sym.Crop(mx.sym.var("x"), mx.sym.var("y"))
-            ref_res = mx.nd.Crop(mx.nd.array(x_data), mx.nd.array(y_data))
-        else:
-            mx_sym = mx.sym.Crop(mx.sym.var("x"), mx.sym.var("y"), offset=offset)
-            ref_res = mx.nd.Crop(mx.nd.array(x_data), mx.nd.array(y_data), offset=offset)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": xshape, "y": yshape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                func = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()
-                if offset is None or offset == (0, 0):
-                    op_res = func(x_data, y_data)
-                else:
-                    op_res = func(x_data)
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((1, 3, 40, 40), (1, 3, 20, 20))
-    verify((1, 3, 40, 40), (1, 3, 20, 20), (0, 0))
-    verify((1, 3, 40, 40), (1, 3, 20, 20), (10, 10))
-    verify((5, 32, 40, 40), (5, 32, 25, 25))
-    verify((5, 32, 40, 40), (5, 32, 25, 25), (5, 5))
-
-
-@tvm.testing.uses_gpu
-def test_forward_argsort():
-    def verify(shape, axis, is_ascend, dtype="float32"):
-        x_np = np.random.uniform(size=shape).astype("float32")
-        ref_res = mx.nd.argsort(mx.nd.array(x_np), axis=axis, is_ascend=is_ascend, dtype=dtype)
-        mx_sym = mx.sym.argsort(mx.sym.var("x"), axis=axis, is_ascend=is_ascend, dtype=dtype)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((2, 3, 4), axis=0, is_ascend=False)
-    verify((1, 4, 6), axis=1, is_ascend=True)
-    verify((3, 5, 6), axis=-3, is_ascend=False, dtype="int32")
-
-
-@tvm.testing.uses_gpu
-def test_forward_topk():
-    def verify(shape, k, axis, ret_type, is_ascend=None, dtype="float32"):
-        x_np = np.random.uniform(size=shape).astype("float32")
-        if is_ascend is None:
-            ref_res = mx.nd.topk(mx.nd.array(x_np), k=k, axis=axis, ret_typ=ret_type, dtype=dtype)
-            mx_sym = mx.sym.topk(mx.sym.var("x"), k=k, axis=axis, ret_typ=ret_type, dtype=dtype)
-        else:
-            ref_res = mx.nd.topk(
-                mx.nd.array(x_np),
-                k=k,
-                axis=axis,
-                ret_typ=ret_type,
-                is_ascend=is_ascend,
-                dtype=dtype,
-            )
-            mx_sym = mx.sym.topk(
-                mx.sym.var("x"), k=k, axis=axis, ret_typ=ret_type, is_ascend=is_ascend, dtype=dtype
-            )
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_np
-                )
-                if isinstance(ref_res, list):
-                    assert len(op_res) == len(ref_res)
-                    for i, t in enumerate(op_res):
-                        tvm.testing.assert_allclose(t.numpy(), ref_res[i].asnumpy())
-                else:
-                    tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((3, 4), k=1, axis=0, ret_type="both")
-    verify((3, 4), k=1, axis=-1, ret_type="indices")
-    verify((3, 5, 6), k=2, axis=2, ret_type="value", is_ascend=False)
-    verify((3, 5, 6), k=2, axis=1, ret_type="value", is_ascend=True)
-    verify((3, 5, 6), k=0, axis=2, ret_type="both", dtype="int32")
-
-
-@tvm.testing.uses_gpu
-def test_forward_sequence_mask():
-    def verify(shape, use_sequence_length, value, axis, dtype, itype):
-        data_np = np.random.uniform(size=shape).astype(dtype)
-        valid_length_np = np.random.randint(0, shape[axis], size=shape[1 - axis]).astype(itype)
-        if use_sequence_length:
-            ref_res = mx.nd.SequenceMask(
-                mx.nd.array(data_np, dtype=dtype),
-                sequence_length=mx.nd.array(valid_length_np, dtype=itype),
-                use_sequence_length=use_sequence_length,
-                value=value,
-                axis=axis,
-            )
-            mx_sym = mx.sym.SequenceMask(
-                mx.sym.var("data"),
-                sequence_length=mx.sym.var("valid_length"),
-                use_sequence_length=use_sequence_length,
-                value=value,
-                axis=axis,
-            )
-            mod, _ = relay.frontend.from_mxnet(
-                mx_sym,
-                {"data": shape, "valid_length": valid_length_np.shape},
-                dtype={"data": dtype, "valid_length": itype},
-            )
-        else:
-            ref_res = mx.nd.SequenceMask(
-                mx.nd.array(data_np, dtype=dtype),
-                use_sequence_length=use_sequence_length,
-                value=value,
-                axis=axis,
-            )
-            mx_sym = mx.sym.SequenceMask(
-                mx.sym.var("data"), use_sequence_length=use_sequence_length, value=value, axis=axis
-            )
-            mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": shape}, dtype={"data": dtype})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                if use_sequence_length is False and kind == "graph":
-                    # Disable the test for 'graph' when it's identity.
-                    continue
-                func = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()
-                if use_sequence_length:
-                    op_res = func(data_np, valid_length_np)
-                else:
-                    op_res = func(data_np)
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((5, 10), True, 0.0, 0, "float32", "float32")
-    verify((5, 4, 3), True, 1.0, 1, "float32", "float32")
-    verify((5, 4, 3), False, 1.0, 1, "float64", "float64")
-    verify((5, 4, 3, 2), True, 1.0, 0, "float32", "float32")
-
-
-@tvm.testing.uses_gpu
-def test_forward_contrib_div_sqrt_dim():
-    def verify(shape):
-        x_np = np.random.uniform(size=shape).astype("float32")
-        ref_res = mx.nd.contrib.div_sqrt_dim(mx.nd.array(x_np))
-        mx_sym = mx.sym.contrib.div_sqrt_dim(mx.sym.var("x"))
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((3, 4))
-    verify((3, 4, 5))
-
-
-@tvm.testing.uses_gpu
-def test_forward_batch_norm():
-    def verify(shape, axis=1, fix_gamma=False):
-        x = np.random.uniform(size=shape).astype("float32")
-        gamma = np.random.uniform(size=(shape[axis])).astype("float32")
-        beta = np.random.uniform(size=(shape[axis])).astype("float32")
-        moving_mean = np.random.uniform(size=(shape[axis])).astype("float32")
-        moving_var = np.abs(np.random.uniform(size=(shape[axis])).astype("float32")) + 0.5
-        ref_res = mx.nd.BatchNorm(
-            mx.nd.array(x),
-            mx.nd.array(gamma),
-            mx.nd.array(beta),
-            mx.nd.array(moving_mean),
-            mx.nd.array(moving_var),
-            axis=axis,
-            use_global_stats=True,
-            fix_gamma=fix_gamma,
-        )
-        mx_sym = mx.sym.BatchNorm(
-            mx.sym.var("x"),
-            mx.sym.var("gamma"),
-            mx.sym.var("beta"),
-            mx.sym.var("mean"),
-            mx.sym.var("var"),
-            axis=axis,
-            use_global_stats=True,
-            fix_gamma=fix_gamma,
-        )
-
-        shape_dict = {
-            "x": x.shape,
-            "gamma": gamma.shape,
-            "beta": beta.shape,
-            "mean": moving_mean.shape,
-            "var": moving_var.shape,
-        }
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        # print(mod)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x, gamma, beta, moving_mean, moving_var
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3)
-
-    verify((2, 3, 4, 5))
-    verify((2, 3, 4, 5), axis=0)
-    verify((2, 3, 4, 5), axis=-1)
-    verify((2, 3, 4, 5), fix_gamma=True)
-
-
-@tvm.testing.uses_gpu
-def test_forward_instance_norm():
-    def verify(shape, axis=1, epsilon=1e-5):
-        x = np.random.uniform(size=shape).astype("float32")
-        gamma = np.random.uniform(size=(shape[axis])).astype("float32")
-        beta = np.random.uniform(size=(shape[axis])).astype("float32")
-        ref_res = mx.nd.InstanceNorm(mx.nd.array(x), mx.nd.array(gamma), mx.nd.array(beta), epsilon)
-        mx_sym = mx.sym.InstanceNorm(
-            mx.sym.var("x"), mx.sym.var("gamma"), mx.sym.var("beta"), epsilon
-        )
-        shape_dict = {"x": x.shape, "gamma": gamma.shape, "beta": beta.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x, gamma, beta
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=2e-5, atol=1e-5)
-
-    verify((2, 3, 4, 5))
-    verify((32, 64, 80, 64))
-    verify((8, 6, 5))
-    verify((8, 7, 6, 5, 4))
-
-
-@tvm.testing.uses_gpu
-def test_forward_layer_norm():
-    def verify(shape, axis=-1):
-        x = np.random.uniform(size=shape).astype("float32")
-        gamma = np.random.uniform(size=(shape[axis])).astype("float32")
-        beta = np.random.uniform(size=(shape[axis])).astype("float32")
-        ref_res = mx.nd.LayerNorm(mx.nd.array(x), mx.nd.array(gamma), mx.nd.array(beta), axis=axis)
-        mx_sym = mx.sym.LayerNorm(
-            mx.sym.var("x"), mx.sym.var("gamma"), mx.sym.var("beta"), axis=axis
-        )
-        shape_dict = {"x": x.shape, "gamma": gamma.shape, "beta": beta.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x, gamma, beta
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify((2, 5))
-    verify((2, 5), axis=0)
-    verify((2, 5, 6))
-
-
-@tvm.testing.uses_gpu
-def test_forward_group_norm():
-    def verify(shape, num_groups=1):
-        x = np.random.uniform(size=shape).astype("float32")
-        gamma = np.random.uniform(size=(shape[1])).astype("float32")
-        beta = np.random.uniform(size=(shape[1])).astype("float32")
-        ref_res = mx.nd.GroupNorm(
-            data=mx.nd.array(x),
-            gamma=mx.nd.array(gamma),
-            beta=mx.nd.array(beta),
-            num_groups=num_groups,
-        )
-        mx_sym = mx.sym.GroupNorm(
-            mx.sym.var("x"), mx.sym.var("gamma"), mx.sym.var("beta"), num_groups=num_groups
-        )
-        shape_dict = {"x": x.shape, "gamma": gamma.shape, "beta": beta.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x, gamma, beta
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify((1, 4, 2), num_groups=4)
-    # TODO(trevmorr): MXNet GroupNorm implementation is bugged for cases when num_groups != num_channels
-    # https://github.com/apache/incubator-mxnet/pull/18199
-    # verify((1, 4, 2, 3), num_groups=2)
-    # verify((1, 4, 2, 3))
-
-
-@tvm.testing.uses_gpu
-def test_forward_one_hot():
-    def verify(indices_shape, depth, on_value, off_value, dtype):
-        x = np.random.randint(0, 5, size=indices_shape)
-        ref_res = mx.nd.one_hot(mx.nd.array(x), depth, on_value, off_value, dtype)
-        mx_sym = mx.sym.one_hot(mx.sym.var("x"), depth, on_value, off_value, dtype)
-        shape_dict = {"x": x.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x.astype("float32")
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify((3,), 3, 1, 0, "int32")
-    verify((3,), 3, 1.0, 0.0, "float32")
-    verify((2, 2), 5, 2, -2, "int32")
-    verify((2, 2), 5, 0.5, -0.5, "float32")
-    verify((3, 2, 4, 5), 6, 1, 0, "int32")
-    verify((3, 2, 4, 5), 6, 1.0, 0.0, "float32")
-
-
-@tvm.testing.uses_gpu
-def test_forward_pad():
-    def verify(data_shape, out_shape, mode, pad_width, constant_value=0.0):
-        data = mx.sym.var("data")
-        mx_sym = mx.sym.pad(data, mode=mode, pad_width=pad_width, constant_value=constant_value)
-        verify_mxnet_frontend_impl(mx_sym, data_shape=data_shape, out_shape=out_shape)
-
-    verify(
-        data_shape=(1, 1, 3, 5),
-        out_shape=(1, 1, 6, 12),
-        mode="constant",
-        pad_width=(0, 0, 0, 0, 1, 2, 3, 4),
-    )
-    verify(
-        data_shape=(1, 1, 3, 5),
-        out_shape=(1, 1, 6, 12),
-        mode="constant",
-        pad_width=(0, 0, 0, 0, 1, 2, 3, 4),
-        constant_value=3.0,
-    )
-    verify(
-        data_shape=(1, 1, 3, 5),
-        out_shape=(1, 1, 6, 12),
-        mode="edge",
-        pad_width=(0, 0, 0, 0, 1, 2, 3, 4),
-    )
-    verify(
-        data_shape=(1, 1, 3, 5),
-        out_shape=(1, 1, 6, 12),
-        mode="reflect",
-        pad_width=(0, 0, 0, 0, 1, 2, 3, 4),
-    )
-    verify(
-        data_shape=(1, 1, 3, 5, 7),
-        out_shape=(1, 1, 6, 12, 18),
-        mode="constant",
-        pad_width=(0, 0, 0, 0, 1, 2, 3, 4, 5, 6),
-    )
-    verify(
-        data_shape=(1, 1, 3, 5, 7),
-        out_shape=(1, 1, 6, 12, 18),
-        mode="constant",
-        pad_width=(0, 0, 0, 0, 1, 2, 3, 4, 5, 6),
-        constant_value=3.0,
-    )
-    verify(
-        data_shape=(1, 1, 3, 5, 7),
-        out_shape=(1, 1, 6, 12, 18),
-        mode="edge",
-        pad_width=(0, 0, 0, 0, 1, 2, 3, 4, 5, 6),
-    )
-    verify(
-        data_shape=(1, 1, 3, 5, 7),
-        out_shape=(1, 1, 6, 12, 18),
-        mode="reflect",
-        pad_width=(0, 0, 0, 0, 1, 2, 3, 4, 5, 6),
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_slice():
-    def verify(data_shape, out_shape, begin, end):
-        data = mx.sym.var("data")
-        mx_sym = mx.sym.slice(data, begin=begin, end=end)
-        verify_mxnet_frontend_impl(mx_sym, data_shape=data_shape, out_shape=out_shape)
-
-    verify(data_shape=(1, 1, 10), out_shape=(1, 1, 8), begin=(0, 0, 2), end=(1, 1, 10))
-    verify(
-        data_shape=(1, 1, 10), out_shape=(1, 1, 8), begin=(None, None, 2), end=(None, None, None)
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_convolution():
-    def verify(data_shape, kernel_size, stride, pad, num_filter, is_depthwise=False):
-        if is_depthwise:
-            groups = data_shape[1]
-            weight_shape = (
-                data_shape[1],
-                num_filter // groups,
-            ) + kernel_size
-        else:
-            groups = 1
-            weight_shape = (
-                num_filter,
-                data_shape[1],
-            ) + kernel_size
-        x = np.random.uniform(size=data_shape).astype("float32")
-        weight = np.random.uniform(size=weight_shape).astype("float32")
-        bias = np.random.uniform(size=num_filter).astype("float32")
-        ref_res = mx.nd.Convolution(
-            data=mx.nd.array(x),
-            weight=mx.nd.array(weight),
-            bias=mx.nd.array(bias),
-            kernel=kernel_size,
-            stride=stride,
-            pad=pad,
-            num_filter=num_filter,
-            num_group=groups,
-        )
-        mx_sym = mx.sym.Convolution(
-            mx.sym.var("x"),
-            mx.sym.var("weight"),
-            mx.sym.var("bias"),
-            kernel=kernel_size,
-            stride=stride,
-            pad=pad,
-            num_filter=num_filter,
-            num_group=groups,
-        )
-        shape_dict = {"x": x.shape, "weight": weight.shape, "bias": bias.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x, weight, bias
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3)
-
-    verify(data_shape=(1, 1, 1024 * 16), kernel_size=(17,), stride=(2,), pad=(8,), num_filter=4)
-    verify(data_shape=(20, 1, 1024 * 16), kernel_size=(17,), stride=(2,), pad=(8,), num_filter=4)
-    verify(data_shape=(1, 8, 1024 * 16), kernel_size=(17,), stride=(2,), pad=(8,), num_filter=4)
-    verify(data_shape=(20, 8, 1024 * 16), kernel_size=(17,), stride=(2,), pad=(8,), num_filter=4)
-    verify(data_shape=(1, 1, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
-    verify(data_shape=(20, 1, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
-    verify(data_shape=(1, 8, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
-    verify(data_shape=(20, 8, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
-    verify(
-        data_shape=(1, 8, 32, 32),
-        kernel_size=(3, 3),
-        stride=(1, 1),
-        pad=(1, 1),
-        num_filter=8,
-        is_depthwise=True,
-    )
-    verify(
-        data_shape=(1, 1, 16, 16, 16),
-        kernel_size=(3, 3, 3),
-        stride=(1, 1, 1),
-        pad=(1, 1, 1),
-        num_filter=2,
-    )
-    verify(
-        data_shape=(20, 1, 16, 16, 16),
-        kernel_size=(3, 3, 3),
-        stride=(1, 1, 1),
-        pad=(1, 1, 1),
-        num_filter=2,
-    )
-    verify(
-        data_shape=(1, 8, 16, 16, 16),
-        kernel_size=(3, 3, 3),
-        stride=(2, 2, 2),
-        pad=(1, 1, 1),
-        num_filter=2,
-    )
-    verify(
-        data_shape=(20, 8, 16, 16, 16),
-        kernel_size=(3, 3, 3),
-        stride=(1, 1, 1),
-        pad=(1, 1, 1),
-        num_filter=2,
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_deconvolution():
-    def verify(data_shape, kernel_size, stride, pad, num_filter):
-        weight_shape = (data_shape[1], num_filter) + kernel_size
-        x = np.random.uniform(size=data_shape).astype("float32")
-        weight = np.random.uniform(size=weight_shape).astype("float32")
-        bias = np.random.uniform(size=num_filter).astype("float32")
-        ref_res = mx.nd.Deconvolution(
-            data=mx.nd.array(x),
-            weight=mx.nd.array(weight),
-            bias=mx.nd.array(bias),
-            kernel=kernel_size,
-            stride=stride,
-            pad=pad,
-            num_filter=num_filter,
-            no_bias=False,
-        )
-        mx_sym = mx.sym.Deconvolution(
-            mx.sym.var("x"),
-            mx.sym.var("weight"),
-            mx.sym.var("bias"),
-            kernel=kernel_size,
-            stride=stride,
-            pad=pad,
-            num_filter=num_filter,
-            no_bias=False,
-        )
-        shape_dict = {"x": x.shape, "weight": weight.shape, "bias": bias.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x, weight, bias
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify(data_shape=(1, 1, 1024 * 16), kernel_size=(17,), stride=(2,), pad=(8,), num_filter=4)
-    verify(data_shape=(20, 1, 1024 * 16), kernel_size=(17,), stride=(2,), pad=(8,), num_filter=4)
-    verify(data_shape=(1, 8, 1024 * 16), kernel_size=(17,), stride=(2,), pad=(8,), num_filter=4)
-    verify(data_shape=(20, 8, 1024 * 16), kernel_size=(17,), stride=(2,), pad=(8,), num_filter=4)
-    verify(data_shape=(1, 1, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
-    verify(data_shape=(20, 1, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
-    verify(data_shape=(1, 8, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
-    verify(data_shape=(20, 8, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
-
-
-@tvm.testing.uses_gpu
-def test_forward_cond():
-    def verify(a_np, b_np):
-        a_nd, b_nd = mx.nd.array(a_np), mx.nd.array(b_np)
-        pred = a_nd * b_nd < 5
-        then_func = lambda: (a_nd + 5) * (b_nd + 5)
-        else_func = lambda: (a_nd - 5) * (b_nd - 5)
-        ref_res = mx.nd.contrib.cond(pred, then_func, else_func)
-
-        a_sym, b_sym = mx.sym.var("a"), mx.sym.var("b")
-        pred = a_sym * b_sym < 5
-        then_func = lambda: (a_sym + 5) * (b_sym + 5)
-        else_func = lambda: (a_sym - 5) * (b_sym - 5)
-        mx_sym = mx.sym.contrib.cond(pred, then_func, else_func)
-
-        shape_dict = {"a": a_np.shape, "b": b_np.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["debug", "vm"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    a_np, b_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3)
-
-    verify(np.asarray([1.0], "float32"), np.asarray([2.0], "float32"))
-    verify(np.asarray([4.0], "float32"), np.asarray([3.0], "float32"))
-
-
-@tvm.testing.uses_gpu
-def test_forward_amp_cast():
-    def verify(from_dtype, to_dtype):
-        from_np = np.random.uniform(size=(1, 3, 18)).astype(from_dtype)
-        x_var = mx.sym.var("x", dtype=from_dtype)
-        mx_sym = mx.sym.amp_cast(x_var, dtype=to_dtype)
-        shape_dict = {"x": (1, 3, 18)}
-        dtype_dict = {"x": from_dtype}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "vm", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    from_np
-                )
-                assert op_res.dtype == to_dtype, op_res.dtype
-                tvm.testing.assert_allclose(op_res.numpy(), from_np.astype(to_dtype))
-
-    verify("float32", "float16")
-    verify("float16", "float32")
-
-
-@tvm.testing.uses_gpu
-def test_forward_amp_multicast():
-    def verify(dtypes, cast_narrow, expected_dtype):
-        x_nps = [np.random.uniform(size=(1, 3, 18)).astype(dtype) for dtype in dtypes]
-        x_vars = [mx.sym.var(str(i), dtype=dtype) for i, dtype in enumerate(dtypes)]
-        mx_sym = mx.sym.amp_multicast(*x_vars, cast_narrow=cast_narrow, num_outputs=len(dtypes))
-        shape_dict = {}
-        dtype_dict = {}
-        for i, dtype in enumerate(dtypes):
-            shape_dict[str(i)] = (1, 3, 18)
-            dtype_dict[str(i)] = dtype
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "vm", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    *x_nps
-                )
-                for i, res in enumerate(op_res):
-                    assert res.dtype == expected_dtype, res.dtype
-                    tvm.testing.assert_allclose(res.numpy(), x_nps[i].astype(expected_dtype))
-
-    verify(["float32", "float16"], False, "float32")
-    verify(["float32", "float16"], True, "float16")
-    verify(["float32", "float32"], False, "float32")
-    verify(["float32", "float32"], True, "float32")
-    verify(["float16", "float16"], False, "float16")
-    verify(["float16", "float16"], True, "float16")
-
-
-@tvm.testing.uses_gpu
-def test_forward_unravel_index():
-    def verify(x, shape, dtype):
-        a_np = np.array(x).astype(dtype)
-        mx_sym = _mx_symbol(mx.sym, "unravel_index", [mx.sym.var("a"), shape])
-        ref_res = _mx_symbol(mx.nd, "unravel_index", [mx.nd.array(a_np), shape])
-        shapes = {"a": a_np.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "vm", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    a_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    for dtype in ["int32", "int64"]:
-        verify([0, 1, 2, 3], [2, 2], dtype)
-        verify([144, 13, 45], [6, 7, 10, 2], dtype)
-        verify([456], [6, 7, 10, 2], dtype)
-
-    # In below example, 5 is out of bound for array of size 4.
-    # MXNet implementation provides different result than TVM
-    # TVM implementation is inline with Tensorflow
-    # Ideally error should be thrown just like Numpy
-    # verify([0, 1, 2, 5], [2, 2], dtype)
-
-
-@tvm.testing.uses_gpu
-def test_forward_swap_axis():
-    def _verify_swap_axis(in_shape, out_shape, dim1, dim2):
-        data = mx.sym.var("data")
-        mx_sym = mx.sym.swapaxes(data, dim1, dim2)
-        verify_mxnet_frontend_impl(mx_sym, in_shape, out_shape)
-
-    _verify_swap_axis((4, 5), (5, 4), 0, 1)
-    _verify_swap_axis((2, 4, 4, 5), (2, 5, 4, 4), 1, 3)
-    # MXNet errors out when dim1 == dim2
-    # _verify_swap_axis((4, 5), (5, 4), 0, 0)
-
-
-@tvm.testing.uses_gpu
-def test_forward_depth_to_space():
-    def verify(shape, blocksize=2):
-        x = np.random.uniform(size=shape).astype("float32")
-        ref_res = mx.nd.depth_to_space(mx.nd.array(x), blocksize)
-        mx_sym = mx.sym.depth_to_space(mx.sym.var("x"), blocksize)
-        shape_dict = {
-            "x": x.shape,
-        }
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify((1, 18, 3, 3), 3)
-
-
-@tvm.testing.uses_gpu
-def test_forward_space_to_depth():
-    def verify(shape, blocksize=2):
-        x = np.random.uniform(size=shape).astype("float32")
-        ref_res = mx.nd.space_to_depth(mx.nd.array(x), blocksize)
-        mx_sym = mx.sym.space_to_depth(mx.sym.var("x"), blocksize)
-        shape_dict = {
-            "x": x.shape,
-        }
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify((1, 1, 9, 9), 3)
-
-
-@tvm.testing.uses_gpu
-def test_forward_correlation():
-    def verify(data_shape, kernel_size, max_displacement, stride1, stride2, pad_size, is_multiply):
-        data1 = np.random.uniform(size=data_shape).astype("float32")
-        data2 = np.random.uniform(size=data_shape).astype("float32")
-        ref_res = mx.nd.Correlation(
-            data1=mx.nd.array(data1),
-            data2=mx.nd.array(data2),
-            kernel_size=kernel_size,
-            max_displacement=max_displacement,
-            stride1=stride1,
-            stride2=stride2,
-            pad_size=pad_size,
-            is_multiply=is_multiply,
-        )
-        mx_sym = mx.sym.Correlation(
-            data1=mx.sym.var("data1"),
-            data2=mx.sym.var("data2"),
-            kernel_size=kernel_size,
-            max_displacement=max_displacement,
-            stride1=stride1,
-            stride2=stride2,
-            pad_size=pad_size,
-            is_multiply=is_multiply,
-        )
-        shape_dict = {"data1": data1.shape, "data2": data2.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    data1, data2
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify(
-        (1, 3, 10, 10),
-        kernel_size=1,
-        max_displacement=4,
-        stride1=1,
-        stride2=1,
-        pad_size=4,
-        is_multiply=False,
-    )
-    verify(
-        (5, 1, 15, 15),
-        kernel_size=1,
-        max_displacement=5,
-        stride1=1,
-        stride2=1,
-        pad_size=5,
-        is_multiply=False,
-    )
-    verify(
-        (5, 1, 15, 15),
-        kernel_size=1,
-        max_displacement=5,
-        stride1=1,
-        stride2=1,
-        pad_size=5,
-        is_multiply=True,
-    )
-    verify(
-        (5, 1, 15, 15),
-        kernel_size=1,
-        max_displacement=10,
-        stride1=1,
-        stride2=2,
-        pad_size=10,
-        is_multiply=True,
-    )
-    verify(
-        (5, 1, 4, 4),
-        kernel_size=3,
-        max_displacement=1,
-        stride1=1,
-        stride2=1,
-        pad_size=2,
-        is_multiply=True,
-    )
-    verify(
-        (5, 1, 4, 4),
-        kernel_size=3,
-        max_displacement=1,
-        stride1=2,
-        stride2=1,
-        pad_size=2,
-        is_multiply=True,
-    )
-    verify(
-        (5, 1, 4, 4),
-        kernel_size=3,
-        max_displacement=1,
-        stride1=2,
-        stride2=1,
-        pad_size=2,
-        is_multiply=False,
-    )
-    verify(
-        (5, 1, 6, 4),
-        kernel_size=3,
-        max_displacement=1,
-        stride1=2,
-        stride2=1,
-        pad_size=2,
-        is_multiply=False,
-    )
-    verify(
-        (5, 1, 11, 11),
-        kernel_size=5,
-        max_displacement=1,
-        stride1=1,
-        stride2=1,
-        pad_size=2,
-        is_multiply=False,
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_arange_like():
-    def verify(data_shape, start=None, step=None, axis=None):
-        attrs = {}
-        if start is not None:
-            attrs["start"] = start
-        if step is not None:
-            attrs["step"] = step
-        if axis is not None:
-            attrs["axis"] = axis
-        data = mx.sym.var("data")
-        data_np = np.random.uniform(size=data_shape).astype("float32")
-        ref_res = mx.nd.contrib.arange_like(mx.nd.array(data_np), **attrs)
-
-        mx_sym = mx.sym.contrib.arange_like(data, **attrs)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph"]:
-                op_res = relay.create_executor(
-                    kind, mod=mod, device=dev, target=target
-                ).evaluate()()
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify(data_shape=(3,), start=0.0, step=1.0)
-    verify(data_shape=(3, 4, 5), start=0.0, step=1.0)
-    verify(data_shape=(3, 4, 5), start=0.0, step=1.0, axis=-1)
-    verify(data_shape=(3, 4, 5), start=2.0, step=3.0, axis=1)
-
-
-@tvm.testing.uses_gpu
-def test_forward_interleaved_matmul_selfatt_qk():
-    def verify(batch, seq_length, num_heads, head_dim):
-        data_shape = (seq_length, batch, num_heads * head_dim * 3)
-        data = mx.sym.var("data")
-        data_np = np.random.uniform(size=data_shape).astype("float32")
-        ref_res = mx.nd.contrib.interleaved_matmul_selfatt_qk(mx.nd.array(data_np), heads=num_heads)
-
-        mx_sym = mx.sym.contrib.interleaved_matmul_selfatt_qk(data, heads=num_heads)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    data_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-    verify(1, 10, 3, 16)
-    verify(3, 10, 6, 8)
-
-
-@tvm.testing.uses_gpu
-def test_forward_interleaved_matmul_selfatt_valatt():
-    def verify(batch, seq_length, num_heads, head_dim):
-        data_shape = (seq_length, batch, num_heads * head_dim * 3)
-        weight_shape = (batch * num_heads, seq_length, seq_length)
-        data = mx.sym.var("data")
-        weight = mx.sym.var("weight")
-        data_np = np.random.uniform(size=data_shape).astype("float32")
-        weight_np = np.random.uniform(size=weight_shape).astype("float32")
-        ref_res = mx.nd.contrib.interleaved_matmul_selfatt_valatt(
-            mx.nd.array(data_np), mx.nd.array(weight_np), heads=num_heads
-        )
-
-        mx_sym = mx.sym.contrib.interleaved_matmul_selfatt_valatt(data, weight, heads=num_heads)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape, "weight": weight_shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    data=data_np, weight=weight_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-    verify(1, 10, 4, 16)
-    verify(3, 10, 6, 8)
-
-
-@tvm.testing.uses_gpu
-def test_forward_box_nms():
-    def verify(
-        data_shape,
-        overlap_thresh=0.5,
-        valid_thresh=0,
-        topk=1,
-        coord_start=2,
-        score_index=1,
-        id_index=0,
-        force_suppress=False,
-        in_format="corner",
-    ):
-        dtype = "float32"
-        data = np.random.uniform(low=0, high=1, size=data_shape).astype(dtype)
-        ref_res = mx.nd.contrib.box_nms(
-            mx.nd.array(data),
-            overlap_thresh=overlap_thresh,
-            valid_thresh=valid_thresh,
-            topk=topk,
-            coord_start=coord_start,
-            score_index=score_index,
-            id_index=id_index,
-            force_suppress=force_suppress,
-            background_id=-1,
-            in_format=in_format,
-            out_format=in_format,
-        )
-        mx_sym = mx.sym.contrib.box_nms(
-            mx.sym.var("data"),
-            overlap_thresh=overlap_thresh,
-            valid_thresh=valid_thresh,
-            topk=topk,
-            coord_start=coord_start,
-            score_index=score_index,
-            id_index=id_index,
-            force_suppress=force_suppress,
-            background_id=-1,
-            in_format=in_format,
-            out_format=in_format,
-        )
-        shape_dict = {"data": data_shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            if tvm.contrib.thrust.can_use_thrust(
-                tvm.target.Target(target + " -libs=thrust"), "tvm.contrib.thrust.sort"
-            ):
-                target += " -libs=thrust"
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    data
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify((1, 10, 6))
-    # No valid boxes
-    verify((1, 10, 6), valid_thresh=1)
-
-
-@tvm.testing.uses_gpu
-def test_forward_box_decode():
-    def verify(data_shape, anchor_shape, stds=[1, 1, 1, 1], clip=-1, in_format="corner"):
-        dtype = "float32"
-        data = np.random.uniform(low=-2, high=2, size=data_shape).astype(dtype)
-        anchors = np.random.uniform(low=-2, high=2, size=anchor_shape).astype(dtype)
-        ref_res = mx.nd.contrib.box_decode(
-            mx.nd.array(data),
-            mx.nd.array(anchors),
-            stds[0],
-            stds[1],
-            stds[2],
-            stds[3],
-            clip,
-            in_format,
-        )
-        mx_sym = mx.sym.contrib.box_decode(
-            mx.sym.var("data"),
-            mx.sym.var("anchors"),
-            stds[0],
-            stds[1],
-            stds[2],
-            stds[3],
-            clip,
-            in_format,
-        )
-        shape_dict = {"data": data_shape, "anchors": anchor_shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    data, anchors
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify((1, 10, 4), (1, 10, 4))
-    verify((4, 10, 4), (1, 10, 4))
-    verify((1, 10, 4), (1, 10, 4), stds=[2, 3, 0.5, 1.5])
-    verify((1, 10, 4), (1, 10, 4), clip=1)
-    verify((1, 10, 4), (1, 10, 4), in_format="center")
-
-
-@tvm.testing.uses_gpu
-def test_forward_softmax():
-    def verify(data_shape, axis, use_length, length):
-        dtype = "float32"
-        x = np.random.uniform(low=-100, high=100, size=data_shape).astype(dtype)
-        if use_length:
-            ref_res = mx.nd.softmax(
-                data=mx.nd.array(x),
-                length=mx.nd.array(length, dtype="int32"),
-                axis=axis,
-                use_length=use_length,
-            )
-            mx_sym = mx.symbol.softmax(
-                data=mx.sym.var("data"),
-                length=mx.sym.var("length"),
-                axis=axis,
-                use_length=use_length,
-            )
-            shape_dict = {"data": data_shape, "length": (length.shape)}
-            dtype_dict = {"data": dtype, "length": "int32"}
-            mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype_dict)
-        else:
-            ref_res = mx.nd.softmax(data=mx.nd.array(x), axis=axis)
-            mx_sym = mx.symbol.softmax(data=mx.sym.var("data"), axis=axis)
-            shape_dict = {"data": data_shape}
-            mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                func = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()
-                if use_length:
-                    op_res = func(x, length)
-                else:
-                    op_res = func(x)
-
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify((2, 3, 5), -1, False, None)
-    verify((2, 3, 5), 2, False, None)
-    verify((2, 3), -1, True, np.array([2, 1]).astype("int32"))
-    verify((2, 3, 4), -1, True, np.array([[3, 4, 2], [2, 1, 1]]).astype("int32"))
-    verify((2, 3, 4), 2, True, np.array([[3, 4, 2], [1, 2, 1]]).astype("int32"))
-
-
-@pytest.mark.skipif(not hasattr(mx.sym.np, "pad"), reason="mx.sym.np.pad hasn't been publish yet")
-@pytest.mark.parametrize(
-    "data_shape, pad_width",
-    [
-        ((1, 1, 3, 5), ((0, 0), (0, 0), (1, 2), (3, 4))),
-        ((1, 1, 3, 5, 7), ((0, 0), (0, 0), (1, 2), (3, 4), (5, 6))),
-    ],
-)
-@pytest.mark.parametrize("mode", ["constant", "edge", "reflect"])
-@pytest.mark.parametrize("dtype", ["float64", "float32", "int64", "int32"])
-@pytest.mark.parametrize("constant_value", [0.0, 3.0])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-def test_forward_npi_pad(data_shape, pad_width, mode, dtype, constant_value, target, dev, kind):
-    data_np = np.random.uniform(size=data_shape).astype(dtype)
-    data = mx.sym.var("data")
-    if mode == "constant":
-        ref_res = np.pad(data_np, mode=mode, pad_width=pad_width, constant_values=constant_value)
-        mx_sym = mx.sym.np.pad(
-            data.as_np_ndarray(), mode=mode, pad_width=pad_width, constant_values=constant_value
-        )
-    else:
-        ref_res = np.pad(data_np, mode=mode, pad_width=pad_width)
-        mx_sym = mx.sym.np.pad(data.as_np_ndarray(), mode=mode, pad_width=pad_width)
-    mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape}, dtype=dtype)
-    op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(data_np)
-    tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
-
-
-@pytest.mark.skipif(
-    not hasattr(mx.sym.np, "pad"), reason="test'll abort with Mxnet 1.x, skip for now"
-)
-@pytest.mark.parametrize("data_shape", [(2, 2, 2), (2, 7, 2)])
-@pytest.mark.parametrize("dtype", ["float64", "float32", "int64", "int32", "bool"])
-@pytest.mark.parametrize("axes", [(1, 0, 2), None])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-def test_forward_npi_transpose(data_shape, axes, dtype, target, dev, kind):
-    data_np = np.random.uniform(size=data_shape).astype(dtype)
-    data = mx.sym.var("data")
-    ref_res = mx.np.transpose(mx.np.array(data_np), axes=axes)
-    mx_sym = mx.sym.np.transpose(data.as_np_ndarray(), axes=axes)
-    mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape}, dtype=dtype)
-    op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(data_np)
-    tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-
-@pytest.mark.parametrize(
-    "data_shape1, data_shape2, axis",
-    [
-        ((2, 2), (2, 2), 1),
-        ((2, 4), (2, 3), 1),
-        ((1, 3, 2), (1, 3, 5), 2),
-        ((1, 3, 3), (1, 3, 3), 1),
-        ((1, 3), (1, 3), 0),
-    ],
-)
-@pytest.mark.parametrize("dtype", ["float64", "float32", "int64", "int32"])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-def test_forward_npi_concatenate(data_shape1, data_shape2, axis, dtype, target, dev, kind):
-    data_np1 = np.random.uniform(size=data_shape1).astype(dtype)
-    data_np2 = np.random.uniform(size=data_shape2).astype(dtype)
-    data1 = mx.sym.var("data1")
-    data2 = mx.sym.var("data2")
-    ref_res = mx.np.concatenate([mx.np.array(data_np1), mx.np.array(data_np2)], axis=axis)
-    mx_sym = mx.sym.np.concatenate([data1.as_np_ndarray(), data2.as_np_ndarray()], axis=axis)
-    mod, _ = relay.frontend.from_mxnet(
-        mx_sym, shape={"data1": data_shape1, "data2": data_shape2}, dtype=dtype
-    )
-    op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-        data_np1, data_np2
-    )
-    tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-
-@pytest.mark.parametrize(
-    "data_shape1, data_shape2, axis",
-    [
-        ((3,), (3,), 0),
-        ((3,), (3,), -1),
-        ((1, 3, 2), (1, 3, 2), 2),
-        ((1, 3, 3), (1, 3, 3), 1),
-        ((1, 3), (1, 3), 0),
-    ],
-)
-@pytest.mark.parametrize("dtype", ["float64", "float32", "int64", "int32"])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-def test_forward_npi_stack(data_shape1, data_shape2, axis, dtype, target, dev, kind):
-    data_np1 = np.random.uniform(size=data_shape1).astype(dtype)
-    data_np2 = np.random.uniform(size=data_shape2).astype(dtype)
-    data1 = mx.sym.var("data1")
-    data2 = mx.sym.var("data2")
-    ref_res = mx.np.stack([mx.np.array(data_np1), mx.np.array(data_np2)], axis=axis)
-    mx_sym = mx.sym.np.stack([data1.as_np_ndarray(), data2.as_np_ndarray()], axis=axis)
-    mod, _ = relay.frontend.from_mxnet(
-        mx_sym, shape={"data1": data_shape1, "data2": data_shape2}, dtype=dtype
-    )
-    op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-        data_np1, data_np2
-    )
-    tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-
-@pytest.mark.parametrize("data_shape", [(2, 2, 2), (2, 7, 2), (2, 2, 2, 1, 2, 3, 1), (1, 8)])
-@pytest.mark.parametrize("dtype", ["float64", "float32", "int64", "int32", "bool"])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-def test_forward_np_copy(data_shape, dtype, target, dev, kind):
-    data_np = np.random.uniform(size=data_shape).astype(dtype)
-    data = mx.sym.var("data")
-    ref_res = mx.np.copy(mx.np.array(data_np))
-    mx_sym = mx.sym.np.copy(data.as_np_ndarray())
-    mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape}, dtype=dtype)
-    op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(data_np)
-    tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-
-@pytest.mark.parametrize("dtype", ["float64", "float32", "int64", "int32", "bool"])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-@pytest.mark.parametrize(
-    "data_shape,out_shape,reverse",
-    [
-        ((2, 3, 8), (-2, -2, 2, -1), False),
-        ((8, 3, 3, 3, 4, 4), (-6, 2, -1, -4), False),
-        ((8, 3, 3, 3, 4, 4), (-5, -4), False),
-        ((1, 8, 3, 3, 3, 4, 4), (-3, -5, -4), False),
-        ((8, 1, 3, 4), (-2, -3, -1), False),
-        ((8, 3, 3, 3, 3, 8), (-4, -5), True),
-        ((8, 3, 2, 4, 8), (-4, -1, 2, -6), True),
-        ((3, 2, 4, 8, 1, 1), (-4, -1, 2, -6, -5, -3), True),
-        ((2, 4, 1, 8), (-4, -3, -1, 2, -6), True),
-    ],
-)
-def test_forward_npx_reshape(data_shape, out_shape, dtype, target, reverse, dev, kind):
-    data_np = np.random.uniform(size=data_shape).astype(dtype)
-    data = mx.sym.var("data")
-    ref_res = mx.npx.reshape(mx.np.array(data_np), newshape=out_shape, reverse=reverse)
-    mx_sym = mx.sym.npx.reshape(data.as_np_ndarray(), newshape=out_shape, reverse=reverse)
-    mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape}, dtype=dtype)
-    op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(data_np)
-    tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-
-@pytest.mark.parametrize(
-    "data_shape", [(2, 2, 2), (2, 7, 2), (2, 2, 2, 1, 2, 3, 1), (1, 8), (2, 2), (1, 3)]
-)
-@pytest.mark.parametrize("dtype", ["float64", "float32", "int64", "int32"])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-def test_forward_npi_binary(data_shape, dtype, target, dev, kind):
-    ref_ops = [mx.np.power, mx.np.multiply, mx.np.add, mx.np.subtract, mx.np.less]
-    mx_ops = [
-        mx.sym.np.power,
-        mx.sym.np.multiply,
-        mx.sym.np.add,
-        mx.sym.np.subtract,
-        mx.sym.np.less,
-    ]
-    for i in range(len(ref_ops)):
-        ref_op = ref_ops[i]
-        mx_op = mx_ops[i]
-        # mx.np.power only support float type
-        if ref_op == mx.np.power and dtype not in ["float64", "float32"]:
-            continue
-        data_np1 = np.random.uniform(size=data_shape).astype(dtype)
-        data_np2 = np.random.uniform(size=data_shape).astype(dtype)
-        data1 = mx.sym.var("lhs")
-        data2 = mx.sym.var("rhs")
-        ref_res = ref_op(mx.np.array(data_np1), mx.np.array(data_np2))
-        mx_sym = mx_op(data1.as_np_ndarray(), data2.as_np_ndarray())
-        mod, _ = relay.frontend.from_mxnet(
-            mx_sym, shape={"lhs": data_shape, "rhs": data_shape}, dtype=dtype
-        )
-        op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-            data_np1, data_np2
-        )
-        tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-
-@pytest.mark.parametrize(
-    "data_shape", [(2, 2, 2), (2, 7, 2), (2, 2, 2, 1, 2, 3, 1), (1, 8), (2, 2), (1, 3)]
-)
-@pytest.mark.parametrize("dtype", ["float64", "float32", "int64", "int32"])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("scalar", [1.0, 2.0, 3.0, 4.0])
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-def test_forward_npi_binary_scalar(data_shape, dtype, scalar, target, dev, kind):
-    ref_ops = [mx.np.power, mx.np.multiply, mx.np.add, mx.np.subtract, mx.np.true_divide]
-    mx_ops = [
-        mx.sym.np.power,
-        mx.sym.np.multiply,
-        mx.sym.np.add,
-        mx.sym.np.subtract,
-        mx.sym.np.true_divide,
-    ]
-    for i in range(len(ref_ops)):
-        ref_op = ref_ops[i]
-        mx_op = mx_ops[i]
-        # mx.np.power only support float type
-        if ref_op == mx.np.power and dtype not in ["float64", "float32"]:
-            continue
-        data_np1 = np.random.uniform(size=data_shape).astype(dtype)
-        data1 = mx.sym.var("lhs")
-        ref_res = ref_op(mx.np.array(data_np1), scalar)
-        mx_sym = mx_op(data1.as_np_ndarray(), scalar)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape={"lhs": data_shape}, dtype=dtype)
-        op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-            data_np1
-        )
-        tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-
-@pytest.mark.parametrize(
-    "data_shape", [(2, 2, 2), (2, 7, 2), (2, 2, 2, 1, 2, 3, 1), (1, 8), (2, 2), (1, 3)]
-)
-@pytest.mark.parametrize("dtype", ["float64", "float32"])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-def test_forward_npi_tanh(data_shape, dtype, target, dev, kind):
-    data_np1 = np.random.uniform(size=data_shape).astype(dtype)
-    data1 = mx.sym.var("data")
-    ref_res = mx.np.tanh(mx.np.array(data_np1))
-    mx_sym = mx.sym.np.tanh(data1.as_np_ndarray())
-    mod, _ = relay.frontend.from_mxnet(mx_sym, shape={"data": data_shape}, dtype=dtype)
-    op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(data_np1)
-    tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-
-@pytest.mark.skipif(not hasattr(mx.np, "where"), reason="mx.np.where hasn't been publish yet")
-@pytest.mark.parametrize(
-    "data_shape,cond_shape",
-    [[(2, 2, 2), (2, 2, 2)], [(2, 7, 2), (7, 2)], [(2, 2), (1, 2)], [(1, 3), (3, 3)]],
-)
-@pytest.mark.parametrize("data_dtype", ["float64", "float32", "int64", "int32", "bool"])
-@pytest.mark.parametrize("cond_dtype", ["float64", "float32", "int64", "int32", "bool"])
-@pytest.mark.parametrize("scalar", [1.0, 2.0])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-def test_forward_npi_where_rscalar(
-    data_shape, cond_shape, data_dtype, cond_dtype, scalar, target, dev, kind
-):
-    if data_dtype == "bool":
-        scalar = scalar == 0.0
-    cond_np = np.random.uniform(size=cond_shape).astype(cond_dtype)
-    data_np = np.random.uniform(size=data_shape).astype(data_dtype)
-    cond = mx.sym.var("condition")
-    data = mx.sym.var("x")
-    ref_res = mx.np.where(mx.np.array(cond_np), mx.np.array(data_np), scalar)
-    mx_sym = mx.sym.np.where(cond.as_np_ndarray(), data.as_np_ndarray(), scalar)
-    dtypeDic = {}
-    dtypeDic["condition"] = cond_dtype
-    dtypeDic["x"] = data_dtype
-    mod, _ = relay.frontend.from_mxnet(
-        mx_sym, shape={"condition": cond_shape, "x": data_shape}, dtype=dtypeDic
-    )
-    op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-        cond_np, data_np
-    )
-    tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-
-@pytest.mark.parametrize("dtype", ["float64", "float32", "int64", "int32", "bool"])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-@pytest.mark.parametrize(
-    "data_shape, axis, indices_or_sections, squeeze_axis",
-    [
-        ((3, 2, 1), 1, 2, False),
-        ((3, 2, 1), 0, 3, False),
-        ((3, 2, 1), 0, 3, True),
-        ((3, 2, 1), 0, (1, 2), False),
-    ],
-)
-def test_forward_split_v2(
-    data_shape, axis, dtype, indices_or_sections, squeeze_axis, target, dev, kind
-):
-    data_np = np.random.uniform(size=data_shape).astype(dtype)
-    data = mx.sym.var("data")
-    ref_res = mx.ndarray.split_v2(
-        mx.nd.array(data_np), indices_or_sections, axis=axis, squeeze_axis=squeeze_axis
-    )
-    mx_sym = mx.sym.split_v2(
-        data.as_nd_ndarray(), indices_or_sections, axis=axis, squeeze_axis=squeeze_axis
-    )
-    mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape}, dtype=dtype)
-    op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(data_np)
-    op_res_ = []
-    for arr in op_res:
-        op_res_.append(arr.numpy().tolist())
-    ref_res_ = []
-    for arr in ref_res:
-        ref_res_.append(arr.asnumpy().tolist())
-    tvm.testing.assert_allclose(op_res_, ref_res_, rtol=1e-5)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/mxnet/test_graph.py b/tests/python/frontend/mxnet/test_graph.py
deleted file mode 100644
index 63ce763f1725..000000000000
--- a/tests/python/frontend/mxnet/test_graph.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import mxnet as mx
-
-import tvm
-from tvm import te
-from tvm import relay
-from tvm.relay import transform
-import model_zoo
-
-
-def compare_graph(lhs_mod, rhs_mod):
-    lhs_mod = transform.InferType()(lhs_mod)
-    rhs_mod = transform.InferType()(rhs_mod)
-    tvm.ir.assert_structural_equal(lhs_mod["main"], rhs_mod["main"])
-
-
-def test_mlp():
-    shape = {"data": (1, 1, 28, 28)}
-    mx_fun = model_zoo.mx_mlp()
-    mod, _ = relay.frontend.from_mxnet(mx_fun, shape=shape)
-    relay_fun = model_zoo.relay_mlp()
-    compare_graph(mod, relay_fun)
-
-
-def test_vgg():
-    shape = {"data": (1, 3, 224, 224)}
-    for n in [11, 13, 16, 19]:
-        mx_sym = model_zoo.mx_vgg(n)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape=shape)
-        relay_mod = model_zoo.relay_vgg(n)
-        compare_graph(mod, relay_mod)
-
-
-def test_resnet():
-    shape = {"data": (1, 3, 224, 224)}
-    for n in [18, 34, 50, 101]:
-        mx_sym = model_zoo.mx_resnet(n)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape=shape)
-        relay_mod = model_zoo.relay_resnet(n)
-        compare_graph(mod, relay_mod)
-
-
-def test_squeezenet():
-    shape = {"data": (1, 3, 224, 224)}
-    for version in ["1.0", "1.1"]:
-        mx_sym = model_zoo.mx_squeezenet(version)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape)
-        relay_mod = model_zoo.relay_squeezenet(version)
-        compare_graph(mod, relay_mod)
-
-
-def test_inception_v3():
-    shape = {"data": (1, 3, 299, 299)}
-    mx_sym = model_zoo.mx_inception_v3()
-    mod, _ = relay.frontend.from_mxnet(mx_sym, shape)
-    relay_mod = model_zoo.relay_inception_v3()
-    compare_graph(mod, relay_mod)
-
-
-def test_dqn():
-    shape = {"data": (1, 4, 84, 84)}
-    mx_sym = model_zoo.mx_dqn()
-    mod, _ = relay.frontend.from_mxnet(mx_sym, shape)
-    relay_mod = model_zoo.relay_dqn()
-    compare_graph(mod, relay_mod)
-
-
-def test_dcgan():
-    shape = {"data": (2, 100)}
-    mx_sym = model_zoo.mx_dcgan()
-    mod, _ = relay.frontend.from_mxnet(mx_sym, shape)
-    relay_mod = model_zoo.relay_dcgan(batch_size=2)
-    compare_graph(mod, relay_mod)
-
-
-def test_multi_outputs():
-    xshape = (10, 27)
-    yshape = (10, 9)
-
-    def mx_compose(F, **kwargs):
-        x = F.sym.Variable("x")
-        y = F.sym.Variable("y")
-        z = F.sym.split(x, **kwargs)
-        return F.sym.broadcast_sub(F.sym.broadcast_add(z[0], z[2]), y)
-
-    def relay_compose(F, **kwargs):
-        x = F.var("x", shape=xshape)
-        y = F.var("y", shape=yshape)
-        z = F.split(x, **kwargs)
-        z = F.subtract(F.add(z[0], z[2]), y)
-        func = relay.Function(relay.analysis.free_vars(z), z)
-        return tvm.IRModule.from_expr(func)
-
-    mx_sym = mx_compose(mx, num_outputs=3, axis=1)
-    mod, _ = relay.frontend.from_mxnet(mx_sym, shape={"x": xshape, "y": yshape})
-    relay_mod = relay_compose(relay, indices_or_sections=3, axis=1)
-    compare_graph(mod, relay_mod)
-
-
-if __name__ == "__main__":
-    test_mlp()
-    test_resnet()
-    test_vgg()
-    test_multi_outputs()
-    test_dqn()
-    test_dcgan()
-    test_squeezenet()
-    test_inception_v3()
diff --git a/tests/python/frontend/mxnet/test_qnn_ops_utils.py b/tests/python/frontend/mxnet/test_qnn_ops_utils.py
deleted file mode 100644
index adbb0a74558b..000000000000
--- a/tests/python/frontend/mxnet/test_qnn_ops_utils.py
+++ /dev/null
@@ -1,224 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import numpy as np
-import tvm
-from tvm import relay
-from tvm.contrib import graph_executor
-from tvm.relay.frontend.mxnet_qnn_op_utils import (
-    dequantize_mxnet_min_max,
-    quantize_mxnet_min_max,
-    get_mkldnn_int8_scale,
-    get_mkldnn_uint8_scale,
-    quantize_conv_bias_mkldnn_from_var,
-)
-
-
-def test_mkldnn_dequantize():
-    def dequantize_test_driver(in_dtype, quant_args, in_data, verify_output_data):
-        shape = in_data.shape
-        input_data = relay.var("input_data", shape=shape, dtype=in_dtype)
-        min_range = quant_args["min_range"]
-        max_range = quant_args["max_range"]
-        dequantized_output = dequantize_mxnet_min_max(
-            input_data, min_range=min_range, max_range=max_range, in_dtype=in_dtype
-        )
-        mod = relay.Function(relay.analysis.free_vars(dequantized_output), dequantized_output)
-        mod = tvm.IRModule.from_expr(mod)
-        with tvm.transform.PassContext(opt_level=3):
-            graph, lib, params = relay.build(mod, "llvm", params=None)
-            rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
-            rt_mod.set_input(input_data=in_data)
-            rt_mod.set_input(**params)
-            rt_mod.run()
-            res = rt_mod.get_output(0).numpy()
-            assert np.allclose(res, verify_output_data)
-            assert res.dtype == np.float32
-
-    def test_uint8_to_float32():
-        data = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]).astype("uint8").reshape((2, 5))
-        output = (
-            np.array(
-                [
-                    0.0,
-                    0.25048923,
-                    0.50097847,
-                    0.7514677,
-                    1.0019569,
-                    62.8728,
-                    63.123287,
-                    63.373775,
-                    63.624268,
-                    63.874756,
-                ]
-            )
-            .astype("float32")
-            .reshape((2, 5))
-        )
-        quant_args = {"min_range": -63.5, "max_range": 64}
-        dequantize_test_driver(
-            in_dtype="uint8", quant_args=quant_args, in_data=data, verify_output_data=output
-        )
-
-    def test_int8_to_float32():
-        data = (
-            np.array([-126, -125, -124, -123, -122, 123, 124, 125, 126, 127])
-            .astype("int8")
-            .reshape((2, 5))
-        )
-        output = (
-            np.array(
-                [
-                    -63.247063,
-                    -62.745102,
-                    -62.24314,
-                    -61.74118,
-                    -61.23922,
-                    61.74118,
-                    62.24314,
-                    62.745102,
-                    63.247063,
-                    63.749023,
-                ]
-            )
-            .astype("float32")
-            .reshape((2, 5))
-        )
-        dequantize_args = {"min_range": -63.5, "max_range": 64}
-        dequantize_test_driver(
-            in_dtype="int8", quant_args=dequantize_args, in_data=data, verify_output_data=output
-        )
-
-    test_uint8_to_float32()
-    test_int8_to_float32()
-
-
-def test_mkldnn_quantize():
-    def quantize_test_driver(out_dtype, quant_args, in_data, verify_output_data):
-        shape = in_data.shape
-        input_data = relay.var("input_data", shape=shape, dtype="float32")
-        min_range = quant_args["min_range"]
-        max_range = quant_args["max_range"]
-        quantized_output, _, _ = quantize_mxnet_min_max(
-            input_data, min_range=min_range, max_range=max_range, out_dtype=out_dtype
-        )
-        mod = relay.Function(relay.analysis.free_vars(quantized_output), quantized_output)
-        mod = tvm.IRModule.from_expr(mod)
-        with tvm.transform.PassContext(opt_level=3):
-            graph, lib, params = relay.build(mod, "llvm", params=None)
-            rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
-            rt_mod.set_input(input_data=in_data)
-            rt_mod.set_input(**params)
-            rt_mod.run()
-            res = rt_mod.get_output(0).numpy()
-            assert np.allclose(res, verify_output_data)
-            assert res.dtype == verify_output_data.dtype
-
-    def test_float32_to_uint8():
-        data = (
-            np.array(
-                [
-                    0.0,
-                    0.25048923,
-                    0.50097847,
-                    0.7514677,
-                    1.0019569,
-                    62.8728,
-                    63.123287,
-                    63.373775,
-                    63.624268,
-                    63.874756,
-                ]
-            )
-            .astype("float32")
-            .reshape((2, 5))
-        )
-        output = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]).astype("uint8").reshape((2, 5))
-
-        quant_args = {"min_range": -63.5, "max_range": 64}
-        quantize_test_driver(
-            out_dtype="uint8", quant_args=quant_args, in_data=data, verify_output_data=output
-        )
-
-    def test_float32_to_int8():
-        data = (
-            np.array(
-                [
-                    -63.247063,
-                    -62.745102,
-                    -62.24314,
-                    -61.74118,
-                    -61.23922,
-                    61.74118,
-                    62.24314,
-                    62.745102,
-                    63.247063,
-                    63.749023,
-                ]
-            )
-            .astype("float32")
-            .reshape((2, 5))
-        )
-        output = (
-            np.array([-126, -125, -124, -123, -122, 123, 124, 125, 126, 127])
-            .astype("int8")
-            .reshape((2, 5))
-        )
-
-        quant_args = {"min_range": -63.5, "max_range": 64}
-        quantize_test_driver(
-            out_dtype="int8", quant_args=quant_args, in_data=data, verify_output_data=output
-        )
-
-    test_float32_to_uint8()
-    test_float32_to_int8()
-
-
-def test_get_mkldnn_int8_scale():
-    range_min = -3.904039
-    range_max = 3.904039
-    expected = 0.03061991354976495
-    output = get_mkldnn_int8_scale(range_max=range_max, range_min=range_min)
-    assert np.allclose(output, expected)
-
-
-def test_get_mkldnn_uint8_scale():
-    range_min = 0.0
-    range_max = 55.77269
-    expected = 0.21828841189047482
-    output = get_mkldnn_uint8_scale(range_max=range_max, range_min=range_min)
-    assert np.allclose(output, expected)
-
-
-def test_quantize_conv_bias_mkldnn_from_var():
-    bias_var = relay.var("bias", shape=(3,), dtype="float32")
-    bias_scale = tvm.nd.array(np.array([0.5, 0.6, 0.7]))
-    output = quantize_conv_bias_mkldnn_from_var(bias_var, bias_scale)
-    assert isinstance(output, tvm.relay.expr.Call)
-    attrs = output.attrs
-    assert attrs.axis == 0
-    assert attrs.out_dtype == "int32"
-    assert output.op.name == "qnn.quantize"
-    assert output.args[1].data == bias_scale
-
-
-if __name__ == "__main__":
-    test_mkldnn_dequantize()
-    test_mkldnn_quantize()
-    test_get_mkldnn_int8_scale()
-    test_get_mkldnn_uint8_scale()
-    test_quantize_conv_bias_mkldnn_from_var()
diff --git a/tests/python/frontend/oneflow/test_forward.py b/tests/python/frontend/oneflow/test_forward.py
deleted file mode 100644
index fda5f1b723c7..000000000000
--- a/tests/python/frontend/oneflow/test_forward.py
+++ /dev/null
@@ -1,963 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=arguments-differ, unused-argument
-"""Unit tests for various models and operators"""
-import os
-
-import numpy as np
-import oneflow as flow
-from packaging import version as package_version
-import tvm
-import tvm.testing
-import tvm.topi.testing
-from tvm import relay
-
-MODEL_HOME = "test_model"
-
-
-def mkdir(path):
-    # init
-    path = path.strip()
-    path = path.rstrip("\\")
-
-    if not os.path.exists(path):
-        os.makedirs(path)
-    else:
-        print(f"{path} is already here")
-
-
-def rmdir(path):
-    for root, dirs, files in os.walk(path, topdown=False):
-        for name in files:
-            os.remove(os.path.join(root, name))
-        for name in dirs:
-            os.rmdir(os.path.join(root, name))
-    os.removedirs(path)
-
-
-def assert_shape(out1, out2):
-    if out1.shape != out2.shape:
-        msg = "Output shapes {} and {} don't match"
-        raise AssertionError(msg.format(out1.shape, out2.shape))
-
-
-class OneFlowGraph(flow.nn.Graph):
-    def __init__(self, module):
-        super().__init__()
-        self.m = module
-
-    def build(self, x):
-        out = self.m(x)
-        return out
-
-
-class OneFlowGraphV2(flow.nn.Graph):
-    def __init__(self, module):
-        super().__init__()
-        self.m = module
-
-    def build(self, input_1, input_2, input_3):
-        out = self.m(input_1, input_2, input_3)
-        return out
-
-
-class OneFlowGraphV3(flow.nn.Graph):
-    def __init__(self, module):
-        super().__init__()
-        self.m = module
-
-    def build(self, input_1, input_2):
-        out = self.m(input_1, input_2)
-        return out
-
-
-def get_oneflow_output(model, inputs):
-    flow_output = model(inputs)
-    return flow_output.numpy()
-
-
-def get_oneflow_concat_output(model, input1, input2, input3):
-    flow_output = model(input1, input2, input3).numpy()
-    return flow_output
-
-
-def get_oneflow_elementwise_output(model, input1, input2):
-    return model(input1, input2).numpy()
-
-
-def get_tvm_output(graph, model_path, inputs: flow.tensor, target="llvm", dtype="float32"):
-    """Generic function to execute and get tvm output"""
-    inputs_numpy = inputs.numpy()
-    if target == "llvm":
-        device = tvm.cpu(0)
-    elif target == "cuda":
-        device = tvm.cuda(0)
-
-    mod, params = relay.frontend.from_oneflow(graph, model_path)
-    with tvm.transform.PassContext(opt_level=10):
-        intrp = relay.build_module.create_executor("graph", mod, device, target)
-    tvm_output = intrp.evaluate()(tvm.nd.array(inputs_numpy.astype(dtype)), **params).numpy()
-    return tvm_output
-
-
-def get_tvm_concat_output(
-    graph,
-    model_path,
-    input1: flow.tensor,
-    input2: flow.tensor,
-    input3: flow.tensor,
-    target="llvm",
-    dtype="float32",
-):
-    """Generic function to execute and get tvm concat output"""
-    input1_numpy = input1.numpy()
-    input2_numpy = input2.numpy()
-    input3_numpy = input3.numpy()
-    if target == "llvm":
-        device = tvm.cpu(0)
-    elif target == "cuda":
-        device = tvm.cuda(0)
-
-    mod, params = relay.frontend.from_oneflow(graph, model_path)
-    with tvm.transform.PassContext(opt_level=10):
-        intrp = relay.build_module.create_executor("graph", mod, device, target)
-    tvm_output = intrp.evaluate()(
-        tvm.nd.array(input1_numpy.astype(dtype)),
-        tvm.nd.array(input2_numpy.astype(dtype)),
-        tvm.nd.array(input3_numpy.astype(dtype)),
-        **params,
-    ).numpy()
-    return tvm_output
-
-
-def get_tvm_elementwise_output(
-    graph,
-    model_path,
-    input1: flow.tensor,
-    input2: flow.tensor,
-    target="llvm",
-    dtype="float32",
-):
-    """Generic function to execute and get tvm elementwise output"""
-    input1_numpy = input1.numpy()
-    input2_numpy = input2.numpy()
-    if target == "llvm":
-        device = tvm.cpu(0)
-    elif target == "cuda":
-        device = tvm.cuda(0)
-
-    mod, params = relay.frontend.from_oneflow(graph, model_path)
-    with tvm.transform.PassContext(opt_level=10):
-        intrp = relay.build_module.create_executor("graph", mod, device, target)
-    tvm_output = intrp.evaluate()(
-        tvm.nd.array(input1_numpy.astype(dtype)),
-        tvm.nd.array(input2_numpy.astype(dtype)),
-        **params,
-    ).numpy()
-    return tvm_output
-
-
-def verify_conv(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs=flow.tensor(
-        np.random.rand(1, 3, 224, 224),
-        dtype=flow.float32,
-    ),
-    device="llvm",
-):
-    """verify_conv"""
-    if device == "cuda":
-        model.to(device)
-        inputs = inputs.to(device)
-
-    graph = OneFlowGraph(model)
-    graph._compile(inputs)
-
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_output(graph, inputs)
-    out_tvm = get_tvm_output(graph, MODEL_HOME, inputs, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-def verify_pool(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs=flow.tensor(
-        np.random.rand(1, 3, 224, 224),
-        dtype=flow.float32,
-    ),
-    device="llvm",
-):
-    """verify_pool"""
-    if device == "cuda":
-        model.to(device)
-        inputs = inputs.to(device)
-
-    graph = OneFlowGraph(model)
-    graph._compile(inputs)
-
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_output(graph, inputs)
-    out_tvm = get_tvm_output(graph, MODEL_HOME, inputs, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-def verify_normalization(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs=flow.tensor(
-        np.random.rand(1, 3, 224, 224),
-        dtype=flow.float32,
-    ),
-    device="llvm",
-):
-    """verify_normalization"""
-    if device == "cuda":
-        model.to(device)
-        inputs = inputs.to(device)
-
-    graph = OneFlowGraph(model)
-    graph._compile(inputs)
-
-    # write params
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_output(graph, inputs)
-    out_tvm = get_tvm_output(graph, MODEL_HOME, inputs, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-def verify_upsample(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs=flow.tensor(
-        np.random.rand(1, 3, 50, 50),
-        dtype=flow.float32,
-    ),
-    device="llvm",
-):
-    """verify_upsample"""
-    if device == "cuda":
-        model.to(device)
-        inputs = inputs.to(device)
-
-    graph = OneFlowGraph(model)
-    graph._compile(inputs)
-
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_output(graph, inputs)
-    out_tvm = get_tvm_output(graph, MODEL_HOME, inputs, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-def verify_convtran(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs=flow.tensor(
-        np.random.rand(1, 3, 50, 50),
-        dtype=flow.float32,
-    ),
-    device="llvm",
-):
-    """verify_convtran"""
-    if device == "cuda":
-        model.to(device)
-        inputs = inputs.to(device)
-
-    graph = OneFlowGraph(model)
-    graph._compile(inputs)
-
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_output(graph, inputs)
-    out_tvm = get_tvm_output(graph, MODEL_HOME, inputs, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-def verify_activation(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs=flow.tensor(
-        np.random.rand(10, 10),
-        dtype=flow.float32,
-    ),
-    device="llvm",
-):
-    """verify_activation"""
-    if device == "cuda":
-        model.to(device)
-        inputs = inputs.to(device)
-
-    graph = OneFlowGraph(model)
-    graph._compile(inputs)
-
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_output(graph, inputs)
-    out_tvm = get_tvm_output(graph, MODEL_HOME, inputs, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-def verify_math(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs=flow.tensor(
-        np.random.rand(100, 1),
-        dtype=flow.float32,
-    ),
-    device="llvm",
-):
-    """verify_math"""
-    if device == "cuda":
-        model.to(device)
-        inputs = inputs.to(device)
-
-    graph = OneFlowGraph(model)
-    graph._compile(inputs)
-
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_output(graph, inputs)
-    out_tvm = get_tvm_output(graph, MODEL_HOME, inputs, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-def verify_matmul(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs1=flow.tensor(np.random.randn(2, 5), dtype=flow.float32),
-    inputs2=flow.tensor(np.random.randn(5, 2), dtype=flow.float32),
-    device="llvm",
-):
-    """verify_matmul"""
-    if device == "cuda":
-        model.to(device)
-        inputs1 = inputs1.to(device)
-        inputs2 = inputs2.to(device)
-
-    graph = OneFlowGraphV3(model)
-    graph._compile(inputs1, inputs2)
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_elementwise_output(graph, inputs1, inputs2)
-    out_tvm = get_tvm_elementwise_output(graph, MODEL_HOME, inputs1, inputs2, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-def verify_concat(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs1=flow.tensor(np.random.randn(2, 5, 5, 4), dtype=flow.float32),
-    inputs2=flow.tensor(np.random.randn(2, 5, 5, 2), dtype=flow.float32),
-    inputs3=flow.tensor(np.random.randn(2, 5, 5, 3), dtype=flow.float32),
-    device="llvm",
-):
-    """verify_concat"""
-    if device == "cuda":
-        model.to(device)
-        inputs1 = inputs1.to(device)
-        inputs2 = inputs2.to(device)
-        inputs3 = inputs3.to(device)
-
-    graph = OneFlowGraphV2(model)
-    graph._compile(inputs1, inputs2, inputs3)
-
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_concat_output(graph, inputs1, inputs2, inputs3)
-    out_tvm = get_tvm_concat_output(graph, MODEL_HOME, inputs1, inputs2, inputs3, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-# defs/nn
-@tvm.testing.uses_gpu
-def test_conv2d():
-    """Conv2d"""
-
-    class Conv2dModel(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.conv = flow.nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
-
-        def forward(self, x):
-            x = self.conv(x)
-            return x
-
-    if os.path.exists(MODEL_HOME):
-        rmdir(MODEL_HOME)
-
-    model = Conv2dModel()
-    model.eval()
-
-    for device in ["llvm"]:
-        verify_conv(model, device=device)
-
-
-@tvm.testing.uses_gpu
-def test_pool2d():
-    """Pool2d"""
-
-    class MaxPool2dModel(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.pool = flow.nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-
-        def forward(self, x):
-            x = self.pool(x)
-            return x
-
-    class AvgPool2dModel(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.pool = flow.nn.AvgPool2d(kernel_size=3, stride=2, padding=1)
-
-        def forward(self, x):
-            x = self.pool(x)
-            return x
-
-    class AdaptiveAvgPool2dModel(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.pool = flow.nn.AdaptiveAvgPool2d((None, 7))
-
-        def forward(self, x):
-            x = self.pool(x)
-            return x
-
-    if os.path.exists(MODEL_HOME):
-        rmdir(MODEL_HOME)
-
-    model1 = MaxPool2dModel().eval()
-    model2 = AvgPool2dModel().eval()
-    model3 = AdaptiveAvgPool2dModel().eval()
-
-    for device in ["llvm"]:
-        verify_pool(model1, device=device)
-        verify_pool(model2, device=device)
-        verify_pool(model3, device=device)
-
-
-@tvm.testing.uses_gpu
-def test_normalization():
-    """Normalization"""
-
-    class BatchNorm2dModel(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.normalization = flow.nn.BatchNorm2d(3)
-
-        def forward(self, x):
-            x = self.normalization(x)
-            return x
-
-    if os.path.exists(MODEL_HOME):
-        rmdir(MODEL_HOME)
-
-    model = BatchNorm2dModel().eval()
-
-    for device in ["llvm"]:
-        verify_normalization(model, device=device)
-
-
-@tvm.testing.uses_gpu
-def test_upsample():
-    """Upsample"""
-
-    class UpsampleModel(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.upsample = flow.nn.Upsample(scale_factor=2.0, mode="nearest")
-
-        def forward(self, x):
-            x = self.upsample(x)
-            return x
-
-    class UpsampleBiliModel(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.upsample = flow.nn.UpsamplingBilinear2d(scale_factor=2.0)
-
-        def forward(self, x):
-            x = self.upsample(x)
-            return x
-
-    if os.path.exists(MODEL_HOME):
-        rmdir(MODEL_HOME)
-
-    model1 = UpsampleModel().eval()
-    model2 = UpsampleBiliModel().eval()
-
-    for device in ["llvm"]:
-        verify_upsample(model1, device=device)
-        verify_upsample(model2, device=device)
-
-
-@tvm.testing.uses_gpu
-def test_convtran():
-    """ConvTran"""
-
-    class ConvTranModel(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.convtran = flow.nn.ConvTranspose2d(3, 4, (3, 5), stride=(2, 1), padding=(4, 2))
-
-        def forward(self, x):
-            x = self.convtran(x)
-            return x
-
-    if os.path.exists(MODEL_HOME):
-        rmdir(MODEL_HOME)
-
-    model = ConvTranModel().eval()
-
-    for device in ["llvm"]:
-        verify_convtran(model, device=device)
-
-
-@tvm.testing.uses_gpu
-def test_activation():
-    """Activation"""
-
-    class Softmax(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.Softmax()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class Softplus(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.Softplus()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class Softsign(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.Softsign()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class Tanh(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.Tanh()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class ReLU(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.ReLU()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class ReLU6(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.ReLU6()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class PReLU(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.PReLU()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class SELU(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.SELU()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class SiLU(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.SiLU()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class LeakyReLU(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.LeakyReLU(0.1)
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class GELU(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.GELU()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class HardTanh(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.Hardtanh()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class TensorSoftmax(flow.nn.Module):
-        def forward(self, x):
-            x = x.softmax(dim=-1)
-            return x
-
-    class Threshold(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.Threshold(0.5, 0.2)
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    if os.path.exists(MODEL_HOME):
-        rmdir(MODEL_HOME)
-
-    model1 = Softmax().eval()
-    model2 = Softplus().eval()  # pylint: disable=unused-variable
-    model3 = Softsign().eval()
-    model4 = Tanh().eval()
-    model5 = ReLU().eval()
-    model6 = ReLU6().eval()
-    model7 = PReLU().eval()
-    model8 = SELU().eval()
-    model9 = SiLU().eval()
-    model10 = LeakyReLU().eval()
-    model11 = GELU().eval()
-    model12 = HardTanh().eval()
-    model13 = TensorSoftmax().eval()
-
-    for device in ["llvm"]:
-        verify_activation(model1, device=device)
-        verify_activation(model2, device=device)
-        verify_activation(model3, device=device)
-        verify_activation(model4, device=device)
-        verify_activation(model5, device=device)
-        verify_activation(model6, device=device)
-        verify_activation(model7, device=device)
-        verify_activation(model8, device=device)
-        verify_activation(model9, device=device)
-        verify_activation(model10, device=device)
-        verify_activation(model11, device=device)
-        verify_activation(model12, device=device)
-        verify_activation(
-            model13,
-            device=device,
-            inputs=flow.tensor(np.random.rand(1, 12, 197, 197).astype(np.float32)),
-        )
-
-    # Threshold was introduced in the version 0.8.0 of oneflow
-    if package_version.parse(flow.__version__) >= package_version.parse("0.8.0"):
-        model14 = Threshold().eval()
-        verify_activation(model14, device="llvm")
-
-
-@tvm.testing.uses_gpu
-def test_math():
-    """Math"""
-
-    class Sigmoid(flow.nn.Module):
-        def forward(self, x):
-            return flow.sigmoid(x)
-
-    class Sign(flow.nn.Module):
-        def forward(self, x):
-            return flow.sign(x)
-
-    class Reciprocal(flow.nn.Module):
-        def forward(self, x):
-            return flow.reciprocal(x)
-
-    class Pow(flow.nn.Module):
-        def forward(self, x):
-            return flow.pow(x, 2.0)
-
-    class Log(flow.nn.Module):
-        def forward(self, x):
-            return flow.log(x)
-
-    class Log2(flow.nn.Module):
-        def forward(self, x):
-            return flow.log1p(x)
-
-    class Exp(flow.nn.Module):
-        def forward(self, x):
-            return flow.exp(x)
-
-    class Exp2(flow.nn.Module):
-        def forward(self, x):
-            return flow.expm1(x)
-
-    class Variance(flow.nn.Module):
-        def forward(self, x):
-            return flow.var(x, 1, unbiased=False, keepdim=True)
-
-    model1 = Sigmoid().eval()
-    model2 = Sign().eval()
-    model3 = Log().eval()
-    model4 = Log2().eval()
-    model5 = Exp().eval()
-    model6 = Exp2().eval()
-    model7 = Reciprocal().eval()
-    model8 = Pow().eval()
-    model9 = Variance().eval()
-
-    for device in ["llvm"]:
-        verify_math(model1, device=device)
-        verify_math(model2, device=device)
-        verify_math(model3, device=device)
-        verify_math(model4, device=device)
-        verify_math(model5, device=device)
-        verify_math(model6, device=device)
-        verify_math(model7, device=device)
-        verify_math(model8, device=device)
-        verify_math(model9, device=device)
-
-
-@tvm.testing.uses_gpu
-def test_slice():
-    """Slice"""
-
-    class Slice(flow.nn.Module):
-        def forward(self, x):
-            tup_list = [[None, None, None], [0, 5, 2], [0, 6, 3]]
-            out = flow.slice(x, slice_tup_list=tup_list)
-            return out
-
-    model = Slice().eval()
-
-    for device in ["llvm"]:
-        verify_math(
-            model, device=device, inputs=flow.tensor(np.random.randn(3, 6, 9).astype(np.float32))
-        )
-
-
-@tvm.testing.uses_gpu
-def test_concat():
-    """Concat"""
-
-    class Concat(flow.nn.Module):
-        def forward(self, input_1, input_2, input_3):
-            out = flow.cat([input_1, input_2, input_3], dim=-1)
-            return out
-
-    model = Concat().eval()
-
-    for device in ["llvm"]:
-        verify_concat(model, device=device)
-
-
-@tvm.testing.uses_gpu
-def test_add_constant():
-    """ConstantAdd"""
-
-    class ConstantAdd(flow.nn.Module):
-        def forward(self, x):
-            out = flow.add(1.0, x)
-            return out
-
-    model = ConstantAdd().eval()
-
-    for device in ["llvm"]:
-        verify_math(
-            model, device=device, inputs=flow.tensor(np.random.randn(3, 6, 9).astype(np.float32))
-        )
-
-
-@tvm.testing.uses_gpu
-def test_logical():
-    class LogicalGreater(flow.nn.Module):
-        def forward(self, x):
-            return x > 1.0
-
-    model1 = LogicalGreater().eval()
-
-    for device in ["llvm"]:
-        verify_math(
-            model1, device=device, inputs=flow.tensor(np.random.randn(3, 6, 9).astype(np.float32))
-        )
-
-
-@tvm.testing.uses_gpu
-def test_expand():
-    class Expand(flow.nn.Module):
-        def forward(self, x):
-            return x.expand(2, -1, -1)
-
-    model1 = Expand().eval()
-
-    for device in ["llvm"]:
-        verify_math(
-            model1, device=device, inputs=flow.tensor(np.random.randn(1, 6, 9).astype(np.float32))
-        )
-
-
-@tvm.testing.uses_gpu
-def test_matmul():
-    """MatMul"""
-
-    class MatMul(flow.nn.Module):
-        def forward(self, x, y):
-            return flow._C.matmul(x, y)
-
-    class MatMulTranspose(flow.nn.Module):
-        def forward(self, x, y):
-            return flow._C.matmul(x, y, transpose_b=True)
-
-    class BatchMatMul(flow.nn.Module):
-        def forward(self, x, y):
-            return flow._C.batch_matmul(x, y)
-
-    class BroadCastMatMul(flow.nn.Module):
-        def forward(self, x, y):
-            return flow._C.matmul(x, y)
-
-    model1 = MatMul().eval()
-    model2 = MatMulTranspose().eval()
-    model3 = BatchMatMul().eval()
-    model4 = BroadCastMatMul().eval()
-
-    for device in ["llvm"]:
-        verify_matmul(
-            model1,
-            device=device,
-            inputs1=flow.tensor(np.random.randn(2, 3).astype(np.float32)),
-            inputs2=flow.tensor(np.random.randn(3, 3).astype(np.float32)),
-        )
-        verify_matmul(
-            model2,
-            device=device,
-            inputs1=flow.tensor(np.random.randn(1, 2).astype(np.float32)),
-            inputs2=flow.tensor(np.random.randn(3, 2).astype(np.float32)),
-        )
-        verify_matmul(
-            model3,
-            device=device,
-            inputs1=flow.tensor(np.random.randn(2, 1, 2).astype(np.float32)),
-            inputs2=flow.tensor(np.random.randn(2, 2, 3).astype(np.float32)),
-        )
-        verify_matmul(
-            model4,
-            device=device,
-            inputs1=flow.tensor(np.random.randn(3, 8, 8, 16).astype(np.float32)),
-            inputs2=flow.tensor(np.random.randn(16, 8).astype(np.float32)),
-        )
-
-
-if __name__ == "__main__":
-    test_conv2d()
-    test_pool2d()
-    test_normalization()
-    test_upsample()
-    test_convtran()
-    test_activation()
-    test_math()
-    test_slice()
-    test_concat()
-    test_add_constant()
-    test_logical()
-    test_expand()
-    test_matmul()
-    rmdir("log")
diff --git a/tests/python/frontend/oneflow/test_vision_models.py b/tests/python/frontend/oneflow/test_vision_models.py
deleted file mode 100644
index 03478dc41e33..000000000000
--- a/tests/python/frontend/oneflow/test_vision_models.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name
-# pylint: disable=arguments-differ, unused-argument
-"""Unit tests for various models and operators"""
-import os
-
-import numpy as np
-import oneflow as flow
-from flowvision.models.alexnet import alexnet
-from flowvision.models.squeezenet import squeezenet1_0
-from flowvision.models.shufflenet_v2 import shufflenet_v2_x0_5
-from flowvision.models.mobilenet import mobilenet_v2
-from flowvision.models.ghostnet import ghostnet
-from flowvision.models.vision_transformer import vit_base_patch16_224
-import tvm
-import tvm.testing
-import tvm.topi.testing
-from tvm import relay
-
-MODEL_HOME = "test_model"
-
-
-def mkdir(path):
-    # init
-    path = path.strip()
-    path = path.rstrip("\\")
-
-    if not os.path.exists(path):
-        os.makedirs(path)
-    else:
-        print(f"{path} is already here")
-
-
-def rmdir(path):
-    for root, dirs, files in os.walk(path, topdown=False):
-        for name in files:
-            os.remove(os.path.join(root, name))
-        for name in dirs:
-            os.rmdir(os.path.join(root, name))
-    os.removedirs(path)
-
-
-def assert_shape(out1, out2):
-    if out1.shape != out2.shape:
-        msg = "Output shapes {} and {} don't match"
-        raise AssertionError(msg.format(out1.shape, out2.shape))
-
-
-class OneFlowGraph(flow.nn.Graph):
-    def __init__(self, module):
-        super().__init__()
-        self.m = module
-
-    def build(self, x):
-        out = self.m(x)
-        return out
-
-
-def get_oneflow_output(model, inputs):
-    flow_output = model(inputs)
-    return flow_output.numpy()
-
-
-def get_tvm_output(graph, model_path, inputs: flow.tensor, target="llvm", dtype="float32"):
-    """Generic function to execute and get tvm output"""
-    inputs_numpy = inputs.numpy()
-    if target == "llvm":
-        device = tvm.cpu(0)
-    elif target == "cuda":
-        device = tvm.cuda(0)
-
-    mod, params = relay.frontend.from_oneflow(graph, model_path)
-    with tvm.transform.PassContext(opt_level=10):
-        intrp = relay.build_module.create_executor("graph", mod, device, target)
-    tvm_output = intrp.evaluate()(tvm.nd.array(inputs_numpy.astype(dtype)), **params).numpy()
-    return tvm_output
-
-
-def verify_model(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs=flow.tensor(
-        np.random.rand(1, 3, 224, 224),
-        dtype=flow.float32,
-    ),
-    device="llvm",
-):
-    """Generic function to generate and compare oneflow and TVM output"""
-    if device == "cuda":
-        model.to(device)
-        inputs = inputs.to(device)
-
-    graph = OneFlowGraph(model)
-    graph._compile(inputs)
-
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_output(graph, inputs)
-    out_tvm = get_tvm_output(graph, MODEL_HOME, inputs, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-@tvm.testing.uses_gpu
-def test_vision_models():
-    """Vision models test"""
-
-    if os.path.exists(MODEL_HOME):
-        rmdir(MODEL_HOME)
-
-    vision_alexnet = alexnet().eval()
-    vision_squeezenet = squeezenet1_0().eval()
-    vision_shufflenet = shufflenet_v2_x0_5().eval()
-    vision_mobilenetv2 = mobilenet_v2().eval()
-    vision_ghostnet = ghostnet().eval()
-    vision_vit = vit_base_patch16_224().eval()
-
-    for device in ["llvm"]:
-        verify_model(vision_alexnet, device=device)
-        verify_model(vision_squeezenet, device=device)
-        verify_model(vision_shufflenet, device=device)
-        verify_model(vision_mobilenetv2, device=device)
-        verify_model(vision_ghostnet, device=device)
-        verify_model(vision_vit, device=device)
-
-
-if __name__ == "__main__":
-    test_vision_models()
-    rmdir("log")
diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py
deleted file mode 100644
index a81352bb679f..000000000000
--- a/tests/python/frontend/onnx/test_forward.py
+++ /dev/null
@@ -1,8716 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-argument
-"""
-ONNX testcases
-================
-This article is a test script to test ONNX operator with Relay.
-"""
-import glob
-import os
-import platform
-import re
-import copy
-import tempfile
-import pytest
-import scipy
-import numpy as np
-
-import tvm
-import tvm.testing
-import tvm.topi.testing
-from tvm import relay
-from tvm.contrib import graph_executor, utils
-from tvm.relay.frontend.common import infer_type
-from tvm.relay.build_module import bind_params_by_name
-from relay.utils.tag_span import _create_span, _set_span, _verify_structural_equal_with_span
-
-import onnx
-import onnxruntime.backend
-from onnx import TensorProto, helper, mapping, numpy_helper
-from onnxruntime.quantization import CalibrationDataReader, quantize_static
-
-import torch
-import torchvision
-from torch.nn import Linear, Module, Sequential
-
-
-def get_input_data_shape_dict(graph_def, input_data):
-    """Get input data shape"""
-    if isinstance(input_data, list):
-        input_names = {}
-        shape_dict = {}
-        for i, _ in enumerate(input_data):
-            input_names[i] = graph_def.graph.input[i].name
-            input_ = input_data[i]
-
-            if input_ is None or not hasattr(input_, "shape") or input_.shape == ():
-                # Skip adding input shape data when the input data is None;
-                # This is to enable optional arguments for onnx operators.
-                continue
-
-            elif isinstance(input_, list):
-                shape_dict[input_names[i]] = (len(input_),)
-
-            else:
-                shape_dict[input_names[i]] = input_.shape
-
-    else:
-        input_names = graph_def.graph.input[0].name
-        shape_dict = {input_names: input_data.shape}
-
-    return input_names, shape_dict
-
-
-def get_tvm_output_with_vm(
-    graph_def,
-    input_data,
-    target,
-    dev,
-    opset=None,
-    freeze_params=False,
-    convert_config=None,
-    validate_structural_equal=True,
-):
-    """Generic function to execute and get tvm output with vm executor"""
-    if not isinstance(input_data, list):
-        input_data = [input_data]
-    _, shape_dict = get_input_data_shape_dict(graph_def, input_data)
-
-    with tvm.testing.disable_span_filling():
-        mod, params = relay.frontend.from_onnx(
-            graph_def,
-            shape_dict,
-            opset=opset,
-            freeze_params=freeze_params,
-            convert_config=convert_config,
-        )
-        # handle the bfloat16 so we explicitly allocate
-        # bfloat16 arrays as input
-        for i, param in enumerate(mod["main"].params):
-            if param.type_annotation.dtype == "bfloat16":
-                input_data[i] = tvm.nd.empty(input_data[i].shape, "bfloat16").copyfrom(
-                    input_data[i]
-                )
-
-    if validate_structural_equal:
-        with tvm.testing.enable_span_filling():
-            mod_with_span, _ = relay.frontend.from_onnx(
-                graph_def,
-                shape_dict,
-                opset=opset,
-                freeze_params=freeze_params,
-                convert_config=convert_config,
-            )
-        tvm.ir.assert_structural_equal(mod, mod_with_span)
-
-    result = relay.create_executor("vm", mod=mod, device=dev, target=target).evaluate()(
-        *input_data, **params
-    )
-    if isinstance(result, tvm.runtime.NDArray):
-        return result.numpy()
-    return [r.numpy() for r in result]
-
-
-def get_tvm_output(
-    graph_def,
-    input_data,
-    target,
-    dev,
-    output_shape=None,
-    output_dtype="float32",
-    opset=None,
-    opt_level=1,
-    convert_config=None,
-):
-    """Generic function to execute and get tvm output"""
-    # TODO: Resolve the issues and remove the following lines
-    input_names, shape_dict = get_input_data_shape_dict(graph_def, input_data)
-
-    mod, params = relay.frontend.from_onnx(
-        graph_def, shape_dict, opset=opset, convert_config=convert_config
-    )
-
-    with tvm.transform.PassContext(opt_level=opt_level):
-        graph, lib, params = relay.build(mod, target, params=params)
-
-    m = graph_executor.create(graph, lib, dev)
-    # set inputs
-    if isinstance(input_data, list):
-        for i, _ in enumerate(input_names):
-            # Its possible for some onnx inputs to not be needed in the tvm
-            # module, confirm its present before setting.
-            # pylint: disable=unnecessary-list-index-lookup
-            m.set_input(input_names[i], tvm.nd.array(input_data[i].astype(input_data[i].dtype)))
-    else:
-        m.set_input(input_names, tvm.nd.array(input_data.astype(input_data.dtype)))
-
-    m.set_input(**params)
-    # execute
-    m.run()
-    # get outputs
-    if isinstance(output_shape, list):
-        tvm_output_list = []
-        for i, _ in enumerate(output_shape):
-            tvm_output = m.get_output(i)
-            tvm_output_list.append(tvm_output.numpy())
-        return tvm_output_list
-    else:
-        tvm_output = m.get_output(0)
-        return tvm_output.numpy()
-
-
-def get_onnxruntime_output(model, inputs):
-    """Generic function to generate onnxruntime output"""
-    rep = onnxruntime.backend.prepare(model.SerializeToString(), "CPU")
-    if isinstance(inputs, list) and len(inputs) == 1:
-        inp = inputs[0]
-    else:
-        inp = inputs
-    output = rep.run(inp)
-    # Unpack output if there's only a single value.
-    if len(output) == 1:
-        output = output[0]
-    return output
-
-
-def verify_with_ort_with_inputs(
-    model,
-    inputs,
-    out_shape=None,
-    target=None,
-    dev=None,
-    use_vm=False,
-    opset=None,
-    freeze_params=False,
-    dtype="float32",
-    rtol=1e-5,
-    atol=1e-5,
-    apply_softmax=False,
-    opt_level=1,
-    convert_config=None,
-):
-    """verify_with_ort_with_inputs"""
-    if opset is not None:
-        model.opset_import[0].version = opset
-
-    ort_out = get_onnxruntime_output(model, inputs)
-    if use_vm:
-        tvm_out = get_tvm_output_with_vm(
-            model,
-            inputs,
-            target,
-            dev,
-            opset=opset,
-            freeze_params=freeze_params,
-            convert_config=convert_config,
-        )
-    else:
-        tvm_out = get_tvm_output(
-            model,
-            inputs,
-            target,
-            dev,
-            out_shape,
-            dtype,
-            opset=opset,
-            opt_level=opt_level,
-            convert_config=convert_config,
-        )
-
-    if not isinstance(tvm_out, list):
-        tvm_out = [tvm_out]
-    if not isinstance(ort_out, list):
-        ort_out = [ort_out]
-    for tvm_val, ort_val in zip(tvm_out, ort_out):
-        if apply_softmax:
-            ort_val = scipy.special.softmax(ort_val)
-            tvm_val = scipy.special.softmax(tvm_val)
-        tvm.testing.assert_allclose(ort_val, tvm_val, rtol=rtol, atol=atol)
-        assert ort_val.dtype == tvm_val.dtype
-
-
-def verify_with_ort(
-    model,
-    input_shapes,
-    out_shape=None,
-    target=None,
-    dev=None,
-    use_vm=False,
-    opset=None,
-    freeze_params=False,
-    dtype="float32",
-    rtol=1e-5,
-    atol=1e-5,
-):
-    """verify_with_ort"""
-    inputs = [np.random.uniform(size=ishape).astype(dtype) for ishape in input_shapes]
-    verify_with_ort_with_inputs(
-        model,
-        inputs,
-        out_shape=out_shape,
-        target=target,
-        dev=dev,
-        use_vm=use_vm,
-        opset=opset,
-        freeze_params=freeze_params,
-        dtype=dtype,
-        rtol=rtol,
-        atol=atol,
-    )
-
-
-def quantize_and_verify_with_ort(
-    onnx_model, input_names, input_shapes, target, dev, rtol=1e-5, atol=1e-5
-):
-    """quantize_and_verify_with_ort"""
-    input_arrays = [np.random.random(shape).astype("float32") for shape in input_shapes]
-
-    class RandomDataReader(CalibrationDataReader):
-        # pylint: disable=missing-class-docstring
-        def __init__(self, n=10):
-            input_dict = dict(zip(input_names, input_shapes))
-            self.data = iter(
-                [
-                    {
-                        name: np.random.random(shape).astype("float32")
-                        for name, shape in input_dict.items()
-                    }
-                    for _ in range(n)
-                ]
-            )
-
-        def get_next(self):
-            return next(self.data, None)
-
-    t_dir = tvm.contrib.utils.tempdir()
-    model_fp32 = os.path.join(t_dir.temp_dir, "model.onnx")
-    onnx.save_model(onnx_model, model_fp32)
-    model_quant = os.path.join(t_dir.temp_dir, "model.quant.onnx")
-    _ = quantize_static(  # pylint: disable=assignment-from-no-return
-        model_fp32, model_quant, RandomDataReader()
-    )
-    # opt_level=1 will cause error with qnn lowering
-    model = onnx.load(model_quant)
-    verify_with_ort_with_inputs(
-        model, input_arrays, opt_level=2, target=target, dev=dev, use_vm=True, rtol=rtol, atol=atol
-    )
-
-
-def make_constant_node(name, data_type, dims, vals):
-    return helper.make_node(
-        "Constant",
-        inputs=[],
-        outputs=[name],
-        value=helper.make_tensor(name=name, data_type=data_type, dims=dims, vals=vals),
-    )
-
-
-def is_version_greater_than(ver):
-    return "".join(re.findall(r"(\d+\.)(\d+\.)(\d)", onnx.__version__)[0]) > "".join(
-        re.findall(r"(\d+\.)(\d+\.)(\d)", ver)[0]
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_reshape(target, dev):
-    """test_reshape"""
-    in_shape = (4, 3, 3, 4)
-    ref_shape = (6, 2, 4, 3)
-
-    ref_array = np.array(ref_shape)
-    ref_node = onnx.helper.make_node(
-        "Constant",
-        inputs=[],
-        outputs=["ref_in"],
-        value=onnx.helper.make_tensor(
-            name="const_tensor",
-            data_type=onnx.TensorProto.INT32,
-            dims=ref_array.shape,
-            vals=ref_array.flatten().astype(int),
-        ),
-    )
-    reshape_node = helper.make_node("Reshape", ["in", "ref_in"], ["out"])
-
-    graph = helper.make_graph(
-        [ref_node, reshape_node],
-        "reshape_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(ref_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="reshape_test")
-
-    x = np.random.uniform(size=in_shape).astype("int32")
-    tvm_out = get_tvm_output(model, x, target, dev, ref_shape, "float32")
-    tvm.testing.assert_allclose(ref_shape, tvm_out.shape)
-
-
-@tvm.testing.parametrize_targets
-def test_double_reshape(target, dev):
-    """test_double_reshape"""
-    in_shape = (4, 3, 3, 4)
-    ref_shape = (6, 2, 4, 3)
-
-    ref_array = np.array(ref_shape)
-    ref_node = onnx.helper.make_node(
-        "Constant",
-        inputs=[],
-        outputs=["ref_in"],
-        value=onnx.helper.make_tensor(
-            name="const_tensor",
-            data_type=onnx.TensorProto.INT32,
-            dims=ref_array.shape,
-            vals=ref_array.flatten().astype(int),
-        ),
-    )
-    reshape_node1 = helper.make_node("Reshape", ["in", "ref_in"], ["out1"])
-    reshape_node2 = helper.make_node("Reshape", ["in", "ref_in"], ["out2"])
-    add_node = helper.make_node("Add", ["out1", "out2"], ["out"])
-
-    graph = helper.make_graph(
-        [ref_node, reshape_node1, reshape_node2, add_node],
-        "reshape_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(ref_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="reshape_test")
-
-    x = np.random.uniform(size=in_shape).astype("int32")
-    tvm_out = get_tvm_output(model, x, target, dev, ref_shape, "float32")
-    tvm.testing.assert_allclose(ref_shape, tvm_out.shape)
-
-
-@tvm.testing.parametrize_targets
-def test_expand(target, dev):
-    """test_expand"""
-
-    def _test_expand(name, data, shape, ref_data, dtype="int32"):
-        shape_array = np.array(shape)
-        if dtype == "int32":
-            shape_node = onnx.helper.make_node(
-                "Constant",
-                inputs=[],
-                outputs=["shape"],
-                value=onnx.helper.make_tensor(
-                    name="const_tensor",
-                    data_type=onnx.TensorProto.INT32,
-                    dims=shape_array.shape,
-                    vals=shape_array.flatten().astype("int32"),
-                ),
-            )
-        elif dtype == "int64":
-            shape_node = onnx.helper.make_node(
-                "Constant",
-                inputs=[],
-                outputs=["shape"],
-                value=onnx.helper.make_tensor(
-                    name="const_tensor",
-                    data_type=onnx.TensorProto.INT64,
-                    dims=shape_array.shape,
-                    vals=shape_array.flatten().astype("int64"),
-                ),
-            )
-        else:
-            raise TypeError("Invalid dtype")
-        expand_node = helper.make_node("Expand", ["in", "shape"], ["out"])
-
-        graph = helper.make_graph(
-            [shape_node, expand_node],
-            "expand_test",
-            inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(data.shape))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(ref_data.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name=name)
-
-        tvm_out = get_tvm_output_with_vm(model, data, target, dev, freeze_params=True)
-        tvm.testing.assert_allclose(ref_data, tvm_out)
-
-    in_shape = (3, 1)
-    shape = (3, 4)
-    data = np.random.uniform(size=in_shape).astype(np.float32)
-    ref_data = np.tile(data, 4)
-    _test_expand("expand_with_dim_unchanged_test", data, shape, ref_data, "int32")
-    _test_expand("expand_with_dim_unchanged_test", data, shape, ref_data, "int64")
-
-    in_shape = (3, 1)
-    shape = (2, 1, 6)
-    data = np.random.uniform(size=in_shape).astype(np.float32)
-    ref_data = data * np.ones(shape, dtype=np.float32)
-    _test_expand("expand_larger_target_shape_test", data, shape, ref_data, "int32")
-    _test_expand("expand_larger_target_shape_test", data, shape, ref_data, "int64")
-
-    in_shape = (1, 1)
-    shape = (3,)
-    data = np.random.uniform(size=in_shape).astype(np.float32)
-    ref_data = data * np.ones(shape, dtype=np.float32)
-    _test_expand("expand_smaller_target_shape_test", data, shape, ref_data, "int32")
-    _test_expand("expand_smaller_target_shape_test", data, shape, ref_data, "int64")
-
-
-@tvm.testing.parametrize_targets
-def test_depth_to_space(target, dev):
-    """test_depth_to_space"""
-
-    def verify_depth_to_space(inshape, outshape, mode, block_size):
-        node = onnx.helper.make_node(
-            "DepthToSpace", inputs=["x"], outputs=["y"], blocksize=block_size
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "depth_to_space_test",
-            inputs=[helper.make_tensor_value_info("x", TensorProto.FLOAT, list(inshape))],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(outshape))],
-        )
-
-        model = helper.make_model(graph, producer_name="depth_to_space_test")
-
-        verify_with_ort(model, [inshape], [outshape], target, dev)
-
-    # current onnx.checker use OpSet-1 version of DepthToSpace, which doesn't have a mode argument.
-    # TO-DO, we can add mode argument to test CRD mode and DCR mode
-    # in the future when we update to a newer onnx version.
-    verify_depth_to_space((1, 8, 2, 3), (1, 2, 4, 6), mode="CRD", block_size=2)
-
-
-@tvm.testing.parametrize_targets
-def test_space_to_depth(target, dev):
-    """test_space_to_depth"""
-
-    def verify_space_to_depth(inshape, outshape, block_size):
-        node = onnx.helper.make_node(
-            "SpaceToDepth", inputs=["x"], outputs=["y"], blocksize=block_size
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "space_to_depth_test",
-            inputs=[helper.make_tensor_value_info("x", TensorProto.FLOAT, list(inshape))],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(outshape))],
-        )
-
-        model = helper.make_model(graph, producer_name="space_to_depth_test")
-
-        verify_with_ort(model, [inshape], [outshape], target, dev)
-
-    verify_space_to_depth((1, 1, 4, 6), (1, 4, 2, 3), 2)
-
-
-@tvm.testing.parametrize_targets
-def test_shape(target, dev):
-    """test_shape"""
-    in_shape = (4, 3, 3, 4)
-    ref_shape = (6, 2, 4, 3)
-
-    ref_array = np.array(ref_shape)
-    ref_node = onnx.helper.make_node(
-        "Constant",
-        inputs=[],
-        outputs=["ref_in"],
-        value=onnx.helper.make_tensor(
-            name="const_tensor",
-            data_type=onnx.TensorProto.INT32,
-            dims=ref_array.shape,
-            vals=ref_array.flatten().astype(int),
-        ),
-    )
-    reshape_node = helper.make_node("Reshape", ["in", "ref_in"], ["out"])
-
-    shape_node = helper.make_node("Shape", ["out"], ["final_out"])
-
-    graph = helper.make_graph(
-        [ref_node, reshape_node, shape_node],
-        "shape_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-        outputs=[helper.make_tensor_value_info("final_out", TensorProto.FLOAT, list(ref_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="shape_test")
-
-    x = np.random.uniform(size=in_shape).astype("int32")
-    tvm_out = get_tvm_output(model, x, target, dev, ref_shape, "int32")
-    tvm.testing.assert_allclose(ref_shape, tvm_out)
-
-
-@tvm.testing.parametrize_targets
-def test_power(target, dev):
-    """test_power"""
-
-    def _test_power_iteration(x_shape, y_shape):
-        if isinstance(y_shape, int):
-            y_shape = [y_shape]
-
-        x = np.random.uniform(size=x_shape).astype(np.float32)
-        y = np.random.uniform(size=y_shape).astype(np.float32)
-
-        np_res = np.power(x, y).astype(np.float32)
-
-        res = helper.make_node("Pow", ["x", "y"], ["out"])
-
-        graph = helper.make_graph(
-            [res],
-            "power_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape)),
-                helper.make_tensor_value_info("y", TensorProto.FLOAT, list(y_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(np_res.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="power_test")
-
-        tvm_out = get_tvm_output(model, [x, y], target, dev, np_res.shape)
-        tvm.testing.assert_allclose(np_res, tvm_out, rtol=1e-5, atol=1e-5)
-
-    _test_power_iteration((1, 3), (1))
-    _test_power_iteration((2, 3), (2, 3))
-    _test_power_iteration((2, 3), (1, 3))
-
-
-@tvm.testing.parametrize_targets
-def test_range(target, dev):
-    """test_range"""
-
-    def verify_range(start, limit, delta, dtype):
-        dtype_map = {
-            "float32": TensorProto.FLOAT,
-            "int32": TensorProto.INT32,
-            "int64": TensorProto.INT64,
-        }
-        dtype_onnx = dtype_map[dtype]
-        y = helper.make_node("Range", ["start", "limit", "delta"], ["output"])
-        graph = helper.make_graph(
-            [y],
-            "range_test",
-            inputs=[
-                helper.make_tensor_value_info("start", dtype_onnx, []),
-                helper.make_tensor_value_info("limit", dtype_onnx, []),
-                helper.make_tensor_value_info("delta", dtype_onnx, []),
-            ],
-            outputs=[
-                helper.make_tensor_value_info(
-                    "output", dtype_onnx, np.arange(start, limit, delta).shape
-                )
-            ],
-        )
-        model = helper.make_model(graph, producer_name="range_test")
-        inputs = [np.array(x).astype(dtype) for x in [start, limit, delta]]
-        verify_with_ort_with_inputs(model, inputs, target=target, dev=dev, use_vm=True)
-
-    for t in ["float32", "int32", "int64"]:
-        verify_range(0, 10, 1, t)
-        verify_range(2, 8, 2, t)
-        verify_range(-3, 6, 4, t)
-        verify_range(-2, -7, -1, t)
-
-
-@tvm.testing.parametrize_targets
-def test_squeeze(target, dev):
-    """test_squeeze"""
-
-    def test_squeeze_once(in_shape, out_shape, axes=None):
-        y = helper.make_node("Squeeze", ["in"], ["out"], axes=axes)
-
-        graph = helper.make_graph(
-            [y],
-            "squeeze_test",
-            inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="squeeze_test")
-        x = np.random.uniform(size=in_shape).astype("float32")
-        verify_with_ort_with_inputs(model, [x], [out_shape], target=target, dev=dev, opset=11)
-
-    test_squeeze_once((1, 3, 1, 3, 1, 1), (3, 3), [0, 2, 4, 5])
-    test_squeeze_once((1, 3, 1, 3, 1, 1), (3, 3))  # empty axis.
-    test_squeeze_once((), ())  # scalar testing.
-
-
-@tvm.testing.parametrize_targets
-def test_flatten(target, dev):
-    """test_flatten"""
-
-    def verify_flatten(in_shape, axis, ref_shape):
-        flatten_node = helper.make_node("Flatten", ["in"], ["out"], axis=axis)
-
-        graph = helper.make_graph(
-            [flatten_node],
-            "flatten_test",
-            inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(ref_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="flatten_test")
-        verify_with_ort(model, [in_shape], target=target, dev=dev)
-
-    verify_flatten((1, 3, 4, 4), 1, (1, 48))
-    verify_flatten((1,), 1, (1, 1))
-
-
-@tvm.testing.parametrize_targets
-def test_unsqueeze(target, dev):
-    """test_unsqueeze"""
-    in_shape = (3, 3)
-    axis = (0, 3, 4)
-    out_shape = (1, 3, 3, 1, 1)
-    y = helper.make_node("Unsqueeze", ["in"], ["out"], axes=list(axis))
-
-    graph = helper.make_graph(
-        [y],
-        "squeeze_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="squeeze_test")
-    verify_with_ort(model, [in_shape], target=target, dev=dev, opset=11)
-
-
-@tvm.testing.parametrize_targets
-def test_unsqueeze_with_neg_axes(target, dev):
-    def verify_unsqueeze_with_neg_axes(opset=11):
-        in_shape = (2, 3, 4)
-        axis = (-2, -1)
-        out_shape = (2, 3, 4, 1, 1)
-        if opset < 13:
-            y = helper.make_node("Unsqueeze", ["in"], ["out"], axes=list(axis))
-            nodes = [y]
-        else:
-            axes = np.array(list(axis)).astype(np.int64)
-            axes = helper.make_node(
-                "Constant",
-                inputs=[],
-                outputs=["axes"],
-                value=onnx.helper.make_tensor(
-                    name="const_axes",
-                    data_type=onnx.TensorProto.INT64,
-                    dims=axes.shape,
-                    vals=axes.flatten().astype(int),
-                ),
-            )
-            y = helper.make_node("Unsqueeze", ["in", "axes"], ["out"])
-            nodes = [axes, y]
-
-        graph = helper.make_graph(
-            nodes,
-            "squeeze_test",
-            inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="squeeze_test")
-        verify_with_ort(model, [in_shape], target=target, dev=dev, opset=opset)
-
-    verify_unsqueeze_with_neg_axes()
-    verify_unsqueeze_with_neg_axes(opset=13)
-
-
-@tvm.testing.parametrize_targets
-def test_gather(target, dev):
-    """test_gather"""
-
-    def verify_gather(in_shape, indices, axis, dtype):
-        x = np.random.uniform(size=in_shape).astype(dtype)
-        indices = np.array(indices, dtype="int64")
-        out_np = np.take(x, indices, axis=axis)
-
-        y = helper.make_node("Gather", ["in", "indices"], ["out"], axis=axis)
-
-        graph = helper.make_graph(
-            [y],
-            "gather_test",
-            inputs=[
-                helper.make_tensor_value_info(
-                    "in", mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)], list(in_shape)
-                ),
-                helper.make_tensor_value_info("indices", TensorProto.INT64, list(indices.shape)),
-            ],
-            outputs=[
-                helper.make_tensor_value_info(
-                    "out", mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)], list(out_np.shape)
-                )
-            ],
-        )
-        model = helper.make_model(graph, producer_name="gather_test")
-        verify_with_ort_with_inputs(model, [x, indices], target=target, dev=dev, dtype=dtype)
-
-    verify_gather((4,), [1], 0, "int32")
-    verify_gather((1, 4), [0], 0, "int32")
-    verify_gather((4,), [[[1, 0], [0, 1]]], 0, "float32")
-    verify_gather((2, 2), [[[1, 0], [0, 1]]], 1, "int32")
-    verify_gather((3, 3, 3), [[[1, 0]]], -1, "int32")
-    verify_gather((4, 3, 5, 6), [[2, 1, 0, 0]], 0, "float32")
-
-
-@tvm.testing.parametrize_targets
-def test_dynamic_gather(target, dev):
-    """test_dynamic_gather"""
-    dtype = "float32"
-    in_shape = [2, 2]
-    indices = 1
-    axis = 1
-    x = np.random.uniform(size=in_shape).astype(dtype)
-    indices = np.array(indices, dtype="int64")
-    out_np = np.take(x, indices, axis=axis)
-
-    indices = helper.make_node(
-        "Constant",
-        inputs=[],
-        outputs=["indices"],
-        value=onnx.helper.make_tensor(
-            name="const_indices",
-            data_type=onnx.TensorProto.INT64,
-            dims=[],
-            vals=[1],
-        ),
-    )
-    y = helper.make_node("Gather", ["in", "indices"], ["out"], axis=axis)
-
-    graph = helper.make_graph(
-        [indices, y],
-        "gather_test",
-        inputs=[
-            helper.make_tensor_value_info(
-                "in", mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)], ["?", "?"]
-            ),
-        ],
-        outputs=[
-            helper.make_tensor_value_info(
-                "out", mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)], ["?"] * len(out_np.shape)
-            )
-        ],
-    )
-    model = helper.make_model(graph, producer_name="dynamic_gather_test")
-
-    mod, params = relay.frontend.from_onnx(model)
-
-    result = relay.create_executor("vm", mod=mod, device=dev, target=target).evaluate()(x, **params)
-    tvm.testing.assert_allclose(out_np, result.numpy(), rtol=1e-5, atol=1e-5)
-
-
-@tvm.testing.parametrize_targets
-def test_gatherelements(target, dev):
-    """test_gatherelements"""
-
-    def verify_gatherelements(in_shape, indices, axis):
-        x = np.random.uniform(size=in_shape).astype("float32")
-        indices = np.array(indices, dtype="int32")
-
-        y = helper.make_node("GatherElements", ["data", "indices"], ["output"], axis=axis)
-        graph = helper.make_graph(
-            [y],
-            "gather_elements_test",
-            inputs=[
-                helper.make_tensor_value_info("data", TensorProto.FLOAT, list(in_shape)),
-                helper.make_tensor_value_info("indices", TensorProto.INT32, list(indices.shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("output", TensorProto.FLOAT, list(in_shape))],
-        )
-        model = helper.make_model(graph, producer_name="gather_elements_test")
-
-        verify_with_ort_with_inputs(model, [x, indices], target=target, dev=dev)
-
-    verify_gatherelements((4,), [3, 0, 2, 1], 0)
-    verify_gatherelements((2, 2), [[1, 0], [0, 1]], 0)
-    verify_gatherelements((2, 2), [[0, 0], [1, 0]], 1)
-    verify_gatherelements((2, 2), [[1, 0], [0, 1]], 1)
-
-    indices = [
-        [[1, 0, 0], [1, 0, 1], [0, 1, 1]],
-        [[1, 1, 1], [1, 2, 1], [1, 0, 1]],
-        [[1, 2, 1], [1, 2, 1], [1, 2, 1]],
-    ]
-
-    verify_gatherelements((3, 3, 3), indices, 2)
-
-
-@tvm.testing.parametrize_targets
-def test_scatter(target, dev):
-    """test_scatter"""
-
-    def verify_scatter(in_shape, indices, axis):
-        x = np.random.uniform(size=in_shape).astype("float32")
-        indices = np.array(indices, dtype="int32")
-        updates = np.random.uniform(size=indices.shape).astype("float32")
-
-        y = helper.make_node("Scatter", ["data", "indices", "updates"], ["output"], axis=axis)
-
-        graph = helper.make_graph(
-            [y],
-            "scatter_test",
-            inputs=[
-                helper.make_tensor_value_info("data", TensorProto.FLOAT, list(in_shape)),
-                helper.make_tensor_value_info("indices", TensorProto.INT32, list(indices.shape)),
-                helper.make_tensor_value_info("updates", TensorProto.FLOAT, list(indices.shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("output", TensorProto.FLOAT, list(in_shape))],
-        )
-        model = helper.make_model(graph, producer_name="scatter_test")
-        # Scatter operator has been supported from version 9 and
-        # deprecated since version 11 of the default ONNX operator set
-        verify_with_ort_with_inputs(model, [x, indices, updates], target=target, dev=dev, opset=9)
-
-    verify_scatter((4,), [1], 0)
-    verify_scatter((1, 4), [[0]], 0)
-    verify_scatter((4,), [2, 3], 0)
-    verify_scatter((2, 2), [[1, 0], [0, 1]], 1)
-    verify_scatter((3, 3, 3), [[[-1, -3]]], -1)
-    verify_scatter((4, 3, 5, 6), [[[[2, 1, 0, 0]]]], 0)
-
-
-@tvm.testing.parametrize_targets
-def test_scatter_elements(target, dev):
-    """test_scatter_elements"""
-
-    def verify_scatter_elements(in_shape, indices, axis=0, reduction="update"):
-        x = np.random.uniform(size=in_shape).astype("float32")
-        indices = np.array(indices, dtype="int32")
-        updates = np.random.uniform(size=indices.shape).astype("float32")
-
-        scatter_elements_node = helper.make_node(
-            "ScatterElements",
-            ["data", "indices", "updates"],
-            ["output"],
-            axis=axis,
-            reduction=reduction,
-        )
-
-        graph = helper.make_graph(
-            [scatter_elements_node],
-            "scatter_elements_test",
-            inputs=[
-                helper.make_tensor_value_info("data", TensorProto.FLOAT, list(in_shape)),
-                helper.make_tensor_value_info("indices", TensorProto.INT32, list(indices.shape)),
-                helper.make_tensor_value_info("updates", TensorProto.FLOAT, list(indices.shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("output", TensorProto.FLOAT, list(in_shape))],
-        )
-        model = helper.make_model(graph, producer_name="scatter_elements_test")
-        verify_with_ort_with_inputs(model, [x, indices, updates], target=target, dev=dev)
-
-    # Usual scatter for 1d input
-    verify_scatter_elements((4,), [2, 3])
-    # Usual scatter with specified positive axis
-    verify_scatter_elements((2, 2), [[1, 0], [0, 1]], 1)
-    # Usual scatter for 3d input with spicified negative indices and axis
-    verify_scatter_elements((3, 3, 3), [[[-1, -3]]], -1)
-    # Usual scatter for 4d input
-    verify_scatter_elements((4, 3, 5, 6), [[[[2, 1, 0, 0]]]])
-    # Scatter elements with addition reduction of duplicates
-    verify_scatter_elements(
-        (3, 3, 3),
-        [[[0, 2, 1], [1, 1, 1], [2, 1, 0]], [[0, 2, 1], [1, 1, 1], [2, 1, 0]]],
-        0,
-        "add",
-    )
-    # Scatter elements with reduction and specified axis
-    verify_scatter_elements((3, 3, 3), [[[2, 2, 2], [1, 1, 1], [0, 0, 0]]], 2, "add")
-    # Scatter elements with multiplication reduction of duplicates
-    verify_scatter_elements(
-        (3, 3, 3),
-        [[[0, 2, 1], [1, 1, 1], [2, 1, 0]], [[0, 2, 1], [1, 1, 1], [2, 1, 0]]],
-        0,
-        "mul",
-    )
-    # TODO(vvchernov): min and max options are supported from 18 version, but CI supports 17 only
-    # # Scatter elements with min reduction of duplicates
-    # verify_scatter_elements(
-    #     (3, 3, 3),
-    #     [[[0, 2, 1], [1, 1, 1], [2, 1, 0]], [[0, 2, 1], [1, 1, 1], [2, 1, 0]]],
-    #     0,
-    #     "min",
-    # )
-    # # Scatter elements with max reduction of duplicates
-    # verify_scatter_elements(
-    #     (3, 3, 3),
-    #     [[[0, 2, 1], [1, 1, 1], [2, 1, 0]], [[0, 2, 1], [1, 1, 1], [2, 1, 0]]],
-    #     0,
-    #     "max",
-    # )
-
-
-@tvm.testing.parametrize_targets
-def test_slice(target, dev):
-    """test_slice"""
-
-    def _test_slice_iteration_v1(indata, outdata, starts, ends, axes=None):
-        if axes:
-            y = helper.make_node("Slice", ["in"], ["out"], axes=axes, starts=starts, ends=ends)
-        else:
-            y = helper.make_node("Slice", ["in"], ["out"], starts=starts, ends=ends)
-
-        graph = helper.make_graph(
-            [y],
-            "slice_test",
-            inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(indata.shape))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(outdata.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="slice_test")
-        verify_with_ort_with_inputs(
-            model, [indata], [outdata.shape], opset=1, target=target, dev=dev
-        )
-
-    def _test_slice_iteration_v10(indata, outdata, **attrs):
-        starts = attrs["starts"]
-        ends = attrs["ends"]
-        axes = None if "axes" not in attrs else attrs["axes"]
-        steps = None if "steps" not in attrs else attrs["steps"]
-        starts = np.asarray(starts)
-        ends = np.asarray(ends)
-        inputs = [
-            helper.make_tensor_value_info("data", TensorProto.FLOAT, list(indata.shape)),
-            helper.make_tensor_value_info("starts", TensorProto.INT64, list(starts.shape)),
-            helper.make_tensor_value_info("ends", TensorProto.INT64, list(ends.shape)),
-        ]
-        initializer = [
-            helper.make_tensor("starts", TensorProto.INT64, list(starts.shape), starts),
-            helper.make_tensor("ends", TensorProto.INT64, list(ends.shape), ends),
-        ]
-        nodes = []
-
-        if "add_noop_to_input_attrs" in attrs:
-
-            def add_noop_to_input_attr(attr_name, attr):
-                output_name = attr_name + "_output"
-
-                ref_shape = list(np.array(attr).shape)
-                ref_shape.insert(0, 1)
-                ref_shape = tuple(ref_shape)
-                ref_array = np.array(ref_shape)
-                ref_node = onnx.helper.make_node(
-                    "Constant",
-                    inputs=[],
-                    outputs=["ref_in_" + attr_name],
-                    value=onnx.helper.make_tensor(
-                        name="const_tensor__1_" + attr_name,
-                        data_type=onnx.TensorProto.INT64,
-                        dims=ref_array.shape,
-                        vals=ref_array.flatten().astype(int),
-                    ),
-                )
-                in_shape = np.array(attr).shape
-                in_array = np.array(in_shape)
-                ref_node2 = onnx.helper.make_node(
-                    "Constant",
-                    inputs=[],
-                    outputs=["input_shape_" + attr_name],
-                    value=onnx.helper.make_tensor(
-                        name="const_tensor__2_" + attr_name,
-                        data_type=onnx.TensorProto.INT64,
-                        dims=in_array.shape,
-                        vals=in_array.flatten().astype(int),
-                    ),
-                )
-
-                reshape1_node = helper.make_node(
-                    "Reshape", [attr_name, "ref_in_" + attr_name], ["reshape_" + attr_name]
-                )
-                reshape2_node = helper.make_node(
-                    "Reshape", ["reshape_" + attr_name, "input_shape_" + attr_name], [output_name]
-                )
-                return [ref_node, ref_node2, reshape1_node, reshape2_node]
-
-        slice_inputs = []
-        for attr_name in ["starts", "ends", "axes", "steps"]:
-            if attr_name not in attrs:
-                continue
-            if "add_noop_to_input_attrs" in attrs and attr_name in attrs["add_noop_to_input_attrs"]:
-                nodes.extend(add_noop_to_input_attr(attr_name, attrs[attr_name]))
-                slice_inputs.append(attr_name + "_output")
-            else:
-                slice_inputs.append(attr_name)
-
-        if axes:
-            axes = np.asarray(axes)
-            inputs.append(
-                helper.make_tensor_value_info("axes", TensorProto.INT64, list(axes.shape))
-            )
-            initializer.append(
-                helper.make_tensor("axes", TensorProto.INT64, list(axes.shape), axes)
-            )
-
-        if steps:
-            assert axes is not None and len(axes) == len(steps)
-            steps = np.asarray(steps)
-            inputs.append(
-                helper.make_tensor_value_info("steps", TensorProto.INT64, list(axes.shape))
-            )
-            initializer.append(
-                helper.make_tensor("steps", TensorProto.INT64, list(steps.shape), steps)
-            )
-
-        y = helper.make_node("Slice", ["data", *slice_inputs], ["out"])
-
-        nodes.append(y)
-        graph = helper.make_graph(
-            nodes,
-            "slice_test",
-            inputs=inputs,
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(outdata.shape))],
-            initializer=initializer,
-        )
-        model = helper.make_model(graph, producer_name="slice_test")
-        verify_with_ort_with_inputs(
-            model, [indata], opset=10, freeze_params=True, use_vm=True, target=target, dev=dev
-        )
-
-    x = np.random.randn(20, 10, 5).astype(np.float32)
-    _test_slice_iteration_v1(x, x[0:3, 0:10], starts=(0, 0), ends=(3, 10), axes=(0, 1))
-    _test_slice_iteration_v1(x, x[0:3, 0:10], starts=(0, 0), ends=(10, 3), axes=(1, 0))
-    _test_slice_iteration_v1(x, x[:, :, 3:4], starts=(0, 0, 3), ends=(20, 10, 4))
-    _test_slice_iteration_v1(x, x[:, 1:1000], starts=(1,), ends=(1000,), axes=(1,))
-    _test_slice_iteration_v1(x, x[:, 0:-1], starts=(0,), ends=(-1,), axes=(1,))
-    _test_slice_iteration_v10(x, x[0:3, 0:10], starts=(0, 0), ends=(3, 10), axes=(0, 1))
-    _test_slice_iteration_v10(x, x[0:3, 0:10], starts=(0, 0), ends=(10, 3), axes=(1, 0))
-    _test_slice_iteration_v10(x, x[:, :, 3:4], starts=(0, 0, 3), ends=(20, 10, 4))
-    _test_slice_iteration_v10(x, x[:, 1:1000], starts=(1,), ends=(1000,), axes=(1,))
-    _test_slice_iteration_v10(x, x[:, 0:-1], starts=(0,), ends=(-1,), axes=(1,))
-    _test_slice_iteration_v10(x, x[:, 0:-1], starts=(0,), ends=(-1,), axes=(-1,))
-    _test_slice_iteration_v10(
-        x,
-        x[0:3, 0:10],
-        starts=(0, 0),
-        ends=(3, 10),
-        axes=(0, 1),
-        add_noop_to_input_attrs=["starts"],
-    )
-    _test_slice_iteration_v10(
-        x, x[:, :, 3:4], starts=(0, 0, 3), ends=(20, 10, 4), add_noop_to_input_attrs=["ends"]
-    )
-    _test_slice_iteration_v10(
-        x, x[:, 1:1000], starts=(1,), ends=(1000,), axes=(1,), add_noop_to_input_attrs=["axes"]
-    )
-    _test_slice_iteration_v10(
-        x,
-        x[:, 0:-1],
-        starts=(0,),
-        ends=(-1,),
-        axes=(1,),
-        add_noop_to_input_attrs=["starts", "ends"],
-    )
-    _test_slice_iteration_v10(
-        x,
-        x[0:3, 0:10],
-        starts=(0, 0),
-        ends=(3, 10),
-        axes=(0, 1),
-        add_noop_to_input_attrs=["ends", "axes"],
-    )
-    _test_slice_iteration_v10(
-        x,
-        x[:, :, 3:4],
-        starts=(0, 0, 3),
-        ends=(20, 10, 4),
-        add_noop_to_input_attrs=["starts", "axes"],
-    )
-    _test_slice_iteration_v10(
-        x,
-        x[:, 1:1000],
-        starts=(1,),
-        ends=(1000,),
-        axes=(1,),
-        add_noop_to_input_attrs=["starts", "ends", "axes"],
-    )
-    x = np.random.randn(1, 1, 1, 128).astype(np.float32)
-    _test_slice_iteration_v10(
-        x, x, starts=(0, 0), ends=(9223372036854775807, 9223372036854775807), axes=(0, 3)
-    )
-
-    x = np.random.randn(4, 4).astype(np.float32)
-    _test_slice_iteration_v10(
-        x, x[:, 1::2], starts=(1,), ends=(9223372036854775807,), axes=(1,), steps=(2,)
-    )
-    _test_slice_iteration_v10(
-        x,
-        x[0::1, 1::2],
-        starts=(0, 1),
-        ends=(4, 4),
-        axes=(0, 1),
-        steps=(1, 2),
-    )
-
-
-def _test_onnx_op_elementwise(
-    target, dev, inshape, outfunc, npargs, dtype, opname, kwargs, opset=None, verify=True
-):
-    indata = np.random.uniform(-1, 1, size=inshape).astype(dtype)
-    outdata = outfunc(indata, **npargs)
-
-    y = helper.make_node(opname, ["in"], ["out"], **kwargs)
-
-    ONNX_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)]
-
-    graph = helper.make_graph(
-        [y],
-        opname + "_test",
-        inputs=[helper.make_tensor_value_info("in", ONNX_DTYPE, list(indata.shape))],
-        outputs=[helper.make_tensor_value_info("out", ONNX_DTYPE, list(outdata.shape))],
-    )
-
-    model = helper.make_model(graph, producer_name=opname + "_test")
-    if verify:
-        verify_with_ort_with_inputs(
-            model, [indata], [outdata.shape], opset=opset, dtype=dtype, target=target, dev=dev
-        )
-    else:
-        get_tvm_output(
-            model,
-            [indata],
-            target,
-            dev,
-            [outdata.shape],
-            dtype,
-            opset=opset,
-            opt_level=3,
-        )
-
-
-@tvm.testing.parametrize_targets
-def test_floor(target, dev):
-    _test_onnx_op_elementwise(target, dev, (2, 4, 5, 6), np.floor, {}, "float32", "Floor", {})
-
-
-@tvm.testing.parametrize_targets
-def test_ceil(target, dev):
-    _test_onnx_op_elementwise(target, dev, (2, 4, 5, 6), np.ceil, {}, "float32", "Ceil", {})
-
-
-@tvm.testing.parametrize_targets
-def test_clip(target, dev):
-    """test_clip"""
-    _test_onnx_op_elementwise(
-        target,
-        dev,
-        (2, 4, 5, 6),
-        np.clip,
-        {"a_min": -1.0, "a_max": 1.0},
-        "float32",
-        "Clip",
-        {"min": -1.0, "max": 1.0},
-        opset=6,
-    )
-
-    _test_onnx_op_elementwise(
-        target,
-        dev,
-        (2, 4, 5, 6),
-        np.clip,
-        {"a_min": -np.inf, "a_max": 1.0},
-        "float32",
-        "Clip",
-        {"max": 1.0},
-        opset=6,
-    )
-
-    _test_onnx_op_elementwise(
-        target,
-        dev,
-        (2, 4, 5, 6),
-        np.clip,
-        {"a_min": -1.0, "a_max": np.inf},
-        "float32",
-        "Clip",
-        {"min": -1.0},
-        opset=6,
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_clip_min_max_as_inputs(target, dev):
-    """test_clip_min_max_as_inputs"""
-    input_shape = (2, 4, 5, 6)
-    nodes = [
-        make_constant_node("min", onnx.TensorProto.FLOAT, (), [0.0]),
-        make_constant_node("max", onnx.TensorProto.FLOAT, (), [6.0]),
-    ]
-    input_names = ["in", "min", "max"]
-    nodes.append(helper.make_node("Clip", inputs=input_names, outputs=["out"]))
-    graph = helper.make_graph(
-        nodes,
-        "clip_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(input_shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(input_shape))],
-    )
-    model = helper.make_model(graph, producer_name="clip_test")
-
-    verify_with_ort(model, [input_shape], out_shape=[input_shape], target=target, dev=dev)
-
-
-@tvm.testing.parametrize_targets
-def test_round(target, dev):
-    _test_onnx_op_elementwise(target, dev, (2, 4, 5, 6), np.round, {}, "float32", "Round", {})
-    _test_onnx_op_elementwise(
-        target, dev, (2, 4, 5, 6), np.round, {}, "float64", "Round", {}, verify=False
-    )  # TODO: enable verification once ORT supports float64
-
-
-def _test_finite_ops(target, dev, inshape, outfunc, npargs, dtype, opname, kwargs):
-    indata = np.random.choice(a=[np.nan, np.inf, -np.inf, 0.5, 1.0, 0], size=inshape).astype(dtype)
-
-    outdata = outfunc(indata, **npargs)
-    y = helper.make_node(opname, ["in"], ["out"], **kwargs)
-
-    graph = helper.make_graph(
-        [y],
-        opname + "_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(indata.shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.BOOL, list(outdata.shape))],
-    )
-
-    model = helper.make_model(graph, producer_name=opname + "_test")
-    verify_with_ort_with_inputs(
-        model, [indata], [outdata.shape], dtype=dtype, target=target, dev=dev
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_isinf(target, dev):
-    _test_finite_ops(target, dev, (2, 4, 5, 6), np.isinf, {}, "float32", "IsInf", {})
-
-
-@tvm.testing.parametrize_targets
-def test_isnan(target, dev):
-    """test_isnan"""
-    _test_finite_ops(target, dev, (2, 4, 5, 6), np.isnan, {}, "float32", "IsNaN", {})
-
-
-@tvm.testing.parametrize_targets
-def test_gather_nd(target, dev):
-    """test_gather_nd"""
-
-    def verify_gather_nd(in_shape, indices, out_shape, dtype="float32", batch_dims=0, opset=11):
-        x = np.random.uniform(size=in_shape).astype(dtype)
-        indices = np.array(indices, dtype="int64")
-
-        y = helper.make_node("GatherND", ["in", "indices"], ["out"])
-
-        if opset >= 12:
-            batch_dims_attr = helper.make_attribute("batch_dims", batch_dims)
-            y.attribute.append(batch_dims_attr)
-
-        graph = helper.make_graph(
-            [y],
-            "gather_test",
-            inputs=[
-                helper.make_tensor_value_info(
-                    "in", mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)], list(in_shape)
-                ),
-                helper.make_tensor_value_info("indices", TensorProto.INT64, list(indices.shape)),
-            ],
-            outputs=[
-                helper.make_tensor_value_info(
-                    "out", mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)], list(out_shape)
-                )
-            ],
-        )
-        model = helper.make_model(graph, producer_name="gather_test")
-        verify_with_ort_with_inputs(
-            model, [x, indices], [out_shape], opset=opset, target=target, dev=dev
-        )
-
-    verify_gather_nd([2, 2], [[0, 0], [1, 1]], [2], "int32")
-    verify_gather_nd([2, 2], [[1], [0]], [2, 2])
-    verify_gather_nd([2, 2, 2], [[0, 1], [1, 0]], [2, 2])
-    verify_gather_nd([2, 2, 2], [[[0, 1]], [[1, 0]]], [2, 1, 2])
-
-    if is_version_greater_than("1.6.0"):
-        verify_gather_nd([2, 2, 2], [[1], [0]], [2, 2], batch_dims=1, opset=12)
-        verify_gather_nd(
-            (3, 2, 2, 3, 4),
-            np.random.randint(low=0, high=2, size=(3, 2, 3), dtype="int64"),
-            (3, 2),
-            batch_dims=2,
-            opset=12,
-        )
-
-
-@tvm.testing.parametrize_targets
-def test_onehot(target, dev):
-    """test_onehot"""
-    indices_shape = [10]
-    indices_array = np.random.randint(low=0, high=9, size=indices_shape, dtype="int32")
-    depth = 10
-    values = np.asarray([0, 1]).astype("int32")
-    out_np = np.eye(depth)[indices_array.reshape(-1)]
-
-    onehot_node = helper.make_node("OneHot", ["indices", "depth", "values"], ["out"])
-
-    graph = helper.make_graph(
-        [onehot_node],
-        "onehot_test",
-        inputs=[
-            helper.make_tensor_value_info("indices", TensorProto.INT32, indices_shape),
-            helper.make_tensor_value_info("depth", TensorProto.INT32, [1]),
-            helper.make_tensor_value_info("values", TensorProto.INT32, values.shape),
-        ],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.INT32, out_np.shape)],
-    )
-
-    model = helper.make_model(graph, producer_name="onehot_test")
-
-    # TODO(jwfromm): Replace test against np with test against onnxrt once we update versions.
-    tvm_out = get_tvm_output_with_vm(
-        model, [indices_array, np.array([depth]).astype("int32"), values], target, dev
-    )
-    tvm.testing.assert_allclose(out_np, tvm_out, rtol=1e-5, atol=1e-5)
-
-
-@tvm.testing.parametrize_targets
-def test_gemm(target, dev):
-    """test_gemm"""
-
-    def verify_gemm(a_shape, b_shape, c_shape=None, freeze_params=False, dtype="float32"):
-        out_shape = [a_shape[0], b_shape[1]]
-        a_array = np.random.uniform(size=a_shape).astype(dtype)
-        b_array = np.random.uniform(size=b_shape).astype(dtype)
-        input_names = ["a", "b"]
-        ONNX_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)]
-        input_nodes = [
-            helper.make_tensor_value_info("a", ONNX_DTYPE, list(a_shape)),
-            helper.make_tensor_value_info("b", ONNX_DTYPE, list(b_shape)),
-        ]
-        input_values = [a_array, b_array]
-        if c_shape is not None:
-            c_array = np.random.uniform(size=c_shape).astype(dtype)
-            input_names.append("c")
-            input_nodes.append(helper.make_tensor_value_info("c", ONNX_DTYPE, list(c_shape)))
-            input_values.append(c_array)
-
-        gemm_node = helper.make_node("Gemm", input_names, ["out"])
-
-        graph = helper.make_graph(
-            [gemm_node],
-            "gemm_test",
-            inputs=input_nodes,
-            outputs=[helper.make_tensor_value_info("out", ONNX_DTYPE, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="gemm_test")
-        atol = 1e-5
-        rtol = 1e-5
-        if dtype == "float16":
-            atol = 1e-3
-            rtol = 1e-3
-        verify_with_ort_with_inputs(
-            model,
-            input_values,
-            freeze_params=freeze_params,
-            dtype=dtype,
-            atol=atol,
-            rtol=rtol,
-            target=target,
-            dev=dev,
-        )
-
-    verify_gemm(a_shape=(4, 3), b_shape=(3, 4))
-    verify_gemm(a_shape=(4, 3), b_shape=(3, 4), c_shape=(4,))
-    verify_gemm(a_shape=(4, 3), b_shape=(3, 4), c_shape=(4,), freeze_params=True)
-    verify_gemm(a_shape=(4, 3), b_shape=(3, 4), c_shape=(4,), freeze_params=True, dtype="float16")
-
-
-@tvm.testing.parametrize_targets
-def test_matmul(target, dev):
-    """test_matmul"""
-
-    def test_one_matmul(a_shape, b_shape):
-        out_shape = np.matmul(np.zeros(a_shape), np.zeros(b_shape)).shape
-
-        a_array = np.random.uniform(size=a_shape).astype("float32")
-        b_array = np.random.uniform(size=b_shape).astype("float32")
-
-        mul_node = helper.make_node("MatMul", ["a", "b"], ["out"])
-
-        graph = helper.make_graph(
-            [mul_node],
-            "matmul_test",
-            inputs=[
-                helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape)),
-                helper.make_tensor_value_info("b", TensorProto.FLOAT, list(b_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="matmul_test")
-        verify_with_ort_with_inputs(model, [a_array, b_array], target=target, dev=dev)
-
-    test_one_matmul((4, 3), (3, 4))
-    test_one_matmul((3,), (3, 1))
-    test_one_matmul((1, 3), (3,))
-    test_one_matmul((3,), (3,))
-
-
-@tvm.testing.parametrize_targets
-def test_batch_matmul(target, dev):
-    """test_batch_matmul"""
-
-    def verify_batch_matmul(a_shape, b_shape, out_shape, convert_config=None):
-        a_array = np.random.uniform(size=a_shape).astype("float32")
-        b_array = np.random.uniform(size=b_shape).astype("float32")
-
-        mul_node = helper.make_node("MatMul", ["a", "b"], ["out"])
-
-        graph = helper.make_graph(
-            [mul_node],
-            "matmul_test",
-            inputs=[
-                helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape)),
-                helper.make_tensor_value_info("b", TensorProto.FLOAT, list(b_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, out_shape)],
-        )
-
-        model = helper.make_model(graph, producer_name="matmul_test")
-        verify_with_ort_with_inputs(
-            model,
-            [a_array, b_array],
-            use_vm=True,
-            target=target,
-            dev=dev,
-            convert_config=convert_config,
-        )
-
-    verify_batch_matmul((2, 3, 4, 3), (2, 3, 3, 4), (2, 3, 4, 4))
-    verify_batch_matmul((2, 4, 3), (3, 4), (2, 4, 4))
-    verify_batch_matmul((2, 3, 4, 3), (3, 4), (2, 3, 4, 4))
-    # Test implicit broadcasting.
-    verify_batch_matmul((5,), (5, 5, 4), (5, 4))
-    verify_batch_matmul((5, 4, 5), (5,), (5, 4))
-    verify_batch_matmul((4, 3), (2, 3, 4), (2, 4, 4))
-    verify_batch_matmul((2, 4, 3), (1, 3, 4), (2, 4, 4))
-    verify_batch_matmul((1, 4, 3), (2, 3, 4), (2, 4, 4))
-    verify_batch_matmul((4, 32, 16), (16, 32), (4, 32, 32))
-    verify_batch_matmul((4, 32, 16, 32), (32, 16), (4, 32, 16, 16))
-    verify_batch_matmul((4, 32, 16, 32), (1, 32, 32, 16), (4, 32, 16, 16))
-    verify_batch_matmul((4, 1, 16, 32), (1, 32, 32, 16), (4, 32, 16, 16))
-    # Test transb=False
-    verify_batch_matmul(
-        (2, 3, 4, 3),
-        (2, 3, 3, 4),
-        (2, 3, 4, 4),
-        convert_config={"use_nt_batch_matmul": False},
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_use_nt_batch_matmul(target, dev):
-    """test_use_nt_batch_matmul"""
-    a_shape = (2, 3, 4)
-    b_shape = (2, 4, 3)
-    out_shape = [2, 3, 3]
-    a_array = np.random.uniform(size=a_shape).astype("float32")
-    b_array = np.random.uniform(size=b_shape).astype("float32")
-
-    for use_nt_batch_matmul in [True, False]:
-        mul_node = helper.make_node("MatMul", ["a", "b"], ["out"])
-
-        graph = helper.make_graph(
-            [mul_node],
-            "matmul_test",
-            inputs=[
-                helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape)),
-                helper.make_tensor_value_info("b", TensorProto.FLOAT, list(b_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="matmul_test")
-        _, shape_dict = get_input_data_shape_dict(model, [a_array, b_array])
-
-        mod, _ = relay.frontend.from_onnx(
-            model, shape_dict, convert_config={"use_nt_batch_matmul": use_nt_batch_matmul}
-        )
-        has_transpose_op = "transpose" in str(mod)
-        # use_nt_batch_matmul implies, TVM converts qualified onnx `matmul`
-        # to `transpose(weight) + nn.batch_matmul_NT`, otherwise to `nn.batch_matmul`
-        assert has_transpose_op == use_nt_batch_matmul
-
-
-@tvm.testing.parametrize_targets
-def test_matmulinteger16(target, dev):
-    """test_matmulinteger16"""
-
-    def verify_matmulinteger16(a_shape, b_shape, out_shape):
-        a_dtype = "int16"
-        b_dtype = "int16"
-        low = np.iinfo(np.int16).min
-        high = np.iinfo(np.int16).max
-
-        a_proto = TensorProto.INT16
-        b_proto = TensorProto.INT16
-        out_proto = TensorProto.INT32
-        a_array = np.random.randint(low, high, size=a_shape).astype(a_dtype)
-        b_array = np.random.randint(low, high, size=b_shape).astype(b_dtype)
-
-        mul_node = helper.make_node("MatMulInteger16", ["a", "b"], ["out"], domain="com.microsoft")
-
-        graph = helper.make_graph(
-            [mul_node],
-            "matmuli16_test",
-            inputs=[
-                helper.make_tensor_value_info("a", a_proto, list(a_shape)),
-                helper.make_tensor_value_info("b", b_proto, list(b_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", out_proto, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="matmuli16_test")
-        verify_with_ort_with_inputs(model, [a_array, b_array], target=target, dev=dev)
-
-    # 2D computation to verify matmul op
-    verify_matmulinteger16((4, 3), (3, 4), (4, 4))
-    verify_matmulinteger16((5, 7), (7, 8), (5, 8))
-    # Verify 3D matmul using batch_matmul op
-    verify_matmulinteger16((2, 4, 3), (1, 3, 4), (2, 4, 4))
-    verify_matmulinteger16((1, 4, 3), (2, 3, 4), (2, 4, 4))
-    # Test implicit broadcasting
-    verify_matmulinteger16((2, 3, 5, 3), (2, 3, 3, 5), (2, 3, 5, 5))
-    verify_matmulinteger16((2, 7, 3), (3, 7), (2, 7, 7))
-    verify_matmulinteger16((2, 3, 4, 3), (3, 4), (2, 3, 4, 4))
-
-
-def verify_simple_dynamic_model(a_shape, b_shape, target, dev):
-    """verify_simple_dynamic_model"""
-
-    def verify_model(model, a_shape, b_shape):
-        a_array = np.random.uniform(size=a_shape).astype("float32")
-        b_array = np.random.uniform(size=b_shape).astype("float32")
-        # matmul
-        out_np = np.matmul(a_array, b_array)
-        # relu
-        out_np[out_np < 0] = 0
-
-        tvm_out = model(a_array, b_array).numpy()
-        tvm.testing.assert_allclose(out_np, tvm_out, rtol=1e-5, atol=1e-5)
-
-    mul_node = helper.make_node("MatMul", ["a", "b"], ["out"])
-    relu_node = helper.make_node("Relu", ["out"], ["relu"])
-
-    a_array = np.random.uniform(size=a_shape).astype("float32")
-    b_array = np.random.uniform(size=b_shape).astype("float32")
-    # matmul
-    out_np = np.matmul(a_array, b_array)
-
-    graph = helper.make_graph(
-        [mul_node, relu_node],
-        "matmul_test",
-        inputs=[
-            helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape)),
-            helper.make_tensor_value_info("b", TensorProto.FLOAT, list(b_shape)),
-        ],
-        outputs=[helper.make_tensor_value_info("relu", TensorProto.FLOAT, list(out_np.shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="matmul_test")
-
-    a_anys = [relay.Any()] * len(a_shape)
-    b_anys = [relay.Any()] * len(b_shape)
-
-    mod, _ = relay.frontend.from_onnx(model, {"a": a_anys, "b": b_anys})
-    model = relay.create_executor("vm", mod=mod, device=dev, target=target).evaluate()
-    verify_model(model, a_shape, b_shape)
-    verify_model(model, [a * 2 for a in a_shape], [b * 2 for b in b_shape])
-    verify_model(model, [a * 3 for a in a_shape], [b * 3 for b in b_shape])
-
-
-# TODO(mbrookhart, electriclilies): Add CUDA as a target once batch matmul is fixed
-@tvm.testing.parametrize_targets("llvm")
-def test_batch_matmul_dynamic_model(target, dev):
-    verify_simple_dynamic_model((2, 3, 4, 3), (2, 3, 3, 4), target, dev)
-    verify_simple_dynamic_model((2, 4, 3), (3, 4), target, dev)
-    verify_simple_dynamic_model((2, 3, 4, 3), (3, 4), target, dev)
-
-
-@tvm.testing.parametrize_targets
-def test_lrn(target, dev):
-    """test_lrn"""
-
-    def verify_lrn(shape, nsize, dtype, alpha=None, beta=None, bias=None):
-        in_array = np.random.uniform(size=shape).astype(dtype)
-
-        if alpha is None and beta is None and bias is None:
-            alpha = 0.0001
-            beta = 0.75
-            bias = 1.0
-            node = onnx.helper.make_node("LRN", inputs=["in"], outputs=["out"], size=nsize)
-        else:
-            node = onnx.helper.make_node(
-                "LRN", inputs=["in"], outputs=["out"], alpha=alpha, beta=beta, bias=bias, size=nsize
-            )
-
-        graph = helper.make_graph(
-            [node],
-            "lrn_test",
-            inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(shape))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(shape))],
-        )
-        model = helper.make_model(graph, producer_name="lrn_test")
-        verify_with_ort_with_inputs(model, [in_array], target=target, dev=dev)
-
-    verify_lrn((5, 5, 5, 5), 3, "float32")
-    verify_lrn((5, 5, 5, 5), 3, "float32", alpha=0.0002, beta=0.5, bias=2.0)
-
-
-@tvm.testing.parametrize_targets
-def test_instance_norm(target, dev):
-    """test_instance_norm"""
-
-    def verify_instance_norm(shape, axis=1):
-        x = np.random.randn(*shape).astype(np.float32)
-        gamma = np.random.randn(shape[1]).astype(np.float32)
-        beta = np.random.randn(shape[1]).astype(np.float32)
-        epsilon = 1e-5
-
-        node = onnx.helper.make_node(
-            "InstanceNormalization",
-            inputs=["x", "gamma", "beta"],
-            outputs=["y"],
-            epsilon=epsilon,
-        )
-        graph = helper.make_graph(
-            [node],
-            "instance_norm_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(shape)),
-                helper.make_tensor_value_info("gamma", TensorProto.FLOAT, (shape[1],)),
-                helper.make_tensor_value_info("beta", TensorProto.FLOAT, (shape[1],)),
-            ],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(shape))],
-        )
-        model = helper.make_model(graph, producer_name="instance_norm_test")
-        verify_with_ort_with_inputs(
-            model, [x, gamma, beta], out_shape=[shape], target=target, dev=dev
-        )
-
-    verify_instance_norm((2, 3, 4, 5))
-    verify_instance_norm((32, 64, 80, 64))
-    verify_instance_norm((8, 6, 5))
-    verify_instance_norm((8, 7, 6, 5, 4))
-
-
-@tvm.testing.parametrize_targets
-def test_upsample_nearest(target, dev):
-    """test_upsample_nearest"""
-    scale = 2
-    in_shape = (1, 1, 3, 3)
-    out_shape = (1, 1, 3 * scale, 3 * scale)
-    y = helper.make_node("Upsample", ["in"], ["out"], mode="nearest", scales=[1.0, 1.0, 2.0, 2.0])
-
-    in_array = np.random.uniform(size=in_shape).astype(np.float32)
-
-    graph = helper.make_graph(
-        [y],
-        "upsample_nearest_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="upsample_nearest_test")
-    verify_with_ort_with_inputs(model, [in_array], [out_shape], opset=7, target=target, dev=dev)
-
-
-@tvm.testing.parametrize_targets
-def test_upsample_nearest_default(target, dev):
-    """test_upsample_nearest_default"""
-    scale = 2
-    in_shape = (1, 1, 3, 3)
-    out_shape = (1, 1, 3 * scale, 3 * scale)
-    y = helper.make_node("Upsample", ["in"], ["out"], scales=[1.0, 1.0, 2.0, 2.0])
-
-    in_array = np.random.uniform(size=in_shape).astype(np.float32)
-
-    graph = helper.make_graph(
-        [y],
-        "upsample_nearest_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="upsample_nearest_test")
-    verify_with_ort_with_inputs(model, [in_array], [out_shape], opset=7, target=target, dev=dev)
-
-
-@tvm.testing.parametrize_targets
-def test_upsample3d_nearest(target, dev):
-    """test_upsample3d_nearest"""
-    scale = 2
-    in_shape = (1, 1, 3, 3, 3)
-    out_shape = (1, 1, 3 * scale, 3 * scale, 3 * scale)
-    y = helper.make_node(
-        "Upsample", ["in"], ["out"], mode="nearest", scales=[1.0, 1.0, 2.0, 2.0, 2.0]
-    )
-
-    in_array = np.random.uniform(size=in_shape).astype(np.float32)
-
-    graph = helper.make_graph(
-        [y],
-        "upsample_nearest_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="upsample_nearest_test")
-    # Upsample is deprecated after opset 9
-    verify_with_ort_with_inputs(model, [in_array], [out_shape], opset=7, target=target, dev=dev)
-
-
-@tvm.testing.parametrize_targets
-def test_upsample_bilinear(target, dev):
-    """test_upsample_bilinear"""
-    scale = 2
-    in_shape = (1, 1, 3, 3)
-    out_shape = (1, 1, 3 * scale, 3 * scale)
-    y = helper.make_node("Upsample", ["in"], ["out"], mode="linear", scales=[1.0, 1.0, 2.0, 2.0])
-
-    in_array = np.random.uniform(size=in_shape).astype(np.float32)
-
-    graph = helper.make_graph(
-        [y],
-        "upsample_bilinear_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="upsample_bilinear_test")
-    verify_with_ort_with_inputs(model, [in_array], [out_shape], opset=7, target=target, dev=dev)
-
-
-@tvm.testing.parametrize_targets
-def test_upsample3d_trilinear(target, dev):
-    """test_upsample3d_trilinear"""
-    scale = 2
-    in_shape = (1, 1, 3, 3, 3)
-    out_shape = (1, 1, 3 * scale, 3 * scale, 3 * scale)
-    y = helper.make_node("Upsample", ["in", "scales"], ["out"], mode="linear")
-    scales = [1.0, 1.0, 2.0, 2.0, 2.0]
-    in_array = np.random.uniform(size=in_shape).astype(np.float32)
-    out_array = tvm.topi.testing.resize3d_python(
-        in_array,
-        (scale, scale, scale),
-        "NCDHW",
-        "linear",
-        coordinate_transformation_mode="asymmetric",
-    )
-
-    ref_array = np.array(scales)
-    ref_node = helper.make_node(
-        "Constant",
-        inputs=[],
-        outputs=["scales"],
-        value=onnx.helper.make_tensor(
-            name="const_tensor",
-            data_type=TensorProto.FLOAT,
-            dims=ref_array.shape,
-            vals=ref_array.flatten().astype(float),
-        ),
-    )
-
-    graph = helper.make_graph(
-        [ref_node, y],
-        "upsample_trilinear_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="upsample_trilinear_test")
-    # TODO(jwfromm): Trilinear upsampling not supported in 1.0.0 onnxruntime.
-    # Replace topi comparison with verify_with_ort once we update.
-    tvm_out = get_tvm_output(model, in_array, target, dev, out_shape, "float32")
-    tvm.testing.assert_allclose(out_array, tvm_out, rtol=1e-5, atol=1e-5)
-
-
-# TODO: Fix softmax with dynamic input on cuda and enable this test
-@tvm.testing.known_failing_targets("cuda")
-@tvm.testing.parametrize_targets
-def test_softmax(target, dev):
-    """test_softmax"""
-
-    def verify_softmax(inshape, axis, opset=None, dynamic=False):
-        opname = "Softmax"
-        outshape = inshape
-        node_list = []
-        input_node_list = [helper.make_tensor_value_info("in", TensorProto.FLOAT, list(inshape))]
-        output_node_list = [helper.make_tensor_value_info("out", TensorProto.FLOAT, list(outshape))]
-        input_list = [np.random.uniform(size=inshape).astype(np.float32)]
-        softmax_inputs = ["in"]
-
-        if dynamic:
-            input_node_list.append(
-                helper.make_tensor_value_info("shape", TensorProto.INT64, [len(inshape)])
-            )
-            input_list.append(np.asarray(inshape))
-            reshape_node = helper.make_node("Reshape", ["in", "shape"], ["dynamic_in"])
-            softmax_inputs[0] = "dynamic_in"
-            node_list += [reshape_node]
-
-        y = helper.make_node(opname, softmax_inputs, ["out"])
-        if axis is not None:
-            axis_attr = helper.make_attribute("axis", axis)
-            y.attribute.append(axis_attr)
-        node_list.append(y)
-
-        graph = helper.make_graph(
-            node_list,
-            opname + "_test",
-            inputs=input_node_list,
-            outputs=output_node_list,
-        )
-
-        model = helper.make_model(graph, producer_name=opname + "_test")
-        verify_with_ort_with_inputs(
-            model, input_list, use_vm=True, opset=opset, target=target, dev=dev
-        )
-
-    verify_softmax((1, 10), None)
-    verify_softmax((1, 10), 1)
-    verify_softmax((1, 2, 3, 10), 0)
-    verify_softmax((1, 2, 3, 10), 2)
-    verify_softmax((1, 2, 3, 4, 10), 3)
-    verify_softmax((1, 2, 3, 4, 10), 4)
-    verify_softmax((1, 10), -1, dynamic=True)
-    verify_softmax((1, 2, 3, 10), -1, dynamic=True)
-    verify_softmax((1, 10), -1, opset=8, dynamic=True)
-    verify_softmax((1, 2, 3, 10), -1, opset=8, dynamic=True)
-
-
-@tvm.testing.parametrize_targets
-def test_forward_min(target, dev):
-    """test_forward_min"""
-
-    def verify_min(input_dim):
-        dtype = "float32"
-
-        a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-        a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-        a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
-        min_node = helper.make_node("Min", ["a_np1", "a_np2", "a_np3"], ["out"])
-
-        graph = helper.make_graph(
-            [min_node],
-            "Min_test",
-            inputs=[
-                helper.make_tensor_value_info("a_np1", TensorProto.FLOAT, list(input_dim)),
-                helper.make_tensor_value_info("a_np2", TensorProto.FLOAT, list(input_dim)),
-                helper.make_tensor_value_info("a_np3", TensorProto.FLOAT, list(input_dim)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(input_dim))],
-        )
-
-        model = helper.make_model(graph, producer_name="Min_test")
-        verify_with_ort_with_inputs(model, [a_np1, a_np2, a_np3], target=target, dev=dev)
-
-    verify_min((1, 3, 20, 20))
-    verify_min((20, 20))
-
-
-@tvm.testing.parametrize_targets
-def test_forward_max(target, dev):
-    """test_forward_max"""
-
-    def verify_max(input_dim):
-        dtype = "float32"
-
-        a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-        a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-        a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
-        max_node = helper.make_node("Max", ["a_np1", "a_np2", "a_np3"], ["out"])
-
-        graph = helper.make_graph(
-            [max_node],
-            "Max_test",
-            inputs=[
-                helper.make_tensor_value_info("a_np1", TensorProto.FLOAT, list(input_dim)),
-                helper.make_tensor_value_info("a_np2", TensorProto.FLOAT, list(input_dim)),
-                helper.make_tensor_value_info("a_np3", TensorProto.FLOAT, list(input_dim)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(input_dim))],
-        )
-
-        model = helper.make_model(graph, producer_name="Max_test")
-        verify_with_ort_with_inputs(model, [a_np1, a_np2, a_np3], target=target, dev=dev)
-
-    verify_max((1, 3, 20, 20))
-    verify_max((20, 20))
-
-
-@tvm.testing.parametrize_targets
-def test_forward_mean(target, dev):
-    """test_forward_mean"""
-
-    def verify_mean(input_dim):
-        dtype = "float32"
-
-        a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-        a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-        a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
-        mean_node = helper.make_node("Mean", ["a_np1", "a_np2", "a_np3"], ["out"])
-
-        graph = helper.make_graph(
-            [mean_node],
-            "Mean_test",
-            inputs=[
-                helper.make_tensor_value_info("a_np1", TensorProto.FLOAT, list(input_dim)),
-                helper.make_tensor_value_info("a_np2", TensorProto.FLOAT, list(input_dim)),
-                helper.make_tensor_value_info("a_np3", TensorProto.FLOAT, list(input_dim)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(input_dim))],
-        )
-
-        model = helper.make_model(graph, producer_name="Mean_test")
-        verify_with_ort_with_inputs(model, [a_np1, a_np2, a_np3], target=target, dev=dev)
-
-    verify_mean((1, 3, 20, 20))
-    verify_mean((20, 20))
-
-
-@tvm.testing.parametrize_targets
-def test_forward_hardsigmoid(target, dev):
-    """test_forward_hardsigmoid"""
-
-    def verify_hardsigmoid(input_dim, alpha, beta):
-        dtype = "float32"
-
-        a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-
-        hardsigmoid_node = helper.make_node(
-            "HardSigmoid", ["a_np1"], ["out"], alpha=alpha, beta=beta
-        )
-
-        graph = helper.make_graph(
-            [hardsigmoid_node],
-            "HardSigmoid_test",
-            inputs=[helper.make_tensor_value_info("a_np1", TensorProto.FLOAT, list(input_dim))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(input_dim))],
-        )
-
-        model = helper.make_model(graph, producer_name="HardSigmoid_test")
-        verify_with_ort_with_inputs(model, [a_np1], target=target, dev=dev)
-
-    verify_hardsigmoid((1, 3, 20, 20), 0.5, 0.6)
-    verify_hardsigmoid((20, 20), 0.3, 0.4)
-
-
-# TODO (mbrookhart, electriclilies) Fix argmin on GPU and enable this test
-@tvm.testing.known_failing_targets("cuda")
-@tvm.testing.parametrize_targets
-def test_forward_arg_min_max(target, dev):
-    """test_forward_arg_min_max"""
-
-    def verify_argreduce(input_dim, op_name, axis=None, keepdims=None):
-        a_np1 = np.random.uniform(-10, 10, input_dim).astype(np.int32)
-        out_shape = list(a_np1.shape)
-        def_axis = axis if axis is not None else 0
-        if keepdims == 1 or keepdims is None:
-            out_shape[def_axis] = 1
-        else:
-            out_shape.pop(def_axis)
-
-        node = onnx.helper.make_node(op_name, inputs=["a_np1"], outputs=["out"])
-
-        if keepdims is not None:
-            keepdims_attr = helper.make_attribute("keepdims", keepdims)
-            node.attribute.append(keepdims_attr)
-        if axis is not None:
-            axis_attr = helper.make_attribute("axis", axis)
-            node.attribute.append(axis_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "argreduce_test",
-            inputs=[helper.make_tensor_value_info("a_np1", TensorProto.INT32, list(a_np1.shape))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.INT64, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="argreduce_test")
-        verify_with_ort_with_inputs(model, [a_np1], target=target, dev=dev)
-
-    # Verify argmin and argmax
-    verify_argreduce([3, 4, 4], "ArgMin")
-    verify_argreduce([3, 4, 4], "ArgMax")
-    verify_argreduce([3, 4, 4], "ArgMin", axis=1)
-    verify_argreduce([3, 4, 4], "ArgMax", axis=0)
-    verify_argreduce([3, 4, 4], "ArgMin", keepdims=0)
-    verify_argreduce([3, 4, 4], "ArgMax", keepdims=1)
-    for axis in [None, 0, 1, 2]:
-        for keepdims in [None, True, False]:
-            verify_argreduce([3, 4, 4], "ArgMin", axis, keepdims)
-            verify_argreduce([3, 4, 4], "ArgMax", axis, keepdims)
-
-
-@tvm.testing.parametrize_targets
-def test_constantofshape(target, dev):
-    """test_constantofshape"""
-
-    def verify_constantofshape(input_dim, value, dtype):
-        fill_node = helper.make_node(
-            "ConstantOfShape",
-            ["input"],
-            ["output"],
-            value=helper.make_tensor(
-                "value", mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)], (1,), (value,)
-            ),
-        )
-
-        inputs = [helper.make_tensor_value_info("input", TensorProto.INT64, [len(input_dim)])]
-
-        graph = helper.make_graph(
-            [fill_node],
-            "fill_test",
-            inputs,
-            outputs=[
-                helper.make_tensor_value_info(
-                    "output", mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)], input_dim
-                )
-            ],
-        )
-
-        model = helper.make_model(graph, producer_name="fill_test")
-        input_np = np.array(input_dim).astype("int64")
-        verify_with_ort_with_inputs(model, [input_np], use_vm=True, target=target, dev=dev)
-
-    verify_constantofshape((2, 3, 4, 5), 10, "float32")
-    verify_constantofshape((3, 3), 0, "int32")
-    verify_constantofshape((1, 2, 3), -1, "float32")
-
-
-@tvm.testing.parametrize_targets
-def test_pad(target, dev):
-    """test_pad"""
-
-    def verify_pad(indata, pads, mode="constant", value=0.0):
-        indata = np.array(indata).astype(np.float32)
-        #  numpy expect result
-        len_dim = len(pads) // 2
-        np_pads = [(pads[i], pads[i + len_dim]) for i in range(len_dim)]
-        #  onnx graph
-        if mode in ["edge", "reflect"]:
-            outdata = np.pad(indata, pad_width=np_pads, mode=mode)
-            node = helper.make_node(
-                "Pad",
-                inputs=["input"],
-                outputs=["output"],
-                mode=mode,
-                pads=pads,
-            )
-        else:
-            outdata = np.pad(indata, pad_width=np_pads, mode="constant", constant_values=value)
-            node = helper.make_node(
-                "Pad", inputs=["input"], outputs=["output"], mode="constant", pads=pads, value=value
-            )
-        graph = helper.make_graph(
-            [node],
-            "pad_test",
-            inputs=[helper.make_tensor_value_info("input", TensorProto.FLOAT, list(indata.shape))],
-            outputs=[
-                helper.make_tensor_value_info("output", TensorProto.FLOAT, list(outdata.shape))
-            ],
-        )
-        model = helper.make_model(graph, producer_name="pad_test")
-        verify_with_ort_with_inputs(
-            model, [indata], [outdata.shape], dtype="float32", opset=2, target=target, dev=dev
-        )
-
-    def verify_pad_v11(indata, pads, mode="constant", value=0.0):
-        indata = np.array(indata).astype(np.float32)
-        #  numpy expect result
-        len_dim = len(pads) // 2
-        np_pads = [(pads[i], pads[i + len_dim]) for i in range(len_dim)]
-        pads = np.array(pads)
-        #  onnx graph
-        if mode in ["edge", "reflect"]:
-            inputs = [indata]
-            outdata = np.pad(indata, pad_width=np_pads, mode=mode)
-            node = helper.make_node("Pad", inputs=["input", "pads"], outputs=["output"], mode=mode)
-            graph = helper.make_graph(
-                [node],
-                "pad_test",
-                inputs=[
-                    helper.make_tensor_value_info("input", TensorProto.FLOAT, list(indata.shape)),
-                    helper.make_tensor_value_info("pads", TensorProto.INT64, (len(pads),)),
-                ],
-                initializer=[helper.make_tensor("pads", TensorProto.INT64, (len(pads),), pads)],
-                outputs=[
-                    helper.make_tensor_value_info("output", TensorProto.FLOAT, list(outdata.shape))
-                ],
-            )
-        else:
-            inputs = [indata]
-            outdata = np.pad(indata, pad_width=np_pads, mode="constant", constant_values=value)
-            node = helper.make_node(
-                "Pad",
-                inputs=["input", "pads", "constant_value"],
-                outputs=["output"],
-                mode="constant",
-            )
-            graph = helper.make_graph(
-                [node],
-                "pad_test",
-                inputs=[
-                    helper.make_tensor_value_info("input", TensorProto.FLOAT, list(indata.shape)),
-                    helper.make_tensor_value_info("pads", TensorProto.INT64, (len(pads),)),
-                    helper.make_tensor_value_info("constant_value", TensorProto.FLOAT, (1,)),
-                ],
-                initializer=[
-                    helper.make_tensor("pads", TensorProto.INT64, (len(pads),), pads),
-                    helper.make_tensor("constant_value", TensorProto.FLOAT, (1,), [value]),
-                ],
-                outputs=[
-                    helper.make_tensor_value_info("output", TensorProto.FLOAT, list(outdata.shape))
-                ],
-            )
-        model = helper.make_model(graph, producer_name="pad_test")
-        verify_with_ort_with_inputs(model, inputs, opset=11, use_vm=True, target=target, dev=dev)
-
-    verify_pad(np.random.randn(2, 2).astype(np.float32), [0, 1, 0, 0], "constant", 0.0)
-    verify_pad(np.random.randn(2, 3).astype(np.float32), [1, 0, 0, 1], "constant", 0.0)
-    verify_pad(np.random.randn(3, 2).astype(np.float32), [0, 0, 1, 0], "constant", 5.0)
-    verify_pad(np.random.randn(1, 3, 4, 5).astype(np.float32), [0, 0, 1, 1, 0, 0, 1, 1], "edge")
-    verify_pad(np.random.randn(1, 3, 4, 5).astype(np.float32), [0, 0, 1, 1, 0, 0, 1, 1], "reflect")
-
-    verify_pad_v11(np.random.randn(2, 2).astype(np.float32), [0, 1, 0, 0], "constant", 0.0)
-    verify_pad_v11(np.random.randn(2, 3).astype(np.float32), [1, 0, 0, 1], "constant", 0.0)
-    verify_pad_v11(np.random.randn(3, 2).astype(np.float32), [0, 0, 1, 0], "constant", 5.0)
-    verify_pad_v11(np.random.randn(1, 3, 4, 5).astype(np.float32), [0, 0, 1, 1, 0, 0, 1, 1], "edge")
-    verify_pad_v11(
-        np.random.randn(1, 3, 4, 5).astype(np.float32), [0, 0, 1, 1, 0, 0, 1, 1], "reflect"
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_all_reduce_funcs(target, dev):
-    """test_all_reduce_funcs"""
-
-    def verify_reduce_func(func, data, axis, keepdims):
-        inshape = data.shape
-        outshape = np.sum(data, axis=axis, keepdims=keepdims == 1).shape
-
-        if axis:
-            node = onnx.helper.make_node(
-                func, inputs=["x"], outputs=["y"], axes=axis, keepdims=keepdims
-            )
-        else:
-            node = onnx.helper.make_node(func, inputs=["x"], outputs=["y"], keepdims=keepdims)
-
-        graph = helper.make_graph(
-            [node],
-            "reduce_test",
-            inputs=[helper.make_tensor_value_info("x", TensorProto.FLOAT, list(inshape))],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(outshape))],
-        )
-
-        model = helper.make_model(graph, producer_name="reduce_test")
-
-        verify_with_ort_with_inputs(
-            model,
-            [data],
-            [outshape],
-            opset=11,
-            target=target,
-            dev=dev,
-            rtol=1e-4,
-            atol=1e-4,
-        )
-
-    funcs = [
-        "ReduceMax",
-        "ReduceMean",
-        "ReduceMin",
-        "ReduceProd",
-        "ReduceSum",
-        "ReduceSumSquare",
-        "ReduceLogSum",
-        "ReduceLogSumExp",
-        "ReduceL1",
-        "ReduceL2",
-    ]
-
-    for func in funcs:
-        verify_reduce_func(func, np.array(1.0).astype(np.float32), axis=None, keepdims=False)
-
-        for keepdims in [True, False]:
-            verify_reduce_func(
-                func, np.random.randn(3, 2, 2).astype(np.float32), axis=None, keepdims=keepdims
-            )
-
-            verify_reduce_func(
-                func, np.random.randn(3, 2, 3).astype(np.float32), axis=None, keepdims=keepdims
-            )
-
-            verify_reduce_func(
-                func, np.random.randn(3, 3, 3).astype(np.float32), axis=(1,), keepdims=keepdims
-            )
-
-            verify_reduce_func(
-                func, np.random.randn(3, 3, 3, 1).astype(np.float32), axis=(1, 2), keepdims=keepdims
-            )
-
-            verify_reduce_func(
-                func, np.random.randn(3, 3, 3, 1).astype(np.float32), axis=(1,), keepdims=keepdims
-            )
-
-            verify_reduce_func(
-                func, np.random.randn(1, 3, 4, 1).astype(np.float32), axis=(1,), keepdims=keepdims
-            )
-
-
-@tvm.testing.parametrize_targets
-def test_split(target, dev):
-    """test_split"""
-
-    def verify_split(indata, outdatas, split, axis=0, pass_split=True, opset=11):
-        indata = np.array(indata).astype(np.float32)
-        outdatas = [np.array(o).astype(np.float32) for o in outdatas]
-        inputs = [helper.make_tensor_value_info("input", TensorProto.FLOAT, list(indata.shape))]
-        input_names = ["input"]
-        initializer = []
-
-        if split:
-            split_index = range(len(split))
-        else:
-            split_index = range(len(outdatas))
-
-        if pass_split:
-            if opset >= 13:
-                input_names.append("split")
-                np_split = np.array(split).astype(np.int64)
-                inputs.append(
-                    helper.make_tensor_value_info("split", TensorProto.INT64, list(np_split.shape))
-                )
-                # TODO(mbrookhart): Support dynamic split, edit this test case to remove split from
-                # the initializer and add it back to the input data
-                indata = [indata]  # , np_split]
-                initializer.append(
-                    helper.make_tensor("split", TensorProto.INT64, list(np_split.shape), np_split)
-                )
-        node = helper.make_node(
-            "Split",
-            inputs=input_names,
-            outputs=[f"output_{i}" for i in range(len(split_index))],
-            axis=axis,
-        )
-
-        if pass_split and opset < 13:
-            split_attr = helper.make_attribute("split", split)
-            node.attribute.append(split_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "split_test",
-            inputs=inputs,
-            initializer=initializer,
-            outputs=[
-                helper.make_tensor_value_info(
-                    f"output_{i}", TensorProto.FLOAT, list(outdatas[i].shape)
-                )
-                for i in range(len(split_index))
-            ],
-        )
-        model = helper.make_model(graph, producer_name="split_test")
-        verify_with_ort_with_inputs(
-            model,
-            indata,
-            out_shape=list(range(len(split_index))),
-            opset=opset,
-            target=target,
-            dev=dev,
-            use_vm=True,
-            freeze_params=(opset >= 13),
-        )
-
-    # 1D
-    verify_split([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], [2, 2, 2], 0)
-    verify_split(
-        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], [2, 2, 2], 0, False
-    )
-    verify_split([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], [[1.0, 2.0], [3.0], [4.0, 5.0, 6.0]], [2, 1, 3], 0)
-    verify_split(
-        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], [[1.0, 2.0], [3.0], [4.0, 5.0, 6.0]], [2, 1, 3], 0, opset=13
-    )
-    # 2D
-    verify_split(
-        [[1.0, 2.0, 3.0, 4.0], [7.0, 8.0, 9.0, 10.0]],
-        [[[1.0, 2.0], [7.0, 8.0]], [[3.0, 4.0], [9.0, 10.0]]],
-        [2, 2],
-        1,
-    )
-    verify_split(
-        [[1.0, 2.0, 3.0, 4.0], [7.0, 8.0, 9.0, 10.0]],
-        [[[1.0, 2.0], [7.0, 8.0]], [[3.0, 4.0], [9.0, 10.0]]],
-        [2, 2],
-        1,
-        opset=13,
-    )
-    # Split evenly (unstack)
-    verify_split([1, 2, 3], [[1], [2], [3]], False, 0, False)
-    # Split a single value to a single value
-    verify_split([1], [[1]], [1], pass_split=True)
-    # Test that the default case modifies nothing when split list has length one
-    verify_split([[1.0, 2.0]], [[1.0, 2.0]], [2], 1)
-    verify_split([[1.0, 2.0]], [[1.0, 2.0]], [1], 0)
-
-
-@tvm.testing.parametrize_targets
-def test_binary_ops(target, dev):
-    """test_binary_ops"""
-    in_shape = (1, 2, 3, 3)
-    dtype = "float32"
-    out_shape = in_shape
-
-    def verify_binary_ops(op, x, y, out_type="float32"):
-        out = helper.make_node(op, ["in1", "in2"], ["out"])
-        graph = helper.make_graph(
-            [out],
-            "_test",
-            inputs=[
-                helper.make_tensor_value_info("in1", TensorProto.FLOAT, x.shape),
-                helper.make_tensor_value_info("in2", TensorProto.FLOAT, y.shape),
-            ],
-            outputs=[
-                helper.make_tensor_value_info(
-                    "out", mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(out_type)], list(out_shape)
-                )
-            ],
-        )
-        model = helper.make_model(graph, producer_name="_test")
-        verify_with_ort_with_inputs(model, [x, y], target=target, dev=dev)
-
-    x = np.random.uniform(size=in_shape).astype(dtype)
-    y = np.random.uniform(size=in_shape).astype(dtype)
-    z_array = np.random.uniform(size=(3,)).astype(dtype)
-    verify_binary_ops("Add", x, y)
-    verify_binary_ops("Add", x, z_array)
-    verify_binary_ops("Sub", x, y)
-    verify_binary_ops("Sub", x, z_array)
-    verify_binary_ops("Mul", x, y)
-    verify_binary_ops("Mul", x, z_array)
-    verify_binary_ops("Div", x, y)
-    verify_binary_ops("Div", x, z_array)
-    verify_binary_ops("Sum", x, y)
-    verify_binary_ops("Sum", x, z_array)
-    verify_binary_ops("Greater", x, y, "bool")
-    verify_binary_ops("Greater", x, z_array, "bool")
-    verify_binary_ops("GreaterOrEqual", x, y, "bool")
-    verify_binary_ops("GreaterOrEqual", x, z_array, "bool")
-    verify_binary_ops("Less", x, y, "bool")
-    verify_binary_ops("Less", x, z_array, "bool")
-    verify_binary_ops("LessOrEqual", x, y, "bool")
-    verify_binary_ops("LessOrEqual", x, z_array, "bool")
-    verify_binary_ops("Equal", x, y, "bool")
-    verify_binary_ops("Equal", x, z_array, "bool")
-
-
-@tvm.testing.parametrize_targets
-def test_unary_ops(target, dev):
-    """test_unary_ops"""
-    in_shape = (1, 2, 3, 3)
-    _ = "float32"
-    out_shape = in_shape
-
-    def verify_unary_ops(op, x, rtol=1e-5, atol=1e-5, dtype="float32"):
-        x = x.astype(dtype)
-        ONNX_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)]
-        out = helper.make_node(op, ["in1"], ["out"])
-        graph = helper.make_graph(
-            [out],
-            "_test",
-            inputs=[
-                helper.make_tensor_value_info("in1", ONNX_DTYPE, list(in_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", ONNX_DTYPE, list(out_shape))],
-        )
-        model = helper.make_model(graph, producer_name="_test")
-        verify_with_ort_with_inputs(model, [x], rtol=rtol, atol=atol, target=target, dev=dev)
-
-    x = np.random.uniform(size=in_shape)
-    verify_unary_ops("Neg", x)
-    verify_unary_ops("Abs", x)
-    verify_unary_ops("Reciprocal", x)
-    verify_unary_ops("Reciprocal", x, dtype="float16")
-    verify_unary_ops("Sqrt", x)
-    verify_unary_ops("Relu", x)
-    verify_unary_ops("Exp", x)
-    verify_unary_ops("Log", x)
-    verify_unary_ops("Log", x)
-    verify_unary_ops("Acos", x)
-    verify_unary_ops("Acosh", x)
-    verify_unary_ops("Asin", x)
-    verify_unary_ops("Asinh", x)
-    verify_unary_ops("Atan", x)
-    verify_unary_ops("Atanh", x)
-    verify_unary_ops("Cos", x)
-    verify_unary_ops("Cosh", x)
-    verify_unary_ops("Sin", x)
-    verify_unary_ops("Sinh", x)
-    verify_unary_ops("Tan", x)
-    verify_unary_ops("Tanh", x)
-    verify_unary_ops("Sigmoid", x)
-    verify_unary_ops("Softsign", x)
-
-
-@tvm.testing.parametrize_targets
-def test_leaky_relu(target, dev):
-    def leaky_relu_x(x, alpha):
-        return np.where(x >= 0, x, x * alpha)
-
-    _test_onnx_op_elementwise(
-        target,
-        dev,
-        (2, 4, 5, 6),
-        leaky_relu_x,
-        {"alpha": 0.25},
-        "float32",
-        "LeakyRelu",
-        {"alpha": 0.25},
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_elu(target, dev):
-    def elu_x(x, alpha):
-        return np.where(x > 0, x, alpha * (np.exp(x) - 1.0))
-
-    _test_onnx_op_elementwise(
-        target, dev, (2, 4, 5, 6), elu_x, {"alpha": 0.25}, "float32", "Elu", {"alpha": 0.25}
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_selu(target, dev):
-    def selu_x(x, alpha, gamma):
-        return gamma * np.where(x > 0, x, alpha * (np.exp(x) - 1.0))
-
-    _test_onnx_op_elementwise(
-        target,
-        dev,
-        (2, 4, 5, 6),
-        selu_x,
-        {"alpha": 0.25, "gamma": 0.3},
-        "float32",
-        "Selu",
-        {"alpha": 0.25, "gamma": 0.3},
-    )
-
-
-@pytest.mark.skip("Currently ONNX Runtime in CI does not support domain version of 18")
-@tvm.testing.parametrize_targets
-def test_mish(target, dev):
-    def mish_x(x):
-        return x * np.tanh(np.log1p(np.exp(x)))
-
-    _test_onnx_op_elementwise(target, dev, (2, 4, 5, 6), mish_x, {}, "float64", "Mish", {})
-
-
-@tvm.testing.parametrize_targets
-def test_prelu(target, dev):
-    """test_prelu"""
-
-    def verify_prelu(x_shape, a_shape):
-        node = helper.make_node("PRelu", inputs=["X", "slope"], outputs=["Y"])
-
-        graph = helper.make_graph(
-            [node],
-            "prelu_test",
-            inputs=[
-                helper.make_tensor_value_info("X", TensorProto.FLOAT, list(x_shape)),
-                helper.make_tensor_value_info("slope", TensorProto.FLOAT, list(a_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(x_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="prelu_test")
-
-        verify_with_ort(
-            model,
-            [x_shape, a_shape],
-            out_shape=[list(x_shape)],
-            use_vm=True,
-            target=target,
-            dev=dev,
-        )
-
-    verify_prelu([3, 4, 5, 6], [1, 4, 1, 1])
-    verify_prelu([1, 8, 5, 6], [1, 8, 1, 1])
-    verify_prelu([2, 12, 16, 16], [1, 12, 1, 1])
-    verify_prelu([2, 12, 16, 16], [1])  # Test alpha broadcasting.
-    verify_prelu([3, 1], [3, 1])  # Test non NCHW workload.
-
-
-@tvm.testing.parametrize_targets
-def test_thresholded_relu(target, dev):
-    def thresholded_relu_x(x, alpha):
-        out_np = np.clip(x, alpha, np.inf)
-        out_np[out_np == alpha] = 0
-        return out_np
-
-    _test_onnx_op_elementwise(
-        target,
-        dev,
-        (2, 4, 5, 6),
-        thresholded_relu_x,
-        {"alpha": 0.25},
-        "float32",
-        "ThresholdedRelu",
-        {"alpha": 0.25},
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_logsoftmax(target, dev):
-    _test_onnx_op_elementwise(
-        target,
-        dev,
-        (1, 4),
-        tvm.topi.testing.log_softmax_python,
-        {},
-        "float32",
-        "LogSoftmax",
-        {"axis": 1},
-    )
-
-
-def check_torch_conversion(model, input_size, target, dev):
-    dummy_input = torch.randn(*input_size)
-    file_name = f"{model.__name__}.onnx"
-    # Set verbose=True for more output
-    torch.onnx.export(model(), dummy_input, file_name, export_params=True, verbose=False)
-    onnx_model = onnx.load(file_name)
-    input_data = np.random.uniform(size=input_size).astype("float32")
-    verify_with_ort_with_inputs(
-        onnx_model, [input_data], apply_softmax=True, target=target, dev=dev
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_resnet(target, dev):
-    check_torch_conversion(torchvision.models.resnet18, (1, 3, 224, 224), target, dev)
-    # check_torch_conversion(torchvision.models.resnet101, (1,3,224,224))
-
-
-# def test_alexnet():
-# Torch's ONNX export does not support the adaptive pooling used by AlexNet?
-# check_torch_conversion(torchvision.models.alexnet, (1,3,224,224))
-
-# Torch's ONNX export does not support the adaptive pooling used by vgg16?
-# def test_vgg16():
-#     check_torch_conversion(torchvision.models.vgg16, (1,3,224,224))
-
-# TODO(@jroesch): Update Torch + ONNX to support this import.
-# def test_squeezenet():
-#     # Torch's ONNX export does not support the max pooling used by Squezenet
-#     check_torch_conversion(torchvision.models.squeezenet1_0, (1,3,224,224))
-
-
-@tvm.testing.parametrize_targets
-def test_densenet(target, dev):
-    check_torch_conversion(torchvision.models.densenet161, (1, 3, 224, 224), target, dev)
-
-
-@tvm.testing.parametrize_targets
-def test_inception(target, dev):
-    check_torch_conversion(torchvision.models.inception_v3, (1, 3, 224, 224), target, dev)
-
-
-# TODO(@jroesch): Update Torch + ONNX to support this import.
-# def test_googlenet():
-#     check_torch_conversion(torchvision.models.googlenet, (1,3,224,224))
-
-# TODO(@jroesch): Update Torch + ONNX to support this import.
-# def test_shufflenetv2():
-#     check_torch_conversion(torchvision.models.shufflenetv2, (1,3,224,224))
-
-
-@tvm.testing.parametrize_targets
-def test_sign(target, dev):
-    def sign_x(x):
-        return np.sign(x)
-
-    _test_onnx_op_elementwise(target, dev, (3, 4, 5, 6), sign_x, {}, "float32", "Sign", {})
-
-
-@tvm.testing.parametrize_targets
-def test_not(target, dev):
-    """test_not"""
-
-    def verify_not(indata, dtype):
-        x = indata.astype(dtype)
-
-        node = helper.make_node(
-            "Not",
-            inputs=["in"],
-            outputs=["out"],
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "not_test",
-            inputs=[helper.make_tensor_value_info("in", TensorProto.BOOL, list(x.shape))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.BOOL, list(x.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="not_test")
-        verify_with_ort_with_inputs(model, [x], target=target, dev=dev)
-
-    # 2d
-    verify_not(indata=(np.random.randn(3, 4) > 0), dtype=bool)
-    # 3d
-    verify_not(indata=(np.random.randn(3, 4, 5) > 0), dtype=bool)
-    # 4d
-    verify_not(indata=(np.random.randn(3, 4, 5, 6) > 0), dtype=bool)
-
-
-@tvm.testing.parametrize_targets
-def test_and(target, dev):
-    """test_and"""
-
-    def verify_and(indata, dtype):
-        x = indata[0].astype(dtype)
-        y = indata[1].astype(dtype)
-        outdata = np.logical_and(x, y)
-
-        node = helper.make_node(
-            "And",
-            inputs=["in1", "in2"],
-            outputs=["out"],
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "and_test",
-            inputs=[
-                helper.make_tensor_value_info("in1", TensorProto.BOOL, list(x.shape)),
-                helper.make_tensor_value_info("in2", TensorProto.BOOL, list(y.shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.BOOL, list(outdata.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="and_test")
-        verify_with_ort_with_inputs(model, [x, y], [outdata.shape], target=target, dev=dev)
-
-    # 2d
-    x = np.random.randn(3, 4) > 0
-    y = np.random.randn(3, 4) > 0
-    verify_and(indata=[x, y], dtype=bool)
-
-    # 3d
-    x = np.random.randn(3, 4, 5) > 0
-    y = np.random.randn(3, 4, 5) > 0
-    verify_and(indata=[x, y], dtype=bool)
-
-    # 4d
-    x = np.random.randn(3, 4, 5, 6) > 0
-    y = np.random.randn(3, 4, 5, 6) > 0
-    verify_and(indata=[x, y], dtype=bool)
-
-    # 3d vs 1d
-    x = np.random.randn(3, 4, 5) > 0
-    y = np.random.randn(5) > 0
-    verify_and(indata=[x, y], dtype=bool)
-
-    # 3d vs 2d
-    x = np.random.randn(3, 4, 5) > 0
-    y = np.random.randn(4, 5) > 0
-    verify_and(indata=[x, y], dtype=bool)
-
-
-@tvm.testing.parametrize_targets
-def test_tile(target, dev):
-    """test_tile"""
-
-    def verify_tile_v6(indata, repeats, outdata):
-        node = helper.make_node("Tile", inputs=["input", "repeats"], outputs=["out"])
-        graph = helper.make_graph(
-            [node],
-            "tile_test",
-            inputs=[
-                helper.make_tensor_value_info("input", TensorProto.FLOAT, list(indata.shape)),
-                helper.make_tensor_value_info("repeats", TensorProto.INT64, list(repeats.shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(outdata.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="tile_test")
-        verify_with_ort_with_inputs(
-            model, [indata, repeats], use_vm=True, opset=6, target=target, dev=dev
-        )
-
-    x = np.random.rand(2, 3, 4, 5).astype(np.float32)
-    repeats = np.random.randint(low=1, high=10, size=(np.ndim(x),)).astype(np.int64)
-    z_array = np.tile(x, repeats)
-    verify_tile_v6(x, repeats, z_array)
-
-
-@tvm.testing.parametrize_targets
-def test_erf(target, dev):
-    """test_erf"""
-
-    def verify_erf(indata, outdata):
-        node = helper.make_node("Erf", inputs=["in"], outputs=["out"])
-        graph = helper.make_graph(
-            [node],
-            "erf_test",
-            inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(indata.shape))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(outdata.shape))],
-        )
-        model = helper.make_model(graph, producer_name="erf_test")
-        verify_with_ort_with_inputs(model, [indata], [outdata.shape], target=target, dev=dev)
-
-    x = np.random.rand(2, 3, 4, 6).astype(np.float32)
-    z_array = scipy.special.erf(x)
-    verify_erf(x, z_array)
-
-
-@tvm.testing.parametrize_targets
-def test_where(target, dev):
-    """test_where"""
-
-    def verify_where(condition, x, y, dtype, outdata, dynamic=False):
-        node_list = []
-        where_inputs = ["condition", "x", "y"]
-        if dynamic:
-            shape_node = helper.make_node("Shape", ["x"], ["shape"])
-            reshape_node = helper.make_node("Reshape", ["x", "shape"], ["X"])
-            where_inputs[1] = "X"
-            node_list += [shape_node, reshape_node]
-        node = helper.make_node("Where", inputs=where_inputs, outputs=["out"])
-        node_list.append(node)
-        graph = helper.make_graph(
-            node_list,
-            "where_test",
-            inputs=[
-                helper.make_tensor_value_info("condition", TensorProto.BOOL, list(condition.shape)),
-                helper.make_tensor_value_info("x", dtype, list(x.shape)),
-                helper.make_tensor_value_info("y", dtype, list(y.shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", dtype, list(outdata.shape))],
-        )
-        model = helper.make_model(graph, producer_name="where_test")
-        verify_with_ort_with_inputs(
-            model, [condition, x, y], [outdata.shape], use_vm=True, target=target, dev=dev
-        )
-
-    condition = np.array([[1, 0], [1, 1]], dtype=bool)
-    x = np.array([[1, 2], [3, 4]], dtype=np.int64)
-    y = np.array([[9, 8], [7, 6]], dtype=np.int64)
-    outdata = np.where(condition, x, y)
-    verify_where(condition, x, y, TensorProto.INT64, outdata)
-
-    x = np.array([[1, 2], [3, 4]], dtype=np.float32)
-    y = np.array([[9, 8], [7, 6]], dtype=np.float32)
-    outdata = np.where(condition, x, y)
-    verify_where(condition, x, y, TensorProto.FLOAT, outdata)
-
-    x = np.array(1, dtype=np.float32)
-    y = np.array([2], dtype=np.float32)
-    outdata = np.where(condition, x, y)
-    verify_where(condition, x, y, TensorProto.FLOAT, outdata)
-
-    x = np.array([2], dtype=np.float32)
-    y = np.array(1, dtype=np.float32)
-    outdata = np.where(condition, x, y)
-    verify_where(condition, x, y, TensorProto.FLOAT, outdata)
-
-    condition = np.array(1, dtype=bool)
-    x = np.array([[1, 2], [3, 4]], dtype=np.float32)
-    y = np.array([[5, 6], [7, 8]], dtype=np.float32)
-    outdata = np.where(condition, x, y)
-    verify_where(condition, x, y, TensorProto.FLOAT, outdata)
-
-    x = np.array([[1, 2], [3, 4]], dtype=np.float32)
-    y = np.array([[1], [7]], dtype=np.float32)
-    outdata = np.where(condition, x, y)
-    verify_where(condition, x, y, TensorProto.FLOAT, outdata)
-    verify_where(condition, x, y, TensorProto.FLOAT, outdata, dynamic=True)
-
-    condition = np.random.uniform(size=(3, 1)) < 0.5
-    x = np.random.uniform(size=2).astype(np.float32)
-    y = np.random.uniform(size=2).astype(np.float32)
-    outdata = np.where(condition, x, y)
-    verify_where(condition, x, y, TensorProto.FLOAT, outdata)
-
-
-@tvm.testing.parametrize_targets
-def test_or(target, dev):
-    """test_or"""
-
-    def verify_or(indata, dtype):
-        x = indata[0].astype(dtype)
-        y = indata[1].astype(dtype)
-        outdata = np.logical_or(x, y)
-
-        node = helper.make_node(
-            "Or",
-            inputs=["in1", "in2"],
-            outputs=["out"],
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "or_test",
-            inputs=[
-                helper.make_tensor_value_info("in1", TensorProto.BOOL, list(x.shape)),
-                helper.make_tensor_value_info("in2", TensorProto.BOOL, list(y.shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.BOOL, list(outdata.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="or_test")
-        verify_with_ort_with_inputs(model, [x, y], [outdata.shape], target=target, dev=dev)
-
-    # 2d
-    x = np.random.randn(3, 4) > 0
-    y = np.random.randn(3, 4) > 0
-    verify_or(indata=[x, y], dtype=bool)
-
-    # 3d
-    x = np.random.randn(3, 4, 5) > 0
-    y = np.random.randn(3, 4, 5) > 0
-    verify_or(indata=[x, y], dtype=bool)
-
-    # 4d
-    x = np.random.randn(3, 4, 5, 6) > 0
-    y = np.random.randn(3, 4, 5, 6) > 0
-    verify_or(indata=[x, y], dtype=bool)
-
-    # 3d vs 1d
-    x = np.random.randn(3, 4, 5) > 0
-    y = np.random.randn(5) > 0
-    verify_or(indata=[x, y], dtype=bool)
-
-    # 3d vs 2d
-    x = np.random.randn(3, 4, 5) > 0
-    y = np.random.randn(4, 5) > 0
-    verify_or(indata=[x, y], dtype=bool)
-
-
-@tvm.testing.parametrize_targets
-def test_batch_norm(target, dev):
-    """test_batch_norm"""
-
-    def verify_batch_norm(in_shape):
-        batchnorm = onnx.helper.make_node(
-            "BatchNormalization", inputs=["x", "scale", "B", "mean", "var"], outputs=["Y"]
-        )
-
-        graph = helper.make_graph(
-            [batchnorm],
-            "batchnorm_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(in_shape)),
-                helper.make_tensor_value_info("scale", TensorProto.FLOAT, [in_shape[1]]),
-                helper.make_tensor_value_info("B", TensorProto.FLOAT, [in_shape[1]]),
-                helper.make_tensor_value_info("mean", TensorProto.FLOAT, [in_shape[1]]),
-                helper.make_tensor_value_info("var", TensorProto.FLOAT, [in_shape[1]]),
-            ],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(in_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="batchnorm_test")
-        # X, scale, b, mean, var
-        inshapes = [in_shape, in_shape[1], in_shape[1], in_shape[1], in_shape[1]]
-        verify_with_ort(model, inshapes, out_shape=[in_shape], target=target, dev=dev)
-
-    verify_batch_norm([1, 3, 224, 224])
-    verify_batch_norm([1, 3, 24, 24])
-    verify_batch_norm([16, 3, 24, 24])
-    verify_batch_norm([16, 16, 24, 24])
-    verify_batch_norm([16, 16, 10, 10])
-
-
-@tvm.testing.parametrize_targets
-def test_batch_norm_dynamic_subgraph(target, dev):
-    """test_batch_norm_dynamic_subgraph"""
-
-    def verify_batch_norm_dynamic_subgraph(in_shape, o_shape):
-
-        batchnorm = onnx.helper.make_node(
-            "BatchNormalization", inputs=["x", "scale", "B", "mean", "var"], outputs=["Y"]
-        )
-
-        shape_node = helper.make_node("Shape", ["Y"], ["shape"])
-        reshape_node = helper.make_node("Reshape", ["in", "shape"], ["out"])
-        graph = helper.make_graph(
-            [batchnorm, shape_node, reshape_node],
-            "batchnorm_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(in_shape)),
-                helper.make_tensor_value_info("in", TensorProto.FLOAT, list(o_shape)),
-                helper.make_tensor_value_info("scale", TensorProto.FLOAT, [in_shape[1]]),
-                helper.make_tensor_value_info("B", TensorProto.FLOAT, [in_shape[1]]),
-                helper.make_tensor_value_info("mean", TensorProto.FLOAT, [in_shape[1]]),
-                helper.make_tensor_value_info("var", TensorProto.FLOAT, [in_shape[1]]),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(in_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="batchnorm_test")
-
-        # X, inp, scale, b, mean, var
-        inshapes = [in_shape, o_shape, in_shape[1], in_shape[1], in_shape[1], in_shape[1]]
-        verify_with_ort(model, inshapes, out_shape=[in_shape], use_vm=True, target=target, dev=dev)
-
-    verify_batch_norm_dynamic_subgraph([16, 16, 10, 10], [160, 160])
-
-
-@tvm.testing.parametrize_targets
-def test_conv(target, dev):
-    """test_conv"""
-
-    def verify_conv(
-        x_shape,
-        w_shape,
-        y_shape,
-        padding,
-        kernel_shape,
-        strides,
-        dilations,
-        group=1,
-        auto_pad="NOTSET",
-        unset_pad=False,
-    ):
-        if unset_pad:
-            node = helper.make_node(
-                "Conv",
-                inputs=["x", "W"],
-                outputs=["y"],
-                kernel_shape=kernel_shape,
-                # Default values for other attributes:
-                strides=strides,
-                dilations=dilations,
-                group=group,
-            )
-        elif padding is None:
-            ## autopadding with unset default attributes
-            kwargs = {}
-            if not all(list(s == 1 for s in strides)):
-                kwargs["strides"] = strides
-            if not all(list(d == 1 for d in dilations)):
-                kwargs["dilations"] = dilations
-
-            node = helper.make_node(
-                "Conv",
-                inputs=["x", "W"],
-                outputs=["y"],
-                # Default values for other attributes:
-                auto_pad=auto_pad,
-                group=group,
-                **kwargs,
-            )
-        else:
-            node = helper.make_node(
-                "Conv",
-                inputs=["x", "W"],
-                outputs=["y"],
-                kernel_shape=kernel_shape,
-                # Default values for other attributes:
-                strides=strides,
-                dilations=dilations,
-                group=group,
-                pads=padding,
-            )
-
-        graph = helper.make_graph(
-            [node],
-            "conv_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape)),
-                helper.make_tensor_value_info("W", TensorProto.FLOAT, list(w_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(y_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="conv_test")
-
-        verify_with_ort(
-            model,
-            [x_shape, w_shape],
-            [y_shape],
-            use_vm=True,
-            target=target,
-            dev=dev,
-        )
-
-    def repeat(num, dims):
-        return tuple(num for _ in range(dims))
-
-    for dims in [1, 2, 3]:
-        # Convolution with padding
-        verify_conv(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            (1, 1) + repeat(5, dims),
-            2 * repeat(1, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-        )
-        # Convolution with asymmetric padding
-        verify_conv(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            (1, 1) + repeat(4, dims),
-            repeat(0, dims) + repeat(1, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-        )
-        # Convolution without padding
-        verify_conv(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            (1, 1) + repeat(3, dims),
-            2 * repeat(0, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-        )
-        # Convolution with autopadding
-        verify_conv(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            (1, 1) + repeat(5, dims),
-            None,
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-            auto_pad="SAME_UPPER",
-        )
-        # Convolution with valid autopadding
-        verify_conv(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            (1, 1) + repeat(3, dims),
-            None,
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-            auto_pad="VALID",
-        )
-        # Convolution with unset padding
-        verify_conv(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            (1, 1) + repeat(3, dims),
-            2 * repeat(0, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-            True,
-        )
-        # Convolution with non uniform stride
-        verify_conv(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            (1, 1) + repeat(3, dims),
-            None,
-            repeat(3, dims),
-            repeat(2, dims),
-            repeat(1, dims),
-            auto_pad="SAME_UPPER",
-        )
-        # Convolution with dilation
-        verify_conv(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            (1, 1) + repeat(5, dims),
-            2 * repeat(2, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(2, dims),
-        )
-
-    # TODO(jwfromm): Merge with other tests once group_conv3d is supported.
-    for dims in [1, 2, 3]:
-        # Group Convolution
-        verify_conv(
-            (1, 8) + repeat(5, dims),
-            (8, 1) + repeat(3, dims),
-            (1, 8) + repeat(5, dims),
-            2 * repeat(1, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-            group=8,
-        )
-
-        verify_conv(
-            (1, 12) + repeat(5, dims),
-            (30, 4) + repeat(3, dims),
-            (1, 30) + repeat(5, dims),
-            2 * repeat(1, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-            group=3,
-        )
-
-
-@tvm.testing.parametrize_targets
-def test_convtranspose(target, dev):
-    """test_convtranspose"""
-
-    def verify_convtranspose_with_output_shape(
-        x_shape,
-        w_shape,
-        output_shape,
-        kernel_shape,
-        strides,
-        dilations,
-        auto_pad="SAME_UPPER",
-        group=1,
-    ):
-        node = helper.make_node(
-            "ConvTranspose",
-            inputs=["x", "W"],
-            outputs=["y"],
-            kernel_shape=kernel_shape,
-            # Default values for other attributes:
-            strides=strides,
-            dilations=dilations,
-            output_shape=output_shape,
-            auto_pad=auto_pad,
-        )
-
-        if group is not None:
-            group_attr = helper.make_attribute("group", group)
-            node.attribute.append(group_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "ConvTranspose_with_output_shape_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape)),
-                helper.make_tensor_value_info("W", TensorProto.FLOAT, list(w_shape)),
-            ],
-            outputs=[
-                helper.make_tensor_value_info("y", TensorProto.FLOAT, [1, 1] + list(output_shape))
-            ],
-        )
-
-        model = helper.make_model(graph, producer_name="convtranspose_output_shape_test")
-
-        verify_with_ort(model, [x_shape, w_shape], use_vm=True, target=target, dev=dev)
-
-    def verify_convtranspose_with_padding(
-        x_shape,
-        w_shape,
-        padding,
-        kernel_shape,
-        strides,
-        dilations,
-        auto_pad="NOTSET",
-        unset_pad=False,
-        group=1,
-    ):
-        node = helper.make_node(
-            "ConvTranspose",
-            inputs=["x", "W"],
-            outputs=["y"],
-            kernel_shape=kernel_shape,
-            # Default values for other attributes:
-            strides=strides,
-            dilations=dilations,
-        )
-        if not unset_pad:
-            if padding is None:
-                pad_attr = helper.make_attribute("auto_pad", auto_pad)
-            else:
-                pad_attr = helper.make_attribute("pads", padding)
-            node.attribute.append(pad_attr)
-
-        if group is not None:
-            group_attr = helper.make_attribute("group", group)
-            node.attribute.append(group_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "convtranspose_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape)),
-                helper.make_tensor_value_info("W", TensorProto.FLOAT, list(w_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, ["?"] * len(x_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="convtranspose_pad_test")
-
-        verify_with_ort(model, [x_shape, w_shape], use_vm=True, target=target, dev=dev)
-
-    def verify_convtranspose(x_shape, w_shape, y_shape, p, group=1):
-        node = onnx.helper.make_node(
-            "ConvTranspose",
-            inputs=["x", "W"],
-            outputs=["y"],
-            strides=[3, 2],
-            kernel_shape=[3, 3],
-            pads=p,
-        )
-
-        if group is not None:
-            group_attr = helper.make_attribute("group", group)
-            node.attribute.append(group_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "verify_convtranspose_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape)),
-                helper.make_tensor_value_info("W", TensorProto.FLOAT, list(w_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(y_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="convtranspose_test")
-        verify_with_ort(model, [x_shape, w_shape], y_shape, opset=11, target=target, dev=dev)
-
-    # Convolution Transpose with padding
-    # (1, 1, 3, 3) input tensor
-    # (1, 2, 3, 3) tensor for convolution weights
-    # (1, 2, 7, 3) output tensor
-    # [1, 2, 1, 2] list for pads
-    verify_convtranspose((1, 1, 3, 3), (1, 2, 3, 3), (1, 2, 7, 3), [1, 2, 1, 2])
-    # Test undefined groups.
-    verify_convtranspose((1, 1, 3, 3), (1, 2, 3, 3), (1, 2, 7, 3), [1, 2, 1, 2], group=None)
-
-    if "llvm" in target:
-        # GPU does not support groups != 1 for convtranspose, so only test llvm
-        # Test depthwise-convolution
-        verify_convtranspose((1, 10, 3, 3), (10, 1, 3, 3), (1, 10, 7, 3), [1, 2, 1, 2], group=10)
-
-        # Test grouped-convolution
-        verify_convtranspose((1, 10, 3, 3), (10, 1, 3, 3), (1, 5, 7, 3), [1, 2, 1, 2], group=5)
-
-    def repeat(num, dims):
-        return tuple(num for _ in range(dims))
-
-    # Once onnxruntime update is complete
-    for dims in [1, 2, 3]:
-        # Convolution with padding
-        verify_convtranspose_with_padding(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            2 * repeat(1, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-        )
-        # Convolution without padding
-        verify_convtranspose_with_padding(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            2 * repeat(0, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-        )
-        # Convolution with unset padding
-        verify_convtranspose_with_padding(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            2 * repeat(0, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-            True,
-        )
-        # Convolution with autopadding
-        verify_convtranspose_with_padding(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            None,
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-            auto_pad="SAME_UPPER",
-        )
-        # Convolution with valid autopadding
-        verify_convtranspose_with_padding(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            None,
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-            auto_pad="VALID",
-        )
-        # Convolution with non uniform stride
-        verify_convtranspose_with_padding(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            None,
-            repeat(3, dims),
-            repeat(2, dims),
-            repeat(1, dims),
-            auto_pad="SAME_UPPER",
-        )
-        # Convolution with default stride
-        verify_convtranspose_with_padding(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            2 * repeat(1, dims),
-            repeat(3, dims),
-            None,
-            repeat(1, dims),
-        )
-        # Convolution with dilation
-        # TODO(mbrookhart): Relay doesn't currently support convtranspose with dilation
-        # verify_convtranspose_with_padding(
-        #     (1, 1) + repeat(5, D),
-        #     (1, 1) + repeat(3, D),
-        #     2 * repeat(2, D),
-        #     repeat(3, D),
-        #     repeat(1, D),
-        #     repeat(2, D),
-        # )
-
-    # Convolution with output_shape
-    for dims in [1, 2, 3]:
-        for num in range(60, 66):
-            verify_convtranspose_with_output_shape(
-                (1, 1) + repeat(32, dims),
-                (1, 1) + repeat(4, dims),
-                repeat(num, dims),
-                repeat(4, dims),
-                repeat(2, dims),
-                repeat(1, dims),
-            )
-
-            verify_convtranspose_with_output_shape(
-                (1, 1) + repeat(32, dims),
-                (1, 1) + repeat(4, dims),
-                repeat(num, dims),
-                repeat(4, dims),
-                repeat(2, dims),
-                repeat(1, dims),
-                auto_pad="SAME_LOWER",
-            )
-
-            verify_convtranspose_with_output_shape(
-                (1, 1) + repeat(32, dims),
-                (1, 2) + repeat(4, dims),
-                repeat(num, dims),
-                repeat(4, dims),
-                repeat(2, dims),
-                repeat(1, dims),
-                auto_pad="SAME_UPPER",
-            )
-
-    verify_convtranspose_with_output_shape(
-        (1, 1, 3, 3),
-        (1, 2, 3, 3),
-        (6, 6),
-        (3, 3),
-        (2, 2),
-        (1, 1),
-        auto_pad="SAME_UPPER",
-    )
-
-    verify_convtranspose_with_output_shape(
-        (1, 1, 3, 3),
-        (1, 2, 3, 3),
-        (6, 6),
-        (3, 3),
-        (2, 2),
-        (1, 1),
-        auto_pad="SAME_LOWER",
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_unsqueeze_constant(target, dev):
-    """test_unsqueeze_constant"""
-
-    class Flatten(Module):
-        def forward(self, input_):
-            return input_.view(input_.size(0), -1)
-
-    with tempfile.NamedTemporaryFile() as f:
-        file_name = f.name
-        input_size = (1, 16, 32, 32)
-        dummy_input = torch.randn(*input_size)
-        layer = Sequential(Flatten(), Linear(16 * 32 * 32, 64))
-        torch.onnx.export(layer, dummy_input, file_name, export_params=True)
-
-        onnx_model = onnx.load(file_name)
-        relay.frontend.from_onnx(onnx_model, {"onnx::Reshape_0": input_size})
-
-
-@tvm.testing.parametrize_targets
-def test_pooling(target, dev):
-    """test_pooling"""
-
-    def verify_pooling(x_shape, kernel_shape, strides, pads, out_shape, mode, auto_pad="NOTSET"):
-        _ = np.random.uniform(size=x_shape).astype("float32")
-
-        if mode == "max":
-            node_type = "MaxPool"
-        elif mode == "average":
-            node_type = "AveragePool"
-        else:
-            raise ValueError(f"Pool method {mode} is not supported.")
-
-        pool_node = helper.make_node(
-            node_type, inputs=["x"], outputs=["y"], kernel_shape=kernel_shape, strides=strides
-        )
-
-        if pads is None:
-            pad_attr = helper.make_attribute("auto_pad", auto_pad)
-        else:
-            pad_attr = helper.make_attribute("pads", pads)
-        pool_node.attribute.append(pad_attr)
-
-        if mode == "max":
-            storage_attr = helper.make_attribute("storage_order", 0)
-            pool_node.attribute.append(storage_attr)
-
-        graph = helper.make_graph(
-            [pool_node],
-            "pooling_test",
-            inputs=[helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape))],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="pooling_test")
-        verify_with_ort(
-            model,
-            [x_shape],
-            [out_shape],
-            use_vm=False,
-            target=target,
-            dev=dev,
-        )
-
-    for mode in ["max", "average"]:
-        # Pool1D
-        verify_pooling(
-            x_shape=[1, 1, 32],
-            kernel_shape=[3],
-            strides=[1],
-            pads=[1, 1],
-            out_shape=[1, 1, 32],
-            mode=mode,
-        )
-        # Pool2D
-        verify_pooling(
-            x_shape=[1, 1, 32, 32],
-            kernel_shape=[3, 3],
-            strides=[1, 1],
-            pads=[1, 1, 1, 1],
-            out_shape=[1, 1, 32, 32],
-            mode=mode,
-        )
-
-        # Pool1D with stride
-        verify_pooling(
-            x_shape=[1, 1, 32],
-            kernel_shape=[3],
-            strides=[2],
-            pads=[1, 1],
-            out_shape=[1, 1, 16],
-            mode=mode,
-        )
-        # Pool2D with stride
-        verify_pooling(
-            x_shape=[1, 1, 32, 32],
-            kernel_shape=[3, 3],
-            strides=[2, 2],
-            pads=[1, 1, 1, 1],
-            out_shape=[1, 1, 16, 16],
-            mode=mode,
-        )
-
-        # Pool1D with stride and autopadding
-        verify_pooling(
-            x_shape=[1, 1, 32],
-            kernel_shape=[3],
-            strides=[2],
-            pads=None,
-            out_shape=[1, 1, 16],
-            mode=mode,
-            auto_pad="SAME_UPPER",
-        )
-        # Pool2D with stride and autopadding
-        verify_pooling(
-            x_shape=[1, 1, 32, 32],
-            kernel_shape=[3, 3],
-            strides=[2, 2],
-            pads=None,
-            out_shape=[1, 1, 16, 16],
-            mode=mode,
-            auto_pad="SAME_UPPER",
-        )
-
-        # Pool3D with stride
-        verify_pooling(
-            x_shape=[1, 1, 32, 32, 32],
-            kernel_shape=[3, 3, 3],
-            strides=[2, 2, 2],
-            pads=[1, 1, 1, 1, 1, 1],
-            out_shape=[1, 1, 16, 16, 16],
-            mode=mode,
-        )
-
-        # Pool3D with stride and autopadding
-        verify_pooling(
-            x_shape=[1, 1, 32, 32, 32],
-            kernel_shape=[3, 3, 3],
-            strides=[2, 2, 2],
-            pads=None,
-            out_shape=[1, 1, 16, 16, 16],
-            mode=mode,
-            auto_pad="SAME_UPPER",
-        )
-
-
-@tvm.testing.parametrize_targets
-def test_global_pooling(target, dev):
-    """test_global_pooling"""
-
-    def verify_global_pooling(x_shape, mode):
-        out_shape = x_shape[:2] + [1] * (len(x_shape) - 2)
-
-        if mode == "max":
-            node_type = "GlobalMaxPool"
-        elif mode == "average":
-            node_type = "GlobalAveragePool"
-        else:
-            raise ValueError(f"Pool method {mode} is not supported.")
-
-        pool_node = helper.make_node(node_type, inputs=["x"], outputs=["y"])
-
-        graph = helper.make_graph(
-            [pool_node],
-            "global_pooling_test",
-            inputs=[helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape))],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="global_pooling_test")
-        verify_with_ort(
-            model,
-            [x_shape],
-            [out_shape],
-            use_vm=False,
-            target=target,
-            dev=dev,
-        )
-
-    # Test each pooling mode across all N-D inputs.
-    for mode in ["average", "max"]:
-        # 1D Pooling (NCW)
-        verify_global_pooling([1, 8, 8], mode)
-        verify_global_pooling([4, 1, 4], mode)
-        # 2D Pooling (NCHW)
-        verify_global_pooling([1, 8, 8, 8], mode)
-        verify_global_pooling([4, 1, 6, 4], mode)
-        # 3D Pooling (NCDHW)
-        verify_global_pooling([1, 8, 6, 8, 8], mode)
-        verify_global_pooling([4, 1, 2, 6, 4], mode)
-
-
-@pytest.mark.skip("flaky")
-@tvm.testing.parametrize_targets
-def test_qlinear_average_pool(target, dev):
-    """test_qlinear_average_pool"""
-
-    def verify_qlinear_average_pool(
-        x_shape, kernel_shape, strides, pads, out_shape, auto_pad="NOTSET"
-    ):
-        input_nodes = [
-            helper.make_tensor_value_info("X", TensorProto.FLOAT, list(x_shape)),
-        ]
-
-        output_nodes = [
-            helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(out_shape)),
-        ]
-
-        input_names = ["X"]
-
-        node = helper.make_node(
-            "AveragePool",
-            inputs=input_names,
-            outputs=["Y"],
-            kernel_shape=kernel_shape,
-            strides=strides,
-        )
-
-        if pads is None:
-            pad_attr = helper.make_attribute("auto_pad", auto_pad)
-        else:
-            pad_attr = helper.make_attribute("pads", pads)
-        node.attribute.append(pad_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "qlinear_average_pool_test",
-            inputs=input_nodes,
-            outputs=output_nodes,
-        )
-
-        model = helper.make_model(graph, producer_name="qlinear_average_pool_Test")
-        quantize_and_verify_with_ort(model, input_names, [x_shape], target, dev)
-
-    # Pool1D
-    verify_qlinear_average_pool(
-        x_shape=[1, 1, 32],
-        kernel_shape=[3],
-        strides=[1],
-        pads=[1, 1],
-        out_shape=[1, 1, 32],
-    )
-    # Pool2D
-    verify_qlinear_average_pool(
-        x_shape=[1, 1, 32, 32],
-        kernel_shape=[3, 3],
-        strides=[1, 1],
-        pads=[1, 1, 1, 1],
-        out_shape=[1, 1, 32, 32],
-    )
-
-    # Pool1D with stride
-    verify_qlinear_average_pool(
-        x_shape=[1, 1, 32],
-        kernel_shape=[3],
-        strides=[2],
-        pads=[1, 1],
-        out_shape=[1, 1, 16],
-    )
-    # Pool2D with stride
-    verify_qlinear_average_pool(
-        x_shape=[1, 1, 32, 32],
-        kernel_shape=[3, 3],
-        strides=[2, 2],
-        pads=[1, 1, 1, 1],
-        out_shape=[1, 1, 16, 16],
-    )
-
-    # Pool1D with stride and autopadding
-    verify_qlinear_average_pool(
-        x_shape=[1, 1, 32],
-        kernel_shape=[3],
-        strides=[2],
-        pads=None,
-        out_shape=[1, 1, 16],
-        auto_pad="SAME_UPPER",
-    )
-    # Pool2D with stride and autopadding
-    verify_qlinear_average_pool(
-        x_shape=[1, 1, 32, 32],
-        kernel_shape=[3, 3],
-        strides=[2, 2],
-        pads=None,
-        out_shape=[1, 1, 16, 16],
-        auto_pad="SAME_UPPER",
-    )
-
-    # Pool3D with stride
-    verify_qlinear_average_pool(
-        x_shape=[1, 1, 32, 32, 32],
-        kernel_shape=[3, 3, 3],
-        strides=[2, 2, 2],
-        pads=[1, 1, 1, 1, 1, 1],
-        out_shape=[1, 1, 16, 16, 16],
-    )
-
-    # Pool3D with stride and autopadding
-    verify_qlinear_average_pool(
-        x_shape=[1, 1, 32, 32, 32],
-        kernel_shape=[3, 3, 3],
-        strides=[2, 2, 2],
-        pads=None,
-        out_shape=[1, 1, 16, 16, 16],
-        auto_pad="SAME_UPPER",
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_qlinear_global_average_pool(target, dev):
-    """test_qlinear_global_average_pool"""
-
-    def verify_qlinear_global_average_pool(x_shape):
-        out_shape = x_shape[:2] + [1] * (len(x_shape) - 2)
-
-        node_type = "GlobalAveragePool"
-
-        input_names = ["X"]
-
-        pool_node = helper.make_node(node_type, inputs=input_names, outputs=["Y"])
-
-        graph = helper.make_graph(
-            [pool_node],
-            "qlinear_global_average_pool_test",
-            inputs=[helper.make_tensor_value_info("X", TensorProto.FLOAT, list(x_shape))],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="qlinear_global_average_pool_test")
-        quantize_and_verify_with_ort(model, input_names, [x_shape], target, dev)
-
-    # 1D Pooling (NCW)
-    verify_qlinear_global_average_pool([1, 8, 8])
-    verify_qlinear_global_average_pool([4, 1, 4])
-
-    # 2D Pooling (NCHW)
-    verify_qlinear_global_average_pool([1, 8, 8, 8])
-    verify_qlinear_global_average_pool([4, 1, 6, 4])
-
-    # 3D Pooling (NCDHW)
-    verify_qlinear_global_average_pool([1, 8, 6, 8, 8])
-    verify_qlinear_global_average_pool([4, 1, 2, 6, 4])
-
-
-@tvm.testing.parametrize_targets
-def test_mod(target, dev):
-    """test_mod"""
-
-    def verify_mod(x_shape, y_shape, fmod, out_shape, dtype="float32"):
-        x_np = np.random.uniform(-100.0, 100.0, x_shape).astype(dtype)
-        y_np = np.random.uniform(-100.0, 100.0, y_shape).astype(dtype)
-        y_np = np.where(y_np == 0, 1, y_np)  # remove 0's to avoid division by zero error
-
-        mod_node = helper.make_node("Mod", inputs=["x", "y"], outputs=["z"], fmod=fmod)
-
-        onnx_dtype = TensorProto.FLOAT if dtype == "float32" else TensorProto.INT32
-        graph = helper.make_graph(
-            [mod_node],
-            "mod_test",
-            inputs=[
-                helper.make_tensor_value_info("x", onnx_dtype, list(x_shape)),
-                helper.make_tensor_value_info("y", onnx_dtype, list(y_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("z", onnx_dtype, list(out_shape))],
-        )
-        model = helper.make_model(graph, producer_name="mod_test")
-        verify_with_ort_with_inputs(model, [x_np, y_np], [out_shape], target=target, dev=dev)
-
-    # Mod
-    verify_mod(
-        x_shape=[1, 32, 32], y_shape=[1, 1, 32], fmod=0, out_shape=(1, 32, 32), dtype="int32"
-    )
-    verify_mod(
-        x_shape=[1, 32, 32, 32],
-        y_shape=[1, 32, 32, 32],
-        fmod=0,
-        out_shape=(1, 32, 32, 32),
-        dtype="int32",
-    )
-
-    # fmod
-    verify_mod(
-        x_shape=[1, 32, 32], y_shape=[1, 32, 32], fmod=1, out_shape=(1, 32, 32), dtype="int32"
-    )
-    verify_mod(x_shape=[1, 1, 32, 32], y_shape=[1, 32, 32, 32], fmod=1, out_shape=(1, 32, 32, 32))
-    verify_mod(x_shape=[1, 32, 32, 32], y_shape=[1, 1, 32, 32], fmod=1, out_shape=(1, 32, 32, 32))
-    verify_mod(
-        x_shape=[1, 32, 32, 32],
-        y_shape=[1, 32, 32, 32],
-        fmod=1,
-        out_shape=(1, 32, 32, 32),
-        dtype="int32",
-    )
-    verify_mod(x_shape=[1, 32, 32, 32], y_shape=[1, 32, 32, 32], fmod=1, out_shape=(1, 32, 32, 32))
-
-
-@tvm.testing.parametrize_targets
-def test_xor(target, dev):
-    """test_xor"""
-
-    def verify_xor(x_shape, y_shape):
-        x_np = np.random.choice(a=[False, True], size=x_shape).astype("bool")
-        y_np = np.random.choice(a=[False, True], size=y_shape).astype("bool")
-
-        np_out = np.logical_xor(x_np, y_np)
-        out_shape = np_out.shape
-
-        xor_node = helper.make_node("Xor", inputs=["x", "y"], outputs=["z"])
-
-        onnx_dtype = TensorProto.BOOL
-        graph = helper.make_graph(
-            [xor_node],
-            "xor_test",
-            inputs=[
-                helper.make_tensor_value_info("x", onnx_dtype, list(x_shape)),
-                helper.make_tensor_value_info("y", onnx_dtype, list(y_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("z", onnx_dtype, list(out_shape))],
-        )
-        model = helper.make_model(graph, producer_name="xor_test")
-        verify_with_ort_with_inputs(model, [x_np, y_np], [out_shape], target=target, dev=dev)
-
-    # XOR
-    verify_xor(x_shape=[1, 32, 32], y_shape=[1, 32, 32])
-
-    # Xor broadcast
-    verify_xor(x_shape=[1, 32, 32], y_shape=[1, 1, 32])
-
-
-@tvm.testing.parametrize_targets
-def test_max_roi_pool(target, dev):
-    """test_max_roi_pool"""
-
-    def verify_max_roi_pool(x_shape, rois_shape, pooled_shape, spatial_scale, out_shape):
-        if spatial_scale is None:
-            pool_node = helper.make_node(
-                "MaxRoiPool", inputs=["x", "rois"], outputs=["y"], pooled_shape=pooled_shape
-            )
-        else:
-            pool_node = helper.make_node(
-                "MaxRoiPool",
-                inputs=["x", "rois"],
-                outputs=["y"],
-                pooled_shape=pooled_shape,
-                spatial_scale=spatial_scale,
-            )
-
-        graph = helper.make_graph(
-            [pool_node],
-            "pool_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape)),
-                helper.make_tensor_value_info("rois", TensorProto.FLOAT, list(rois_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="pool_test")
-        verify_with_ort(model, [x_shape, rois_shape], [out_shape], target=target, dev=dev)
-
-    verify_max_roi_pool(
-        x_shape=[1, 3, 6, 6],
-        rois_shape=[3, 5],
-        pooled_shape=[1, 1],
-        spatial_scale=None,
-        out_shape=[3, 3, 1, 1],
-    )
-
-    verify_max_roi_pool(
-        x_shape=[1, 3, 10, 10],
-        rois_shape=[4, 5],
-        pooled_shape=[2, 2],
-        spatial_scale=2.0,
-        out_shape=[4, 3, 2, 2],
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_lppool(target, dev):
-    """test_lppool"""
-
-    def verify_lppool(x_shape, kernel_shape, p, strides, pads, out_shape, auto_pad="NOTSET"):
-        kwargs = {}
-        if p is not None:
-            kwargs["p"] = p
-        if pads is None:
-            pool_node = helper.make_node(
-                "LpPool",
-                inputs=["x"],
-                outputs=["y"],
-                kernel_shape=kernel_shape,
-                auto_pad=auto_pad,
-                strides=strides,
-                **kwargs,
-            )
-        else:
-            pool_node = helper.make_node(
-                "LpPool",
-                inputs=["x"],
-                outputs=["y"],
-                kernel_shape=kernel_shape,
-                pads=pads,
-                strides=strides,
-                **kwargs,
-            )
-
-        graph = helper.make_graph(
-            [pool_node],
-            "lppool_test",
-            inputs=[helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape))],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="lppool_test")
-        verify_with_ort(
-            model,
-            [x_shape],
-            [out_shape],
-            use_vm=True,
-            target=target,
-            dev=dev,
-        )
-
-    # Pool1D
-    verify_lppool(
-        x_shape=[1, 1, 32], kernel_shape=[3], p=2, strides=[1], pads=[1, 1], out_shape=[1, 1, 32]
-    )
-
-    # Pool2D
-    verify_lppool(
-        x_shape=[1, 1, 32, 32],
-        kernel_shape=[3, 3],
-        p=2,
-        strides=[1, 1],
-        pads=[1, 1, 1, 1],
-        out_shape=[1, 1, 32, 32],
-    )
-
-    # Pool1D with stride
-    verify_lppool(
-        x_shape=[1, 1, 32], kernel_shape=[3], p=2, strides=[2], pads=[1, 1], out_shape=[1, 1, 16]
-    )
-
-    # Pool2D with stride
-    verify_lppool(
-        x_shape=[1, 1, 32, 32],
-        kernel_shape=[3, 3],
-        p=2,
-        strides=[2, 2],
-        pads=[1, 1, 1, 1],
-        out_shape=[1, 1, 16, 16],
-    )
-
-    # Pool1D with stride and autopadding
-    verify_lppool(
-        x_shape=[1, 1, 32],
-        kernel_shape=[3],
-        p=2,
-        strides=[2],
-        pads=None,
-        out_shape=[1, 1, 16],
-        auto_pad="SAME_UPPER",
-    )
-
-    # Pool2D with stride and autopadding
-    verify_lppool(
-        x_shape=[1, 1, 32, 32],
-        kernel_shape=[3, 3],
-        p=2,
-        strides=[2, 2],
-        pads=None,
-        out_shape=[1, 1, 16, 16],
-        auto_pad="SAME_UPPER",
-    )
-
-    # Pool2D with empty stride
-    verify_lppool(
-        x_shape=[1, 3, 32, 32],
-        kernel_shape=[2, 2],
-        p=4,
-        strides=None,
-        pads=None,
-        out_shape=[1, 3, 32, 32],
-        auto_pad="SAME_LOWER",
-    )
-
-    # Pool3D with stride
-    verify_lppool(
-        x_shape=[1, 1, 32, 32, 32],
-        kernel_shape=[3, 3, 3],
-        p=2,
-        strides=[2, 2, 2],
-        pads=[1, 1, 1, 1, 1, 1],
-        out_shape=[1, 1, 16, 16, 16],
-    )
-
-    # Pool3D with stride and autopadding
-    verify_lppool(
-        x_shape=[1, 1, 32, 32, 32],
-        kernel_shape=[3, 3, 3],
-        p=2,
-        strides=[2, 2, 2],
-        pads=None,
-        out_shape=[1, 1, 16, 16, 16],
-        auto_pad="SAME_UPPER",
-    )
-    # Pool2D with empty p
-    verify_lppool(
-        x_shape=[1, 1, 32, 32],
-        kernel_shape=[3, 3],
-        p=None,
-        strides=[1, 1],
-        pads=[1, 1, 1, 1],
-        out_shape=[1, 1, 32, 32],
-    )
-
-
-def verify_global_lppool(x_shape, p, out_shape, target, dev):
-    """verify_global_lppool"""
-    pool_node = helper.make_node(
-        "GlobalLpPool",
-        inputs=["x"],
-        outputs=["y"],
-        p=p,
-    )
-
-    graph = helper.make_graph(
-        [pool_node],
-        "global_lppool_test",
-        inputs=[helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape))],
-        outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(out_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="global_lppool_test")
-    verify_with_ort(model, [x_shape], out_shape, use_vm=True, target=target, dev=dev)
-
-
-@tvm.testing.parametrize_targets
-def test_global_lppool(target, dev):
-    """test_global_lppool"""
-    # LpPool1D
-    verify_global_lppool(x_shape=[1, 15, 16], p=2, out_shape=[1, 15, 1], target=target, dev=dev)
-
-    # LpPool2D
-    verify_global_lppool(
-        x_shape=[1, 15, 32, 32], p=2, out_shape=[1, 15, 1, 1], target=target, dev=dev
-    )
-
-    # LpPool2D
-    verify_global_lppool(
-        x_shape=[1, 15, 32, 32], p=3, out_shape=[1, 15, 1, 1], target=target, dev=dev
-    )
-
-    # LpPool3D
-    verify_global_lppool(
-        x_shape=[1, 15, 3, 32, 32], p=2, out_shape=[1, 15, 1, 1, 1], target=target, dev=dev
-    )
-
-
-def verify_rnn(
-    seq_length,
-    batch_size,
-    input_size,
-    hidden_size,
-    rnn_type="LSTM",
-    use_bias=False,
-    activations=None,
-    alphas=None,
-    betas=None,
-    use_initial_state=False,
-    use_peep=False,
-    linear_before_reset=False,
-    directions=1,
-    layout=0,
-    rtol=1e-5,
-    atol=1e-5,
-    target=None,
-    dev=None,
-    use_sequence_lens=False,
-):
-    """verify_rnn"""
-    if rnn_type == "RNN":
-        multiplier = 1
-    elif rnn_type == "LSTM":
-        multiplier = 4
-    elif rnn_type == "GRU":
-        multiplier = 3
-    else:
-        raise NotImplementedError(f"{rnn_type} RNNs not yet supported.")
-
-    if directions not in [1, 2]:
-        raise ValueError(f"Direction should be either 1 or 2 (for bidirectional LSTMs)")
-
-    def get_inputs():
-        input_names = []
-        input_values = []
-        input_tensors = []
-
-        def register(np_arr, name, shape=None):
-            input_values.append(np_arr)
-            input_names.append(name)
-
-            # Map of numpy dtypes to the protobuf equivalent
-            dtype_map = {
-                "float32": TensorProto.FLOAT,
-                "int32": TensorProto.INT32,
-                "int8": TensorProto.INT8,
-            }
-
-            if np_arr.dtype.name not in dtype_map:
-                raise ValueError(f"Unknown dtype we don't know how to handle {np.dtype.name}")
-            if shape is None:
-                shape = list(np_arr.shape)
-            proto_type = dtype_map[np_arr.dtype.name]
-            input_tensors.append(helper.make_tensor_value_info(name, proto_type, shape))
-
-        if layout == 1:
-            x_np = np.random.uniform(size=(batch_size, seq_length, input_size)).astype("float32")
-        else:
-            x_np = np.random.uniform(size=(seq_length, batch_size, input_size)).astype("float32")
-        w_np = np.random.uniform(size=(directions, multiplier * hidden_size, input_size)).astype(
-            "float32"
-        )
-        r_np = np.random.uniform(size=(directions, multiplier * hidden_size, hidden_size)).astype(
-            "float32"
-        )
-        register(x_np, "X")
-        register(w_np, "W")
-        register(r_np, "R")
-
-        if use_bias:
-            b_np = np.random.uniform(size=(directions, multiplier * 2 * hidden_size)).astype(
-                "float32"
-            )
-            register(b_np, "B")
-
-        if use_sequence_lens:
-            sequence_np = np.random.uniform(0, seq_length, size=(batch_size)).astype("int32")
-            register(sequence_np, "sequence_lens")
-
-        if use_initial_state:
-            assert use_bias is True, "Initial states must have bias specified."
-
-            if not use_sequence_lens:
-                sequence_np = np.repeat(seq_length, batch_size).astype("int32")
-                register(sequence_np, "sequence_lens")
-
-            if layout == 1:
-                initial_h_np = np.random.uniform(size=(batch_size, directions, hidden_size)).astype(
-                    "float32"
-                )
-            else:
-                initial_h_np = np.random.uniform(size=(directions, batch_size, hidden_size)).astype(
-                    "float32"
-                )
-            register(initial_h_np, "initial_h")
-
-            if rnn_type == "LSTM":
-                if layout == 1:
-                    initial_c_np = np.random.uniform(
-                        size=(batch_size, directions, hidden_size)
-                    ).astype("float32")
-                else:
-                    initial_c_np = np.random.uniform(
-                        size=(directions, batch_size, hidden_size)
-                    ).astype("float32")
-                register(initial_c_np, "initial_c")
-
-        if use_peep and rnn_type == "LSTM":
-            assert use_initial_state is True, "Peepholes require initial state to be specified."
-            p_np = np.random.uniform(size=(directions, 3 * hidden_size)).astype("float32")
-            register(p_np, "P")
-
-        return input_names, input_tensors, input_values
-
-    input_names, input_tensors, input_values = get_inputs()
-
-    def get_outputs():
-        output_names = []
-        graph_outputs = []
-        output_shapes = []
-
-        def register(name, shape, proto_type):
-            output_names.append(name)
-            graph_outputs.append(helper.make_tensor_value_info(name, proto_type, list(shape)))
-            output_shapes.append(list(shape))
-
-        if layout == 1:
-            register("Y", [directions, seq_length, batch_size, hidden_size], TensorProto.FLOAT)
-            register("Y_h", [batch_size, directions, hidden_size], TensorProto.FLOAT)
-        else:
-            register("Y", [seq_length, directions, batch_size, hidden_size], TensorProto.FLOAT)
-            register("Y_h", [directions, batch_size, hidden_size], TensorProto.FLOAT)
-
-        if rnn_type == "LSTM":
-            if layout == 1:
-                register("Y_c", [batch_size, directions, hidden_size], TensorProto.FLOAT)
-            else:
-                register("Y_c", [directions, batch_size, hidden_size], TensorProto.FLOAT)
-
-        return output_names, graph_outputs, output_shapes
-
-    output_names, graph_outputs, output_shapes = get_outputs()
-
-    rnn_node = helper.make_node(
-        rnn_type, inputs=input_names, outputs=output_names, hidden_size=hidden_size
-    )
-    if activations is not None:
-        activations_attr = helper.make_attribute("activations", activations)
-        rnn_node.attribute.append(activations_attr)
-    if directions == 2:
-        direction_attr = helper.make_attribute("direction", "bidirectional")
-        rnn_node.attribute.append(direction_attr)
-    if alphas is not None:
-        alphas_attr = helper.make_attribute("activation_alpha", alphas)
-        rnn_node.attribute.append(alphas_attr)
-    if betas is not None:
-        betas_attr = helper.make_attribute("activation_beta", betas)
-        rnn_node.attribute.append(betas_attr)
-    if linear_before_reset and rnn_type == "GRU":
-        lbr_attr = helper.make_attribute("linear_before_reset", 1)
-        rnn_node.attribute.append(lbr_attr)
-    if layout == 1:
-        layout_attr = helper.make_attribute("layout", 1)
-        rnn_node.attribute.append(layout_attr)
-
-    graph = helper.make_graph([rnn_node], "rnn_test", inputs=input_tensors, outputs=graph_outputs)
-
-    model = helper.make_model(graph, producer_name="rnn_test")
-
-    verify_with_ort_with_inputs(
-        model, input_values, output_shapes, atol=atol, rtol=rtol, target=target, dev=dev
-    )
-
-
-def verify_rnn_helper(target, dev, rnn_type):
-    num_activations = 1
-    if rnn_type == "GRU":
-        num_activations = 2
-    elif rnn_type == "LSTM":
-        num_activations = 3
-
-    for directions in [1, 2]:
-        # No bias.
-        verify_rnn(
-            seq_length=2,
-            batch_size=1,
-            input_size=16,
-            hidden_size=32,
-            use_bias=False,
-            rnn_type=rnn_type,
-            directions=directions,
-            target=target,
-            dev=dev,
-        )
-        # large batch.
-        verify_rnn(
-            seq_length=4,
-            batch_size=8,
-            input_size=16,
-            hidden_size=32,
-            use_bias=True,
-            rnn_type=rnn_type,
-            directions=directions,
-            target=target,
-            dev=dev,
-        )
-        # Non power of two.
-        verify_rnn(
-            seq_length=3,
-            batch_size=3,
-            input_size=16,
-            hidden_size=40,
-            use_bias=True,
-            rnn_type=rnn_type,
-            directions=directions,
-            target=target,
-            dev=dev,
-        )
-        # Long sequence.
-        verify_rnn(
-            seq_length=8,
-            batch_size=1,
-            input_size=16,
-            hidden_size=32,
-            use_bias=True,
-            rnn_type=rnn_type,
-            directions=directions,
-            target=target,
-            dev=dev,
-        )
-        # Large hidden.
-        verify_rnn(
-            seq_length=2,
-            batch_size=1,
-            input_size=16,
-            hidden_size=128,
-            use_bias=True,
-            rnn_type=rnn_type,
-            directions=directions,
-            target=target,
-            dev=dev,
-        )
-        # Large input.
-        verify_rnn(
-            seq_length=2,
-            batch_size=1,
-            input_size=64,
-            hidden_size=32,
-            use_bias=True,
-            rnn_type=rnn_type,
-            directions=directions,
-            target=target,
-            dev=dev,
-        )
-
-        # Different activation testing.
-        # Default value hardsigmoid.
-        # TODO: onnxruntime <= v1.12.0 has wrong default value of all activation functions
-        if rnn_type != "RNN":
-            activations = ["HardSigmoid", "Tanh", "Tanh"][0:num_activations] * directions
-            verify_rnn(
-                seq_length=2,
-                batch_size=1,
-                input_size=16,
-                hidden_size=32,
-                use_bias=False,
-                activations=activations,
-                rnn_type=rnn_type,
-                directions=directions,
-                target=target,
-                dev=dev,
-            )
-        # Multiple parametrized activations.
-        activations = ["HardSigmoid", "LeakyRelu", "Tanh"][0:num_activations] * directions
-        alphas = [2.0, 0.5, 0.0][0:num_activations] * directions
-        betas = [0.3, 0.0, 0.0][0:num_activations] * directions
-        verify_rnn(
-            seq_length=2,
-            batch_size=1,
-            input_size=16,
-            hidden_size=32,
-            use_bias=False,
-            activations=activations,
-            alphas=alphas,
-            betas=betas,
-            rnn_type=rnn_type,
-            directions=directions,
-            target=target,
-            dev=dev,
-        )
-        # All parametrized with new Affine activation.
-        activations = ["Affine", "LeakyRelu", "HardSigmoid"][0:num_activations] * directions
-        alphas = [0.8, 2.0, 0.5][0:num_activations] * directions
-        betas = [0.0, 0.3, 0.0][0:num_activations] * directions
-        verify_rnn(
-            seq_length=2,
-            batch_size=1,
-            input_size=16,
-            hidden_size=32,
-            use_bias=False,
-            activations=activations,
-            alphas=alphas,
-            betas=betas,
-            rnn_type=rnn_type,
-            directions=directions,
-            target=target,
-            dev=dev,
-        )
-
-        # Testing with initial state
-        verify_rnn(
-            seq_length=2,
-            batch_size=1,
-            input_size=16,
-            hidden_size=32,
-            use_bias=True,
-            use_initial_state=True,
-            rnn_type=rnn_type,
-            directions=directions,
-            target=target,
-            dev=dev,
-        )
-
-        # Testing layout
-        # TODO: onnxruntime <= 1.12.0 doesn't support layout == 1
-        # verify_rnn(
-        #     seq_length=2,
-        #     batch_size=1,
-        #     input_size=16,
-        #     hidden_size=32,
-        #     use_bias=True,
-        #     rnn_type="RNN",
-        #     directions=directions,
-        #     layout=1,
-        #     target=target,
-        #     dev=dev,
-        # )
-
-        # Testing with initial state
-        if rnn_type == "GRU":
-            verify_rnn(
-                seq_length=2,
-                batch_size=1,
-                input_size=16,
-                hidden_size=32,
-                use_bias=True,
-                use_initial_state=True,
-                rnn_type=rnn_type,
-                directions=directions,
-                target=target,
-                dev=dev,
-                use_sequence_lens=True,
-            )
-            verify_rnn(
-                seq_length=8,
-                batch_size=8,
-                input_size=16,
-                hidden_size=32,
-                use_bias=True,
-                use_initial_state=True,
-                rnn_type=rnn_type,
-                directions=directions,
-                target=target,
-                dev=dev,
-                use_sequence_lens=True,
-            )
-
-        # Testing with peepholes
-        if rnn_type == "LSTM":
-            verify_rnn(
-                seq_length=2,
-                batch_size=1,
-                input_size=16,
-                hidden_size=32,
-                use_bias=True,
-                use_initial_state=True,
-                use_peep=True,
-                rnn_type="LSTM",
-                directions=directions,
-                target=target,
-                dev=dev,
-            )
-
-
-@tvm.testing.parametrize_targets
-def test_rnn(target, dev):
-    verify_rnn_helper(target, dev, "RNN")
-
-
-@tvm.testing.parametrize_targets
-def test_lstm(target, dev):
-    verify_rnn_helper(target, dev, "LSTM")
-
-
-@tvm.testing.parametrize_targets
-def test_gru(target, dev):
-    verify_rnn_helper(target, dev, "GRU")
-
-
-@tvm.testing.parametrize_targets
-def test_resize(target, dev):
-    """test_resize"""
-
-    def verify(ishape, oshape, scales, mode, coord_trans="asymmetric", alpha=0.5, exclude=False):
-        nodes = [
-            make_constant_node("roi", onnx.TensorProto.FLOAT, (0,), []),
-            make_constant_node("scales", onnx.TensorProto.FLOAT, (len(scales),), scales),
-        ]
-        input_names = ["X", "roi", "scales"]
-
-        if oshape != []:
-            nodes.append(
-                make_constant_node("sizes", onnx.TensorProto.INT64, (len(oshape),), oshape)
-            )
-            input_names.append("sizes")
-        nodes.append(
-            helper.make_node(
-                "Resize",
-                inputs=input_names,
-                outputs=["Y"],
-                mode=mode,
-                coordinate_transformation_mode=coord_trans,
-                cubic_coeff_a=alpha,
-                exclude_outside=exclude,
-            )
-        )
-
-        if oshape == []:
-            oshape = [round(dim * scale) for (dim, scale) in zip(ishape, scales)]
-        graph = helper.make_graph(
-            nodes,
-            "resize_test",
-            inputs=[helper.make_tensor_value_info("X", TensorProto.FLOAT, ishape)],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, oshape)],
-        )
-
-        model = helper.make_model(graph, producer_name="resize_test")
-
-        verify_with_ort(
-            model,
-            [ishape],
-            [oshape],
-            use_vm=True,
-            opset=11,
-            freeze_params=True,
-            target=target,
-            dev=dev,
-        )
-
-    for ndim in [1, 2, 3]:
-        method = "nearest"
-        for coord_trans in ["asymmetric", "align_corners", "half_pixel"]:
-            # upsampling
-            verify([1, 16] + [32] * ndim, [1, 16] + [64] * ndim, [], method, coord_trans)
-            # downsampling
-            verify([1, 16] + [32] * ndim, [1, 16] + [16] * ndim, [], method, coord_trans)
-            # scales are specified instead of sizes
-            verify([1, 16] + [32] * ndim, [], [1, 1] + [0.5] * ndim, method, coord_trans)
-            verify([1, 16] + [32] * ndim, [], [1, 1] + [2] * ndim, method, coord_trans)
-            verify([1, 16] + [32] * ndim, [], [1, 1] + [2] * ndim, None, coord_trans)
-
-        method = "linear"
-        # upsampling
-        verify([1, 16] + [32] * ndim, [1, 16] + [64] * ndim, [], method)
-        # downsampling
-        verify([1, 16] + [32] * ndim, [1, 16] + [16] * ndim, [], method)
-        # scales are specified instead of sizes
-        verify([1, 16] + [32] * ndim, [], [1, 1] + [0.5] * ndim, method)
-        verify([1, 16] + [32] * ndim, [], [1, 1] + [2] * ndim, method)
-
-        if ndim == 2:
-            # ONNX Runtime only supports cubic interpolation for 2D images
-            method = "cubic"
-            for alpha in [0.5, 0.75]:
-                for exclude in [True, False]:
-                    # upsampling
-                    verify(
-                        [1, 16] + [32] * ndim,
-                        [1, 16] + [64] * ndim,
-                        [],
-                        method,
-                        alpha=alpha,
-                        exclude=exclude,
-                    )
-                    # downsampling
-                    verify(
-                        [1, 16] + [32] * ndim,
-                        [1, 16] + [16] * ndim,
-                        [],
-                        method,
-                        alpha=alpha,
-                        exclude=exclude,
-                    )
-                    # scales are specified instead of sizes
-                    verify(
-                        [1, 16] + [32] * ndim,
-                        [],
-                        [1, 1] + [0.5] * ndim,
-                        method,
-                        alpha=alpha,
-                        exclude=exclude,
-                    )
-                    verify(
-                        [1, 16] + [32] * ndim,
-                        [],
-                        [1, 1] + [2] * ndim,
-                        method,
-                        alpha=alpha,
-                        exclude=exclude,
-                    )
-
-    def verify_opset_10(ishape, scales, mode):
-        nodes = [
-            make_constant_node("scales", onnx.TensorProto.FLOAT, (len(scales),), scales),
-        ]
-        input_names = ["X", "scales"]
-        nodes.append(
-            helper.make_node(
-                "Resize",
-                inputs=input_names,
-                outputs=["Y"],
-                mode=mode,
-            )
-        )
-
-        oshape = [round(dim * scale) for (dim, scale) in zip(ishape, scales)]
-        graph = helper.make_graph(
-            nodes,
-            "resize_test",
-            inputs=[helper.make_tensor_value_info("X", TensorProto.FLOAT, ishape)],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, oshape)],
-        )
-
-        model = helper.make_model(graph, producer_name="resize_test")
-        verify_with_ort(
-            model,
-            [ishape],
-            [oshape],
-            use_vm=True,
-            freeze_params=True,
-            opset=10,
-            target=target,
-            dev=dev,
-        )
-
-    verify_opset_10([1, 16, 32, 32], [1, 1, 2, 2], "nearest")
-    verify_opset_10([1, 16, 32, 32], [1, 1, 0.5, 0.5], "linear")
-
-
-@tvm.testing.parametrize_targets
-def test_nonzero(target, dev):
-    """test_nonzero"""
-
-    def verify_nonzero(indata, outdata, dtype):
-        node = helper.make_node(
-            "NonZero",
-            inputs=["X"],
-            outputs=["Y"],
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "nonzero_test",
-            inputs=[helper.make_tensor_value_info("X", TensorProto.INT64, list(indata.shape))],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.INT64, list(outdata.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="nonzero_test")
-
-        verify_with_ort_with_inputs(
-            model, [indata], dtype="int64", use_vm=True, opset=9, target=target, dev=dev
-        )
-
-    input_data = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    result = np.array((np.nonzero(input_data)))  # expected output [[0, 1, 1], [0, 0, 1]]
-    verify_nonzero(input_data, result, dtype=np.int64)
-
-    input_data = np.array([[3, 0, 0], [0, 4, 0], [5, 6, 0]], dtype=np.int64)
-    result = np.array((np.nonzero(input_data)))  # expected output [[0, 1, 2, 2], [0, 1, 0, 1]]
-    verify_nonzero(input_data, result, dtype=np.int64)
-
-
-@tvm.testing.parametrize_targets
-def test_topk(target, dev):
-    """test_topk"""
-
-    def verify_topk(input_dims, k, axis=-1):
-        output_dims = list(input_dims)
-        output_dims[axis] = k
-
-        node = helper.make_node("TopK", inputs=["X", "K"], outputs=["Values", "Indices"], axis=axis)
-
-        graph = helper.make_graph(
-            [node],
-            "topk_test",
-            inputs=[
-                helper.make_tensor_value_info("X", TensorProto.FLOAT, list(input_dims)),
-                helper.make_tensor_value_info(
-                    "K",
-                    TensorProto.INT64,
-                    [
-                        1,
-                    ],
-                ),
-            ],
-            outputs=[
-                helper.make_tensor_value_info("Values", TensorProto.FLOAT, output_dims),
-                helper.make_tensor_value_info("Indices", TensorProto.INT64, output_dims),
-            ],
-        )
-
-        model = helper.make_model(graph, producer_name="topk_test")
-
-        indata = np.random.uniform(-10, 10, input_dims).astype(np.float32)
-        verify_with_ort_with_inputs(
-            model, [indata, np.array([k])], use_vm=True, target=target, dev=dev
-        )
-
-    for n in [12, 32]:
-        for shape in [[n], [n, n], [n, n, n]]:
-            for k in [1, 5, 10]:
-                verify_topk(shape, k)
-
-        verify_topk([n, n, n], 5, 0)
-        verify_topk([n, n, n], 5, 1)
-        verify_topk([n, n, n], 5, 2)
-
-
-@tvm.testing.parametrize_targets
-def test_roi_align(target, dev):
-    """test_roi_align"""
-
-    def verify_roi_align(
-        input_dims,
-        num_roi,
-        output_height,
-        output_width,
-        sampling_ratio=0,
-        spatial_scale=1.0,
-        mode="avg",
-    ):
-        output_dims = [num_roi, input_dims[1], output_height, output_width]
-
-        node = helper.make_node(
-            "RoiAlign",
-            coordinate_transformation_mode="output_half_pixel",
-            inputs=["X", "rois", "batch_indices"],
-            outputs=["Y"],
-            mode=mode,
-            output_height=output_height,
-            output_width=output_width,
-            sampling_ratio=sampling_ratio,
-            spatial_scale=spatial_scale,
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "roialign_test",
-            inputs=[
-                helper.make_tensor_value_info("X", TensorProto.FLOAT, list(input_dims)),
-                helper.make_tensor_value_info("rois", TensorProto.FLOAT, [num_roi, 4]),
-                helper.make_tensor_value_info(
-                    "batch_indices",
-                    TensorProto.INT64,
-                    [
-                        num_roi,
-                    ],
-                ),
-            ],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, output_dims)],
-        )
-
-        model = helper.make_model(graph, producer_name="roialign_test")
-
-        np_data = np.random.uniform(size=input_dims).astype("float32")
-        np_rois = np.random.uniform(size=[num_roi, 4]).astype("float32") * input_dims[2]
-        np_batch_indices = np.random.randint(low=0, high=input_dims[0], size=num_roi)
-
-        verify_with_ort_with_inputs(
-            model,
-            [np_data, np_rois, np_batch_indices],
-            out_shape=[output_dims],
-            target=target,
-            dev=dev,
-        )
-
-    verify_roi_align((1, 4, 16, 16), 32, 7, 7, sampling_ratio=0, spatial_scale=1.0)
-    verify_roi_align((4, 4, 16, 32), 32, 7, 7, sampling_ratio=0, spatial_scale=1.0)
-    verify_roi_align((1, 8, 16, 16), 32, 7, 7, sampling_ratio=0, spatial_scale=1.0)
-    verify_roi_align((1, 4, 8, 8), 32, 7, 7, sampling_ratio=0, spatial_scale=1.0)
-    verify_roi_align((1, 4, 16, 16), 16, 5, 7, sampling_ratio=0, spatial_scale=1.0)
-    verify_roi_align((1, 4, 16, 12), 8, 7, 3, sampling_ratio=0, spatial_scale=1.0)
-    verify_roi_align((1, 4, 16, 16), 32, 7, 7, sampling_ratio=0, spatial_scale=0.5)
-    verify_roi_align((3, 4, 12, 16), 32, 7, 7, sampling_ratio=0, spatial_scale=1.5)
-    verify_roi_align((5, 4, 16, 14), 32, 7, 7, sampling_ratio=1, spatial_scale=1.0)
-    verify_roi_align((1, 4, 16, 16), 32, 7, 7, sampling_ratio=2, spatial_scale=1.0)
-
-    # ONNX implementation of roi_align with max mode is incorrect, so we don't compare outputs here.
-
-
-@tvm.testing.parametrize_targets
-def test_non_max_suppression(target, dev):
-    """test_non_max_suppression"""
-
-    def verify_nms(
-        boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, output_dims
-    ):
-        input_names = ["boxes", "scores", "max_output_boxes_per_class", "iou_threshold"]
-        input_nodes = [
-            helper.make_tensor_value_info("boxes", TensorProto.FLOAT, boxes.shape),
-            helper.make_tensor_value_info("scores", TensorProto.FLOAT, scores.shape),
-            helper.make_tensor_value_info(
-                "max_output_boxes_per_class", TensorProto.INT64, max_output_boxes_per_class.shape
-            ),
-            helper.make_tensor_value_info("iou_threshold", TensorProto.FLOAT, iou_threshold.shape),
-        ]
-        inputs = [boxes, scores, max_output_boxes_per_class, iou_threshold]
-        if score_threshold is not None:
-            input_names.append("score_threshold")
-            input_nodes.append(
-                helper.make_tensor_value_info(
-                    "score_threshold", TensorProto.FLOAT, score_threshold.shape
-                )
-            )
-            inputs.append(score_threshold)
-        node = helper.make_node(
-            "NonMaxSuppression",
-            inputs=input_names,
-            outputs=["Y"],
-            center_point_box=0,
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "nms_test",
-            inputs=input_nodes,
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.INT64, output_dims)],
-        )
-
-        model = helper.make_model(graph, producer_name="nms_test")
-
-        verify_with_ort_with_inputs(model, inputs, use_vm=True, target=target, dev=dev)
-
-    boxes = np.array(
-        [
-            [
-                [0.0, 0.0, 0.3, 0.3],
-                [0.0, 0.0, 0.4, 0.4],
-                [0.0, 0.0, 0.5, 0.5],
-                [0.5, 0.5, 0.9, 0.9],
-                [0.5, 0.5, 1.0, 1.0],
-            ],
-            [
-                [0.0, 0.0, 0.3, 0.3],
-                [0.0, 0.0, 0.4, 0.4],
-                [0.5, 0.5, 0.95, 0.95],
-                [0.5, 0.5, 0.96, 0.96],
-                [0.5, 0.5, 1.0, 1.0],
-            ],
-        ]
-    ).astype("float32")
-
-    scores = np.array(
-        [
-            [[0.1, 0.2, 0.6, 0.3, 0.9], [0.1, 0.2, 0.6, 0.3, 0.9]],
-            [[0.1, 0.2, 0.6, 0.3, 0.9], [0.1, 0.2, 0.6, 0.3, 0.9]],
-        ]
-    ).astype("float32")
-    max_output_boxes_per_class = np.array(2).astype("int64")
-    iou_threshold = np.array(0.8).astype("float32")
-    output_dims = [8, 3]
-    verify_nms(boxes, scores, max_output_boxes_per_class, iou_threshold, None, output_dims)
-
-    boxes = np.array(
-        [
-            [
-                [0.0, 0.0, 1.0, 1.0],
-                [0.0, 0.1, 1.0, 1.1],
-                [0.0, -0.1, 1.0, 0.9],
-                [0.0, 10.0, 1.0, 11.0],
-                [0.0, 10.1, 1.0, 11.1],
-                [0.0, 100.0, 1.0, 101.0],
-            ]
-        ]
-    ).astype(np.float32)
-    scores = np.array([[[0.9, 0.75, 0.6, 0.95, 0.5, 0.3]]]).astype(np.float32)
-    max_output_boxes_per_class = np.array([3]).astype(np.int64)
-    iou_threshold = np.array([0.5]).astype(np.float32)
-    score_threshold = np.array([0.4]).astype(np.float32)
-    output_dims = [2, 3]
-    verify_nms(
-        boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, output_dims
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_loop(target, dev):
-    """test_loop"""
-
-    def verify_cond_loop():
-        y_in = helper.make_tensor_value_info("y_in", TensorProto.FLOAT, [1])
-        y_out = helper.make_tensor_value_info("y_out", TensorProto.FLOAT, [1])
-        scan_out = helper.make_tensor_value_info("scan_out", TensorProto.FLOAT, [1])
-        cond_in = helper.make_tensor_value_info("cond_in", TensorProto.BOOL, [])
-        cond_out = helper.make_tensor_value_info("cond_out", TensorProto.BOOL, [])
-        iter_count = helper.make_tensor_value_info("iter_count", TensorProto.INT64, [])
-
-        y = np.array([-2]).astype(np.float32)
-
-        five_const_node = helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=["five"],
-            value=helper.make_tensor(
-                name="const_tensor_five", data_type=TensorProto.FLOAT, dims=(), vals=[5]
-            ),
-        )
-
-        iter_cast_node = helper.make_node(
-            "Cast", inputs=["iter_count"], outputs=["iter_cast"], to=onnx.TensorProto.FLOAT
-        )
-
-        y_add_node = helper.make_node("Add", inputs=["y_in", "iter_cast"], outputs=["y_out"])
-
-        less_node = helper.make_node("Less", inputs=["y_out", "five"], outputs=["cond_less"])
-
-        squeeze_node = helper.make_node("Squeeze", inputs=["cond_less"], outputs=["cond_squeeze"])
-
-        cond_cast_node = helper.make_node(
-            "Cast", inputs=["cond_squeeze"], outputs=["cond_out"], to=onnx.TensorProto.BOOL
-        )
-
-        scan_identity_node = helper.make_node("Identity", inputs=["y_out"], outputs=["scan_out"])
-
-        loop_body = helper.make_graph(
-            [
-                five_const_node,
-                iter_cast_node,
-                y_add_node,
-                less_node,
-                squeeze_node,
-                cond_cast_node,
-                scan_identity_node,
-            ],
-            "loop_body",
-            [iter_count, cond_in, y_in],
-            [cond_out, y_out, scan_out],
-        )
-
-        loop_node = helper.make_node(
-            "Loop",
-            inputs=["trip_count", "cond", "y"],
-            outputs=["res_y", "res_scan"],
-            body=loop_body,
-        )
-
-        trip_count = np.array(5).astype(np.int64)
-        _ = np.array([13]).astype(np.float32)
-        cond = np.array(1).astype(bool)
-        loop_graph = onnx.helper.make_graph(
-            [loop_node],
-            "loop_outer",
-            inputs=[
-                onnx.helper.make_tensor_value_info("trip_count", onnx.TensorProto.INT64, []),
-                onnx.helper.make_tensor_value_info("cond", onnx.TensorProto.BOOL, []),
-                onnx.helper.make_tensor_value_info("y", onnx.TensorProto.FLOAT, [1]),
-            ],
-            outputs=[
-                onnx.helper.make_tensor_value_info("res_y", onnx.TensorProto.FLOAT, [1]),
-                onnx.helper.make_tensor_value_info("res_scan", onnx.TensorProto.FLOAT, [5, 1]),
-            ],
-        )
-        loop_model = onnx.helper.make_model(loop_graph)
-
-        # Set a high trip count so that condition trips first.
-        trip_count = np.array(40).astype(np.int64)
-        cond = np.array(1).astype(bool)
-        input_vals = [trip_count, cond, y]
-        verify_with_ort_with_inputs(
-            loop_model,
-            input_vals,
-            use_vm=True,
-            freeze_params=True,
-            opset=11,
-            target=target,
-            dev=dev,
-        )
-
-    def verify_count_loop():
-        y_in = helper.make_tensor_value_info("y_in", TensorProto.FLOAT, [])
-        y_out = helper.make_tensor_value_info("y_out", TensorProto.FLOAT, [])
-        scan_out = helper.make_tensor_value_info("scan_out", TensorProto.FLOAT, [])
-        cond_in = helper.make_tensor_value_info("cond_in", TensorProto.BOOL, [])
-        cond_out = helper.make_tensor_value_info("cond_out", TensorProto.BOOL, [])
-        iter_count = helper.make_tensor_value_info("iter_count", TensorProto.INT64, [])
-
-        y = np.array(-2).astype(np.float32)
-
-        iter_cast_node = helper.make_node(
-            "Cast", inputs=["iter_count"], outputs=["iter_cast"], to=onnx.TensorProto.FLOAT
-        )
-
-        y_add_node = helper.make_node("Add", inputs=["y_in", "iter_cast"], outputs=["y_out"])
-
-        identity_node = helper.make_node("Identity", inputs=["cond_in"], outputs=["cond_out"])
-
-        scan_identity_node = helper.make_node("Identity", inputs=["y_out"], outputs=["scan_out"])
-
-        loop_body = helper.make_graph(
-            [identity_node, iter_cast_node, y_add_node, scan_identity_node],
-            "loop_body",
-            [iter_count, cond_in, y_in],
-            [cond_out, y_out, scan_out],
-        )
-
-        loop_node = helper.make_node(
-            "Loop",
-            inputs=["trip_count", "cond", "y"],
-            outputs=["res_y", "res_scan"],
-            body=loop_body,
-        )
-
-        trip_count = np.array(5).astype(np.int64)
-        _ = np.array([13]).astype(np.float32)
-        cond = np.array(1).astype(bool)
-        loop_graph = onnx.helper.make_graph(
-            [loop_node],
-            "loop_outer",
-            inputs=[
-                onnx.helper.make_tensor_value_info("trip_count", onnx.TensorProto.INT64, []),
-                onnx.helper.make_tensor_value_info("cond", onnx.TensorProto.BOOL, []),
-                onnx.helper.make_tensor_value_info("y", onnx.TensorProto.FLOAT, []),
-            ],
-            outputs=[
-                onnx.helper.make_tensor_value_info("res_y", onnx.TensorProto.FLOAT, []),
-                onnx.helper.make_tensor_value_info("res_scan", onnx.TensorProto.FLOAT, [5]),
-            ],
-        )
-        loop_model = onnx.helper.make_model(loop_graph)
-
-        trip_count = np.array(5).astype(np.int64)
-        cond = np.array(1).astype(bool)
-        input_vals = [trip_count, cond, y]
-        verify_with_ort_with_inputs(
-            loop_model,
-            input_vals,
-            use_vm=True,
-            freeze_params=True,
-            opset=11,
-            target=target,
-            dev=dev,
-        )
-
-    def verify_tensor_loop(shapeless_output=False):
-        y_in = helper.make_tensor_value_info("y_in", TensorProto.FLOAT, [3, 3, 3, 3])
-        y_out = helper.make_tensor_value_info("y_out", TensorProto.FLOAT, [3, 3, 3, 3])
-        scan_out = helper.make_tensor_value_info("scan_out", TensorProto.FLOAT, [3, 3, 3, 3])
-        cond_in = helper.make_tensor_value_info("cond_in", TensorProto.BOOL, [])
-        cond_out = helper.make_tensor_value_info("cond_out", TensorProto.BOOL, [])
-        iter_count = helper.make_tensor_value_info("iter_count", TensorProto.INT64, [])
-
-        y = np.random.normal(size=[3, 3, 3, 3]).astype(np.float32)
-
-        iter_cast_node = helper.make_node(
-            "Cast", inputs=["iter_count"], outputs=["iter_cast"], to=onnx.TensorProto.FLOAT
-        )
-
-        y_add_node = helper.make_node("Add", inputs=["y_in", "iter_cast"], outputs=["y_out"])
-
-        identity_node = helper.make_node("Identity", inputs=["cond_in"], outputs=["cond_out"])
-
-        scan_identity_node = helper.make_node("Identity", inputs=["y_out"], outputs=["scan_out"])
-
-        loop_body = helper.make_graph(
-            [identity_node, iter_cast_node, y_add_node, scan_identity_node],
-            "loop_body",
-            [iter_count, cond_in, y_in],
-            [cond_out, y_out, scan_out],
-        )
-
-        loop_node = helper.make_node(
-            "Loop",
-            inputs=["trip_count", "cond", "y"],
-            outputs=["res_y", "res_scan"],
-            body=loop_body,
-        )
-
-        trip_count = np.array(5).astype(np.int64)
-        cond = np.array(1).astype(bool)
-
-        # Allow testing of malformed nodes since pytorch likes to create these.
-        if shapeless_output:
-            scan_shape = None
-        else:
-            scan_shape = [5, 3, 3, 3, 3]
-
-        loop_graph = onnx.helper.make_graph(
-            [loop_node],
-            "loop_outer",
-            inputs=[
-                onnx.helper.make_tensor_value_info("trip_count", onnx.TensorProto.INT64, []),
-                onnx.helper.make_tensor_value_info("cond", onnx.TensorProto.BOOL, []),
-                onnx.helper.make_tensor_value_info("y", onnx.TensorProto.FLOAT, [3, 3, 3, 3]),
-            ],
-            outputs=[
-                onnx.helper.make_tensor_value_info("res_y", onnx.TensorProto.FLOAT, [3, 3, 3, 3]),
-                onnx.helper.make_tensor_value_info("res_scan", onnx.TensorProto.FLOAT, scan_shape),
-            ],
-        )
-        loop_model = onnx.helper.make_model(loop_graph)
-
-        trip_count = np.array(5).astype(np.int64)
-        cond = np.array(1).astype(bool)
-        input_vals = [trip_count, cond, y]
-        verify_with_ort_with_inputs(
-            loop_model,
-            input_vals,
-            use_vm=True,
-            freeze_params=True,
-            opset=11,
-            target=target,
-            dev=dev,
-        )
-
-    # Test a loop that exits once a condition is met.
-    verify_cond_loop()
-    # Test a loop that exits after a fixed number of iterations with scalar outputs.
-    verify_count_loop()
-    # Test a loop that uses an array output.
-    verify_tensor_loop()
-    # Test a loop that is malformed and has no output shape defined.
-    verify_tensor_loop(shapeless_output=True)
-
-
-@tvm.testing.parametrize_targets
-def test_if(target, dev):
-    """test_if"""
-
-    def verify_if(cond_array, num_outputs):
-        # Given a bool scalar input cond.
-        # return constant tensor x if cond is True, otherwise return constant tensor y.
-
-        def append_constant_nodes(nodes, outputs, expected, name):
-            outputs.append(onnx.helper.make_tensor_value_info(name, onnx.TensorProto.FLOAT, [5]))
-
-            expected.append(np.random.randn(5).astype("float32"))
-
-            nodes.append(
-                onnx.helper.make_node(
-                    "Constant",
-                    inputs=[],
-                    outputs=[name],
-                    value=numpy_helper.from_array(expected[-1]),
-                )
-            )
-
-        if_outputs = []
-        graph_outputs = []
-
-        then_nodes, then_outs, then_expected = [], [], []
-        else_nodes, else_outs, else_expected = [], [], []
-
-        for i in range(num_outputs):
-            append_constant_nodes(then_nodes, then_outs, then_expected, f"then_out{i}")
-            append_constant_nodes(else_nodes, else_outs, else_expected, f"else_out{i}")
-
-            if_outputs.append(f"res{i}")
-            graph_outputs.append(
-                onnx.helper.make_tensor_value_info(f"res{i}", onnx.TensorProto.FLOAT, [5]),
-            )
-
-        then_body = onnx.helper.make_graph(then_nodes, "then_body", [], then_outs)
-        else_body = onnx.helper.make_graph(else_nodes, "else_body", [], else_outs)
-
-        if_node = onnx.helper.make_node(
-            "If", inputs=["cond"], outputs=if_outputs, then_branch=then_body, else_branch=else_body
-        )
-
-        if_graph = onnx.helper.make_graph(
-            [if_node],
-            "if_outer",
-            inputs=[
-                onnx.helper.make_tensor_value_info("cond", onnx.TensorProto.BOOL, []),
-            ],
-            outputs=graph_outputs,
-        )
-
-        if_model = onnx.helper.make_model(if_graph)
-        if cond_array:
-            cond = np.array([1]).astype("bool")
-        else:
-            cond = np.array(1).astype("bool")
-        correct_out = then_expected if cond else else_expected
-
-        # TODO(jwfromm): Onnxruntime 1.0.0 is buggy with If statements. Replace this with
-        # verify_with_ort once we update versions.
-        tvm_out = get_tvm_output_with_vm(if_model, [cond], target, dev, freeze_params=True)
-        if not isinstance(tvm_out, list):
-            tvm_out = [tvm_out]
-        for i, _ in enumerate(tvm_out):
-            tvm.testing.assert_allclose(
-                correct_out[i],
-                tvm_out[i],  # pylint: disable=unnecessary-list-index-lookup
-                rtol=1e-05,
-                atol=1e-05,
-            )
-
-    # Confirm that if works with cond as an array or scalar.
-    verify_if(cond_array=False, num_outputs=1)
-    verify_if(cond_array=False, num_outputs=2)
-    verify_if(cond_array=True, num_outputs=1)
-    verify_if(cond_array=True, num_outputs=2)
-
-
-@tvm.testing.parametrize_targets
-def test_graph_input_use_in_if(target, dev):
-    """test_graph_input_use_in_if"""
-
-    def verify_if(num_nested, cond):
-        # return "graph input" if cond is True, else return constant(-1).
-
-        input_tensor = helper.make_tensor_value_info("graph_input", TensorProto.FLOAT, [1])
-        output_tensor = helper.make_tensor_value_info("graph_output", TensorProto.FLOAT, [1])
-        constant_node = make_constant_node("const_val", TensorProto.FLOAT, [1], [-1])
-        cond_tensor = helper.make_tensor_value_info("cond", TensorProto.BOOL, [1])
-        inner_if_node = None
-        for i in range(num_nested):
-            identity_node = helper.make_node(
-                "Identity",
-                inputs=["const_val"],
-                outputs=[f"const{i}"],
-                name=f"depth{i}'th else identity",
-            )
-            else_branch = helper.make_graph(
-                [identity_node],
-                f"else{i}_body",
-                inputs=[],
-                outputs=[helper.make_tensor_value_info(f"const{i}", TensorProto.FLOAT, [1])],
-            )
-            out_name = f"if_output{i}" if i != (num_nested - 1) else "graph_output"
-
-            if i == 0:
-                identity_node = helper.make_node(
-                    "Identity",
-                    inputs=["graph_input"],
-                    outputs=[f"input_identity{i}"],
-                    name=f"depth{i}'th then identity",
-                )
-                then_branch = helper.make_graph(
-                    [identity_node],
-                    f"then{i}_body",
-                    inputs=[],
-                    outputs=[
-                        helper.make_tensor_value_info(f"input_identity{i}", TensorProto.FLOAT, [1])
-                    ],
-                )
-                if_node = helper.make_node(
-                    "If",
-                    inputs=["cond"],
-                    outputs=[out_name],
-                    then_branch=then_branch,
-                    else_branch=else_branch,
-                    name=f"depth{i}'s If node",
-                )
-                inner_if_node = if_node
-            else:
-                then_branch = helper.make_graph(
-                    [inner_if_node],
-                    f"then{i}_body",
-                    inputs=[],
-                    outputs=[
-                        helper.make_tensor_value_info(f"if_output{i-1}", TensorProto.FLOAT, [1])
-                    ],
-                )
-                if_node = helper.make_node(
-                    "If",
-                    inputs=["cond"],
-                    outputs=[out_name],
-                    then_branch=then_branch,
-                    else_branch=else_branch,
-                    name=f"depth{i}'s If node",
-                )
-                inner_if_node = if_node
-        graph_nodes = [constant_node, inner_if_node]
-        graph = helper.make_graph(
-            graph_nodes,
-            "input_use_in_if_test",
-            inputs=[input_tensor, cond_tensor],
-            outputs=[output_tensor],
-        )
-        model = helper.make_model(graph, producer_name="input_use_in_if_test")
-
-        verify_with_ort_with_inputs(
-            model,
-            [np.array([3.0], dtype="float32"), np.array([cond])],
-            dtype="float32",
-            use_vm=True,
-            opset=14,
-            target=target,
-            dev=dev,
-        )
-
-    # Confirm that if works with cond as an array or scalar.
-    verify_if(num_nested=1, cond=True)
-    verify_if(num_nested=1, cond=False)
-    verify_if(num_nested=2, cond=True)
-    verify_if(num_nested=2, cond=False)
-
-
-@tvm.testing.parametrize_targets
-def test_size(target, dev):
-    """test_size"""
-
-    def verify_size(indata):
-        node = helper.make_node(
-            "Size",
-            inputs=["X"],
-            outputs=["Y"],
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "size_test",
-            inputs=[helper.make_tensor_value_info("X", TensorProto.INT64, list(indata.shape))],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.INT64, [])],
-        )
-
-        model = helper.make_model(graph, producer_name="size_test")
-
-        verify_with_ort_with_inputs(
-            model, [indata], dtype="int64", use_vm=True, opset=11, target=target, dev=dev
-        )
-
-    input_data = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    verify_size(input_data)
-
-    input_data = np.array([[3, 0, 0], [0, 4, 0], [5, 6, 0]], dtype=np.int64)
-    verify_size(input_data)
-
-
-@tvm.testing.parametrize_targets
-def test_maxunpool(target, dev):
-    """test_maxunpool"""
-
-    def verify_maxunpool(data, indices, kernel_shape, strides, output_shape=None, pads=None):
-        input_names = ["xT", "xI"]
-        input_info = [
-            helper.make_tensor_value_info("xT", TensorProto.FLOAT, list(data.shape)),
-            helper.make_tensor_value_info("xI", TensorProto.INT64, list(indices.shape)),
-        ]
-        input_values = [data, indices]
-        if output_shape is not None:
-            input_names.append("output_shape")
-            input_info.append(
-                helper.make_tensor_value_info(
-                    "output_shape", TensorProto.INT64, list(output_shape.shape)
-                )
-            )
-            input_values.append(output_shape)
-        else:
-            # Compute expected output shape
-            output_shape = np.asarray(([1, 1] + list(strides))) * np.asarray(list(data.shape))
-            output_shape += np.asarray(([0, 0] + list(kernel_shape))) - np.asarray(
-                ([0, 0] + list(strides))
-            )
-            if pads is not None:
-                output_shape -= np.asarray(
-                    [0, 0] + list(np.sum(np.reshape(list(pads), [-1, 2]), axis=-1))
-                )
-        output_shape = [int(i) for i in output_shape]
-
-        node = helper.make_node(
-            "MaxUnpool", inputs=input_names, outputs=["y"], kernel_shape=kernel_shape
-        )
-
-        if pads is not None:
-            pad_attr = helper.make_attribute("pads", pads)
-            node.attribute.append(pad_attr)
-
-        if strides is not None:
-            strides_attr = helper.make_attribute("strides", strides)
-            node.attribute.append(strides_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "maxunpool_test",
-            inputs=input_info,
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, output_shape)],
-        )
-
-        model = helper.make_model(graph, producer_name="size_test")
-
-        verify_with_ort_with_inputs(
-            model, input_values, use_vm=True, opset=11, target=target, dev=dev
-        )
-
-    # Basic test
-    x_t = np.array([[[[5, 6], [7, 8]]]], dtype=np.float32)
-    x_i = np.array([[[[0, 7], [13, 15]]]], dtype=np.int64)
-    verify_maxunpool(x_t, x_i, [2, 2], strides=[2, 2])
-    # Small stride
-    verify_maxunpool(x_t, x_i, [2, 2], strides=[1, 1])
-    # Big kernel
-    verify_maxunpool(x_t, x_i, [3, 3], strides=[2, 2])
-    # With output shape
-    output_shape = np.array((1, 1, 5, 5), dtype=np.int64)
-    verify_maxunpool(x_t, x_i, [2, 2], strides=[2, 2], output_shape=output_shape)
-    # With explicit reverse padding
-    pads = np.asarray([1, 1, 1, 1]).astype(np.int64)
-    verify_maxunpool(x_t, x_i, [2, 2], strides=[2, 2], pads=pads)
-
-
-@tvm.testing.parametrize_targets
-def test_softplus(target, dev):
-    """test_softplus"""
-
-    def verify_softplus(indata):
-        node = helper.make_node(
-            "Softplus",
-            inputs=["X"],
-            outputs=["Y"],
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "softplus_test",
-            inputs=[helper.make_tensor_value_info("X", TensorProto.FLOAT, list(indata.shape))],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(indata.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="softplus_test")
-
-        verify_with_ort_with_inputs(
-            model, [indata], dtype="float32", use_vm=True, opset=11, target=target, dev=dev
-        )
-
-    # Simple case with all signs.
-    input_data = np.array([[-1, 0, 1]], dtype=np.float32)
-    verify_softplus(input_data)
-    # More fancy case.
-    input_data = np.random.randn(1, 32, 32, 3).astype("float32")
-    verify_softplus(input_data)
-
-
-@tvm.testing.parametrize_targets
-def test_cumsum(target, dev):
-    """test_cumsum"""
-
-    def verify_cumsum(indata, axis, exclusive=0, reverse=0, dtype="float32"):
-        cumsum_node = onnx.helper.make_node(
-            "CumSum",
-            inputs=["X", "axis"],
-            outputs=["Y"],
-        )
-        if exclusive != 0:
-            exclusive_attr = helper.make_attribute("exclusive", exclusive)
-            cumsum_node.attribute.append(exclusive_attr)
-        if reverse != 0:
-            reverse_attr = helper.make_attribute("reverse", reverse)
-            cumsum_node.attribute.append(reverse_attr)
-        nodes = [
-            make_constant_node("axis", onnx.TensorProto.INT32, [1], [axis]),
-            cumsum_node,
-        ]
-        if dtype == "float32":
-            tensor_type = TensorProto.FLOAT
-        else:
-            tensor_type = TensorProto.INT32
-            dtype = "int32"
-
-        graph = helper.make_graph(
-            nodes,
-            "cumsum_test",
-            inputs=[
-                helper.make_tensor_value_info("X", tensor_type, list(indata.shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("Y", tensor_type, list(indata.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="cumsum_test")
-
-        verify_with_ort_with_inputs(
-            model, [indata], dtype=dtype, use_vm=True, opset=11, target=target, dev=dev
-        )
-
-    data = (
-        np.array(
-            [
-                1.0,
-                2.0,
-                3.0,
-                4.0,
-                5.0,
-                6.0,
-                7.0,
-                8.0,
-                9.0,
-                10.0,
-                11.0,
-                12.0,
-            ]
-        )
-        .astype(np.float32)
-        .reshape((3, 4))
-    )
-
-    verify_cumsum(data, 0)
-    verify_cumsum(data, 1)
-    verify_cumsum(data, 0, 1, 0)
-    verify_cumsum(data, 1, 1, 0)
-    verify_cumsum(data, 0, 0, 1)
-    verify_cumsum(data, 1, 0, 1)
-    verify_cumsum(data, 1, 1, 1)
-    data = np.random.randn(1, 32, 32, 3).astype("float32")
-    verify_cumsum(data, 1)
-    data = np.random.randn(1, 32, 32, 3).astype("int32")
-    verify_cumsum(data, 0, dtype="int32")
-    verify_cumsum(data, 1, dtype="int32")
-    verify_cumsum(data, 0, 1, 0, dtype="int32")
-    verify_cumsum(data, 1, 1, 0, dtype="int32")
-    verify_cumsum(data, 0, 0, 1, dtype="int32")
-    verify_cumsum(data, 1, 0, 1, dtype="int32")
-    verify_cumsum(data, 1, 1, 1, dtype="int32")
-
-
-@tvm.testing.parametrize_targets
-def test_eyelike(target, dev):
-    """test_eyelike"""
-
-    def verify_eyelike(indata, dynamic=False):
-        node_list = []
-        eyelike_inputs = ["X"]
-        input_node_list = [
-            helper.make_tensor_value_info("X", TensorProto.FLOAT, list(indata.shape))
-        ]
-        input_list = [indata]
-
-        if dynamic:
-            input_node_list.append(
-                helper.make_tensor_value_info("shape", TensorProto.INT64, [len(indata.shape)])
-            )
-            input_list.append(np.asarray(indata.shape))
-            reshape_node = helper.make_node("Reshape", ["X", "shape"], ["X_dyn"])
-            eyelike_inputs[0] = "X_dyn"
-            node_list += [reshape_node]
-
-        node = helper.make_node(
-            "EyeLike",
-            inputs=eyelike_inputs,
-            outputs=["Y"],
-        )
-        node_list.append(node)
-
-        graph = helper.make_graph(
-            node_list,
-            "eyelike_test",
-            inputs=input_node_list,
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(indata.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="eyelike_test")
-        verify_with_ort_with_inputs(
-            model, input_list, dtype="float32", opset=9, target=target, dev=dev, use_vm=True
-        )
-
-    input_data = np.zeros((5, 5), dtype=np.float32)
-    verify_eyelike(input_data)
-    verify_eyelike(input_data, True)
-
-
-# The following parametrized tests loads the tests that ONNX ships as
-# serialized ONNX files, inputs, and outputs. The goal of this test
-# is to ensure the ONNX importer is in line with the ONNX specification.
-# To allow these tests to run in CI before all pass, a number of tests
-# that are not yet supported are skipped.
-
-onnx_test_node_dir = os.path.join(os.path.dirname(onnx.__file__), "backend", "test", "data", "node")
-
-onnx_test_folders = sorted(
-    dirname
-    for dirname in os.listdir(onnx_test_node_dir)
-    if dirname.startswith("test") and os.path.isdir(os.path.join(onnx_test_node_dir, dirname))
-)
-
-unsupported_onnx_tests = [
-    "test_batchnorm_epsilon_training_mode",
-    "test_batchnorm_example_training_mode",
-    "test_bernoulli",
-    "test_bernoulli_expanded",
-    "test_bernoulli_double",
-    "test_bernoulli_double_expanded",
-    "test_bernoulli_seed",
-    "test_bernoulli_seed_expanded",
-    "test_blackmanwindow",
-    "test_blackmanwindow_expanded",
-    "test_blackmanwindow_symmetric",
-    "test_blackmanwindow_symmetric_expanded",
-    # the follow cast and castlike cases have lowering issues
-    "test_cast_FLOAT_to_STRING",
-    "test_cast_STRING_to_FLOAT",
-    "test_castlike_FLOAT_to_STRING",
-    "test_castlike_FLOAT_to_STRING_expanded",
-    "test_castlike_STRING_to_FLOAT",
-    "test_castlike_STRING_to_FLOAT_expanded",
-    # the following cast and castlike cases segfault
-    "test_cast_DOUBLE_to_FLOAT16",
-    "test_castlike_DOUBLE_to_FLOAT16",
-    "test_castlike_DOUBLE_to_FLOAT16_expanded",
-    "test_convtranspose_dilations",
-    "test_cumsum_1d",
-    "test_cumsum_1d_exclusive",
-    "test_cumsum_1d_reverse",
-    "test_cumsum_1d_reverse_exclusive",
-    "test_cumsum_2d_axis_0",
-    "test_cumsum_2d_axis_1",
-    "test_cumsum_2d_negative_axis",
-    "test_det_2d",
-    "test_det_nd",
-    "test_dropout_default",
-    "test_dropout_default_mask",
-    "test_dropout_default_mask_ratio",
-    "test_dropout_default_ratio",
-    "test_gru_batchwise",
-    "test_hammingwindow",
-    "test_hammingwindow_expanded",
-    "test_hammingwindow_symmetric",
-    "test_hammingwindow_symmetric_expanded",
-    "test_hannwindow",
-    "test_hannwindow_expanded",
-    "test_hannwindow_symmetric",
-    "test_hannwindow_symmetric_expanded",
-    "test_identity_opt",
-    "test_identity_sequence",
-    "test_if_opt",
-    "test_if_seq",
-    "test_loop13_seq",
-    "test_loop16_seq_none",
-    "test_lstm_batchwise",
-    "test_maxpool_with_argmax_2d_precomputed_pads",
-    "test_maxpool_with_argmax_2d_precomputed_strides",
-    "test_maxunpool_export_with_output_shape",
-    "test_melweightmatrix",
-    # This test fails llvm with a lowering error:
-    "test_nllloss_NCd1d2d3_none_no_weight_negative_ii_expanded",
-    "test_qlinearmatmul_3D",
-    "test_range_float_type_positive_delta_expanded",
-    "test_range_int32_type_negative_delta_expanded",
-    "test_reduce_sum_do_not_keepdims_example",
-    "test_reduce_sum_do_not_keepdims_random",
-    "test_reduce_sum_keepdims_example",
-    "test_reduce_sum_keepdims_random",
-    "test_reduce_sum_negative_axes_keepdims_example",
-    "test_reduce_sum_negative_axes_keepdims_random",
-    "test_roialign_aligned_true",
-    "test_sequence_insert_at_back",
-    "test_sequence_insert_at_front",
-    "test_sequence_map_add_1_sequence_1_tensor",
-    "test_sequence_map_add_1_sequence_1_tensor_expanded",
-    "test_sequence_map_add_2_sequences",
-    "test_sequence_map_add_2_sequences_expanded",
-    "test_sequence_map_extract_shapes",
-    "test_sequence_map_extract_shapes_expanded",
-    "test_sequence_map_identity_1_sequence",
-    "test_sequence_map_identity_1_sequence_1_tensor",
-    "test_sequence_map_identity_1_sequence_1_tensor_expanded",
-    "test_sequence_map_identity_1_sequence_expanded",
-    "test_sequence_map_identity_2_sequences",
-    "test_sequence_map_identity_2_sequences_expanded",
-    "test_simple_rnn_batchwise",
-    "test_simple_rnn_defaults",
-    "test_simple_rnn_with_initial_bias",
-    "test_split_variable_parts_1d",
-    "test_split_variable_parts_2d",
-    "test_split_variable_parts_default_axis",
-    "test_split_zero_size_splits",
-    "test_stft",
-    "test_stft_with_window",
-    "test_strnormalizer_export_monday_casesensintive_lower",
-    "test_strnormalizer_export_monday_casesensintive_nochangecase",
-    "test_strnormalizer_export_monday_casesensintive_upper",
-    "test_strnormalizer_export_monday_empty_output",
-    "test_strnormalizer_export_monday_insensintive_upper_twodim",
-    "test_strnormalizer_nostopwords_nochangecase",
-    "test_tfidfvectorizer_tf_batch_onlybigrams_skip0",
-    "test_tfidfvectorizer_tf_batch_onlybigrams_skip5",
-    "test_tfidfvectorizer_tf_batch_uniandbigrams_skip5",
-    "test_tfidfvectorizer_tf_only_bigrams_skip0",
-    "test_tfidfvectorizer_tf_onlybigrams_levelempty",
-    "test_tfidfvectorizer_tf_onlybigrams_skip5",
-    "test_tfidfvectorizer_tf_uniandbigrams_skip5",
-    "test_training_dropout",
-    "test_training_dropout_default",
-    "test_training_dropout_default_mask",
-    "test_training_dropout_mask",
-    "test_training_dropout_zero_ratio",
-    "test_training_dropout_zero_ratio_mask",
-    "test_tril_zero",
-    "test_triu_zero",
-    "test_unique_sorted_with_axis",
-    "test_unique_sorted_with_axis_3d",
-    "test_unique_sorted_with_negative_axis",
-    "test_upsample_nearest",
-    "test_upsample_nearest_default",
-]
-
-
-target_skips = {
-    "cuda": [
-        "test_range_float_type_positive_delta_expanded",
-        "test_range_int32_type_positive_delta_expanded",
-        "test_mod_mixed_sign_float16",
-        "test_qlinearconv",
-        "test_qlinearmatmul",
-        "test_resize_upsample_sizes_nearest",
-    ]
-}
-
-
-def _load_proto(proto_filename, target_list, model_type_proto):
-    with open(proto_filename, "rb") as fin:
-        protobuf_content = fin.read()
-        if model_type_proto.HasField("sequence_type"):
-            sequence = onnx.SequenceProto()
-            sequence.ParseFromString(protobuf_content)
-            target_list.append(numpy_helper.to_list(sequence))
-        elif model_type_proto.HasField("tensor_type"):
-            tensor = onnx.TensorProto()
-            tensor.ParseFromString(protobuf_content)
-            target_list.append(numpy_helper.to_array(tensor))
-        elif model_type_proto.HasField("optional_type"):
-            optional = onnx.OptionalProto()
-            optional.ParseFromString(protobuf_content)
-            target_list.append(numpy_helper.to_optional(optional))
-        else:
-            raise ValueError(
-                "Loading proto of that specific type (Map/Sparse Tensor) is currently not supported"
-            )
-
-
-def is_ort_version_lower_than(ver):
-    import onnxruntime as ort
-
-    v11, v12, v13 = tuple(int(v) for v in ort.__version__.split("."))
-    v21, v22, v23 = tuple(int(v) for v in ver.split("."))
-
-    return (v11 < v21) or (v11 == v21 and v12 < v22) or ((v11, v12) == (v21, v22) and v13 < v23)
-
-
-@pytest.mark.parametrize("onnx_test", onnx_test_folders)
-@tvm.testing.parametrize_targets
-def test_onnx_nodes(target, dev, onnx_test):
-    """test_onnx_nodes"""
-    if platform.machine() == "aarch64" and onnx_test == "test_resize_upsample_sizes_nearest":
-        pytest.skip("Currently failing on AArch64")
-
-    target_kind = tvm.target.Target(target).kind.name
-
-    if onnx_test in unsupported_onnx_tests:
-        pytest.skip(f"Onnx test '{onnx_test}' not yet supported by TVM")
-
-    target_specific_skips = target_skips.get(target_kind, [])
-    if onnx_test in target_specific_skips:
-        pytest.skip(f"Onnx test '{onnx_test}' not yet supported by TVM on {target_kind} targets")
-
-    if is_ort_version_lower_than("1.13.1") and onnx_test == "test_convtranspose_autopad_same":
-        pytest.skip(
-            f"Onnx test '{onnx_test}' expected to fail for onnxruntime version lower than 1.13.1 "
-            "due to different interpretation of auto_pad parameters SAME_UPPER and SAME_LOWER."
-        )
-
-    test_dir = os.path.join(onnx_test_node_dir, onnx_test)
-
-    atol = 1e-5
-    rtol = 1e-5
-    if "roialign" in test_dir:
-        # for some reason the ONNX test crops the
-        # roialign results to 4 decimal places
-        atol = 1e-4
-
-    if "to_BFLOAT16" in test_dir:
-        # the tolerance here is for the comparison in uint16 space, but is not as significant
-        # of a delta in bfloat16 space because it's representing the mantissa being off by 1
-        atol = 1
-
-    if "_sce_" in test_dir:
-        # complicated loss functions like SoftmaxCrossEntropy can have minor variations
-        # in accuracy depending on implementation
-        atol = 1e-4
-
-    if "bicubic" in test_dir:
-        # satisfies onnx precision for bicubic interpolation
-        atol = 1e-4
-
-    if "dft" in test_dir:
-        atol = 1e-3
-
-    model = onnx.load(os.path.join(test_dir, "model.onnx"))
-    for test_data_dir in glob.glob(os.path.join(test_dir, "test_data_set*")):
-        inputs = []
-        n_inputs = len(glob.glob(os.path.join(test_data_dir, "input_*.pb")))
-        for i in range(n_inputs):
-            input_file = os.path.join(test_data_dir, f"input_{i}.pb")
-            _load_proto(input_file, inputs, model.graph.input[i].type)
-
-        outputs = []
-        n_outputs = len(glob.glob(os.path.join(test_data_dir, "output_*.pb")))
-        for i in range(n_outputs):
-            output_file = os.path.join(test_data_dir, f"output_{i}.pb")
-            _load_proto(output_file, outputs, model.graph.output[i].type)
-
-    tvm_val = get_tvm_output_with_vm(model, inputs, target, dev)
-    if len(outputs) == 1:
-        tvm.testing.assert_allclose(outputs[0], tvm_val, rtol=rtol, atol=atol)
-    else:
-        for output, val in zip(outputs, tvm_val):
-            tvm.testing.assert_allclose(output, val, rtol=rtol, atol=atol)
-
-
-def test_wrong_input():
-    """test_wrong_input"""
-    node = helper.make_node(
-        "Softplus",
-        inputs=["X"],
-        outputs=["Y"],
-    )
-
-    graph = helper.make_graph(
-        [node],
-        "softplus_test",
-        inputs=[helper.make_tensor_value_info("X", TensorProto.FLOAT, list([5]))],
-        outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, list([5]))],
-    )
-    model = helper.make_model(graph, producer_name="softplus_test")
-
-    # Check that the graph can import correctly with proper shape definitions.
-    correct_shape_dict = {"X": [5]}
-    relay.frontend.from_onnx(model, shape=correct_shape_dict)
-
-    # Check that an assertion is triggered when an input not in the graph is provided.
-    wrong_shape_dict = {"Z": [5]}
-    with pytest.raises(AssertionError):
-        relay.frontend.from_onnx(model, shape=wrong_shape_dict)
-
-
-@pytest.mark.skip(reason="unsupported op numel")
-@tvm.testing.parametrize_targets
-def test_aten(target, dev):
-    """test_aten"""
-    torch.set_grad_enabled(False)
-
-    def _convert_to_onnx(model, inputs):
-        file_name = "aten_model.onnx"
-        torch.onnx.export(
-            model,
-            inputs,
-            file_name,
-            export_params=True,
-            verbose=False,
-            opset_version=10,
-            operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN,
-        )
-        onnx_model = onnx.load(file_name)
-        return onnx_model
-
-    def verify_embedding_bag(num_embedding, embedding_dim, data_shape, num_bags=None):
-        dummy_data = torch.randint(0, num_embedding - 1, data_shape)
-        tvm_inputs = [dummy_data.numpy()]
-        model = torch.nn.EmbeddingBag(num_embedding, embedding_dim)
-        onnx_model = _convert_to_onnx(model, dummy_data)
-        torch_out = model(dummy_data)
-        tvm_out = get_tvm_output_with_vm(
-            onnx_model,
-            tvm_inputs,
-            freeze_params=True,
-            target=target,
-            dev=dev,
-        )
-        tvm.testing.assert_allclose(torch_out.numpy(), tvm_out, atol=5e-7)
-
-    verify_embedding_bag(10, 3, [2, 10])
-    verify_embedding_bag(32, 2, [3, 3])
-
-
-@tvm.testing.parametrize_targets
-def test_index_put(target, dev):
-    """test_index_put"""
-
-    class IndexPutModel(torch.nn.Module):
-        def __init__(self, indices, values, accumulate):
-            super().__init__()
-            self.indices = indices
-            self.values = values
-            self.accumulate = accumulate
-
-        def forward(self, x):
-            return x.index_put(self.indices, self.values, self.accumulate)
-
-    def _convert_to_onnx(model, dummy_data):
-        file_name = "aten_model.onnx"
-        torch.onnx.export(
-            model,
-            dummy_data,
-            file_name,
-            export_params=True,
-            verbose=False,
-            opset_version=11,
-            operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK,
-        )
-        onnx_model = onnx.load(file_name)
-        return onnx_model
-
-    def verify_index_put(data_shape, indices, accumulate):
-        dummy_data = torch.ones(data_shape)
-        tvm_inputs = [dummy_data.numpy()]
-        values = torch.rand(indices[0].size())
-        model = IndexPutModel(indices, values, accumulate)
-        onnx_model = _convert_to_onnx(model, dummy_data)
-        torch_out = model(dummy_data)
-
-        tvm_out = get_tvm_output_with_vm(onnx_model, tvm_inputs, target, dev, freeze_params=True)
-        tvm.testing.assert_allclose(torch_out.numpy(), tvm_out)
-
-    shape = (3, 5)
-    xidx = torch.tensor([0, 1, 2, 2])
-    yidx = torch.tensor([0, 1, 3, 4])
-    verify_index_put(shape, [xidx, yidx], True)
-
-    shape = (3, 5, 3)
-    xidx = torch.tensor([0, 1, 2, 2, 0])
-    yidx = torch.tensor([0, 1, 3, 4, 0])
-    zidx = torch.tensor([0, 1, 1, 2, 0])
-    verify_index_put(shape, [xidx, yidx, zidx], False)
-
-    def verify_index_put_slice(data_shape, value_shape, accumulate):
-        dummy_data = torch.ones(data_shape)
-        tvm_inputs = [dummy_data.numpy()]
-        indices = []
-        index_shape = [1] * len(value_shape)
-        index_shape[0] = -1
-        for _, v_shape in enumerate(value_shape):
-            indices.append(torch.arange(0, v_shape).reshape(tuple(index_shape)))
-            index_shape.pop()
-        values = torch.rand(value_shape)
-
-        model = IndexPutModel(indices, values, accumulate)
-        onnx_model = _convert_to_onnx(model, dummy_data)
-        torch_out = model(dummy_data)
-
-        tvm_out = get_tvm_output_with_vm(onnx_model, tvm_inputs, target, dev, freeze_params=True)
-        tvm.testing.assert_allclose(torch_out.numpy(), tvm_out)
-
-    verify_index_put_slice((3, 3), (2, 2), False)
-    verify_index_put_slice((2, 3, 4), (1, 2, 3), True)
-    verify_index_put_slice((2, 3, 4, 5), (1, 2, 3, 1), False)
-
-
-@tvm.testing.parametrize_targets
-def test_reverse_sequence(target, dev):
-    """test_reverse_sequence"""
-
-    def verify_reverse_sequence(x, sequence_lens, batch_axis, time_axis):
-        node = onnx.helper.make_node(
-            "ReverseSequence",
-            inputs=["x", "sequence_lens"],
-            outputs=["y"],
-            time_axis=time_axis,
-            batch_axis=batch_axis,
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "reverse_sequence_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x.shape)),
-                helper.make_tensor_value_info(
-                    "sequence_lens", TensorProto.INT64, list(sequence_lens.shape)
-                ),
-            ],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(x.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="reverse_sequence_test")
-        verify_with_ort_with_inputs(model, [x, sequence_lens], [x.shape], target=target, dev=dev)
-
-    x = np.array(
-        [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]],
-        dtype=np.float32,
-    )
-    sequence_lens = np.array([1, 2, 3, 4], dtype=np.int64)
-    verify_reverse_sequence(x, sequence_lens, 0, 1)
-
-    sequence_lens = np.array([4, 3, 2, 1], dtype=np.int64)
-    verify_reverse_sequence(x, sequence_lens, 1, 0)
-
-
-@pytest.mark.parametrize("op_name", ["Gelu", "FastGelu"], scope="session")
-@pytest.mark.parametrize("data_type", ["float16", "float32"], scope="session")
-@tvm.testing.parametrize_targets
-def test_gelu(target, dev, data_type, op_name):
-    """test_gelu"""
-    dtype = np.dtype(data_type)
-    tensor_type = mapping.NP_TYPE_TO_TENSOR_TYPE[dtype]
-    absolute_tolerance = 1e-3 if data_type == "float16" else 1e-5
-
-    def verify_gelu(x):
-        node = onnx.helper.make_node(
-            op_name,
-            inputs=["x"],
-            outputs=["y"],
-            domain="com.microsoft",
-        )
-
-        graph = helper.make_graph(
-            [node],
-            f"{op_name}_test",
-            inputs=[helper.make_tensor_value_info("x", tensor_type, list(x.shape))],
-            outputs=[helper.make_tensor_value_info("y", tensor_type, list(x.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name=f"{op_name}_test")
-        verify_with_ort_with_inputs(
-            model, [x], [x.shape], atol=absolute_tolerance, dtype=data_type, target=target, dev=dev
-        )
-
-    x = np.array([-1.0, 0, 1.0, 100.0, -100.0, 1000.0, -1000.0], dtype=dtype)
-    verify_gelu(x)
-    x = np.array([[1, 2], [3, 4]], dtype=dtype)
-    verify_gelu(x)
-
-
-@pytest.mark.parametrize("op_name", ["BiasGelu", "FastGelu"], scope="session")
-@pytest.mark.parametrize("data_type", ["float16", "float32"], scope="session")
-@tvm.testing.parametrize_targets
-def test_biasgelu(target, dev, data_type, op_name):
-    """test_biasgelu"""
-    dtype = np.dtype(data_type)
-    tensor_type = mapping.NP_TYPE_TO_TENSOR_TYPE[dtype]
-    absolute_tolerance = 1e-2 if data_type == "float16" else 1e-5
-
-    def verify_biasgelu(x, bias):
-        node = onnx.helper.make_node(
-            op_name,
-            inputs=["x", "bias"],
-            outputs=["y"],
-            domain="com.microsoft",
-        )
-
-        graph = helper.make_graph(
-            [node],
-            f"{op_name}_test",
-            inputs=[
-                helper.make_tensor_value_info("x", tensor_type, list(x.shape)),
-                helper.make_tensor_value_info("bias", tensor_type, list(bias.shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("y", tensor_type, list(x.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name=f"{op_name}_test")
-        verify_with_ort_with_inputs(
-            model,
-            [x, bias],
-            [x.shape],
-            atol=absolute_tolerance,
-            dtype=data_type,
-            target=target,
-            dev=dev,
-        )
-
-    x = np.array([-1.0, 0, 1.0, 100.0, -100.0, 1000.0, -1000.0], dtype=dtype)
-    bias = np.repeat(2.0, 7).astype(dtype)
-    verify_biasgelu(x, bias)
-
-    x = np.array([[1, 2], [3, 4]], dtype=dtype)
-    bias = np.array([0.3, 4.0], dtype=dtype)
-    verify_biasgelu(x, bias)
-
-
-@tvm.testing.parametrize_targets
-def test_embedlayernormalization(target, dev):
-    """test_embedlayernormalization"""
-
-    def verify_embedlayernormalization(
-        input_ids,
-        segment_ids,
-        word_embedding,
-        position_embedding,
-        segment_embedding,
-        gamma,
-        beta,
-    ):
-        node = onnx.helper.make_node(
-            "EmbedLayerNormalization",
-            inputs=[
-                "input_ids",
-                "" if segment_ids is None else "segment_ids",
-                "word_embedding",
-                "position_embedding",
-                "" if segment_embedding is None else "segment_embedding",
-                "gamma",
-                "beta",
-            ],
-            outputs=["output", "mask_index"],
-            domain="com.microsoft",
-        )
-
-        node.attribute.append(onnx.helper.make_attribute("epsilon", 1e-4))
-
-        segment_ids_shape = [] if segment_ids is None else segment_ids.shape
-        segment_embedding_shape = [] if segment_embedding is None else segment_embedding.shape
-
-        graph = helper.make_graph(
-            [node],
-            "embedlayernormalization_test",
-            inputs=[
-                helper.make_tensor_value_info(
-                    "input_ids", TensorProto.INT32, list(input_ids.shape)
-                ),
-                helper.make_tensor_value_info("segment_ids", TensorProto.INT32, segment_ids_shape),
-                helper.make_tensor_value_info(
-                    "word_embedding", TensorProto.FLOAT, list(word_embedding.shape)
-                ),
-                helper.make_tensor_value_info(
-                    "position_embedding", TensorProto.FLOAT, list(position_embedding.shape)
-                ),
-                helper.make_tensor_value_info(
-                    "segment_embedding", TensorProto.FLOAT, segment_embedding_shape
-                ),
-                helper.make_tensor_value_info("gamma", TensorProto.FLOAT, list(gamma.shape)),
-                helper.make_tensor_value_info("beta", TensorProto.FLOAT, list(beta.shape)),
-            ],
-            outputs=[
-                helper.make_tensor_value_info(
-                    "output", TensorProto.FLOAT, list((batch_size, sequence_length, hidden_size))
-                ),
-                helper.make_tensor_value_info("mask_index", TensorProto.INT32, [batch_size]),
-            ],
-        )
-
-        model = helper.make_model(graph, producer_name="embedlayernormalization_test")
-
-        # TODO(@anwang2009): onnxruntime v1.9.0 requires empty list for optional argument,
-        # but v1.10.0+ requires None instead.
-        verify_with_ort_with_inputs(
-            model,
-            [
-                input_ids,
-                np.empty(0, dtype="int32") if segment_ids is None else segment_ids,
-                word_embedding,
-                position_embedding,
-                np.empty(0, dtype="float32") if segment_embedding is None else segment_embedding,
-                gamma,
-                beta,
-            ],
-            [
-                (batch_size, sequence_length, hidden_size),
-                batch_size,
-            ],
-            target=target,
-            dev=dev,
-            rtol=1e-4,
-            atol=1e-4,
-        )
-
-    hidden_size = 384
-    batch_size = 4
-    sequence_length = 3
-    vocab_size = 5
-
-    input_ids = np.full((batch_size, sequence_length), 3).astype("int32")
-    segment_ids = np.zeros((batch_size, sequence_length)).astype("int32")
-    word_embedding = np.full((vocab_size, hidden_size), 1).astype("float32")
-    position_embedding = np.full((sequence_length, hidden_size), 2).astype("float32")
-    segment_embedding = np.full((vocab_size, hidden_size), 3).astype("float32")
-
-    gamma = np.random.uniform(0.5, 0.7, hidden_size).astype("float32")
-    beta = np.random.randn(hidden_size).astype("float32") * 0.1
-
-    verify_embedlayernormalization(
-        input_ids, segment_ids, word_embedding, position_embedding, segment_embedding, gamma, beta
-    )
-
-    # Test with undefined segment embedding
-    verify_embedlayernormalization(
-        input_ids, None, word_embedding, position_embedding, None, gamma, beta
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_attention(target, dev):
-    """test_attention"""
-
-    def verify_attention(_unidirectional, _input, _weight, _bias, _mask_index=None, _past=None):
-        input_names = ["input", "weight", "bias"]
-        if _mask_index is not None:
-            input_names.append("mask_index")
-        if _past is not None:
-            input_names.append("past")
-
-        node = onnx.helper.make_node(
-            "Attention",
-            inputs=input_names,
-            outputs=["output", "present"],
-            domain="com.microsoft",
-            num_heads=num_heads,
-            unidirectional=_unidirectional,
-        )
-
-        past_shape = (2, batch_size, num_heads, past_sequence_length, head_size)
-        present_output_shape = (2, batch_size, num_heads, sequence_length, head_size)
-
-        inputs_info = [
-            helper.make_tensor_value_info("input", TensorProto.FLOAT, list(_input.shape)),
-            helper.make_tensor_value_info("weight", TensorProto.FLOAT, list(_weight.shape)),
-            helper.make_tensor_value_info("bias", TensorProto.FLOAT, list(_bias.shape)),
-        ]
-        if _mask_index is not None:
-            inputs_info.append(
-                helper.make_tensor_value_info(
-                    "mask_index", TensorProto.INT32, list(_mask_index.shape)
-                ),
-            )
-        if _past is not None:
-            inputs_info.append(
-                helper.make_tensor_value_info("past", TensorProto.FLOAT, list(past_shape))
-            )
-
-        graph = helper.make_graph(
-            [node],
-            "attention_test",
-            inputs=inputs_info,
-            outputs=[
-                helper.make_tensor_value_info("output", TensorProto.FLOAT, list(_input.shape)),
-                helper.make_tensor_value_info(
-                    "present", TensorProto.FLOAT, list(present_output_shape)
-                ),
-            ],
-        )
-
-        model = helper.make_model(graph, producer_name="attention_test")
-
-        inputs = [_input, _weight, _bias]
-        if _mask_index is not None:
-            inputs.append(_mask_index)
-        if _past is not None:
-            inputs.append(_past)
-
-        # "present" output should be nullptr when the "past" input isn't included,
-        # but ort requires an output shape to be specified?
-        verify_with_ort_with_inputs(
-            model,
-            inputs,
-            [_input.shape, present_output_shape],
-            target=target,
-            dev=dev,
-            rtol=1e-4,
-            atol=1e-4,
-        )
-
-    batch_size = 11
-    num_heads = 13
-    head_size = 37
-    sequence_length = 7
-    input_hidden_size = 147
-    weight_hidden_size = num_heads * head_size
-    past_sequence_length = 17
-
-    total_sequence_length = past_sequence_length + sequence_length
-
-    # Required inputs
-    input_array = np.random.normal(size=(batch_size, sequence_length, input_hidden_size)).astype(
-        "float32"
-    )
-    weight = (
-        np.random.normal(size=(input_hidden_size, 3 * weight_hidden_size)).astype("float32") * 0.1
-    )
-    bias = np.random.randn(3 * weight_hidden_size).astype("float32")
-
-    # Optional inputs
-    past = np.random.random((2, batch_size, num_heads, past_sequence_length, head_size)).astype(
-        "float32"
-    )
-
-    for unidirectional in [0, 1]:
-        for have_past in [False, True]:
-            if not have_past:
-                mask_index = np.random.randint(0, 2, (batch_size, sequence_length)).astype("int32")
-                verify_attention(unidirectional, input_array, weight, bias, mask_index)
-            else:
-                mask_index = np.random.randint(0, 2, (batch_size, total_sequence_length)).astype(
-                    "int32"
-                )
-                verify_attention(unidirectional, input_array, weight, bias, mask_index, past)
-
-
-@tvm.testing.parametrize_targets
-def test_qattention(target, dev):
-    """test_qattention"""
-
-    def verify_attention(
-        _unidirectional,
-        _input,
-        _weight,
-        _bias,
-        _input_scale,
-        _weight_scale,
-        _mask_index=None,
-        _input_zero_point=None,
-        _weight_zero_point=None,
-        _past=None,
-    ):
-        input_names = ["input", "weight", "bias", "input_scale", "weight_scale"]
-        if _mask_index is not None:
-            input_names.append("mask_index")
-        if _input_zero_point is not None:
-            input_names.append("input_zero_point")
-        if _weight_zero_point is not None:
-            input_names.append("weight_zero_point")
-        if _past is not None:
-            input_names.append("past")
-
-        node = onnx.helper.make_node(
-            "QAttention",
-            inputs=input_names,
-            outputs=["output", "present"],
-            domain="com.microsoft",
-            num_heads=num_heads,
-            unidirectional=_unidirectional,
-        )
-
-        past_shape = (2, batch_size, num_heads, past_sequence_length, head_size)
-        present_output_shape = (
-            2,
-            batch_size,
-            num_heads,
-            past_sequence_length + sequence_length,
-            head_size,
-        )
-
-        inputs_info = [
-            helper.make_tensor_value_info("input", TensorProto.UINT8, list(_input.shape)),
-            helper.make_tensor_value_info("weight", TensorProto.UINT8, list(_weight.shape)),
-            helper.make_tensor_value_info("bias", TensorProto.FLOAT, list(_bias.shape)),
-            helper.make_tensor_value_info("input_scale", TensorProto.FLOAT, ()),
-            helper.make_tensor_value_info("weight_scale", TensorProto.FLOAT, ()),
-        ]
-        if _mask_index is not None:
-            inputs_info.append(
-                helper.make_tensor_value_info(
-                    "mask_index", TensorProto.INT32, list(_mask_index.shape)
-                )
-            )
-        if _input_zero_point is not None:
-            inputs_info.append(
-                helper.make_tensor_value_info("input_zero_point", TensorProto.UINT8, ())
-            )
-        if _weight_zero_point is not None:
-            inputs_info.append(
-                helper.make_tensor_value_info("weight_zero_point", TensorProto.UINT8, ())
-            )
-        if _past is not None:
-            inputs_info.append(
-                helper.make_tensor_value_info("past", TensorProto.FLOAT, list(past_shape))
-            )
-
-        graph = helper.make_graph(
-            [node],
-            "qattention_test",
-            inputs=inputs_info,
-            outputs=[
-                helper.make_tensor_value_info("output", TensorProto.FLOAT, list(_input.shape)),
-                helper.make_tensor_value_info(
-                    "present", TensorProto.FLOAT, list(present_output_shape)
-                ),
-            ],
-        )
-
-        model = helper.make_model(graph, producer_name="qattention_test")
-
-        inputs = [_input, _weight, _bias, _input_scale, _weight_scale]
-        if _mask_index is not None:
-            inputs.append(_mask_index)
-        if _input_zero_point is not None:
-            inputs.append(_input_zero_point)
-        if _weight_zero_point is not None:
-            inputs.append(_weight_zero_point)
-        if _past is not None:
-            inputs.append(_past)
-
-        verify_with_ort_with_inputs(
-            model,
-            inputs,
-            [_input.shape, present_output_shape],
-            target=target,
-            dev=dev,
-            rtol=1e-3,
-            atol=1e-3,
-        )
-
-    batch_size = 11
-    num_heads = 13
-    head_size = 37
-    sequence_length = 7
-    input_hidden_size = 147
-    weight_hidden_size = num_heads * head_size
-    past_sequence_length = 17
-
-    total_sequence_length = past_sequence_length + sequence_length
-
-    # Required inputs
-    input_array = np.random.randint(
-        0, 255, (batch_size, sequence_length, input_hidden_size)
-    ).astype("uint8")
-    weight = np.random.randint(0, 255, (input_hidden_size, 3 * weight_hidden_size)).astype("uint8")
-    bias = np.random.randn(3 * weight_hidden_size).astype("float32")
-    input_scale = np.random.random(1).astype("float32")
-    weight_scale = np.random.random(1).astype("float32")
-
-    # Optional inputs
-    input_zero_point = np.random.randint(0, 255, 1).astype("uint8")
-    weight_zero_point = np.random.randint(0, 255, 1).astype("uint8")
-    past = np.random.random((2, batch_size, num_heads, past_sequence_length, head_size)).astype(
-        "float32"
-    )
-
-    for unidirectional in [0, 1]:
-        for have_past in [False, True]:
-            if not have_past:
-                mask_index = np.random.randint(0, 2, (batch_size, sequence_length)).astype("int32")
-
-                verify_attention(
-                    unidirectional,
-                    input_array,
-                    weight,
-                    bias,
-                    input_scale,
-                    weight_scale,
-                    mask_index,
-                )
-                verify_attention(
-                    unidirectional,
-                    input_array,
-                    weight,
-                    bias,
-                    input_scale,
-                    weight_scale,
-                    mask_index,
-                    input_zero_point,
-                )
-                verify_attention(
-                    unidirectional,
-                    input_array,
-                    weight,
-                    bias,
-                    input_scale,
-                    weight_scale,
-                    mask_index,
-                    input_zero_point,
-                    weight_zero_point,
-                )
-            else:
-                mask_index = np.random.randint(0, 2, (batch_size, total_sequence_length)).astype(
-                    "int32"
-                )
-
-                verify_attention(
-                    unidirectional,
-                    input_array,
-                    weight,
-                    bias,
-                    input_scale,
-                    weight_scale,
-                    mask_index,
-                    input_zero_point,
-                    weight_zero_point,
-                    past,
-                )
-
-
-@tvm.testing.parametrize_targets
-def test_skiplayernormalization(target, dev):
-    """test_skiplayernormalization"""
-
-    def verify_skiplayernormalization(input_, skip, gamma, beta, bias):
-        node = onnx.helper.make_node(
-            "SkipLayerNormalization",
-            inputs=["input", "skip", "gamma", "beta", "bias"],
-            outputs=["output"],
-            domain="com.microsoft",
-        )
-
-        node.attribute.append(onnx.helper.make_attribute("epsilon", 1e-4))
-
-        graph = helper.make_graph(
-            [node],
-            "skiplayernormalization_test",
-            inputs=[
-                helper.make_tensor_value_info("input", TensorProto.FLOAT, list(input_.shape)),
-                helper.make_tensor_value_info("skip", TensorProto.FLOAT, list(skip.shape)),
-                helper.make_tensor_value_info("gamma", TensorProto.FLOAT, list(gamma.shape)),
-                helper.make_tensor_value_info("beta", TensorProto.FLOAT, list(beta.shape)),
-                helper.make_tensor_value_info("bias", TensorProto.FLOAT, list(bias.shape)),
-            ],
-            outputs=[
-                helper.make_tensor_value_info("output", TensorProto.FLOAT, list(input_.shape)),
-            ],
-        )
-
-        model = helper.make_model(graph, producer_name="skiplayernormalization_test")
-        verify_with_ort_with_inputs(
-            model, [input_, skip, gamma, beta, bias], [input_.shape], target=target, dev=dev
-        )
-
-    hidden_size = 384
-    batch_size = 4
-    sequence_length = 4
-
-    dtype = "float32"
-    input_array = np.random.random((batch_size, sequence_length, hidden_size)).astype(dtype)
-    skip = np.random.random((batch_size, sequence_length, hidden_size)).astype(dtype)
-    gamma = np.random.uniform(0.5, 0.7, hidden_size).astype(dtype)
-    beta = np.random.randn(hidden_size).astype(dtype) * 0.1
-    bias = np.random.randn(hidden_size).astype(dtype)
-
-    verify_skiplayernormalization(input_array, skip, gamma, beta, bias)
-
-
-@tvm.testing.known_failing_targets("cuda")
-@tvm.testing.parametrize_targets
-def test_qgemm(target, dev):
-    """test_qgemm"""
-
-    def verify_qgemm(
-        a_shape,
-        b_shape,
-        y_shape,
-        C=False,
-        y_zp=False,
-        b_per_tensor_quantization=False,
-        alpha=1.0,
-        transA=0,
-        transB=1,
-    ):
-        a_array = np.random.randint(low=0, high=255, size=a_shape).astype("uint8")
-        b_array = np.random.uniform(low=0, high=255, size=b_shape).astype("uint8")
-
-        input_nodes = [
-            helper.make_tensor_value_info("a", TensorProto.UINT8, list(a_shape)),
-            helper.make_tensor_value_info("b", TensorProto.UINT8, list(b_shape)),
-        ]
-
-        initializer = [
-            helper.make_tensor("a_scale", TensorProto.FLOAT, (), [np.random.rand()]),
-            helper.make_tensor("a_zero_point", TensorProto.UINT8, (), [np.random.randint(0, 255)]),
-        ]
-
-        input_names = [
-            "a",
-            "a_scale",
-            "a_zero_point",
-            "b",
-            "b_scale",
-            "b_zero_point",
-        ]
-        input_values = [a_array, b_array]
-
-        if b_per_tensor_quantization:
-            initializer.append(
-                helper.make_tensor("b_scale", TensorProto.FLOAT, (), [np.random.rand()])
-            )
-            initializer.append(
-                helper.make_tensor(
-                    "b_zero_point", TensorProto.UINT8, (), [np.random.randint(0, 255)]
-                )
-            )
-        else:  # per_colume_quantization
-            shape_value = b_shape[0] if transB else b_shape[1]
-            b_scale_array = np.random.random(shape_value).astype("float32")
-            w_zero_point_array = np.random.randint(0, 255, size=shape_value).astype("uint8")
-            initializer.append(
-                helper.make_tensor(
-                    "b_scale", TensorProto.FLOAT, list(b_scale_array.shape), b_scale_array
-                )
-            )
-            initializer.append(
-                helper.make_tensor(
-                    "b_zero_point",
-                    TensorProto.UINT8,
-                    list(w_zero_point_array.shape),
-                    w_zero_point_array,
-                )
-            )
-
-        output_tensor = helper.make_tensor_value_info("output", TensorProto.FLOAT, list(y_shape))
-
-        if C is True:
-            C_shape = (b_shape[0] if transB else b_shape[1],)
-            C_array = np.random.randint(low=0, high=65536, size=C_shape).astype("int32")
-            input_nodes.append(helper.make_tensor_value_info("C", TensorProto.INT32, list(C_shape)))
-            input_names.append("C")
-            input_values.append(C_array)
-
-        if y_zp is True:
-            input_names.append("y_scale")
-            initializer.append(
-                helper.make_tensor("y_scale", TensorProto.FLOAT, (), [np.random.rand()])
-            )
-
-            input_names.append("y_zero_point")
-            initializer.append(
-                helper.make_tensor(
-                    "y_zero_point", TensorProto.UINT8, (), [np.random.randint(0, 255)]
-                )
-            )
-
-            output_tensor = helper.make_tensor_value_info(
-                "output", TensorProto.UINT8, list(y_shape)
-            )
-
-        kwargs = {}
-        kwargs["alpha"] = alpha
-        kwargs["transA"] = transA
-        kwargs["transB"] = transB
-
-        node = helper.make_node(
-            "QGemm",
-            inputs=input_names,
-            outputs=["output"],
-            domain="com.microsoft",
-            # Default values for other attributes:
-            **kwargs,
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "QGemm",
-            inputs=input_nodes,
-            outputs=[output_tensor],
-            initializer=initializer,
-        )
-        model = helper.make_model(
-            graph,
-            producer_name="QGemm",
-            opset_imports=[
-                onnx.helper.make_opsetid("com.microsoft", 1),
-            ],
-        )
-
-        verify_with_ort_with_inputs(model, input_values, target=target, dev=dev)
-
-    # B per tensor quantization
-    verify_qgemm(
-        (20, 30),
-        (50, 30),
-        (20, 50),
-        True,
-        True,
-        True,
-    )
-
-    # B per column  quantization
-    verify_qgemm(
-        (20, 30),
-        (50, 30),
-        (20, 50),
-        True,
-        True,
-        False,
-    )
-
-    # test alpha
-    verify_qgemm(
-        (20, 30),
-        (50, 30),
-        (20, 50),
-        True,
-        True,
-        True,
-        0.5,
-    )
-
-    # test transpose A
-    verify_qgemm(
-        (20, 50),
-        (20, 80),
-        (50, 80),
-        True,
-        True,
-        True,
-        0.5,
-        1,
-        0,
-    )
-
-
-@tvm.testing.known_failing_targets("cuda")
-@tvm.testing.parametrize_targets
-def test_qlinearconv(target, dev):
-    """test_qlinearconv"""
-
-    def verify_qlinearconv(
-        x_shape,
-        w_shape,
-        y_shape,
-        padding,
-        kernel_shape,
-        strides,
-        dilations,
-        auto_pad="NOTSET",
-        bias=False,
-        per_channel_quantization=False,
-    ):
-
-        x_array = np.random.randint(low=0, high=255, size=x_shape).astype("uint8")
-        w_array = np.random.uniform(low=0, high=255, size=w_shape).astype("uint8")
-
-        initializer = [
-            helper.make_tensor("x_scale", TensorProto.FLOAT, (), [np.random.rand()]),
-            helper.make_tensor("x_zero_point", TensorProto.UINT8, (), [np.random.randint(0, 255)]),
-            helper.make_tensor("y_scale", TensorProto.FLOAT, (), [np.random.rand()]),
-            helper.make_tensor("y_zero_point", TensorProto.UINT8, (), [np.random.randint(0, 255)]),
-        ]
-
-        input_nodes = [
-            helper.make_tensor_value_info("x", TensorProto.UINT8, list(x_shape)),
-            helper.make_tensor_value_info("w", TensorProto.UINT8, list(w_shape)),
-        ]
-        input_names = [
-            "x",
-            "x_scale",
-            "x_zero_point",
-            "w",
-            "w_scale",
-            "w_zero_point",
-            "y_scale",
-            "y_zero_point",
-        ]
-        input_values = [x_array, w_array]
-
-        if per_channel_quantization:
-            w_scale_array = np.random.random(w_shape[0]).astype("float32")
-            w_zero_point_array = np.random.randint(0, 255, size=w_shape[0]).astype("uint8")
-
-            initializer.append(
-                helper.make_tensor("w_scale", TensorProto.FLOAT, [w_shape[0]], w_scale_array)
-            )
-            initializer.append(
-                helper.make_tensor(
-                    "w_zero_point", TensorProto.UINT8, [w_shape[0]], w_zero_point_array
-                )
-            )
-        else:
-            initializer.append(
-                helper.make_tensor("w_scale", TensorProto.FLOAT, (), [np.random.rand()])
-            )
-            initializer.append(
-                helper.make_tensor(
-                    "w_zero_point", TensorProto.UINT8, (), [np.random.randint(0, 255)]
-                )
-            )
-
-        if bias is True:
-            b_shape = w_shape[0:1]
-            b_array = np.random.randint(low=0, high=65536, size=b_shape).astype("int32")
-            input_nodes.append(helper.make_tensor_value_info("B", TensorProto.INT32, list(b_shape)))
-            input_names.append("B")
-            input_values.append(b_array)
-
-        if padding is None:
-            ## autopadding with unset default attributes
-            kwargs = {}
-            if not all(list(s == 1 for s in strides)):
-                kwargs["strides"] = strides
-            if not all(list(d == 1 for d in dilations)):
-                kwargs["dilations"] = dilations
-
-            node = helper.make_node(
-                "QLinearConv",
-                inputs=input_names,
-                outputs=["y"],
-                # Default values for other attributes:
-                auto_pad=auto_pad,
-                **kwargs,
-            )
-        else:
-            node = helper.make_node(
-                "QLinearConv",
-                inputs=input_names,
-                outputs=["y"],
-                kernel_shape=kernel_shape,
-                # Default values for other attributes:
-                strides=strides,
-                dilations=dilations,
-                # groups=1
-                pads=padding,
-            )
-
-        graph = helper.make_graph(
-            [node],
-            "conv_test",
-            inputs=input_nodes,
-            outputs=[helper.make_tensor_value_info("y", TensorProto.UINT8, list(y_shape))],
-            initializer=initializer,
-        )
-        model = helper.make_model(graph, producer_name="qlinearconv_test")
-        # opt_level=1 will cause error
-        verify_with_ort_with_inputs(model, input_values, opt_level=2, target=target, dev=dev)
-
-    def repeat(num, dims):
-        return tuple(num for _ in range(dims))
-
-    # only support QLinearConv2d because only support qnn.conv2d
-    dims = 2
-
-    # Convolution with padding
-    verify_qlinearconv(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(5, dims),
-        2 * repeat(1, dims),
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-    )
-
-    # Convolution with bias
-    verify_qlinearconv(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(5, dims),
-        2 * repeat(1, dims),
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-        bias=True,
-    )
-
-    # Convolution with asymmetric padding
-    verify_qlinearconv(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(4, dims),
-        repeat(0, dims) + repeat(1, dims),
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-    )
-    # Convolution without padding
-    verify_qlinearconv(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(3, dims),
-        2 * repeat(0, dims),
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-    )
-    # Convolution with autopadding
-    verify_qlinearconv(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(5, dims),
-        None,
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-        auto_pad="SAME_UPPER",
-    )
-    # Convolution with valid autopadding
-    verify_qlinearconv(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(3, dims),
-        None,
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-        auto_pad="VALID",
-    )
-    # Convolution with non uniform stride
-    verify_qlinearconv(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(3, dims),
-        None,
-        repeat(3, dims),
-        repeat(2, dims),
-        repeat(1, dims),
-        auto_pad="SAME_UPPER",
-    )
-    # Convolution with dilation
-    verify_qlinearconv(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(5, dims),
-        2 * repeat(2, dims),
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(2, dims),
-    )
-    # Convolution with per channel quantization
-    verify_qlinearconv(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(3, dims),
-        None,
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-        per_channel_quantization=True,
-    )
-
-
-# TODO(vvchernov): fix problem with quantization on cuda
-@tvm.testing.known_failing_targets("cuda")
-@tvm.testing.parametrize_targets
-def test_qlinearmatmul(target, dev):
-    """test_qlinearmatmul"""
-
-    def verify_qlinearmatmul(
-        x_shape,
-        w_shape,
-        y_shape,
-        x_dtype="uint8",
-        w_dtype="uint8",
-    ):
-        def get_randint_numpy_scalar(dtype="uint8"):
-            if dtype == "uint8":
-                return np.random.randint(0, 255)
-            else:  # "int8"
-                return np.random.randint(-128, 127)
-
-        if x_dtype == "uint8":
-            x_array = np.random.randint(low=0, high=255, size=x_shape).astype("uint8")
-        else:  # "int8"
-            x_array = np.random.randint(low=-128, high=127, size=x_shape).astype("int8")
-        if w_dtype == "uint8":
-            w_array = np.random.uniform(low=0, high=255, size=w_shape).astype("uint8")
-        else:  # "int8"
-            w_array = np.random.uniform(low=-128, high=127, size=w_shape).astype("int8")
-
-        x_proto_type = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(x_dtype)]
-        w_proto_type = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(w_dtype)]
-
-        y_dtype = "int8"
-        if x_dtype == "uint8" and w_dtype == "uint8":
-            y_dtype = "uint8"
-        y_proto_type = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(y_dtype)]
-
-        initializer = [
-            helper.make_tensor("x_scale", TensorProto.FLOAT, (), [np.random.rand()]),
-            # TODO: 0 value for int8?
-            helper.make_tensor(
-                "x_zero_point", x_proto_type, (), [get_randint_numpy_scalar(x_dtype)]
-            ),
-            helper.make_tensor("w_scale", TensorProto.FLOAT, (), [np.random.rand()]),
-            # TODO: 0 value for int8?
-            helper.make_tensor(
-                "w_zero_point", w_proto_type, (), [get_randint_numpy_scalar(w_dtype)]
-            ),
-            helper.make_tensor("y_scale", TensorProto.FLOAT, (), [np.random.rand()]),
-            helper.make_tensor(
-                "y_zero_point", y_proto_type, (), [get_randint_numpy_scalar(y_dtype)]
-            ),
-        ]
-
-        input_nodes = [
-            helper.make_tensor_value_info("x", x_proto_type, list(x_shape)),
-            helper.make_tensor_value_info("w", w_proto_type, list(w_shape)),
-        ]
-        input_names = [
-            "x",
-            "x_scale",
-            "x_zero_point",
-            "w",
-            "w_scale",
-            "w_zero_point",
-            "y_scale",
-            "y_zero_point",
-        ]
-        input_values = [x_array, w_array]
-
-        node = helper.make_node(
-            "QLinearMatMul",
-            inputs=input_names,
-            outputs=["y"],
-        )
-
-        y_proto_type = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype("int8")]
-        if x_dtype == "uint8" and w_dtype == "uint8":
-            y_proto_type = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype("uint8")]
-
-        graph = helper.make_graph(
-            [node],
-            "qmatmul_test",
-            inputs=input_nodes,
-            outputs=[helper.make_tensor_value_info("y", y_proto_type, list(y_shape))],
-            initializer=initializer,
-        )
-        model = helper.make_model(graph, producer_name="qlinearmatmul_test")
-        # opt_level=1 will cause error
-        verify_with_ort_with_inputs(model, input_values, opt_level=2, target=target, dev=dev)
-
-    # Default matmul both ranks = 2 (x_dtype = "uint8", w_dtype = "uint8")
-    verify_qlinearmatmul((2, 3), (3, 2), (2, 2))
-
-    # Default matmul both ranks = 2 (x_dtype = "int8", w_dtype = "int8")
-    verify_qlinearmatmul((2, 3), (3, 2), (2, 2), "int8", "int8")
-
-    # TODO(vvchernov): problems on ONNX Runtime side and type check (onnx.py:L4763) on TVM side
-    # Default matmul both ranks = 2 (x_dtype = "uint8", w_dtype = "int8")
-    # verify_qlinearmatmul((2, 3), (3, 2), (2, 2), "uint8", "int8")
-
-    # TODO(vvchernov): problems on ONNX Runtime side and type check (onnx.py:L4763) on TVM side
-    # Default matmul both ranks = 2 (x_dtype = "int8", w_dtype = "uint8")
-    # verify_qlinearmatmul((2, 3), (3, 2), (2, 2), "int8", "uint8")
-
-    # Reduced matmul: x_ranks = 1, w_rank = 2 (x_dtype = "uint8", w_dtype = "uint8")
-    verify_qlinearmatmul((3,), (3, 2), (2,))
-
-    # Special case matmul: x_ranks = 3, w_rank = 2 (x_dtype = "uint8", w_dtype = "uint8")
-    verify_qlinearmatmul((2, 3, 4), (4, 3), (2, 3, 3))
-
-    # GPT2-style matmul both ranks = 4 (x_dtype = "uint8", w_dtype = "uint8")
-    verify_qlinearmatmul((2, 4, 3, 3), (2, 4, 3, 3), (2, 4, 3, 3))
-
-    # Asymetric matmul: x_ranks = 4, w_rank = 3 (x_dtype = "uint8", w_dtype = "uint8")
-    verify_qlinearmatmul((2, 4, 3, 3), (4, 3, 3), (2, 4, 3, 3))
-
-    # Asymetric matmul: x_ranks = 2, w_rank = 3 (x_dtype = "uint8", w_dtype = "uint8")
-    # verify_qlinearmatmul((3, 3), (4, 3, 3), (4, 3, 3))
-
-
-@tvm.testing.parametrize_targets
-def test_qlinearconcat(target, dev):
-    """test_qlinearconcat"""
-
-    def verify_qlinearconcat(shapes, out_shape, axis=None):
-        input_names = []
-        input_values = []
-        input_nodes = []
-        for i, shape in enumerate(shapes):
-            tensor_name = chr(ord("a") + i)
-            node = helper.make_tensor_value_info(tensor_name, TensorProto.FLOAT, list(shape))
-
-            input_names.append(tensor_name)
-            input_values.append(np.random.random(shape).astype("float32"))
-            input_nodes.append(node)
-
-        node = helper.make_node("Concat", input_names, ["C"])
-        if axis is not None:
-            axis_attr = helper.make_attribute("axis", axis)
-            node.attribute.append(axis_attr)
-        graph = helper.make_graph(
-            [node],
-            "qlinearconcat_test",
-            inputs=input_nodes,
-            outputs=[helper.make_tensor_value_info("C", TensorProto.FLOAT, list(out_shape))],
-        )
-        model = helper.make_model(graph, producer_name="qlinearconcat_test")
-        quantize_and_verify_with_ort(model, input_names, shapes, target, dev)
-
-    verify_qlinearconcat([[2, 1], [2, 1]], [4, 1], 0)
-    verify_qlinearconcat([[2, 1], [2, 1]], [2, 2], 1)
-    verify_qlinearconcat([[1, 2], [2, 2], [3, 2]], [6, 2], 0)
-
-
-@tvm.testing.parametrize_targets
-def test_qlinearadd(target, dev):
-    """test_qlinearadd"""
-
-    def verify_qlinearadd(a_shape, b_shape, c_shape):
-
-        _ = np.random.random(a_shape).astype("float32")
-        _ = np.random.random(b_shape).astype("float32")
-
-        input_nodes = [
-            helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape)),
-            helper.make_tensor_value_info("b", TensorProto.FLOAT, list(b_shape)),
-        ]
-        input_names = [
-            "a",
-            "b",
-        ]
-
-        node = helper.make_node("Add", ["a", "b"], ["C"])
-        graph = helper.make_graph(
-            [node],
-            "qlinearadd_test",
-            inputs=input_nodes,
-            outputs=[helper.make_tensor_value_info("C", TensorProto.FLOAT, list(c_shape))],
-        )
-        model = helper.make_model(graph, producer_name="qlinearadd_test")
-        quantize_and_verify_with_ort(model, input_names, [a_shape, b_shape], target, dev)
-
-    verify_qlinearadd([4, 2], [4, 2], [4, 2])
-    verify_qlinearadd([4, 2], [2], [4, 2])
-    verify_qlinearadd([5, 1, 7], [2, 7], [5, 2, 7])
-
-
-@tvm.testing.parametrize_targets
-def test_qlinearmul(target, dev):
-    """test_qlinearmul"""
-
-    def verify_qlinearmul(a_shape, b_shape, c_shape):
-
-        _ = np.random.random(a_shape).astype("float32")
-        _ = np.random.random(b_shape).astype("float32")
-
-        input_nodes = [
-            helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape)),
-            helper.make_tensor_value_info("b", TensorProto.FLOAT, list(b_shape)),
-        ]
-        input_names = [
-            "a",
-            "b",
-        ]
-
-        node = helper.make_node("Mul", input_names, ["C"])
-        graph = helper.make_graph(
-            [node],
-            "qlinearmul_test",
-            inputs=input_nodes,
-            outputs=[helper.make_tensor_value_info("C", TensorProto.FLOAT, list(c_shape))],
-        )
-        model = helper.make_model(graph, producer_name="qlinearmul_test")
-        quantize_and_verify_with_ort(model, input_names, [a_shape, b_shape], target, dev)
-
-    verify_qlinearmul([7], [7], [7])
-    verify_qlinearmul([4, 2], [4, 2], [4, 2])
-    verify_qlinearmul([4, 2], [2], [4, 2])
-    verify_qlinearmul([5, 1, 7], [2, 7], [5, 2, 7])
-
-
-@pytest.mark.skip(reason="See https://github.com/apache/tvm/issues/11375")
-@tvm.testing.parametrize_targets
-def test_qlinearleakyrelu(target, dev):
-    """test_qlinearleakyrelu"""
-
-    def verify_qlinearleakyrelu(inshape, kwargs):
-
-        in_array = np.random.random(inshape).astype("float32")
-        node = helper.make_node("LeakyRelu", ["X"], ["Y"], **kwargs)
-
-        graph = helper.make_graph(
-            [node],
-            "qlinearRelu_test",
-            inputs=[helper.make_tensor_value_info("X", TensorProto.FLOAT, list(in_array.shape))],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(in_array.shape))],
-        )
-        model = helper.make_model(graph, producer_name="qlinearRelu_test")
-        args = (model, ["X"], [in_array.shape], target, dev)
-        if dev == "cuda":
-            quantize_and_verify_with_ort(*args, rtol=1e-2, atol=1e-2)
-        else:
-            quantize_and_verify_with_ort(*args)
-
-    verify_qlinearleakyrelu([2, 4, 5, 6], {"alpha": 0.25})
-    verify_qlinearleakyrelu([6, 5, 6, 7], {"alpha": 0.35})
-    verify_qlinearleakyrelu([5, 1, 4, 6], {"alpha": 0.65})
-
-
-@pytest.mark.skip(reason="See https://github.com/apache/tvm/issues/11375")
-@tvm.testing.parametrize_targets
-def test_qlinearsigmoid(target, dev):
-    """test_qlinearsigmoid"""
-
-    def verify_qlinearsigmoid(a_shape):
-
-        _ = np.random.random(a_shape).astype("float32")
-
-        input_nodes = [helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape))]
-
-        node = helper.make_node("Sigmoid", ["a"], ["B"])
-        graph = helper.make_graph(
-            [node],
-            "qlinearsigmoid_test",
-            inputs=input_nodes,
-            outputs=[helper.make_tensor_value_info("B", TensorProto.FLOAT, list(a_shape))],
-        )
-        model = helper.make_model(graph, producer_name="qlinearsigmoid_test")
-        quantize_and_verify_with_ort(model, ["a"], [a_shape], target, dev)
-
-    verify_qlinearsigmoid([4, 2])
-    verify_qlinearsigmoid([5])
-    verify_qlinearsigmoid([3, 4, 5])
-    verify_qlinearsigmoid([])
-
-
-@tvm.testing.parametrize_targets
-def test_qlinearsoftmax(target, dev):
-    """test_qlinearsoftmax"""
-
-    def verify_qlinearsoftmax(a_shape):
-
-        _ = np.random.random(a_shape).astype("float32")
-
-        input_nodes = [helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape))]
-
-        node = helper.make_node("Softmax", ["a"], ["B"])
-        graph = helper.make_graph(
-            [node],
-            "qlinearsoftmax_test",
-            inputs=input_nodes,
-            outputs=[helper.make_tensor_value_info("B", TensorProto.FLOAT, list(a_shape))],
-        )
-        model = helper.make_model(graph, producer_name="qlinearsoftmax_test")
-        quantize_and_verify_with_ort(model, ["a"], [a_shape], target, dev)
-
-    verify_qlinearsoftmax([4, 2])
-    verify_qlinearsoftmax([5])
-    verify_qlinearsoftmax([3, 4, 5])
-
-
-@tvm.testing.parametrize_targets("llvm")
-def test_random_bernoulli(target, dev):
-    """test_random_bernoulli"""
-
-    def _get_tvm_output(
-        inputs,
-        out_dtype="int32",
-        seed=None,
-        target=target,
-        dev=dev,
-        use_vm=False,
-        freeze_params=False,
-    ):
-        def get_bernoulli_model(shape, in_dtype="float32", out_dtype="int32", seed=None):
-            onnx_itype = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(in_dtype)]
-            onnx_otype = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(out_dtype)]
-            node = helper.make_node(
-                "Bernoulli",
-                ["input"],
-                ["output"],
-            )
-            dtype_attr = helper.make_attribute("dtype", onnx_otype)
-            node.attribute.append(dtype_attr)
-            if seed is not None:
-                seed_attr = helper.make_attribute("seed", float(seed))
-                node.attribute.append(seed_attr)
-
-            graph = helper.make_graph(
-                [node],
-                "random_bernoulli_test",
-                inputs=[helper.make_tensor_value_info("input", onnx_itype, list(shape))],
-                outputs=[helper.make_tensor_value_info("output", onnx_otype, list(shape))],
-            )
-            return helper.make_model(graph, producer_name="random_bernoulli_test")
-
-        shape = inputs.shape
-        in_dtype = inputs.dtype
-        model = get_bernoulli_model(shape, in_dtype, out_dtype, seed)
-
-        if use_vm:
-            return get_tvm_output_with_vm(
-                model,
-                inputs,
-                target,
-                dev,
-                freeze_params=freeze_params,
-            )
-        else:
-            return get_tvm_output(
-                model,
-                inputs,
-                target,
-                dev,
-            )
-
-    def binom_test(input, ideal_mean, threshold=0.05):
-        # This test is strictly appropriate when input probabilities are all identical.
-        # In that case, it should lead to flaky failures in only one run in a million (p>=1e-6).
-        # The test should be over-conservative when input probabilities are not identical.
-        # (i.e., It should have a rate of flaky failures lower than one run in a million.)
-        # If this test starts repeatedly throwing flaky failures, consult a statistician
-        # in addition to your regular debugging.
-        bnm_test_res = scipy.stats.binomtest(
-            k=np.sum(input, dtype="int32"), n=len(input), p=ideal_mean
-        )
-        return bnm_test_res.pvalue > threshold
-
-    def verify_bernoulli(
-        inputs=None,
-        shape=[],
-        in_dtype="float32",
-        out_dtype="int32",
-        seed=None,
-        target=target,
-        dev=dev,
-        use_vm=False,
-        freeze_params=False,
-        in_out_equal=False,
-    ):
-        if inputs is None:
-            assert len(shape) != 0
-            inputs = np.random.uniform(size=shape).astype(in_dtype)
-
-        tvm_out = _get_tvm_output(
-            inputs,
-            out_dtype,
-            seed,
-            target,
-            dev,
-            use_vm,
-            freeze_params,
-        )
-
-        if isinstance(tvm_out, list):
-            tvm_out = tvm_out[0]
-        # check that values are 0 or 1
-        tvm_flat = tvm_out.flatten()
-        assert np.array_equal(tvm_flat, tvm_flat.astype("bool"))
-        if in_out_equal:
-            tvm.testing.assert_allclose(inputs, tvm_out)
-        else:
-            # check that mean value is close to the theoretical one by binomial test
-            ideal_mean = np.mean(inputs)
-            repeats = 3
-            check = False
-            for i in range(repeats):
-                if binom_test(tvm_flat, ideal_mean):
-                    check = True
-                    break
-                else:
-                    # repeat with new seed
-                    seed = np.random.randint(1e6)
-                    tvm_flat = _get_tvm_output(
-                        inputs,
-                        out_dtype,
-                        seed,
-                        target,
-                        dev,
-                        use_vm,
-                        freeze_params,
-                    ).flatten()
-            assert check, "Binomial test failed"
-
-    # Test input sequence of 0 and 1
-    inputs = np.random.randint(2, size=[10000]).astype("float32")
-    verify_bernoulli(inputs, in_out_equal=True)
-
-    # Binomial test input with 0.5 values
-    val_num = 10000
-    inputs = np.ones([val_num], dtype="float32") * 0.5
-    verify_bernoulli(inputs)
-
-    # Binomial test input with 0.1 values
-    inputs = np.ones([val_num], dtype="float32") * 0.1
-    verify_bernoulli(inputs)
-
-    # Simple test
-    verify_bernoulli(shape=[val_num])
-
-    # Floating output type
-    verify_bernoulli(shape=[val_num], out_dtype="float32")
-
-    # Double input type
-    verify_bernoulli(shape=[val_num], in_dtype="float64")
-
-    # Test N-D tensor generation
-    verify_bernoulli(shape=[2, 4, 100, 100])
-
-    # Test with seed
-    verify_bernoulli(shape=[val_num], seed=np.random.randint(1e6))
-
-    # Test result determinism with the same seeds
-    inputs = np.random.uniform(size=[val_num])
-    fixed_seed = np.random.randint(1e6)
-    tvm_out_1 = _get_tvm_output(inputs, seed=fixed_seed)
-    tvm_out_2 = _get_tvm_output(inputs, seed=fixed_seed)
-    tvm.testing.assert_allclose(tvm_out_1, tvm_out_2)
-
-
-@tvm.testing.parametrize_targets("llvm")
-def test_random_uniform(target, dev):
-    """test_random_uniform"""
-
-    def get_random_uniform(shape, dtype="float32", high=1.0, low=0.0, seed=None):
-        ONNX_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)]
-        node = helper.make_node(
-            "RandomUniform", [], ["out"], shape=shape, dtype=ONNX_DTYPE, high=high, low=low
-        )
-        if seed is not None:
-            seed_attr = helper.make_attribute("seed", seed)
-            node.attribute.append(seed_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "random_uniform_test",
-            inputs=[],
-            outputs=[helper.make_tensor_value_info("out", ONNX_DTYPE, shape)],
-        )
-        model = helper.make_model(graph, producer_name="random_uniform_test")
-        return get_tvm_output_with_vm(
-            model,
-            [],
-            target=target,
-            dev=dev,
-            validate_structural_equal=(seed is not None),
-        )
-
-    # Check that function runs and produces proper shape.
-    vals = get_random_uniform([10], dtype="float32")
-    assert list(vals.shape) == [10]
-    assert vals.dtype == "float32"
-
-    # Test N-D tensor generation.
-    vals = get_random_uniform([1, 3, 100, 100], dtype="float32")
-    assert list(vals.shape) == [1, 3, 100, 100]
-
-    # Check that bounds aren't exceeded.
-    vals = get_random_uniform(shape=[100], high=100.0, low=-100.0)
-    assert list(vals.shape) == [100]
-    assert all(vals >= -100) and all(vals <= 100)
-
-    # Check that a fixed seed produces the same values when run twice.
-    vals_1 = get_random_uniform(shape=[10], seed=1)
-    vals_2 = get_random_uniform(shape=[10], seed=1)
-    assert all(vals_1 == vals_2)
-
-    # Test against an expected output with a fixed seed.
-    real = get_random_uniform(shape=[10], seed=5.0)
-    expected = np.asarray(
-        [
-            0.043976,
-            0.96656,
-            0.292199,
-            0.904297,
-            0.25167,
-            0.521778,
-            0.778985,
-            0.085463,
-            0.939846,
-            0.194201,
-        ]
-    )
-    tvm.testing.assert_allclose(real, expected, rtol=1e-5)
-
-
-@tvm.testing.parametrize_targets("llvm")
-def test_random_uniform_like(target, dev):
-    """test_random_uniform_like"""
-
-    def get_random_uniform_like(input_, shape, dtype=None, high=1.0, low=0.0, seed=None):
-        node = helper.make_node("RandomUniformLike", ["in"], ["out"], high=high, low=low)
-        if seed is not None:
-            seed_attr = helper.make_attribute("seed", seed)
-            node.attribute.append(seed_attr)
-
-        ONNX_DTYPE = None
-        if dtype is not None:
-            ONNX_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)]
-            dtype_attr = helper.make_attribute("dtype", ONNX_DTYPE)
-            node.attribute.append(dtype_attr)
-        else:
-            dtype = input_.dtype
-            ONNX_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)]
-
-        graph = helper.make_graph(
-            [node],
-            "random_uniform_test",
-            inputs=[helper.make_tensor_value_info("in", ONNX_DTYPE, shape)],
-            outputs=[helper.make_tensor_value_info("out", ONNX_DTYPE, shape)],
-        )
-        model = helper.make_model(graph, producer_name="random_uniform_like_test")
-        return get_tvm_output_with_vm(
-            model,
-            [input_],
-            target=target,
-            dev=dev,
-            validate_structural_equal=(seed is not None),
-        )
-
-    # Check that function runs and produces proper shape and dtype.
-    shape = [10]
-    input_array = np.random.random(shape).astype("float32")
-    vals = get_random_uniform_like(input_array, shape, dtype="float32")
-    assert list(vals.shape) == [10]
-    assert vals.dtype == "float32"
-
-    # Test N-D tensor generation.
-    shape = [1, 3, 100, 100]
-    input_array = np.random.random(shape).astype("float32")
-    vals = get_random_uniform_like(input_array, shape, dtype="float64")
-    assert list(vals.shape) == shape
-    assert vals.dtype == "float64"
-
-    # Check that bounds aren't exceeded.
-    shape = [100]
-    input_array = np.random.random(shape).astype("float64")
-    vals = get_random_uniform_like(input_array, shape, high=100.0, low=-100.0)
-    assert list(vals.shape) == shape
-    assert all(vals >= -100) and all(vals <= 100)
-
-    # Test against an expected output with a fixed seed.
-    shape = [10]
-    input_array = np.random.random(shape).astype("float32")
-    real = get_random_uniform_like(input_array, shape=[10], seed=5.0)
-    expected = np.asarray(
-        [
-            0.043976,
-            0.96656,
-            0.292199,
-            0.904297,
-            0.25167,
-            0.521778,
-            0.778985,
-            0.085463,
-            0.939846,
-            0.194201,
-        ]
-    )
-    tvm.testing.assert_allclose(real, expected, rtol=1e-5)
-
-
-@tvm.testing.parametrize_targets("llvm")
-def test_random_normal(target, dev):
-    """test_random_normal"""
-
-    def get_random_normal(shape, dtype="float32", scale=1.0, mean=0.0, seed=None):
-        ONNX_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)]
-        node = helper.make_node(
-            "RandomNormal", [], ["out"], shape=shape, dtype=ONNX_DTYPE, scale=scale, mean=mean
-        )
-        if seed is not None:
-            seed_attr = helper.make_attribute("seed", seed)
-            node.attribute.append(seed_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "random_normal_test",
-            inputs=[],
-            outputs=[helper.make_tensor_value_info("out", ONNX_DTYPE, shape)],
-        )
-        model = helper.make_model(graph, producer_name="random_normal_test")
-        return get_tvm_output_with_vm(
-            model,
-            [],
-            target=target,
-            dev=dev,
-            validate_structural_equal=(seed is not None),
-        )
-
-    # Test N-D tensor generation.
-    vals = get_random_normal([1, 3, 100, 100], dtype="float32")
-    assert list(vals.shape) == [1, 3, 100, 100]
-    tvm.testing.assert_allclose(vals.mean(), 0.0, rtol=0.1, atol=0.1)
-    tvm.testing.assert_allclose(np.std(vals), 1.0, rtol=0.1, atol=0.1)
-
-    # Test mean=2.0 scale=10.0
-    vals = get_random_normal([1, 3, 100, 100], mean=2.0, scale=10.0, dtype="float32")
-    assert list(vals.shape) == [1, 3, 100, 100]
-    tvm.testing.assert_allclose(vals.mean(), 2.0, rtol=0.1, atol=0.1)
-    tvm.testing.assert_allclose(np.std(vals), 10.0, rtol=0.1, atol=0.1)
-
-    # Check that a fixed seed produces the same values when run twice.
-    vals_1 = get_random_normal(shape=[10], seed=1.0)
-    vals_2 = get_random_normal(shape=[10], seed=1.0)
-    assert all(vals_1 == vals_2)
-
-
-@tvm.testing.parametrize_targets("llvm")
-def test_random_normal_like(target, dev):
-    """test_random_normal_like"""
-
-    def get_random_normal_like(input_, shape, dtype="float32", scale=1.0, mean=0.0, seed=None):
-        ONNX_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)]
-        node = helper.make_node(
-            "RandomNormalLike", ["in"], ["out"], dtype=ONNX_DTYPE, scale=scale, mean=mean
-        )
-        if seed is not None:
-            seed_attr = helper.make_attribute("seed", seed)
-            node.attribute.append(seed_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "random_normal_like_test",
-            inputs=[helper.make_tensor_value_info("in", ONNX_DTYPE, shape)],
-            outputs=[helper.make_tensor_value_info("out", ONNX_DTYPE, shape)],
-        )
-        model = helper.make_model(graph, producer_name="random_normal_like_test")
-        return get_tvm_output_with_vm(
-            model,
-            [input_],
-            target=target,
-            dev=dev,
-            validate_structural_equal=(seed is not None),
-        )
-
-    # Test N-D tensor generation.
-    shape = [1, 3, 100, 100]
-    input_array = np.random.random(shape).astype("float32")
-    vals = get_random_normal_like(input_array, [1, 3, 100, 100], dtype="float32")
-    assert list(vals.shape) == [1, 3, 100, 100]
-    tvm.testing.assert_allclose(vals.mean(), 0.0, rtol=0.1, atol=0.1)
-    tvm.testing.assert_allclose(np.std(vals), 1.0, rtol=0.1, atol=0.1)
-
-    # Test mean=2.0 scale=10.0
-    shape = [1, 3, 100, 100]
-    input_array = np.random.random(shape).astype("float32")
-    vals = get_random_normal_like(
-        input_array, [1, 3, 100, 100], mean=2.0, scale=10.0, dtype="float32"
-    )
-    assert list(vals.shape) == [1, 3, 100, 100]
-    tvm.testing.assert_allclose(vals.mean(), 2.0, rtol=0.1, atol=0.1)
-    tvm.testing.assert_allclose(np.std(vals), 10.0, rtol=0.1, atol=0.1)
-
-
-@tvm.testing.parametrize_targets("llvm")
-def test_multinomial(target, dev):
-    def get_multinomial(input, shape, sample_size, seed=None):
-        IN_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype("float32")]
-        OUT_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype("int32")]
-        node = helper.make_node("Multinomial", ["in"], ["out"], sample_size=sample_size)
-        if seed is not None:
-            seed_attr = helper.make_attribute("seed", seed)
-            node.attribute.append(seed_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "multinomial_test",
-            inputs=[helper.make_tensor_value_info("in", IN_DTYPE, shape)],
-            outputs=[helper.make_tensor_value_info("out", OUT_DTYPE, shape)],
-        )
-        model = helper.make_model(graph, producer_name="multinomial_test")
-        return get_tvm_output_with_vm(
-            model,
-            [input],
-            target=target,
-            dev=dev,
-            validate_structural_equal=(seed is not None),
-        )
-
-    # Test N-D tensor generation.
-    shape = [3]
-    sample_size = 2
-    probs = np.random.random(shape).astype("float32")
-    indices = get_multinomial(probs, shape, sample_size)
-    # Since specific values are random, we'll check that the output shape is
-    # correct and the values chosen are all valid indices.
-    assert list(indices.shape) == [sample_size]
-    assert np.max(indices) < shape[-1]
-
-    # Test 2d multinomial
-    shape = [10, 5]
-    sample_size = 4
-    probs = np.random.random(shape).astype("float32")
-    indices = get_multinomial(probs, shape, sample_size)
-    assert list(indices.shape) == [10, sample_size]
-    assert np.max(indices) < shape[-1]
-
-
-@tvm.testing.parametrize_targets
-def test_convinteger(target, dev):
-    """test_convinteger"""
-
-    def verify_convinteger(
-        x_shape,
-        w_shape,
-        y_shape,
-        padding,
-        kernel_shape,
-        strides,
-        dilations,
-        auto_pad="NOTSET",
-        dtype="uint8",
-    ):
-        x_array = np.random.randint(low=0, high=255, size=x_shape).astype(dtype)
-        w_array = np.random.uniform(low=0, high=255, size=w_shape).astype(dtype)
-        x_zero_point_array = np.random.randint(0, 255, size=[1]).astype(dtype)
-        w_zero_point_array = np.random.randint(0, 255, size=[1]).astype(dtype)
-
-        ONNX_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)]
-        input_nodes = [
-            helper.make_tensor_value_info("x", ONNX_DTYPE, list(x_shape)),
-            helper.make_tensor_value_info("w", ONNX_DTYPE, list(w_shape)),
-        ]
-        initializer = [
-            helper.make_tensor("x_zero_point", ONNX_DTYPE, [], x_zero_point_array),
-            helper.make_tensor("w_zero_point", ONNX_DTYPE, [], w_zero_point_array),
-        ]
-        input_names = ["x", "w", "x_zero_point", "w_zero_point"]
-        input_values = [x_array, w_array]
-
-        if padding is None:
-            ## autopadding with unset default attributes
-            kwargs = {}
-            if not all(list(s == 1 for s in strides)):
-                kwargs["strides"] = strides
-            if not all(list(d == 1 for d in dilations)):
-                kwargs["dilations"] = dilations
-
-            node = helper.make_node(
-                "ConvInteger",
-                inputs=input_names,
-                outputs=["y"],
-                # Default values for other attributes:
-                auto_pad=auto_pad,
-                **kwargs,
-            )
-        else:
-            node = helper.make_node(
-                "ConvInteger",
-                inputs=input_names,
-                outputs=["y"],
-                kernel_shape=kernel_shape,
-                # Default values for other attributes:
-                strides=strides,
-                dilations=dilations,
-                # groups=1
-                pads=padding,
-            )
-
-        graph = helper.make_graph(
-            [node],
-            "convinteger_test",
-            inputs=input_nodes,
-            initializer=initializer,
-            outputs=[helper.make_tensor_value_info("y", TensorProto.INT32, list(y_shape))],
-        )
-        model = helper.make_model(graph, producer_name="convinteger_test")
-        # opt_level=1 will cause error
-        verify_with_ort_with_inputs(model, input_values, target=target, dev=dev, opt_level=2)
-
-    def repeat(num, dims):
-        return tuple(num for _ in range(dims))
-
-    # only support 2D ConvInteger because we only support qnn.conv2d for now.
-    dims = 2
-
-    # Convolution with padding
-    verify_convinteger(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(5, dims),
-        2 * repeat(1, dims),
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-    )
-
-    # Convolution with asymmetric padding
-    verify_convinteger(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(4, dims),
-        repeat(0, dims) + repeat(1, dims),
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-    )
-    # Convolution without padding
-    verify_convinteger(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(3, dims),
-        2 * repeat(0, dims),
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-    )
-    # Convolution with autopadding
-    verify_convinteger(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(5, dims),
-        None,
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-        auto_pad="SAME_UPPER",
-    )
-    # Convolution with valid autopadding
-    verify_convinteger(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(3, dims),
-        None,
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-        auto_pad="VALID",
-    )
-    # Convolution with non uniform stride
-    verify_convinteger(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(3, dims),
-        None,
-        repeat(3, dims),
-        repeat(2, dims),
-        repeat(1, dims),
-        auto_pad="SAME_UPPER",
-    )
-    # Convolution with dilation
-    verify_convinteger(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(5, dims),
-        2 * repeat(2, dims),
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(2, dims),
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_bitshift(target, dev):
-    """test_bitshift"""
-
-    def verify_bitshift(in_shape, shift_shape, high=1000000000, in_dtype="uint64"):
-        in_shape = list(in_shape)
-        shift_shape = list(shift_shape)
-
-        # Create an input for each tensor.
-        tensor_values = [
-            np.random.randint(high, size=in_shape).astype(in_dtype),
-            np.random.randint(16, size=shift_shape).astype(in_dtype),
-            np.random.randint(16, size=shift_shape).astype(in_dtype),
-        ]
-
-        bitshift_left_node = helper.make_node(
-            "BitShift",
-            inputs=["input", "shift_left"],
-            outputs=["shifted"],
-            direction="LEFT",
-        )
-
-        bitshift_right_node = helper.make_node(
-            "BitShift",
-            inputs=["shifted", "shift_right"],
-            outputs=["output"],
-            direction="RIGHT",
-        )
-
-        # Create input and output tensors.
-        proto_type = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(in_dtype)]
-        graph_inputs = [
-            helper.make_tensor_value_info("input", proto_type, in_shape),
-            helper.make_tensor_value_info("shift_left", proto_type, shift_shape),
-            helper.make_tensor_value_info("shift_right", proto_type, shift_shape),
-        ]
-
-        graph_outputs = [helper.make_tensor_value_info("output", proto_type, in_shape)]
-
-        graph_nodes = [bitshift_left_node, bitshift_right_node]
-
-        graph = helper.make_graph(
-            graph_nodes,
-            "BitShift_test",
-            inputs=graph_inputs,
-            outputs=graph_outputs,
-        )
-        model = helper.make_model(
-            graph,
-            producer_name="BitShift_test",
-        )
-
-        verify_with_ort_with_inputs(model, tensor_values, target=target, dev=dev)
-
-    shape = (100, 4, 2)
-    broadcast_shape = (100, 1, 1)
-    # Common bitwise test
-    verify_bitshift(shape, shape)
-    # Bitwise test with broadcasting
-    verify_bitshift(shape, broadcast_shape)
-
-
-# TODO(vvchernov): return test back than ONNX Runtime in CI will support domain version of 18
-@pytest.mark.skip("Currently ONNX Runtime in CI does not support domain version of 18")
-@tvm.testing.parametrize_targets
-def test_bitwise(target, dev):
-    """test_bitwise"""
-
-    def verify_bitwise_ops(A_shape, B_shape, C_shape, D_shape, high=128, in_dtype="int32"):
-        A_shape = list(A_shape)
-        B_shape = list(B_shape)
-        C_shape = list(C_shape)
-        D_shape = list(D_shape)
-
-        # Create an input for each tensor.
-        tensor_values = [
-            np.random.randint(high, size=A_shape).astype(in_dtype),
-            np.random.randint(high, size=B_shape).astype(in_dtype),
-            np.random.randint(high, size=C_shape).astype(in_dtype),
-            np.random.randint(high, size=D_shape).astype(in_dtype),
-        ]
-
-        or_node = helper.make_node(
-            "BitwiseOr",
-            inputs=["A", "B"],
-            outputs=["OR"],
-        )
-
-        and_node = helper.make_node(
-            "BitwiseAnd",
-            inputs=["OR", "C"],
-            outputs=["AND"],
-        )
-
-        xor_node = helper.make_node(
-            "BitwiseXor",
-            inputs=["AND", "D"],
-            outputs=["XOR"],
-        )
-
-        not_node = helper.make_node(
-            "BitwiseNot",
-            inputs=["XOR"],
-            outputs=["output"],
-        )
-
-        # Create input and output tensors.
-        proto_type = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(in_dtype)]
-        graph_inputs = [
-            helper.make_tensor_value_info("A", proto_type, A_shape),
-            helper.make_tensor_value_info("B", proto_type, B_shape),
-            helper.make_tensor_value_info("C", proto_type, C_shape),
-            helper.make_tensor_value_info("D", proto_type, D_shape),
-        ]
-
-        graph_outputs = [
-            helper.make_tensor_value_info("output", proto_type, A_shape),
-        ]
-
-        graph_nodes = [
-            or_node,
-            and_node,
-            xor_node,
-            not_node,
-        ]
-
-        graph = helper.make_graph(
-            graph_nodes,
-            "Bitwise_test",
-            inputs=graph_inputs,
-            outputs=graph_outputs,
-        )
-        model = helper.make_model(
-            graph,
-            producer_name="Bitwise_test",
-        )
-
-        verify_with_ort_with_inputs(model, tensor_values, target=target, dev=dev)
-
-    shape = (100, 4, 2)
-    broadcast_shape = (100, 1, 1)
-    dtypes = ["int8", "uint8", "int32", "uint32"]
-    high_vals = [128, 128, 2147483648, 2147483648]
-    for high, dtype in zip(high_vals, dtypes):
-        # Common bitwise test
-        verify_bitwise_ops(shape, shape, shape, shape, high, dtype)
-        # Bitwise test with broadcasting
-        verify_bitwise_ops(shape, broadcast_shape, broadcast_shape, broadcast_shape, high, dtype)
-
-
-@tvm.testing.parametrize_targets
-def test_scan(target, dev):
-    """test_scan"""
-
-    def verify_scan(
-        input_shapes,
-        output_shapes,
-        num_scan_inputs,
-        scan_input_axes,
-        scan_input_directions,
-        scan_output_axes,
-        scan_output_directions,
-        opset,
-    ):
-
-        body_input_shapes = copy.deepcopy(input_shapes)
-        num_state_inputs = len(input_shapes) - num_scan_inputs
-
-        if opset == 8:
-            for i in range(len(input_shapes)):
-                body_input_shapes[i].pop(0)
-            for i in range(num_state_inputs, len(input_shapes)):
-                body_input_shapes[i].pop(0)
-        else:
-            for i in range(num_state_inputs, len(input_shapes)):
-                body_input_shapes[i].pop(scan_input_axes[i - num_state_inputs])
-
-        initial0 = onnx.helper.make_tensor_value_info(
-            "initial0", onnx.TensorProto.FLOAT, body_input_shapes[0]
-        )
-        initial1 = onnx.helper.make_tensor_value_info(
-            "initial1", onnx.TensorProto.FLOAT, body_input_shapes[1]
-        )
-        input0 = onnx.helper.make_tensor_value_info(
-            "input0", onnx.TensorProto.FLOAT, body_input_shapes[2]
-        )
-        input1 = onnx.helper.make_tensor_value_info(
-            "input1", onnx.TensorProto.FLOAT, body_input_shapes[3]
-        )
-        input2 = onnx.helper.make_tensor_value_info(
-            "input2", onnx.TensorProto.FLOAT, body_input_shapes[4]
-        )
-        state0 = onnx.helper.make_tensor_value_info(
-            "state0", onnx.TensorProto.FLOAT, body_input_shapes[0]
-        )
-        scan_out0 = onnx.helper.make_tensor_value_info(
-            "scan_out0", onnx.TensorProto.FLOAT, body_input_shapes[0]
-        )
-        state1 = onnx.helper.make_tensor_value_info(
-            "state1", onnx.TensorProto.FLOAT, body_input_shapes[1]
-        )
-        scan_out1 = onnx.helper.make_tensor_value_info(
-            "scan_out1", onnx.TensorProto.FLOAT, body_input_shapes[1]
-        )
-        add_node = onnx.helper.make_node(
-            "Add",
-            inputs=["initial0", "input0"],
-            outputs=["state0"],
-        )
-        id_node_0 = onnx.helper.make_node(
-            "Identity",
-            inputs=["state0"],
-            outputs=["scan_out0"],
-        )
-        matmul_node = onnx.helper.make_node(
-            "MatMul",
-            inputs=["input1", "input2"],
-            outputs=["matmul_out"],
-        )
-        sub_node = onnx.helper.make_node(
-            "Sub",
-            inputs=["initial1", "matmul_out"],
-            outputs=["state1"],
-        )
-        id_node_1 = onnx.helper.make_node(
-            "Identity",
-            inputs=["state1"],
-            outputs=["scan_out1"],
-        )
-        scan_body = onnx.helper.make_graph(
-            [add_node, id_node_0, matmul_node, sub_node, id_node_1],
-            "scan_body",
-            [initial0, initial1, input0, input1, input2],
-            [state0, state1, scan_out0, scan_out1],
-        )
-        # create scan op node
-        scan_node = None
-        if opset == 8:
-            scan_node = onnx.helper.make_node(
-                "Scan",
-                inputs=["", "init0", "init1", "in0", "in1", "in2"],
-                outputs=["s0", "s1", "scan0", "scan1"],
-                num_scan_inputs=num_scan_inputs,
-                body=scan_body,
-            )
-        else:
-            scan_node = onnx.helper.make_node(
-                "Scan",
-                inputs=["init0", "init1", "in0", "in1", "in2"],
-                outputs=["s0", "s1", "scan0", "scan1"],
-                num_scan_inputs=num_scan_inputs,
-                body=scan_body,
-                scan_input_axes=scan_input_axes,
-                scan_input_directions=scan_input_directions,
-                scan_output_axes=scan_output_axes,
-                scan_output_directions=scan_output_directions,
-            )
-        input_info = [
-            helper.make_tensor_value_info("init0", TensorProto.FLOAT, input_shapes[0]),
-            helper.make_tensor_value_info("init1", TensorProto.FLOAT, input_shapes[1]),
-            helper.make_tensor_value_info("in0", TensorProto.FLOAT, input_shapes[2]),
-            helper.make_tensor_value_info("in1", TensorProto.FLOAT, input_shapes[3]),
-            helper.make_tensor_value_info("in2", TensorProto.FLOAT, input_shapes[4]),
-        ]
-        out_info = [
-            helper.make_tensor_value_info("s0", TensorProto.FLOAT, output_shapes[0]),
-            helper.make_tensor_value_info("s1", TensorProto.FLOAT, output_shapes[1]),
-            helper.make_tensor_value_info("scan0", TensorProto.FLOAT, output_shapes[2]),
-            helper.make_tensor_value_info("scan1", TensorProto.FLOAT, output_shapes[3]),
-        ]
-        graph = helper.make_graph(
-            nodes=[scan_node],
-            name="scan_test",
-            inputs=input_info,
-            outputs=out_info,
-        )
-        model = onnx.helper.make_model(graph, producer_name="scan-test")
-        init0 = np.random.uniform(low=0, high=255, size=input_shapes[0]).astype(np.float32)
-        init1 = np.random.uniform(low=0, high=255, size=input_shapes[1]).astype(np.float32)
-        in0 = np.random.uniform(low=0, high=255, size=input_shapes[2]).astype(np.float32)
-        in1 = np.random.uniform(low=0, high=255, size=input_shapes[3]).astype(np.float32)
-        in2 = np.random.uniform(low=0, high=255, size=input_shapes[4]).astype(np.float32)
-        input_values = [init0, init1, in0, in1, in2]
-
-        verify_with_ort_with_inputs(
-            model,
-            input_values,
-            target=target,
-            dev=dev,
-            opt_level=2,
-            use_vm=True,
-            opset=opset,
-        )
-
-    # opset 8
-    input_shapes = [[2, 6, 7, 8], [2, 3, 3], [2, 5, 6, 7, 8], [2, 5, 3, 4], [2, 5, 4, 3]]
-    output_shapes = [[2, 6, 7, 8], [2, 3, 3], [2, 5, 6, 7, 8], [2, 5, 3, 3]]
-    # input_shapes, output_shapes, num_scan_inputs, scan_input_axes, scan_input_directions,
-    # scan_output_axes, scan_output_directions, opset
-    verify_scan(input_shapes, output_shapes, 3, [0] * 3, [0] * 3, [0] * 2, [0] * 2, 8)
-    # opset 9
-    input_shapes = [[6, 7, 8], [3, 3], [5, 6, 7, 8], [5, 3, 4], [5, 4, 3]]
-    output_shapes = [[6, 7, 8], [3, 3], [5, 6, 7, 8], [5, 3, 3]]
-    verify_scan(input_shapes, output_shapes, 3, [0] * 3, [0] * 3, [0] * 2, [0] * 2, 9)
-
-    input_shapes = [[6, 7, 8], [3, 3], [5, 6, 7, 8], [3, 4, 5], [4, 5, 3]]
-    output_shapes = [[6, 7, 8], [3, 3], [6, 5, 7, 8], [3, 5, 3]]
-    verify_scan(input_shapes, output_shapes, 3, [0, 2, 1], [1] * 3, [1] * 2, [1] * 2, 9)
-    # Negative axes
-    input_shapes = [[6, 7, 8], [3, 3], [5, 6, 7, 8], [3, 4, 5], [4, 5, 3]]
-    output_shapes = [[6, 7, 8], [3, 3], [6, 5, 7, 8], [3, 5, 3]]
-    verify_scan(input_shapes, output_shapes, 3, [-4, -1, -2], [1] * 3, [-3, -2], [1] * 2, 9)
-
-
-@tvm.testing.parametrize_targets
-def test_linear_regressor(target, dev):
-    """test_linear_regressor"""
-
-    def verify_linear_regressor(a_shape, c_shape, i_shape, targets=1, batch=1):
-        a_array = np.random.uniform(size=a_shape).astype("float32")
-        out_shape = (batch, targets)
-
-        coefficients = np.random.uniform(size=c_shape).astype("float32")
-        intercepts = np.random.uniform(size=i_shape).astype("float32")
-
-        mul_node = helper.make_node(
-            "LinearRegressor",
-            ["a"],
-            ["out"],
-            coefficients=coefficients,
-            intercepts=intercepts,
-            targets=targets,
-            domain="ai.onnx.ml",
-        )
-
-        graph = helper.make_graph(
-            [mul_node],
-            "LinearRegressor_test",
-            inputs=[
-                helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, out_shape)],
-        )
-        model = helper.make_model(
-            graph,
-            producer_name="LinearRegressor_test",
-            opset_imports=[
-                onnx.helper.make_opsetid("ai.onnx.ml", 1),
-            ],
-        )
-        verify_with_ort_with_inputs(model, [a_array], target=target, dev=dev)
-
-    verify_linear_regressor((1, 3), (3), (1))
-    verify_linear_regressor((2, 10), (10), (1), batch=2)
-    verify_linear_regressor((1, 3), (30), (10), targets=10)
-    verify_linear_regressor((10, 3), (30), (10), targets=10, batch=10)
-    verify_linear_regressor((1, 4), (3), (1))
-
-
-@tvm.testing.parametrize_targets
-def test_dft(target, dev):
-    """test_dft"""
-
-    def verify_dft(
-        _axis,
-        _inverse,
-        _onesided,
-        _dft_length,
-        _input_shape,
-        _output_shape,
-    ):
-        input_names = ["input"]
-        if _dft_length is not None:
-            input_names.append("dft_length")
-
-        node = onnx.helper.make_node(
-            "DFT",
-            inputs=input_names,
-            outputs=["output"],
-            axis=_axis,
-            inverse=_inverse,
-            onesided=_onesided,
-        )
-
-        nodes = []
-        if _dft_length is not None:
-            nodes.append(
-                make_constant_node("dft_length", TensorProto.INT32, [], [_dft_length]),
-            )
-        nodes.append(node)
-
-        graph = helper.make_graph(
-            nodes,
-            "dft_test",
-            inputs=[
-                helper.make_tensor_value_info("input", TensorProto.FLOAT, _input_shape),
-            ],
-            outputs=[
-                helper.make_tensor_value_info("output", TensorProto.FLOAT, _output_shape),
-            ],
-        )
-
-        model = helper.make_model(graph, producer_name="dft_test")
-
-        _input = np.random.normal(size=_input_shape).astype("float32")
-        verify_with_ort_with_inputs(
-            model,
-            [_input],
-            [_input_shape],
-            target=target,
-            dev=dev,
-            rtol=1e-4,
-            atol=1e-4,
-            use_vm=False,
-        )
-
-    batch_size = 5
-    n = 2
-    D = 7
-
-    for axis in list(range(1, n)) + [-2]:
-        for inverse, onesided in [(0, 0), (0, 1), (1, 0), (None, None)]:
-            for n_fft in [D, D - 1, D + 1]:
-                for c in [1, 2]:
-                    input_shape = [batch_size] + n * [D] + [c]
-                    output_shape = [batch_size] + n * [D] + [2]
-                    if onesided == 1:
-                        output_shape[axis] = output_shape[axis] // 2 + 1
-                    verify_dft(axis, inverse, onesided, n_fft, input_shape, output_shape)
-
-
-@tvm.testing.parametrize_targets
-def test_sequence(target, dev):
-    """test_sequence"""
-
-    def verify_sequence_ops(tensor_shape, num_tensors, axis=0, position=0, new_axis=None):
-        tensor_shape = list(tensor_shape)
-        tensor_values = []
-        for i in range(num_tensors):
-            tensor_values.append(np.random.uniform(size=tensor_shape).astype("float32"))
-
-        # Create an input for each tensor.
-        input_tensor_names = []
-        for i in range(num_tensors):
-            name = f"input_tensor_{i}"
-            input_tensor_names.append(name)
-
-        # Test creating a tensor sequence.
-        construct_node = helper.make_node(
-            "SequenceConstruct",
-            inputs=input_tensor_names,
-            outputs=["sequence"],
-        )
-
-        position_node = make_constant_node("position", TensorProto.INT32, (), [position])
-
-        # Test sequence insertion.
-        insert_node = helper.make_node(
-            "SequenceInsert",
-            inputs=["sequence", input_tensor_names[0], "position"],
-            outputs=["inserted_sequence"],
-        )
-
-        # Test sequence erase.
-        erase_node = helper.make_node(
-            "SequenceErase",
-            inputs=["inserted_sequence", "position"],
-            outputs=["erased_sequence"],
-        )
-
-        # Test sequence concatenation.
-        concat_node = helper.make_node(
-            "ConcatFromSequence",
-            inputs=["erased_sequence"],
-            outputs=["concat_sequence"],
-            axis=axis,
-        )
-
-        # Test splitting a tensor into a sequence.
-        split_node = helper.make_node(
-            "SplitToSequence", inputs=["concat_sequence"], outputs=["split_sequence"], axis=axis
-        )
-
-        # Test tensor extraction from sequence
-        at_node = helper.make_node(
-            "SequenceAt", inputs=["split_sequence", "position"], outputs=["output"]
-        )
-
-        # Test sequence length
-        length_node = helper.make_node(
-            "SequenceLength", inputs=["split_sequence"], outputs=["output_2"]
-        )
-
-        if new_axis is not None:
-            new_axis_attr = helper.make_attribute("new_axis", new_axis)
-            concat_node.attribute.append(new_axis_attr)
-
-        # Create input and output tensors.
-        graph_inputs = []
-        for name in input_tensor_names:
-            input_tensor = helper.make_tensor_value_info(name, TensorProto.FLOAT, tensor_shape)
-            graph_inputs.append(input_tensor)
-
-        # Construct output tensor.
-        output_shape = tensor_shape
-        if new_axis is not None:
-            output_shape.insert(axis, 1)
-            output_shape[axis] = num_tensors + 1
-        else:
-            output_shape[axis] = (num_tensors + 1) * output_shape[axis]
-        graph_outputs = [
-            helper.make_tensor_value_info("output", TensorProto.FLOAT, output_shape),
-            helper.make_tensor_value_info("output_2", TensorProto.INT64, []),
-        ]
-
-        graph_nodes = [
-            position_node,
-            construct_node,
-            insert_node,
-            erase_node,
-            concat_node,
-            split_node,
-            at_node,
-            length_node,
-        ]
-
-        graph = helper.make_graph(
-            graph_nodes,
-            "Sequence_test",
-            inputs=graph_inputs,
-            outputs=graph_outputs,
-        )
-        model = helper.make_model(
-            graph,
-            producer_name="Sequence_test",
-        )
-
-        verify_with_ort_with_inputs(model, tensor_values, target=target, dev=dev)
-
-    verify_sequence_ops((10, 3), 2)
-    verify_sequence_ops((3, 3, 3, 3), 4, position=3)
-    verify_sequence_ops((3, 3, 3, 3), 4, axis=2)
-    verify_sequence_ops((3, 3, 3, 3), 4, axis=2, new_axis=1)
-
-
-@tvm.testing.parametrize_targets
-def test_empty_sequence(target, dev):
-    """test_empty_sequence"""
-
-    # Test creating an empty tensor sequence.
-    empty_node = helper.make_node(
-        "SequenceEmpty",
-        inputs=[],
-        outputs=["empty_sequence"],
-    )
-
-    length_node = helper.make_node("SequenceLength", inputs=["empty_sequence"], outputs=["output"])
-
-    graph_outputs = [helper.make_tensor_value_info("output", TensorProto.INT64, [])]
-
-    graph_nodes = [empty_node, length_node]
-
-    graph = helper.make_graph(
-        graph_nodes,
-        "Sequence_empty_test",
-        inputs=[],
-        outputs=graph_outputs,
-    )
-
-    model = helper.make_model(
-        graph,
-        producer_name="Sequence_empty_test",
-    )
-
-    verify_with_ort_with_inputs(model, [], target=target, dev=dev)
-
-
-def test_exporting_node_renamed_model():
-    """test exproting model when export_node_renamed_model is set"""
-
-    a_name, a_shape = "a", (4, 3)
-    b_name, b_shape = "b", (3, 4)
-    out_name, out_shape = "out", [a_shape[0], b_shape[1]]
-    temp_dir = utils.tempdir().path
-
-    # model definition
-    mul_node = helper.make_node("MatMul", [a_name, b_name], [out_name])
-    graph = helper.make_graph(
-        [mul_node],
-        "matmul_test",
-        inputs=[
-            helper.make_tensor_value_info(a_name, TensorProto.FLOAT, a_shape),
-            helper.make_tensor_value_info(b_name, TensorProto.FLOAT, b_shape),
-        ],
-        outputs=[helper.make_tensor_value_info(out_name, TensorProto.FLOAT, out_shape)],
-    )
-    model = helper.make_model(graph, producer_name="matmul_test")
-
-    # get frontend model
-    shape_dict = {a_name: a_shape, b_name: b_shape}
-    _, _ = relay.frontend.from_onnx(model, shape_dict, export_node_renamed_model_path=temp_dir)
-
-    exported_model_name = os.listdir(temp_dir)[0]
-    assert "tvm_exported_model_" in exported_model_name
-
-    exported_model = onnx.load(os.path.join(temp_dir, exported_model_name))
-    assert exported_model.graph.node[0].name == "MatMul_0"
-
-
-class TestSetSpan:
-    """test structural equal between translated / hand-crafted relay IR with span tagged."""
-
-    def _verify(self, res_fptr, golden_fptr):
-        with tvm.testing.enable_span_filling():
-            with_span = res_fptr()
-        with tvm.testing.disable_span_filling():
-            without_span = res_fptr()
-        tvm.ir.assert_structural_equal(with_span, without_span)
-        _verify_structural_equal_with_span(with_span, golden_fptr())
-
-    def test_conv2d_bias_add_span(self):
-        padding = [0, 0, 0, 0]
-        k_shape = [7, 7]
-        y_shape, y_name = [1, 6, 10, 10], "y"
-        x_shape, x_name = [1, 3, 10, 10], "x"
-        b_shape, b_name = [6], "b"
-        b_val = np.random.random(b_shape).astype(np.float32)
-        w_shape, w_name = [6, 3, 7, 7], "w"
-        w_val = np.random.random(w_shape).astype(np.float32)
-        group, strides, dilations = 1, [1, 1], [1, 1]
-        conv_name = "conv2d"
-
-        def _res():
-            # model definition
-            node = helper.make_node(
-                "Conv",
-                inputs=[x_name, w_name, b_name],
-                outputs=[y_name],
-                kernel_shape=k_shape,
-                strides=strides,
-                dilations=dilations,
-                group=group,
-                pads=padding,
-                name=conv_name,
-            )
-            graph = helper.make_graph(
-                [node],
-                "conv_test",
-                inputs=[helper.make_tensor_value_info(x_name, TensorProto.FLOAT, x_shape)],
-                outputs=[helper.make_tensor_value_info(y_name, TensorProto.FLOAT, y_shape)],
-                initializer=[
-                    helper.make_tensor(
-                        w_name,
-                        TensorProto.FLOAT,
-                        dims=w_shape,
-                        vals=w_val.flatten(),
-                    ),
-                    helper.make_tensor(
-                        b_name,
-                        TensorProto.FLOAT,
-                        dims=b_shape,
-                        vals=b_val.flatten(),
-                    ),
-                ],
-            )
-            model = helper.make_model(graph, producer_name="conv_test")
-
-            # get frontend model
-            shape_dict = {x_name: x_shape}
-            mod, _ = relay.frontend.from_onnx(model, shape_dict)
-            return mod["main"]
-
-        def _golden():
-            conv_si = conv_name
-            x = relay.var(
-                x_name,
-                shape=tuple(x_shape),
-                span=_create_span(f"{conv_si}.{x_name}"),
-            )
-            conv_weight = relay.const(
-                w_val,
-                span=_create_span(f"{conv_si}.{w_name}"),
-            )
-            conv_bias = relay.const(
-                b_val,
-                span=_create_span(f"{conv_si}.{b_name}"),
-            )
-            conv_out = _set_span(
-                relay.nn.conv2d(
-                    x,
-                    conv_weight,
-                    padding=[0] * 4,
-                    channels=y_shape[1],
-                    kernel_size=k_shape,
-                ),
-                conv_si,
-            )
-            bias_out = _set_span(relay.nn.bias_add(conv_out, conv_bias), conv_si)
-            return infer_type(relay.Function([x], bias_out))
-
-        self._verify(_res, _golden)
-
-    def test_batchnorm_span(self):
-        input_name, in_shape = "x", [1, 16, 10, 10]
-        bn_name = "bn"
-        output_name = "y"
-        scale_name = "scale"
-        bias_name = "b"
-        mean_name = "mean"
-        var_name = "var"
-
-        def _res():
-            # model definition
-            batchnorm = onnx.helper.make_node(
-                "BatchNormalization",
-                inputs=[input_name, scale_name, bias_name, mean_name, var_name],
-                outputs=[output_name],
-                name=bn_name,
-            )
-            graph = helper.make_graph(
-                [batchnorm],
-                "batchnorm_test",
-                inputs=[
-                    helper.make_tensor_value_info(input_name, TensorProto.FLOAT, in_shape),
-                    helper.make_tensor_value_info(scale_name, TensorProto.FLOAT, [in_shape[1]]),
-                    helper.make_tensor_value_info(bias_name, TensorProto.FLOAT, [in_shape[1]]),
-                    helper.make_tensor_value_info(mean_name, TensorProto.FLOAT, [in_shape[1]]),
-                    helper.make_tensor_value_info(var_name, TensorProto.FLOAT, [in_shape[1]]),
-                ],
-                outputs=[helper.make_tensor_value_info(output_name, TensorProto.FLOAT, in_shape)],
-            )
-            model = helper.make_model(graph, producer_name="batchnorm_test")
-
-            # get frontend model
-            shape_dict = {input_name: in_shape}
-            mod, _ = relay.frontend.from_onnx(model, shape_dict)
-            return mod["main"]
-
-        def _golden():
-            bn_si = bn_name
-            x = relay.var(
-                input_name,
-                shape=tuple(in_shape),
-                span=_create_span(f"{bn_si}.{input_name}"),
-            )
-            bn_scale = relay.var(
-                scale_name,
-                shape=(in_shape[1],),
-                span=_create_span(f"{bn_si}.{scale_name}"),
-            )
-            bn_bias = relay.var(
-                bias_name,
-                shape=(in_shape[1],),
-                span=_create_span(f"{bn_si}.{bias_name}"),
-            )
-            bn_rm = relay.var(
-                mean_name,
-                shape=(in_shape[1],),
-                span=_create_span(f"{bn_si}.{mean_name}"),
-            )
-            bn_rv = relay.var(
-                var_name,
-                shape=(in_shape[1],),
-                span=_create_span(f"{bn_si}.{var_name}"),
-            )
-            bn_out = _set_span(
-                relay.nn.batch_norm(x, bn_scale, bn_bias, bn_rm, bn_rv),
-                bn_si,
-            )
-            bn_tuple_get_item = _set_span(relay.TupleGetItem(bn_out.tuple_value, 0), bn_si)
-            return infer_type(
-                relay.Function([x, bn_scale, bn_bias, bn_rm, bn_rv], bn_tuple_get_item)
-            )
-
-        self._verify(_res, _golden)
-
-    def test_reshape_span(self):
-        input_shape = [2, 1, 10, 1, 10]
-        new_shape = [2, 1, 10, 10]
-        input_name = "in"
-        output_name = "out"
-        ref_name = "ref_in"
-        const_name = "const"
-        reshape_name = "reshape"
-
-        def _res():
-            # model definition
-            ref_array = np.array(new_shape)
-            ref_node = helper.make_node(
-                "Constant",
-                inputs=[],
-                outputs=[ref_name],
-                value=helper.make_tensor(
-                    name="const_tensor",
-                    data_type=TensorProto.INT32,
-                    dims=ref_array.shape,
-                    vals=ref_array.flatten().astype(int),
-                ),
-                name=const_name,
-            )
-            reshape_node = helper.make_node(
-                "Reshape",
-                [input_name, ref_name],
-                [output_name],
-                name=reshape_name,
-            )
-            graph = helper.make_graph(
-                [ref_node, reshape_node],
-                "reshape_test",
-                inputs=[helper.make_tensor_value_info(input_name, TensorProto.FLOAT, input_shape)],
-                outputs=[helper.make_tensor_value_info(output_name, TensorProto.FLOAT, new_shape)],
-            )
-            model = helper.make_model(graph, producer_name="reshape_test")
-
-            # get frontend model
-            shape_dict = {input_name: input_shape}
-            mod, _ = relay.frontend.from_onnx(model, shape_dict)
-            return mod["main"]
-
-        def _golden():
-            reshape_si = reshape_name
-            x = relay.var(
-                input_name,
-                shape=tuple(input_shape),
-                span=_create_span(f"{reshape_si}.{input_name}"),
-            )
-            reshape_out = _set_span(
-                relay.reshape(x, newshape=new_shape),
-                reshape_si,
-            )
-            return infer_type(relay.Function([x], reshape_out))
-
-        self._verify(_res, _golden)
-
-    def test_matmul_span(self):
-        a_name, a_shape = "a", (4, 3)
-        b_name, b_shape = "b", (3, 4)
-        out_name, out_shape = "out", [a_shape[0], b_shape[1]]
-        matmul_name = "matmul"
-
-        def _res():
-            # model definition
-            mul_node = helper.make_node("MatMul", [a_name, b_name], [out_name], name=matmul_name)
-            graph = helper.make_graph(
-                [mul_node],
-                "matmul_test",
-                inputs=[
-                    helper.make_tensor_value_info(a_name, TensorProto.FLOAT, a_shape),
-                    helper.make_tensor_value_info(b_name, TensorProto.FLOAT, b_shape),
-                ],
-                outputs=[helper.make_tensor_value_info(out_name, TensorProto.FLOAT, out_shape)],
-            )
-            model = helper.make_model(graph, producer_name="matmul_test")
-
-            # get frontend model
-            shape_dict = {a_name: a_shape, b_name: b_shape}
-            mod, _ = relay.frontend.from_onnx(model, shape_dict)
-            return mod["main"]
-
-        def _golden():
-            matmul_si = matmul_name
-            a = relay.var(
-                a_name,
-                shape=tuple(a_shape),
-                span=_create_span(f"{matmul_si}.{a_name}"),
-            )
-            b = relay.var(
-                b_name,
-                shape=tuple(b_shape),
-                span=_create_span(f"{matmul_si}.{b_name}"),
-            )
-            b_t = _set_span(relay.transpose(b, axes=[1, 0]), matmul_si)
-            matmul_out = _set_span(
-                relay.nn.dense(a, b_t, out_dtype="float32"),
-                matmul_si,
-            )
-            return infer_type(relay.Function([a, b], matmul_out))
-
-        self._verify(_res, _golden)
-
-
-@tvm.testing.parametrize_targets
-def test_pad_constant_value(target, dev):
-    """test_pad_constant_value"""
-
-    def verify_pad_constant_value(constant_value):
-        tensor_shape = [1, 2, 257, 126]
-        tensor_values = [np.random.uniform(size=tensor_shape).astype("float32")]
-        graph_inputs = [helper.make_tensor_value_info("input", TensorProto.FLOAT, tensor_shape)]
-        graph_outputs = [helper.make_tensor_value_info("output", TensorProto.FLOAT, None)]
-        pads = helper.make_tensor("pads", TensorProto.INT64, [8], [0, 0, 0, 2, 0, 0, 0, 0])
-        pad_node = helper.make_node(
-            "Pad", ["input", "pads", constant_value], ["output"], mode="constant"
-        )
-        graph_nodes = [pad_node]
-        graph = helper.make_graph(
-            graph_nodes,
-            "test_pad_constant_value",
-            inputs=graph_inputs,
-            outputs=graph_outputs,
-            initializer=[pads],
-        )
-        model = helper.make_model(
-            graph,
-            producer_name="test_pad_constant_value",
-        )
-        verify_with_ort_with_inputs(model, tensor_values, target=target, dev=dev)
-
-    verify_pad_constant_value("")
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/paddlepaddle/test_forward.py b/tests/python/frontend/paddlepaddle/test_forward.py
deleted file mode 100755
index 6b8e90545c83..000000000000
--- a/tests/python/frontend/paddlepaddle/test_forward.py
+++ /dev/null
@@ -1,2566 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import os
-from pathlib import Path
-import shutil
-
-import numpy as np
-import tvm
-import tvm.testing
-import tvm.topi.testing
-from tvm import relay
-from tvm.contrib import graph_executor
-import pytest
-
-import paddle
-
-paddle.disable_signal_handler()
-import paddle.nn as nn
-
-PADDLE_TEST_DATA_ROOT_PATH = Path(Path("~").expanduser(), ".tvm_test_data", "paddle")
-PADDLE_TEST_DATA_ROOT_PATH.mkdir(parents=True, exist_ok=True)
-cached_program = list()
-
-
-def assert_shapes_match(tru, est):
-    if tru.shape != est.shape:
-        msg = "Output shapes {} and {} don't match"
-        raise AssertionError(msg.format(tru.shape, est.shape))
-
-
-def get_paddle_model(func, input_spec):
-    global PADDLE_TEST_DATA_ROOT_PATH
-    global cached_program
-    model_path = Path(PADDLE_TEST_DATA_ROOT_PATH, "model")
-
-    paddle.jit.save(func, str(model_path), input_spec=input_spec)
-    baseline_model = paddle.jit.load(str(model_path))
-    if len(cached_program) >= 4:
-        cached_program = list()
-    cached_program.append(baseline_model._get_program_holder())
-
-    shutil.rmtree(str(PADDLE_TEST_DATA_ROOT_PATH))
-    return baseline_model
-
-
-def verify_model(func, input_data, use_vm=False, rtol=1e-5, atol=1e-5):
-    if not (isinstance(input_data, (tuple, list))):
-        input_data = [input_data]
-
-    input_spec = []
-    input_names = []
-    input_shape_dict = {}
-    compiled_input = {}
-    for idx, data in enumerate(input_data):
-        input_name = "input{}".format(idx)
-        input_spec.append(
-            paddle.static.InputSpec(dtype=data.dtype, shape=data.shape, name=input_name)
-        )
-        input_names.append(input_name)
-        input_shape_dict[input_name] = data.shape
-        if isinstance(data, np.ndarray):
-            compiled_input[input_name] = data
-        else:
-            compiled_input[input_name] = data.numpy()
-
-    baseline_model = get_paddle_model(func, input_spec)
-    baseline_outputs = baseline_model(*[input[:] for input in input_data])
-
-    # get paddle outputs
-    if isinstance(baseline_outputs, (tuple, list)):
-        baseline_outputs = tuple(out.numpy() for out in baseline_outputs)
-    else:
-        baseline_outputs = (baseline_outputs.numpy(),)
-
-    mod, params = relay.frontend.from_paddle(baseline_model, input_shape_dict)
-    compiled_names = []
-    for arg in mod["main"].params:
-        assert arg.name_hint in input_names or arg.name_hint in params
-        if arg.name_hint in input_names:
-            compiled_names.append(arg.name_hint)
-
-    if use_vm:
-        tvm_vm_input = []
-        for idx, data in enumerate(input_data):
-            if isinstance(data, np.ndarray):
-                tvm_vm_input.append(data)
-            else:
-                tvm_vm_input.append(data.numpy())
-        for target, dev in tvm.testing.enabled_targets():
-            result = relay.create_executor("vm", mod=mod, device=dev, target=target).evaluate()(
-                *tvm_vm_input, **params
-            )
-            tvm_vm_output = []
-            if isinstance(result, tvm.runtime.NDArray):
-                tvm_vm_output = result.numpy()
-            else:
-                tvm_vm_output = [r.numpy() for r in result]
-            if not isinstance(tvm_vm_output, list):
-                tvm_vm_output = [tvm_vm_output]
-
-            for i, baseline_output in enumerate(baseline_outputs):
-                assert_shapes_match(baseline_output, tvm_vm_output[i])
-                tvm.testing.assert_allclose(baseline_output, tvm_vm_output[i], rtol=rtol, atol=atol)
-    else:
-        with tvm.transform.PassContext(opt_level=3):
-            for target, dev in tvm.testing.enabled_targets():
-                lib = relay.build(mod, target=target, params=params)
-                gmod = graph_executor.GraphModule(lib["default"](dev))
-                for name in compiled_names:
-                    gmod.set_input(name, compiled_input[name])
-                gmod.run()
-
-                for i, baseline_output in enumerate(baseline_outputs):
-                    compiled_output = gmod.get_output(i).numpy()
-
-                    assert_shapes_match(baseline_output, compiled_output)
-                    tvm.testing.assert_allclose(
-                        baseline_output, compiled_output, rtol=rtol, atol=atol
-                    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_add_subtract():
-    input_shape = [10]
-
-    @paddle.jit.to_static
-    def add_subtract(inputs):
-        return paddle.subtract(paddle.add(inputs, inputs), inputs)
-
-    @paddle.jit.to_static
-    def add_subtract2(inputs):
-        return inputs + 1 - 2
-
-    @paddle.jit.to_static
-    def add_subtract3(inputs1, inputs2):
-        ones = paddle.ones([10], dtype="float32")
-        return inputs1 + ones - inputs2
-
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(add_subtract, input_data)
-    verify_model(add_subtract2, input_data)
-    input_data2 = paddle.rand(input_shape, dtype="float32")
-    verify_model(add_subtract3, [input_data, input_data2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_addmm():
-    class Addmm(nn.Layer):
-        def __init__(self, alpha=1.0, beta=1.0):
-            super(Addmm, self).__init__()
-            self.alpha = alpha
-            self.beta = beta
-
-        @paddle.jit.to_static
-        def forward(self, inputs, x, y):
-            return paddle.addmm(inputs, x, y, self.alpha, self.beta)
-
-    input_shapes = [[10, 10], [1, 1], [7, 1]]
-    x_shapes = [[10, 3], [5, 6], [7, 7]]
-    y_shapes = [[3, 10], [6, 2], [7, 3]]
-    input_shapes = [[10, 10]]
-    x_shapes = [[10, 3]]
-    y_shapes = [[3, 10]]
-
-    for i in range(len(input_shapes)):
-        input_data = paddle.rand(input_shapes[i], dtype="float32")
-        x_data = paddle.rand(x_shapes[i], dtype="float32")
-        y_data = paddle.rand(y_shapes[i], dtype="float32")
-        verify_model(Addmm(), input_data=[input_data, x_data, y_data])
-        verify_model(Addmm(0.5, 0.3), input_data=[input_data, x_data, y_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_arg_max_min():
-    class ArgMax(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.argmax(inputs)
-
-    class ArgMax1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return inputs.argmax(axis=1)
-
-    class ArgMax2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return inputs.argmax(axis=1, keepdim=False)
-
-    class ArgMax3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return inputs.argmax(axis=2, keepdim=True)
-
-    class ArgMin(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.argmin(inputs)
-
-    class ArgMin1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return inputs.argmin(axis=1)
-
-    class ArgMin2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return inputs.argmax(axis=1, keepdim=False)
-
-    class ArgMin3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return inputs.argmin(axis=2, keepdim=True)
-
-    input_shapes = [[256], [5, 28], [10, 5, 4], [1, 3, 8, 8]]
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(ArgMax(), input_data=input_data)
-        verify_model(ArgMin(), input_data=input_data)
-    for input_shape in input_shapes[1:]:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(ArgMax1(), input_data=input_data)
-        verify_model(ArgMax2(), input_data=input_data)
-        verify_model(ArgMin1(), input_data=input_data)
-        verify_model(ArgMin2(), input_data=input_data)
-    for input_shape in input_shapes[2:]:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(ArgMax3(), input_data=input_data)
-        verify_model(ArgMin3(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_argsort():
-    class ArgSort1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.argsort(inputs)
-
-    class ArgSort2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.argsort(inputs, axis=0, descending=True)
-
-    class ArgSort3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.argsort(inputs, axis=-1, descending=True)
-
-    input_shapes = [[256], [10, 20], [10, 5, 3], [1, 3, 5, 5]]
-    for input_shape in input_shapes:
-        # Avoid duplicate elements in the array which will bring
-        # different results with different sort algorithms
-        np.random.seed(13)
-        np_data = np.random.choice(range(-5000, 5000), np.prod(input_shape), replace=False)
-        input_data = paddle.to_tensor(np_data.reshape(input_shape).astype("int64"))
-        verify_model(ArgSort1(), [input_data])
-        verify_model(ArgSort2(), [input_data])
-        verify_model(ArgSort3(), [input_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_assign():
-    @paddle.jit.to_static
-    def assign(inputs):
-        return paddle.assign(inputs)
-
-    @paddle.jit.to_static
-    def assign_value(inputs):
-        x = paddle.to_tensor(np.array([3]).astype("float32"))
-        return inputs + x
-
-    input_shape = [2, 3]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(
-        assign,
-        [
-            input_data,
-        ],
-    )
-    input_data2 = np.random.randint(100, size=input_shape)
-    verify_model(
-        assign,
-        [
-            input_data2,
-        ],
-    )
-    verify_model(assign_value, [input_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_batch_norm():
-    class BatchNorm1D(nn.Layer):
-        def __init__(self):
-            super(BatchNorm1D, self).__init__()
-            self.batch_norm = nn.BatchNorm1D(2)
-
-        @paddle.jit.to_static
-        def forward(self, input_data):
-            return self.batch_norm(input_data)
-
-    class BatchNorm2D(nn.Layer):
-        def __init__(self):
-            super(BatchNorm2D, self).__init__()
-            self.batch_norm = nn.BatchNorm2D(2)
-
-        @paddle.jit.to_static
-        def forward(self, input_data):
-            return self.batch_norm(input_data)
-
-    class BatchNorm3D(nn.Layer):
-        def __init__(self):
-            super(BatchNorm3D, self).__init__()
-            self.batch_norm = nn.BatchNorm3D(2)
-
-        @paddle.jit.to_static
-        def forward(self, input_data):
-            return self.batch_norm(input_data)
-
-    input_data = paddle.rand((2, 2, 3), dtype="float32")
-    verify_model(BatchNorm1D(), input_data=input_data)
-    input_data = paddle.rand((2, 2, 2, 3), dtype="float32")
-    verify_model(BatchNorm2D(), input_data=input_data)
-    input_data = paddle.rand((2, 2, 2, 2, 3), dtype="float32")
-    verify_model(BatchNorm3D(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_bmm():
-    class Bmm(nn.Layer):
-        def __init__(self):
-            super(Bmm, self).__init__()
-
-        @paddle.jit.to_static
-        def forward(self, x, y):
-            return paddle.bmm(x, y)
-
-    x_shapes = [[10, 3, 4], [5, 6, 2], [1, 7, 7]]
-    y_shapes = [[10, 4, 5], [5, 2, 7], [1, 7, 3]]
-    for i in range(len(x_shapes)):
-        x_data = paddle.rand(x_shapes[i], dtype="float32")
-        y_data = paddle.rand(y_shapes[i], dtype="float32")
-        verify_model(Bmm(), input_data=[x_data, y_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_cast():
-    @paddle.jit.to_static
-    def cast1(inputs, dtype="uint8"):
-        return paddle.cast(inputs, dtype)
-
-    @paddle.jit.to_static
-    def cast2(inputs, dtype="int64"):
-        return inputs.cast(dtype)
-
-    input_shape = [2, 3]
-    input_data = paddle.rand(input_shape, dtype="float32") * 100
-    verify_model(
-        cast1,
-        [
-            input_data,
-        ],
-    )
-    verify_model(
-        cast2,
-        [
-            input_data,
-        ],
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_check_tensor():
-    class IsFinite(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.cast(paddle.isfinite(inputs), "int32")
-
-    class IsNan(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.cast(paddle.isnan(inputs), "int32")
-
-    class IsInf(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.cast(paddle.isinf(inputs), "int32")
-
-    input_shapes = [[32], [8, 32], [2, 5, 20], [2, 3, 8, 8], [2, 2, 3, 6, 6]]
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(IsFinite(), input_data=input_data)
-        verify_model(IsNan(), input_data=input_data)
-        verify_model(IsInf(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_clip():
-    class Clip1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.clip(inputs, min=0.3, max=0.55)
-
-    class Clip2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs, max_value):
-            return paddle.clip(inputs, max=max_value)
-
-    class Clip3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs, min_value):
-            return paddle.clip(inputs, min=min_value)
-
-    class Clip4(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs, min_value, max_value):
-            return paddle.clip(inputs, min=min_value, max=max_value)
-
-    input_data = paddle.rand((2, 2, 2, 3), dtype="float32")
-    max_value = paddle.to_tensor([0.55])
-    min_value = paddle.to_tensor([0.3])
-    verify_model(Clip1(), input_data)
-    verify_model(Clip2(), [input_data, max_value])
-    verify_model(Clip3(), [input_data, min_value])
-    verify_model(Clip4(), [input_data, min_value, max_value])
-
-
-@tvm.testing.uses_gpu
-def test_forward_concat_unsqueeze():
-    @paddle.jit.to_static
-    def concat_unsqueeze1(inputs):
-        return paddle.concat([inputs[:, 0].unsqueeze(1), inputs[:, 1].unsqueeze(1)], axis=1)
-
-    @paddle.jit.to_static
-    def concat_unsqueeze2(inputs):
-        a = (inputs[:, :, 0] + 2) * 7
-        b = (inputs[:, :, 1] + 3) * 11
-        c = (inputs[:, :, 2] + 5) * 13
-        return paddle.concat([paddle.unsqueeze(t, axis=2) for t in [a, b, c]], axis=2)
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(concat_unsqueeze1, input_data=input_data)
-    verify_model(concat_unsqueeze2, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_cumsum():
-    @paddle.jit.to_static
-    def cusum1(inputs):
-        return paddle.cumsum(inputs)
-
-    @paddle.jit.to_static
-    def cusum2(inputs):
-        return paddle.cumsum(inputs, axis=0)
-
-    @paddle.jit.to_static
-    def cusum3(inputs):
-        return paddle.cumsum(inputs, axis=1)
-
-    input_data = paddle.randint(0, 100, (10, 10), dtype=paddle.int32)
-    verify_model(cusum1, [input_data])
-    verify_model(cusum1, [input_data.astype(paddle.int64)])
-    verify_model(
-        cusum2,
-        [
-            input_data,
-        ],
-    )
-    verify_model(
-        cusum3,
-        [
-            input_data,
-        ],
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_conv():
-    class Conv2D1(nn.Layer):
-        def __init__(self, stride=1, padding=0, dilation=1, groups=1, padding_mode="zeros"):
-            super(Conv2D1, self).__init__()
-            self.conv = nn.Conv2D(
-                3,
-                6,
-                3,
-                stride=stride,
-                padding=padding,
-                dilation=dilation,
-                groups=groups,
-                padding_mode=padding_mode,
-            )
-            self.softmax = nn.Softmax()
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.softmax(self.conv(inputs))
-
-    class Conv2D2(nn.Layer):
-        def __init__(
-            self,
-            stride=1,
-            padding=0,
-            dilation=1,
-            groups=1,
-            padding_mode="zeros",
-            data_format="NCHW",
-        ):
-            super(Conv2D2, self).__init__()
-            self.conv = nn.Conv2D(
-                3,
-                6,
-                3,
-                stride=stride,
-                padding=padding,
-                dilation=dilation,
-                groups=groups,
-                padding_mode=padding_mode,
-                data_format=data_format,
-            )
-            self.softmax = nn.Softmax()
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.softmax(self.conv(inputs))
-
-    input_shapes = [[1, 3, 10, 10], [1, 3, 12, 12]]
-
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(Conv2D1(), input_data=input_data)
-        verify_model(Conv2D1(stride=2, padding="VALID", dilation=3), input_data=input_data)
-        verify_model(Conv2D1(stride=2, padding="SAME", dilation=3), input_data=input_data)
-        verify_model(
-            Conv2D1(stride=2, padding=3, dilation=3, padding_mode="replicate"),
-            input_data=input_data,
-        )
-        verify_model(Conv2D1(stride=2, padding="SAME", dilation=2, groups=3), input_data=input_data)
-        verify_model(
-            Conv2D2(stride=2, padding="SAME", dilation=2, groups=3, data_format="NCHW"),
-            input_data=input_data,
-        )
-
-
-@tvm.testing.uses_gpu
-def test_forward_conv_transpose():
-    class Conv2DTranspose(nn.Layer):
-        def __init__(self, stride=1, padding=0, dilation=1, groups=1, padding_mode="zeros"):
-            super(Conv2DTranspose, self).__init__()
-            self.conv = nn.Conv2DTranspose(
-                6,
-                3,
-                3,
-                stride=stride,
-                padding=padding,
-                dilation=dilation,
-                groups=groups,
-            )
-            self.softmax = nn.Softmax()
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.softmax(self.conv(inputs))
-
-    input_shapes = [[1, 6, 10, 10], [2, 6, 8, 8]]
-
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(Conv2DTranspose(), input_data=input_data)
-        verify_model(Conv2DTranspose(stride=2, padding="VALID"), input_data=input_data)
-        verify_model(Conv2DTranspose(stride=2, padding="SAME", dilation=1), input_data=input_data)
-        verify_model(Conv2DTranspose(stride=2, padding=3), input_data=input_data)
-        verify_model(Conv2DTranspose(stride=3, padding="SAME", groups=1), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_conv3d():
-    class Conv3D(nn.Layer):
-        def __init__(self, stride=1, padding=0, dilation=1, groups=1, padding_mode="zeros"):
-            super(Conv3D, self).__init__()
-            self.conv = nn.Conv3D(
-                3,
-                6,
-                3,
-                stride=stride,
-                padding=padding,
-                dilation=dilation,
-                groups=groups,
-                padding_mode=padding_mode,
-            )
-            self.softmax = nn.Softmax()
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.softmax(self.conv(inputs))
-
-    class Conv3D2(nn.Layer):
-        def __init__(
-            self,
-            stride=1,
-            padding=0,
-            dilation=1,
-            groups=1,
-            padding_mode="zeros",
-            data_format="NCDHW",
-        ):
-            super(Conv3D2, self).__init__()
-            self.conv = nn.Conv3D(
-                3,
-                6,
-                3,
-                stride=stride,
-                padding=padding,
-                dilation=dilation,
-                groups=groups,
-                padding_mode=padding_mode,
-                data_format=data_format,
-            )
-            self.softmax = nn.Softmax()
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.softmax(self.conv(inputs))
-
-    input_shapes = [[1, 3, 10, 10, 10], [1, 3, 12, 12, 12]]
-
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(Conv3D(), input_data=input_data)
-        verify_model(Conv3D(stride=2, padding="VALID", dilation=3), input_data=input_data)
-        verify_model(Conv3D(stride=2, padding="SAME", dilation=3), input_data=input_data)
-        verify_model(
-            Conv3D(stride=2, padding=(3, 3, 4, 4, 2, 2), dilation=3),
-            input_data=input_data,
-        )
-        verify_model(
-            Conv3D(stride=2, padding=3, dilation=3, padding_mode="reflect"),
-            input_data=input_data,
-        )
-        verify_model(
-            Conv3D(stride=2, padding=3, dilation=3, padding_mode="replicate"),
-            input_data=input_data,
-        )
-        verify_model(Conv3D(stride=2, padding="SAME", dilation=2, groups=3), input_data=input_data)
-        verify_model(
-            Conv3D2(stride=2, padding="SAME", dilation=2, groups=3, data_format="NCDHW"),
-            input_data=input_data,
-        )
-
-
-@tvm.testing.uses_gpu
-def test_forward_dot():
-    class Dot(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, x, y):
-            return paddle.dot(x, y)
-
-    input_shapes = [[128], [8, 24]]
-    for input_shape in input_shapes:
-        x_data = paddle.rand(input_shape, dtype="float32")
-        y_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(Dot(), input_data=[x_data, y_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_dropout():
-    @paddle.jit.to_static
-    def dropout(inputs):
-        return nn.functional.dropout(inputs)
-
-    @paddle.jit.to_static
-    def dropout1(inputs):
-        return nn.functional.dropout(inputs, 0.1)
-
-    @paddle.jit.to_static
-    def dropout2(inputs):
-        return nn.functional.dropout(inputs, 0.1, mode="downscale_in_infer")
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(dropout, input_data=input_data[0, 0])
-    verify_model(dropout, input_data=input_data)
-    verify_model(dropout1, input_data=input_data[0, 0])
-    verify_model(dropout1, input_data=input_data)
-    verify_model(dropout2, input_data=input_data[0, 0])
-    verify_model(dropout2, input_data=input_data)
-
-
-def test_forward_elemwise():
-    class ElemwiseAPI(nn.Layer):
-        def __init__(self, api_name):
-            super(ElemwiseAPI, self).__init__()
-            self.api_name_ = api_name
-            for candidate in (paddle, paddle.nn.functional):
-                self.func = getattr(candidate, api_name, None)
-                if self.func:
-                    break
-
-        @paddle.jit.to_static
-        def forward(self, input1, input2):
-            y = self.func(input1, input2)
-            if "equal" in self.api_name_ or "than" in self.api_name_:
-                # for compare operation, cast boolean result to int32
-                y = paddle.cast(y, "int32")
-            return y
-
-    api_list = [
-        "equal",
-        "floor_divide",
-        "greater_equal",
-        "greater_than",
-        "less_equal",
-        "less_than",
-        "maximum",
-        "minimum",
-        "pow",
-    ]
-    x_shapes = [[128], [8, 20], [4, 20, 3], [2, 3, 8, 8], [2, 3, 3, 9, 9]]
-    y_shapes = [[1], [8, 20], [4, 1, 1], [2, 3, 8, 8], [2, 3, 3, 9, 1]]
-    for x_shape, y_shape in zip(x_shapes, y_shapes):
-        x_data = paddle.randint(1, 10, x_shape, dtype="int32")
-        y_data = paddle.randint(1, 10, y_shape, dtype="int32")
-        for api_name in api_list:
-            if api_name == "pow":
-                # only support float for pow
-                x_data = x_data.astype("float32")
-                y_data = y_data.astype("float32")
-            verify_model(ElemwiseAPI(api_name), [x_data, y_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_expand():
-    @paddle.jit.to_static
-    def expand1(inputs):
-        return paddle.expand(inputs, shape=[2, 128])
-
-    @paddle.jit.to_static
-    def expand2(inputs):
-        return paddle.expand(inputs, shape=[2, 1, 4, 16])
-
-    @paddle.jit.to_static
-    def expand3(inputs):
-        return paddle.expand(inputs, shape=[2, 1, 3, 7, 7])
-
-    @paddle.jit.to_static
-    def expand4(inputs):
-        shape = paddle.to_tensor(np.array([2, 128]).astype("int32"))
-        return paddle.expand(inputs, shape=shape)
-
-    @paddle.jit.to_static
-    def expand5(inputs):
-        shape = paddle.to_tensor(np.array([2, 1, 4, 16]).astype("int32"))
-        return paddle.expand(inputs, shape=shape)
-
-    @paddle.jit.to_static
-    def expand6(inputs):
-        shape = paddle.to_tensor(np.array([2, 1, 3, 7, 7]).astype("int32"))
-        return paddle.expand(inputs, shape=shape)
-
-    data = paddle.rand([128], dtype="float32")
-    verify_model(expand1, input_data=[data])
-    verify_model(expand4, input_data=[data])
-    data = paddle.rand([4, 16], dtype="float32")
-    verify_model(expand2, input_data=[data])
-    verify_model(expand5, input_data=[data])
-    data = paddle.rand([1, 3, 7, 7], dtype="float32")
-    verify_model(expand3, input_data=[data])
-    verify_model(expand6, input_data=[data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_expand_as():
-    class ExpandAs(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, x, y):
-            z = paddle.expand_as(x, y)
-            z += y
-            return z
-
-    x_shapes = [[1], [8, 128], [8, 1, 1], [2, 3, 229, 229], [2, 3, 3, 224, 1]]
-    y_shapes = [[128], [8, 128], [8, 200, 300], [2, 3, 229, 229], [2, 3, 3, 224, 224]]
-    for x_shape, y_shape in zip(x_shapes, y_shapes):
-        x_data = paddle.rand(x_shape, dtype="float32")
-        y_data = paddle.rand(y_shape, dtype="float32")
-        verify_model(ExpandAs(), [x_data, y_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_fill_zeros_like():
-    class FilZeroLike(nn.Layer):
-        def __init__(self, dtype=None):
-            super(FilZeroLike, self).__init__()
-            self.dtype = dtype
-
-        @paddle.jit.to_static
-        def forward(self, x):
-            return paddle.zeros_like(x, dtype=self.dtype)
-
-    input_shape = [2, 3, 5]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(FilZeroLike("float32"), input_data=input_data)
-    verify_model(FilZeroLike("int32"), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_flatten():
-    class Flatten(nn.Layer):
-        def __init__(self, start_axis=0, stop_axis=-1):
-            super(Flatten, self).__init__()
-            self.start_axis = start_axis
-            self.stop_axis = stop_axis
-
-        @paddle.jit.to_static
-        def forward(self, x):
-            return paddle.flatten(x, start_axis=self.start_axis, stop_axis=self.stop_axis)
-
-    input_data = paddle.rand([2, 3, 4, 5, 2], dtype="float32")
-    verify_model(Flatten(), input_data=input_data)
-    verify_model(Flatten(2), input_data=input_data)
-    verify_model(Flatten(2, -2), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_flip():
-    class Flip(nn.Layer):
-        def __init__(self, axis):
-            super(Flip, self).__init__()
-            self.axis = axis
-
-        @paddle.jit.to_static
-        def forward(self, x):
-            return paddle.flip(x, axis=self.axis)
-
-    input_data = paddle.rand([2, 3, 4], dtype="float32")
-    verify_model(Flip(0), input_data)
-    verify_model(Flip(-1), input_data)
-    verify_model(Flip([0, 1]), input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_gather():
-    class Gather(nn.Layer):
-        def __init__(self, axis=None):
-            super(Gather, self).__init__()
-            self.axis = axis
-
-        @paddle.jit.to_static
-        def forward(self, x, index):
-            return paddle.gather(x, index, axis=self.axis)
-
-    x_shapes = [[20, 10], [10, 10, 8]]
-    index = paddle.to_tensor(np.array([1, 3, 5]).astype("int64"))
-    for x_shape in x_shapes:
-        x_data = paddle.rand(x_shape, dtype="float32")
-        verify_model(Gather(), [x_data, index])
-        verify_model(Gather(axis=0), [x_data, index])
-        verify_model(Gather(axis=1), [x_data, index])
-
-
-@tvm.testing.uses_gpu
-def test_forward_gather_nd():
-    class GatherNd(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, x, index):
-            return paddle.gather_nd(x, index)
-
-    x_shapes = [[20], [8, 8], [4, 5, 6], [3, 4, 3, 5]]
-    y_shapes = [[2, 1], [2], [1, 2, 3], [3]]
-    for x_shape, y_shape in zip(x_shapes, y_shapes):
-        x_data = paddle.rand(x_shape, dtype="float32")
-        y_data = paddle.randint(low=0, high=3, shape=y_shape, dtype="int64")
-        verify_model(GatherNd(), [x_data, y_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_group_norm():
-    class GroupNorm(nn.Layer):
-        def __init__(self, channels, groups):
-            super(GroupNorm, self).__init__()
-            self.group_norm = paddle.nn.GroupNorm(num_channels=channels, num_groups=groups)
-
-        def forward(self, inputs):
-            return self.group_norm(inputs)
-
-    input_shapes = [[1, 4, 6, 6], [2, 2, 4, 7], [2, 8, 1, 1]]
-    for input_shape in input_shapes:
-        num_channels = input_shape[1]
-        input_data = paddle.uniform(input_shape)
-        verify_model(GroupNorm(num_channels, 1), input_data, rtol=1e-4, atol=1e-4)
-        verify_model(GroupNorm(num_channels, 2), input_data, rtol=1e-4, atol=1e-4)
-
-
-@tvm.testing.uses_gpu
-def test_forward_grid_sampler():
-    class GridSampler(nn.Layer):
-        def __init__(self, mode="bilinear", padding_mode="zeros", align_corners=True):
-            super(GridSampler, self).__init__()
-            self.mode = mode
-            self.padding_mode = padding_mode
-            self.align_corners = align_corners
-
-        def forward(self, x, grid):
-            return paddle.nn.functional.grid_sample(
-                x,
-                grid,
-                mode=self.mode,
-                padding_mode=self.padding_mode,
-                align_corners=self.align_corners,
-            )
-
-    x_2D = paddle.rand(shape=[4, 4, 8, 8], dtype="float32")
-    grid_2D = paddle.rand(shape=[4, 8, 8, 2], dtype="float32")
-    verify_model(GridSampler(mode="nearest"), input_data=[x_2D, grid_2D])
-    verify_model(GridSampler(padding_mode="reflection"), input_data=[x_2D, grid_2D])
-    verify_model(GridSampler(padding_mode="border"), input_data=[x_2D, grid_2D])
-    verify_model(GridSampler(align_corners=False), input_data=[x_2D, grid_2D])
-
-    x_3D = paddle.rand(shape=[4, 4, 4, 4, 4], dtype="float32")
-    grid_3D = paddle.rand(shape=[4, 8, 8, 8, 3], dtype="float32")
-    verify_model(GridSampler(mode="nearest"), input_data=[x_3D, grid_3D])
-    verify_model(GridSampler(padding_mode="reflection"), input_data=[x_3D, grid_3D])
-    verify_model(GridSampler(padding_mode="border"), input_data=[x_3D, grid_3D])
-    verify_model(GridSampler(align_corners=False), input_data=[x_3D, grid_3D])
-
-
-@tvm.testing.uses_gpu
-def test_forward_scatter():
-    class Scatter(nn.Layer):
-        def __init__(self, overwrite=True):
-            super(Scatter, self).__init__()
-            self.overwrite = overwrite
-
-        @paddle.jit.to_static
-        def forward(self, x, index, updates):
-            return paddle.scatter(x, index, updates, overwrite=self.overwrite)
-
-    x_shapes = [[10], [4, 5], [6, 4, 5], [4, 5, 6, 4]]
-    index_shapes = [[10], [4], [6], [4]]
-    for x_shape, index_shape in zip(x_shapes, index_shapes):
-        x_data = paddle.rand(x_shape, dtype="float32")
-        updates = paddle.rand(x_shape, dtype="float32") + 1.0
-        index = paddle.randint(low=0, high=3, shape=index_shape)
-        verify_model(Scatter(), [x_data, index, updates])
-        verify_model(Scatter(False), [x_data, index, updates])
-
-
-def test_forward_scatter_nd():
-    @paddle.jit.to_static
-    def scatter_nd(index, updates):
-        shape = [3, 5, 9, 10]
-        return paddle.scatter_nd(index, updates, shape)
-
-    @paddle.jit.to_static
-    def scatter_nd_add(x, index, updates):
-        return paddle.scatter_nd_add(x, index, updates)
-
-    index_data = np.array([[1, 1], [0, 1], [1, 3]]).astype(np.int64)
-    index = paddle.to_tensor(index_data)
-    updates = paddle.rand(shape=[3, 9, 10], dtype="float32")
-    verify_model(scatter_nd, [index, updates])
-    x = paddle.rand(shape=[3, 5, 4, 9, 10], dtype="float32")
-    updates = paddle.rand(shape=[3, 2, 9, 10], dtype="float32")
-    index = paddle.randint(0, 3, shape=[3, 2, 3])
-    verify_model(scatter_nd_add, [x, index, updates])
-
-
-@tvm.testing.uses_gpu
-def test_forward_shape_full():
-    @paddle.jit.to_static
-    def full1(inputs):
-        return paddle.full(paddle.shape(inputs), 3.14)
-
-    @paddle.jit.to_static
-    def full2(inputs):
-        return paddle.full(paddle.shape(inputs), 1.0, dtype=inputs.dtype)
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(full1, input_data=[input_data])
-    verify_model(full2, input_data=[input_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_split():
-    class Split(nn.Layer):
-        def __init__(
-            self, axis=None, num_or_sections=None, axis_is_tensor=False, num_is_tensor=False
-        ):
-            super(Split, self).__init__()
-            self.axis = axis
-            self.num_or_sections = num_or_sections
-            self.axis_is_tensor = axis_is_tensor
-            self.num_is_tensor = num_is_tensor
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            axis = self.axis
-            if self.axis_is_tensor:
-                axis = paddle.to_tensor(axis, dtype="int32")
-            num_or_sections = self.num_or_sections
-            if self.num_is_tensor:
-                new_num_or_sections = []
-                for i in num_or_sections:
-                    if isinstance(i, list):
-                        i = paddle.to_tensor(i, dtype="int32")
-                    new_num_or_sections.append(i)
-                num_or_sections = new_num_or_sections
-            return paddle.split(inputs, num_or_sections=num_or_sections, axis=axis)
-
-    input_shape = [3, 6, 2]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(Split(axis=1, num_or_sections=3), input_data=input_data)
-    verify_model(
-        Split(axis=[1], num_or_sections=[2, 3, 1], axis_is_tensor=True), input_data=input_data
-    )
-    verify_model(
-        Split(axis=1, num_or_sections=[2, -1, [3]], num_is_tensor=True), input_data=input_data
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_squeeze():
-    class Squeeze(nn.Layer):
-        def __init__(self, axis=None):
-            super(Squeeze, self).__init__()
-            self.axis = axis
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.squeeze(inputs, axis=self.axis)
-
-    input_shapes = [[1, 1, 3, 1, 5], [5, 1, 6]]
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(Squeeze(axis=None), input_data=input_data)
-        verify_model(Squeeze(axis=1), input_data=input_data)
-    input_data = paddle.rand([1], dtype="float32")
-    verify_model(Squeeze(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_ones_like():
-    @paddle.jit.to_static
-    def ones_like1(inputs):
-        return paddle.ones_like(inputs)
-
-    @paddle.jit.to_static
-    def ones_like2(inputs):
-        return paddle.ones_like(inputs, dtype="int32")
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(ones_like1, input_data=input_data)
-    verify_model(ones_like2, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_gelu():
-    @paddle.jit.to_static
-    def gelu(inputs):
-        return nn.functional.gelu(inputs)
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(gelu, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_hard_sigmoid():
-    @paddle.jit.to_static
-    def hard_sigmoid(inputs):
-        return nn.functional.hardsigmoid(inputs)
-
-    def hard_sigmoid1(inputs):
-        return nn.functional.hardsigmoid(inputs, offset=0.6)
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(hard_sigmoid, input_data=input_data)
-    verify_model(hard_sigmoid1, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_hard_swish():
-    @paddle.jit.to_static
-    def hard_swish(inputs):
-        return nn.functional.hardswish(inputs)
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(hard_swish, input_data=input_data)
-
-
-def test_forward_instance_norm():
-    class InstanceNorm(nn.Layer):
-        def __init__(self, num_features, epsilon=1e-05):
-            super(InstanceNorm, self).__init__()
-            self.instance_norm = paddle.nn.InstanceNorm2D(
-                num_features=num_features, epsilon=epsilon
-            )
-
-        def forward(self, inputs):
-            return self.instance_norm(inputs)
-
-    input_shapes = [[2, 2, 2, 3], [1, 3, 5, 5]]
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(InstanceNorm(input_shape[1]), input_data)
-        verify_model(InstanceNorm(input_shape[1], 1e-03), input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_interpolate():
-    class Interpolate(nn.Layer):
-        def __init__(
-            self,
-            mode="nearest",
-            align_corners=False,
-            align_mode=0,
-            data_format="NCHW",
-            use_scale=False,
-            use_list=False,
-            use_const=False,
-            use_scaler=False,
-        ):
-            super(Interpolate, self).__init__()
-            self.mode = mode
-            self.align_corners = align_corners
-            self.align_mode = align_mode
-            self.data_format = data_format
-            self.use_scale = use_scale
-            self.use_list = use_list
-            self.use_const = use_const
-            self.use_scaler = use_scaler
-
-        @paddle.jit.to_static
-        def forward(self, x):
-            size = np.array([15, 19]).astype("int32")
-            scale = np.array([2.0, 1.0]).astype("float32")
-            if not self.use_list and not self.use_const:
-                size = paddle.to_tensor(size)
-                scale = paddle.to_tensor(scale)
-            elif not self.use_const:
-                size0 = paddle.to_tensor(size[0:1])
-                size = [size0, int(size[1])]
-            elif not self.use_scaler:
-                size = size.tolist()
-                scale = scale.tolist()
-            else:
-                size = list(size)
-                h, w = paddle.rand(size).shape  # add decrease_axis
-                size = [h, w]
-            if not self.use_scale:
-                return paddle.nn.functional.interpolate(
-                    x,
-                    size=size,
-                    mode=self.mode,
-                    align_corners=self.align_corners,
-                    align_mode=self.align_mode,
-                    data_format=self.data_format,
-                )
-            else:
-                return paddle.nn.functional.interpolate(
-                    x,
-                    scale_factor=scale,
-                    mode=self.mode,
-                    align_corners=self.align_corners,
-                    align_mode=self.align_mode,
-                    data_format=self.data_format,
-                )
-
-    input_data = paddle.rand([1, 2, 8, 12]).astype("float32")
-    verify_model(Interpolate(), input_data)
-    verify_model(Interpolate(use_list=True), input_data)
-    verify_model(Interpolate(use_scale=True, use_const=True), input_data)
-    verify_model(Interpolate(use_const=True, use_scaler=True), input_data)
-    verify_model(Interpolate("bilinear", use_scale=True), input_data)
-    verify_model(Interpolate("bilinear", use_scale=True, align_corners=True), input_data)
-    verify_model(
-        Interpolate(
-            "bilinear",
-            use_scale=True,
-            align_corners=True,
-            align_mode=1,
-            data_format="NHWC",
-            use_const=True,
-        ),
-        input_data,
-    )
-    verify_model(
-        Interpolate("bicubic", use_scale=True, align_corners=True, align_mode=1), input_data
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_layer_norm():
-    @paddle.jit.to_static
-    def layer_norm(inputs, weight, bias):
-        return nn.functional.layer_norm(inputs, inputs.shape[-1], weight=weight, bias=bias)
-
-    class LayerNorm(nn.Layer):
-        def __init__(self):
-            super(LayerNorm, self).__init__()
-            data_shape = [10]
-            self.layer_norm = nn.LayerNorm(data_shape)
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.layer_norm(inputs)
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    weight = paddle.rand([10], dtype="float32")
-    bias = paddle.rand([10], dtype="float32")
-    verify_model(layer_norm, input_data=[input_data, weight, bias])
-    verify_model(LayerNorm(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_leaky_relu():
-    @paddle.jit.to_static
-    def leaky_relu(inputs):
-        return nn.functional.leaky_relu(inputs)
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(leaky_relu, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_logical_api():
-    class LogicalAPI(nn.Layer):
-        def __init__(self, api_name):
-            super(LogicalAPI, self).__init__()
-            for candidate in (paddle, paddle.nn.functional):
-                self.func = getattr(candidate, api_name, None)
-                if self.func:
-                    break
-
-        @paddle.jit.to_static
-        def forward(self, x, y):
-            z = self.func(x, y)
-            return paddle.cast(z, "int32")
-
-    x_shapes = [[128], [8, 20], [4, 20, 3], [2, 3, 8, 8], [2, 3, 3, 9, 9]]
-    y_shapes = [[1], [8, 20], [4, 1, 1], [2, 3, 8, 8], [2, 3, 3, 9, 1]]
-    for x_shape, y_shape in zip(x_shapes, y_shapes):
-        x_data = paddle.randint(0, 2, x_shape).astype("bool")
-        y_data = paddle.randint(0, 2, y_shape).astype("bool")
-        verify_model(LogicalAPI("logical_and"), [x_data, y_data])
-        verify_model(LogicalAPI("logical_or"), [x_data, y_data])
-        verify_model(LogicalAPI("logical_xor"), [x_data, y_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_logical_not():
-    class LogicalNot(nn.Layer):
-        def __init__(self):
-            super(LogicalNot, self).__init__()
-
-        @paddle.jit.to_static
-        def forward(self, x):
-            return paddle.logical_not(x).astype("int32")
-
-    input_shapes = [[128], [8, 20], [4, 20, 3], [2, 3, 8, 8], [2, 3, 3, 9, 9]]
-    for input_shape in input_shapes:
-        input_data = paddle.randint(-2, 2, input_shape).astype("bool")
-        verify_model(LogicalNot(), input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_look_up():
-    @paddle.jit.to_static
-    def look_up(inputs, weight):
-        return nn.functional.embedding(inputs, weight)
-
-    class LookUp(nn.Layer):
-        def __init__(self):
-            super(LookUp, self).__init__()
-            self.embedding = paddle.nn.Embedding(10, 4, sparse=True)
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.embedding(inputs)
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.randint(0, 10, input_shape, dtype="int32")
-    weight = paddle.rand([10, 4], dtype="float32")
-    verify_model(look_up, input_data=[input_data, weight])
-    verify_model(LookUp(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_multiply():
-    @paddle.jit.to_static
-    def multiply1(inputs):
-        return inputs * inputs
-
-    @paddle.jit.to_static
-    def multiply2(inputs):
-        return inputs * 1.0 / 2.0
-
-    @paddle.jit.to_static
-    def multiply3(inputs, inputs2):
-        ones = paddle.ones([10], dtype="float32")
-        return inputs * ones / inputs2
-
-    input_shape = [10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(multiply1, input_data=input_data)
-    verify_model(multiply2, input_data=input_data)
-    input_data2 = paddle.rand(input_shape, dtype="float32")
-    verify_model(multiply3, input_data=[input_data, input_data2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_matmul():
-    class MatMul1(nn.Layer):
-        def forward(self, input1, input2):
-            return paddle.matmul(input1, input2)
-
-    # matrix x vector
-    input_data1 = paddle.randn((3, 4), dtype="float32")
-    input_data2 = paddle.randn((4,), dtype="float32")
-    verify_model(MatMul1(), input_data=[input_data1, input_data2])
-
-    # matrix x matrix
-    input_data1 = paddle.randn((5, 4), dtype="float32")
-    input_data2 = paddle.randn((4, 5), dtype="float32")
-    verify_model(MatMul1(), input_data=[input_data1, input_data2])
-
-    # batched matrix x batched matrix
-    input_data1 = paddle.randn((10, 3, 4), dtype="float32")
-    input_data2 = paddle.randn((10, 4, 5), dtype="float32")
-    verify_model(MatMul1(), input_data=[input_data1, input_data2])
-
-    # batched matrix x broadcasted matrix
-    input_data1 = paddle.randn((10, 3, 4), dtype="float32")
-    input_data2 = paddle.randn((4, 5), dtype="float32")
-    verify_model(MatMul1(), input_data=[input_data1, input_data2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_pool2d():
-    class Pool2D1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.avg_pool2d(inputs, kernel_size=2, stride=2, padding=0)
-
-    class Pool2D2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.adaptive_avg_pool2d(inputs, output_size=[3, 3])
-
-    class Pool2D3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.avg_pool2d(
-                inputs,
-                kernel_size=3,
-                stride=1,
-                padding=[1, 1],
-                exclusive=False,
-                divisor_override=2.5,
-            )
-
-    input_shapes = [[1, 2, 8, 8], [1, 3, 10, 10]]
-    for input_shape in input_shapes:
-        input_data = paddle.uniform(shape=input_shape, dtype="float32", min=-1, max=1)
-        verify_model(Pool2D1(), input_data=input_data)
-        verify_model(Pool2D2(), input_data=input_data)
-        verify_model(Pool2D3(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_pad1d():
-    class Pad1D(nn.Layer):
-        def __init__(self, padding=0, mode="constant", value=0.0, data_format="NCL"):
-            super(Pad1D, self).__init__()
-            self.pad1d = paddle.nn.Pad1D(padding, mode=mode, value=value, data_format=data_format)
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.pad1d(inputs)
-
-    input_shapes = [[1, 2, 5], [2, 5, 9]]
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(Pad1D(padding=2), input_data=input_data)
-        verify_model(Pad1D(padding=[1, 2], data_format="NLC"), input_data=input_data)
-        verify_model(Pad1D(padding=[0, 2], value=0.3), input_data=input_data)
-        verify_model(Pad1D(padding=[2, 2], mode="reflect"), input_data=input_data)
-        verify_model(Pad1D(padding=3, mode="replicate"), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_pad2d():
-    class Pad2D(nn.Layer):
-        def __init__(self, padding=0, mode="constant", value=0.0, data_format="NCHW"):
-            super(Pad2D, self).__init__()
-            self.pad2d = paddle.nn.Pad2D(padding, mode=mode, value=value, data_format=data_format)
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.pad2d(inputs)
-
-    input_shapes = [[1, 2, 5, 5], [2, 2, 5, 9]]
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(Pad2D(padding=2), input_data=input_data)
-        verify_model(Pad2D(padding=[1, 2, 0, 2], data_format="NHWC"), input_data=input_data)
-        verify_model(Pad2D(padding=[1, 2, 0, 2], value=0.3), input_data=input_data)
-        verify_model(Pad2D(padding=[1, 2, 0, 2], mode="reflect"), input_data=input_data)
-        verify_model(Pad2D(padding=3, mode="replicate"), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_pad3d():
-    class Pad3D(nn.Layer):
-        def __init__(self, padding=0, mode="constant", value=0.0, data_format="NCDHW"):
-            super(Pad3D, self).__init__()
-            self.pad3d = paddle.nn.Pad3D(padding, mode=mode, value=value, data_format=data_format)
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.pad3d(inputs)
-
-    input_shapes = [[1, 2, 2, 5, 5], [1, 2, 2, 5, 9]]
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(Pad3D(padding=2), input_data=input_data)
-        verify_model(Pad3D(padding=[1, 2, 0, 2, 1, 1], data_format="NDHWC"), input_data=input_data)
-        verify_model(Pad3D(padding=[1, 2, 0, 2, 1, 1], value=0.3), input_data=input_data)
-        verify_model(Pad3D(padding=[1, 2, 0, 2, 1, 1], mode="reflect"), input_data=input_data)
-        verify_model(Pad3D(padding=3, mode="replicate"), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_transpose():
-    class Transpose(nn.Layer):
-        def __init__(self, perm):
-            super(Transpose, self).__init__()
-            self.perm = perm
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            inputs = inputs * 2
-            return paddle.transpose(inputs, perm=self.perm)
-
-    input_data = paddle.rand([1, 3, 5, 4, 3], dtype="float32")
-    verify_model(Transpose([0, 1, 2, 3, 4]), input_data=input_data)
-    verify_model(Transpose([4, 3, 2, 0, 1]), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_reduce():
-    class Reduce(nn.Layer):
-        def __init__(self, op_name, axis=None, keepdim=False):
-            super(Reduce, self).__init__()
-            self.op_name = op_name
-            self.axis = axis
-            self.keepdim = keepdim
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            result = getattr(paddle, self.op_name)(inputs, axis=self.axis, keepdim=self.keepdim)
-            result = result.astype("float32")
-            return result
-
-    input_shapes = [[1, 2, 2, 5, 5], [2, 3, 4], [4, 20], [2, 3, 30, 30]]
-    for input_shape in input_shapes:
-        input_data = paddle.uniform(min=-3, max=3, shape=input_shape, dtype="float32")
-        verify_model(Reduce("all"), input_data=input_data.astype("bool"))
-        verify_model(Reduce("any", 1), input_data=input_data.astype("bool"))
-        verify_model(Reduce("max", 0, True), input_data=input_data)
-        verify_model(Reduce("min", 1, True), input_data=input_data)
-        verify_model(Reduce("prod", 0), input_data=input_data)
-        verify_model(Reduce("sum", 0, True), input_data=input_data)
-        verify_model(Reduce("mean", -1, True), input_data=input_data)
-        # logsumexp only supports tensor with rank less than 5
-        if len(input_shape) < 5:
-            verify_model(Reduce("logsumexp", -1, True), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_reshape():
-    @paddle.jit.to_static
-    def reshape1(inputs, x):
-        new_shape = paddle.shape(x)
-        return paddle.reshape(inputs, new_shape)
-
-    @paddle.jit.to_static
-    def reshape2(inputs):
-        return inputs.reshape([-1])
-
-    @paddle.jit.to_static
-    def reshape3(inputs):
-        data_shape = inputs.shape
-        return inputs.reshape([data_shape[0] * data_shape[1], data_shape[2]])
-
-    @paddle.jit.to_static
-    def reshape4(inputs, x):
-        new_shape = paddle.shape(x)
-        return paddle.reshape(inputs, [new_shape[2], 2, -1])
-
-    input_shape = [2, 1, 10, 1, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    input_data2 = paddle.randn([2, 1, 10, 10])
-    verify_model(reshape1, input_data=[input_data, input_data2])
-    verify_model(reshape2, input_data=input_data)
-    verify_model(reshape3, input_data=paddle.randn((2, 3, 4)))
-    verify_model(reshape4, input_data=[input_data, input_data2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_scale():
-    @paddle.jit.to_static
-    def scale1(inputs):
-        return paddle.scale(inputs, scale=2.0, bias=1.0)
-
-    @paddle.jit.to_static
-    def scale2(inputs):
-        return paddle.scale(inputs, scale=3, bias=2.1, act="gelu")
-
-    input_data = paddle.randn(shape=[2, 3], dtype="float32")
-    verify_model(
-        scale1,
-        input_data=[
-            input_data,
-        ],
-    )
-    verify_model(scale2, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_slice():
-    @paddle.jit.to_static
-    def slice1(inputs):
-        return inputs[:, :, :, :3]
-
-    @paddle.jit.to_static
-    def slice2(inputs):
-        return inputs[0, :, :-3, :]
-
-    @paddle.jit.to_static
-    def slice3(inputs):
-        return inputs[0::2, 0::2] + inputs[1::2, 1::2]
-
-    @paddle.jit.to_static
-    def slice4(inputs):
-        x0 = paddle.to_tensor([2]) - paddle.to_tensor([1])
-        x1 = paddle.to_tensor([3]) + paddle.to_tensor([1])
-        return inputs[:, x0:, 1:x1, :]
-
-    @paddle.jit.to_static
-    def slice5(inputs):
-        b, c, h, w = inputs  # add decrease_axis
-        return h
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(
-        slice1,
-        input_data=[
-            input_data,
-        ],
-    )
-    verify_model(slice2, input_data=input_data)
-    verify_model(slice3, input_data=paddle.randn((4, 4)))
-    verify_model(slice4, input_data=input_data)
-    # verify_model(slice5, input_data=paddle.randn((4,)))
-
-
-@tvm.testing.uses_gpu
-def test_forward_unique():
-    class Unique(nn.Layer):
-        def __init__(
-            self,
-            return_index=False,
-            return_inverse=False,
-            return_counts=False,
-            axis=None,
-            dtype="int64",
-        ):
-            super(Unique, self).__init__()
-            self.return_index = return_index
-            self.return_inverse = return_inverse
-            self.return_counts = return_counts
-            self.axis = None
-            self.dtype = dtype
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            result = paddle.unique(
-                inputs,
-                return_inverse=self.return_inverse,
-                return_counts=self.return_counts,
-                axis=self.axis,
-                dtype=self.dtype,
-            )
-            return result
-
-    input_shape = [2, 3, 5]
-    input_data = paddle.rand(input_shape)
-    verify_model(Unique(), input_data=input_data)
-    verify_model(Unique(return_index=True), input_data=input_data)
-    verify_model(Unique(return_index=True, return_inverse=True), input_data=input_data)
-    verify_model(
-        Unique(return_index=True, return_inverse=True, return_counts=True), input_data=input_data
-    )
-
-
-@tvm.testing.uses_gpu
-def run_math_api(func):
-    api_name = func.__name__.split("_")[-1]
-    print("func_name:", api_name)
-
-    class MathAPI(nn.Layer):
-        def __init__(self, api_name):
-            super(MathAPI, self).__init__()
-            for candidate in (paddle, paddle.nn.functional):
-                self.func = getattr(candidate, api_name, None)
-                if self.func:
-                    break
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.func(inputs)
-
-    input_shapes = [[128], [2, 100], [10, 2, 5], [7, 3, 4, 1]]
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        if api_name in ["log", "log2", "log10", "reciprocal", "sqrt", "rsqrt"]:
-            # avoid illegal input, all elements should be positive
-            input_data = paddle.uniform(input_shape, min=0.01, max=0.99)
-        verify_model(MathAPI(api_name), input_data=input_data)
-
-
-@run_math_api
-def test_forward_abs():
-    pass
-
-
-@run_math_api
-def test_forward_acos():
-    pass
-
-
-@run_math_api
-def test_forward_abs():
-    pass
-
-
-@run_math_api
-def test_forward_atan():
-    pass
-
-
-@run_math_api
-def test_forward_ceil():
-    pass
-
-
-@run_math_api
-def test_forward_cos():
-    pass
-
-
-@run_math_api
-def test_forward_cosh():
-    pass
-
-
-@run_math_api
-def test_forward_elu():
-    pass
-
-
-@run_math_api
-def test_forward_erf():
-    pass
-
-
-@run_math_api
-def test_forward_exp():
-    pass
-
-
-@run_math_api
-def test_forward_floor():
-    pass
-
-
-@run_math_api
-def test_forward_hardshrink():
-    pass
-
-
-@run_math_api
-def test_forward_hardtanh():
-    pass
-
-
-@run_math_api
-def test_forward_log_sigmoid():
-    pass
-
-
-@run_math_api
-def test_forward_log_softmax():
-    pass
-
-
-@run_math_api
-def test_forward_log():
-    pass
-
-
-@run_math_api
-def test_forward_log2():
-    pass
-
-
-@run_math_api
-def test_forward_log10():
-    pass
-
-
-@run_math_api
-def test_forward_log1p():
-    pass
-
-
-@run_math_api
-def test_forward_reciprocal():
-    pass
-
-
-@run_math_api
-def test_forward_relu():
-    pass
-
-
-@run_math_api
-def test_forward_round():
-    pass
-
-
-@run_math_api
-def test_forward_rsqrt():
-    pass
-
-
-@run_math_api
-def test_forward_selu():
-    pass
-
-
-@run_math_api
-def test_forward_sigmoid():
-    pass
-
-
-@run_math_api
-def test_forward_sign():
-    pass
-
-
-@run_math_api
-def test_forward_sin():
-    pass
-
-
-@tvm.testing.uses_gpu
-def test_forward_softplus():
-    @paddle.jit.to_static
-    def Softplus1(input):
-        return paddle.nn.functional.softplus(input, beta=1.0, threshold=20.0)
-
-    @paddle.jit.to_static
-    def Softplus2(input):
-        return paddle.nn.functional.softplus(input, beta=6.0, threshold=20.0)
-
-    @paddle.jit.to_static
-    def Softplus3(input):
-        return paddle.nn.functional.softplus(input, beta=1.0, threshold=10.0)
-
-    x = paddle.to_tensor([-8.0, -12.0, 1.0, 18.0, 25.0])
-    verify_model(Softplus1, x)
-    verify_model(Softplus2, x)
-    verify_model(Softplus3, x)
-
-    input_shapes = [[10], [2, 3], [5, 10, 11], [3, 4, 5, 6]]
-    for input_shape in input_shapes:
-        input_data = paddle.randn(shape=input_shape, dtype="float32")
-        verify_model(Softplus1, input_data=input_data)
-        verify_model(Softplus2, input_data=input_data)
-        verify_model(Softplus3, input_data=input_data)
-
-
-@run_math_api
-def test_forward_sqrt():
-    pass
-
-
-@run_math_api
-def test_forward_square():
-    pass
-
-
-@run_math_api
-def test_forward_sin():
-    pass
-
-
-@run_math_api
-def test_forward_softsign():
-    pass
-
-
-@run_math_api
-def test_forward_sqrt():
-    pass
-
-
-@run_math_api
-def test_forward_square():
-    pass
-
-
-@run_math_api
-def test_forward_swish():
-    pass
-
-
-@run_math_api
-def test_forward_tan():
-    pass
-
-
-@run_math_api
-def test_forward_tanh():
-    pass
-
-
-@tvm.testing.uses_gpu
-def test_forward_meshgrid():
-    @paddle.jit.to_static
-    def t(x, y, z):
-        return paddle.meshgrid(x, y, z)
-
-    x = paddle.randint(low=0, high=100, shape=[2])
-    y = paddle.randint(low=0, high=100, shape=[3])
-    z = paddle.randint(low=0, high=100, shape=[5])
-    verify_model(t, [x, y, z])
-
-
-@tvm.testing.uses_gpu
-def test_forward_mv():
-    class Mv(nn.Layer):
-        def forward(self, input1, input2):
-            return paddle.mv(input1, input2)
-
-    # matrix x vector
-    input_data1 = paddle.randn((3, 4), dtype="float32")
-    input_data2 = paddle.randn((4,), dtype="float32")
-    verify_model(Mv(), input_data=[input_data1, input_data2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_pixel_shuffle():
-    class PixelShuffle(nn.Layer):
-        def __init__(self, upscale_factor, data_format="NCHW"):
-            super(PixelShuffle, self).__init__()
-            self.pixel_shuffle = paddle.nn.PixelShuffle(upscale_factor, data_format)
-
-        @paddle.jit.to_static
-        def forward(self, x):
-            return self.pixel_shuffle(x)
-
-    input_shapes = [[1, 4, 3, 3], [2, 8, 2, 5]]
-    for input_shape in input_shapes:
-        x = paddle.rand(input_shape, dtype="float32")
-        verify_model(PixelShuffle(2), x)
-
-    input_shapes = [[1, 3, 3, 4], [2, 2, 5, 8]]
-    for input_shape in input_shapes:
-        x = paddle.rand(input_shape, dtype="float32")
-        verify_model(PixelShuffle(2, data_format="NHWC"), x)
-
-
-@tvm.testing.uses_gpu
-def test_forward_prelu():
-    class PRelu(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, x, w):
-            return paddle.nn.functional.prelu(x, w)
-
-    x = paddle.normal(shape=[4, 3, 5, 5])
-    w = paddle.to_tensor(
-        np.array(
-            [
-                0.25,
-            ]
-        ).astype("float32")
-    )
-    verify_model(PRelu(), [x, w])
-    w2 = paddle.to_tensor(np.array([0.25, 0.5, 0.8]).astype("float32"))
-    verify_model(PRelu(), [x, w2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_arange():
-    @paddle.jit.to_static
-    def arange(inputs):
-        return paddle.arange(paddle.shape(inputs)[0], 9, 2.0)
-
-    @paddle.jit.to_static
-    def arange1(inputs):
-        return inputs + paddle.arange(0, 10.0, 8, dtype="float32")
-
-    input_shape = [2, 2]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(arange, input_data)
-    verify_model(arange1, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_rnn():
-    class RNN(nn.Layer):
-        def __init__(self, api_name, input_size, hidden_size, num_layers, direction="forward"):
-            super(RNN, self).__init__()
-            rnn_func = getattr(paddle.nn, api_name, None)
-            self.rnn = rnn_func(input_size, hidden_size, num_layers, direction=direction)
-
-        @paddle.jit.to_static
-        def forward(self, inputs, prev_h):
-            y, h = self.rnn(inputs, prev_h)
-            return y
-
-    input_size, hidden_size, num_layers = 8, 16, 2
-    input_shape = [4, 5, 8]
-    input_data = paddle.rand(input_shape, dtype="float32")
-
-    for api_name in ("SimpleRNN", "GRU"):
-        prev_h = paddle.rand([4, 4, 16], dtype="float32")
-        verify_model(
-            RNN(api_name, input_size, hidden_size, num_layers, direction="bidirectional"),
-            input_data=[input_data, prev_h],
-        )
-        prev_h = paddle.rand([2, 4, 16], dtype="float32")
-        verify_model(
-            RNN(api_name, input_size, hidden_size, num_layers), input_data=[input_data, prev_h]
-        )
-
-
-@tvm.testing.uses_gpu
-def test_forward_topk():
-    @paddle.jit.to_static
-    def topk1(inputs):
-        return paddle.topk(inputs, k=1)
-
-    @paddle.jit.to_static
-    def topk2(inputs):
-        k = paddle.to_tensor([1], dtype=paddle.int32)
-        return paddle.topk(inputs, k=k)
-
-    @paddle.jit.to_static
-    def topk3(inputs):
-        return paddle.topk(inputs, k=1, largest=False)
-
-    @paddle.jit.to_static
-    def topk4(inputs):
-        return paddle.topk(inputs, k=2, sorted=True)
-
-    @paddle.jit.to_static
-    def topk5(inputs):
-        return paddle.topk(inputs, k=2, sorted=False)
-
-    @paddle.jit.to_static
-    def topk6(inputs):
-        return paddle.topk(inputs, k=1, axis=0)
-
-    # paddle.fluid.layers.topk
-    @paddle.jit.to_static
-    def topk7(inputs):
-        return paddle.fluid.layers.topk(inputs, k=1)
-
-    @paddle.jit.to_static
-    def topk8(inputs):
-        return paddle.fluid.layers.topk(inputs, k=2)
-
-    input_data = paddle.to_tensor([[1, 4, 5, 7], [3, 6, 2, 5]], dtype=paddle.int32)
-    input_data_fp32 = paddle.to_tensor([[1, 4, 5, 7], [3, 6, 2, 5]], dtype=paddle.float32)
-    verify_model(topk1, input_data=input_data)
-    # verify_model(topk2, input_data=input_data)
-    verify_model(topk3, input_data=input_data)
-    verify_model(topk4, input_data=input_data)
-    verify_model(topk5, input_data=input_data)
-    verify_model(topk6, input_data=input_data)
-    verify_model(topk7, input_data=input_data_fp32)
-    verify_model(topk8, input_data=input_data_fp32)
-
-
-@tvm.testing.uses_gpu
-def test_forward_tanhshrink():
-    @paddle.jit.to_static
-    def tanhshrink(inputs):
-        return paddle.nn.functional.tanhshrink(inputs)
-
-    input_data = paddle.randn(shape=[2, 3], dtype="float32")
-    verify_model(tanhshrink, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_one_hot_v2():
-    @paddle.jit.to_static
-    def one_hot_v2_1(inputs):
-        return nn.functional.one_hot(inputs, num_classes=4)
-
-    input_data = paddle.to_tensor([1, 1, 3, 0], dtype=paddle.int32)
-    verify_model(one_hot_v2_1, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_norm():
-    @paddle.jit.to_static
-    def norm_1(inputs):
-        return paddle.fluid.layers.l2_normalize(inputs, -1, 1e-12)
-
-    def norm_2(inputs):
-        return paddle.fluid.layers.l2_normalize(inputs, 1, 1e-12)
-
-    input_data = paddle.to_tensor(
-        [[[1, 2], [3, 1], [4, 5]], [[3, 1], [3, 5], [2, 4]]], dtype=paddle.float32
-    )
-    verify_model(norm_1, input_data=input_data)
-    verify_model(norm_2, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_where_index():
-    @paddle.jit.to_static
-    def where_index_1(inputs):
-        return paddle.nonzero(inputs)
-
-    input_data = paddle.to_tensor([[1.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 3.0]])
-    verify_model(where_index_1, input_data=input_data, use_vm=True)
-
-
-@tvm.testing.uses_gpu
-def test_forward_take_along_axis():
-    @paddle.jit.to_static
-    def take_along_axis_1(inputs, index):
-        return paddle.take_along_axis(inputs, index, 0)
-
-    input_data = paddle.to_tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
-    index = paddle.to_tensor([[0]])
-    verify_model(take_along_axis_1, input_data=[input_data, index])
-
-
-@tvm.testing.uses_gpu
-def test_forward_stack():
-    class Stack1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, input0, input1, input2):
-            return paddle.stack([input0, input1, input2], axis=-1)
-
-    class Stack2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, input0, input1, input2):
-            return paddle.stack([input0, input1, input2], axis=1)
-
-    class Stack3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, input0, input1, input2):
-            return paddle.stack([input0, input1, input2], axis=2)
-
-    input_shapes = [[2, 3], [5, 10, 11], [3, 4, 5, 6]]
-    for input_shape in input_shapes:
-        input_data_0 = paddle.randn(shape=input_shape, dtype="float32")
-        input_data_1 = paddle.randn(shape=input_shape, dtype="float32")
-        input_data_2 = paddle.randn(shape=input_shape, dtype="float32")
-        verify_model(Stack1(), [input_data_0, input_data_1, input_data_2])
-        verify_model(Stack2(), [input_data_0, input_data_1, input_data_2])
-        verify_model(Stack3(), [input_data_0, input_data_1, input_data_2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_unstack():
-    class UnStack1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.unstack(inputs, axis=-1)
-
-    class UnStack2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.unstack(inputs, axis=1)
-
-    class UnStack3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.unstack(inputs, axis=0)
-
-    input_shapes = [[2, 3], [5, 10, 11], [3, 4, 5, 6], [1, 3, 4, 1, 1]]
-    for input_shape in input_shapes:
-        input_data = paddle.randn(shape=input_shape, dtype="float32")
-        verify_model(UnStack1(), input_data)
-        verify_model(UnStack2(), input_data)
-        verify_model(UnStack3(), input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_silu():
-    class Silu(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.silu(inputs)
-
-    input_shapes = [[10], [2, 3], [5, 10, 11], [3, 4, 5, 6]]
-    for input_shape in input_shapes:
-        input_data = paddle.randn(shape=input_shape, dtype="float32")
-        verify_model(Silu(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_softshrink():
-    @paddle.jit.to_static
-    def Softshrink1(input):
-        return nn.functional.softshrink(input, threshold=0.0)
-
-    @paddle.jit.to_static
-    def Softshrink2(input):
-        return nn.functional.softshrink(input, threshold=0.5)
-
-    @paddle.jit.to_static
-    def Softshrink3(input):
-        return nn.functional.softshrink(input, threshold=1.0)
-
-    x = paddle.to_tensor([-0.9, -0.2, 0.1, 0.8])
-    verify_model(Softshrink2, x)
-
-    input_shapes = [[10], [2, 3], [5, 10, 11], [3, 4, 5, 6]]
-    for input_shape in input_shapes:
-        input_data = paddle.randn(shape=input_shape, dtype="float32")
-        verify_model(Softshrink1, input_data=input_data)
-        verify_model(Softshrink2, input_data=input_data)
-        verify_model(Softshrink3, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_where():
-    @paddle.jit.to_static
-    def where1(x, y):
-        return paddle.where(x > 1, x, y)
-
-    @paddle.jit.to_static
-    def where2(x, y):
-        return paddle.where(x > y, x, y)
-
-    x = paddle.to_tensor([0.9383, 0.1983, 3.2, 1.2])
-    y = paddle.to_tensor([1.0, 1.0, 1.0, 1.0])
-    verify_model(where1, [x, y])
-
-    input_shapes = [[10], [2, 3], [5, 10, 11], [3, 4, 5, 6]]
-    for input_shape in input_shapes:
-        x = paddle.randn(shape=input_shape, dtype="float32")
-        y = paddle.randn(shape=input_shape, dtype="float32")
-        verify_model(where1, [x, y])
-        verify_model(where2, [x, y])
-
-
-@tvm.testing.uses_gpu
-def test_forward_tile():
-    class Tile1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.tile(inputs, repeat_times=[10])
-
-    class Tile2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.tile(inputs, repeat_times=[2, 3])
-
-    class Tile3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.tile(inputs, repeat_times=[1, 2, 3])
-
-    class Tile4(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.tile(inputs, repeat_times=[2, 3, 4, 1, 5])
-
-    class Tile5(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            reps = paddle.to_tensor([3, 2])
-            reps = paddle.cast(reps, "int32")
-            return paddle.tile(inputs, repeat_times=reps)
-
-    class Tile6(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            rep_0 = paddle.to_tensor([3])
-            rep_1 = paddle.to_tensor([2])
-            rep_0 = paddle.cast(rep_0, "int32")
-            rep_1 = paddle.cast(rep_1, "int32")
-            return paddle.tile(inputs, repeat_times=[rep_0, rep_1])
-
-    input_shapes = [
-        [10],
-        [2, 3],
-        [3, 4, 5],
-        [5, 3, 1, 4],
-        [1, 3, 1, 6, 7],
-    ]
-    for input_shape in input_shapes:
-        input_data = paddle.randn(shape=input_shape, dtype="float32")
-        verify_model(Tile1(), input_data=input_data)
-        verify_model(Tile2(), input_data=input_data)
-        verify_model(Tile3(), input_data=input_data)
-        verify_model(Tile4(), input_data=input_data)
-        verify_model(Tile5(), input_data=input_data)
-        verify_model(Tile6(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_mish():
-    class Mish(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.mish(inputs)
-
-    input_shapes = [[10], [2, 3], [5, 10, 11], [3, 4, 5, 6]]
-    if paddle.version.full_version >= "2.4.2":
-        for input_shape in input_shapes:
-            input_data = paddle.randn(shape=input_shape, dtype="float32")
-            verify_model(Mish(), input_data=input_data)
-            input_data += 20.0
-            verify_model(Mish(), input_data=input_data)
-
-        input_data = paddle.to_tensor([-5.0, 0.0, 5.0, 23.1, 20.0])
-        verify_model(Mish(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_thresholded_relu():
-    class ThresholdedRelu1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.thresholded_relu(inputs)
-
-    class ThresholdedRelu2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.thresholded_relu(inputs, threshold=0.5)
-
-    input_shapes = [[10], [2, 3], [5, 10, 11], [3, 4, 5, 6]]
-    for input_shape in input_shapes:
-        input_data = paddle.randn(shape=input_shape, dtype="float32")
-        verify_model(ThresholdedRelu1(), input_data=input_data)
-        verify_model(ThresholdedRelu2(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_index_select():
-    class IndexSelect1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, x, index):
-            return paddle.index_select(x, index, axis=0)
-
-    class IndexSelect2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, x, index):
-            return paddle.index_select(x, index, axis=-1)
-
-    input_shapes = [[10], [2, 3], [5, 10, 11], [3, 4, 5, 6]]
-    for input_shape in input_shapes:
-        input_data = paddle.randn(shape=input_shape, dtype="float32")
-        index = paddle.to_tensor([0, 1, 1], dtype="int32")
-        verify_model(IndexSelect1(), input_data=[input_data, index])
-        verify_model(IndexSelect2(), input_data=[input_data, index])
-
-
-@tvm.testing.uses_gpu
-def test_forward_eye():
-    class Eye1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.eye(3, 5, dtype="int32"), paddle.eye(3, 5, dtype="float32"), inputs
-
-    class Eye2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.eye(5, 3, dtype="int64"), paddle.eye(5, 3, dtype="float64"), inputs
-
-    class Eye3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.eye(0, 3, dtype="int64"), paddle.eye(0, 0, dtype="float64"), inputs
-
-    class Eye4(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.eye(4, None, dtype="int64"), paddle.eye(4, None, dtype="float64"), inputs
-
-    x = paddle.to_tensor([1], dtype="float32")
-    verify_model(Eye1(), input_data=[x])
-    verify_model(Eye2(), input_data=[x])
-    verify_model(Eye3(), input_data=[x])
-    verify_model(Eye4(), input_data=[x])
-
-
-@tvm.testing.uses_gpu
-def test_forward_linspace():
-    class Linspace1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            out1 = paddle.linspace(0.5, 7, 1, "int32")
-            out2 = paddle.linspace(1.3, 7.1, 5, "float32")
-            out3 = paddle.linspace(1, 1000000000, 10, "int64")
-            out4 = paddle.linspace(1, 7.1, 5, "float64")
-            return out1, out2, out3, out4, inputs
-
-    class Linspace2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            start = paddle.to_tensor([-2.5])
-            stop = paddle.to_tensor([31.6])
-            num = paddle.to_tensor([13])
-            start = paddle.cast(start, "float32")
-            stop = paddle.cast(stop, "float32")
-            num = paddle.cast(num, "int32")
-            out1 = paddle.linspace(start, stop, num, "int32")
-            out2 = paddle.linspace(start, stop, num, "float32")
-            out3 = paddle.linspace(start, stop, num, "int64")
-            out4 = paddle.linspace(start, stop, num, "float64")
-            return out1, out2, out3, out4, inputs
-
-    class Linspace3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, start, stop, num):
-            out1 = paddle.linspace(start, stop, num, "int32")
-            out2 = paddle.linspace(start, stop, num, "float32")
-            out3 = paddle.linspace(start, stop, num, "int64")
-            out4 = paddle.linspace(start, stop, num, "float32")
-            return out1
-
-    start = paddle.to_tensor([1.3])
-    stop = paddle.to_tensor([5.1])
-    num = paddle.to_tensor([3])
-    start = paddle.cast(start, "float32")
-    stop = paddle.cast(stop, "float32")
-    num = paddle.cast(num, "int32")
-    x = paddle.to_tensor([1], dtype="float32")
-    verify_model(Linspace1(), input_data=[x])
-    verify_model(Linspace2(), input_data=[x])
-    verify_model(Linspace3(), input_data=[start, stop, num], use_vm=True)
-    num = paddle.to_tensor([1])
-    num = paddle.cast(num, "int32")
-    verify_model(Linspace3(), input_data=[start, stop, num], use_vm=True)
-
-
-@tvm.testing.uses_gpu
-def test_forward_dist():
-    class Dist(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, x, y):
-            l0_norm = paddle.dist(x, y, 0)
-            l2_norm = paddle.dist(x, y, 2)
-            float_norm = paddle.dist(x, y, 1.3)
-            inf_norm = paddle.dist(x, y, float("inf"))
-            ninf_norm = paddle.dist(x, y, float("-inf"))
-            return l0_norm, l2_norm, float_norm, inf_norm, ninf_norm
-
-    x = paddle.to_tensor([[3, 3], [3, 3]], dtype="float32")
-    y = paddle.to_tensor([[1, 2], [3, 4]], dtype="float32")
-    w = paddle.to_tensor([[1, 2]], dtype="float32")
-    v = paddle.to_tensor([[2.1]], dtype="float32")
-    verify_model(Dist(), input_data=[x, y])
-    verify_model(Dist(), input_data=[x, w])
-    verify_model(Dist(), input_data=[w, v])
-    verify_model(Dist(), input_data=[y, v])
-
-
-@tvm.testing.uses_gpu
-def test_forward_p_norm():
-    class PNorm(nn.Layer):
-        def __init__(self, axis, keepdim, p=1):
-            super(PNorm, self).__init__()
-            self.p = p
-            self.axis = axis
-            self.keepdim = keepdim
-
-        @paddle.jit.to_static
-        def forward(self, input_data):
-            return paddle.norm(input_data, p=self.p, axis=self.axis, keepdim=self.keepdim)
-
-    input_data = paddle.rand((2, 2, 3), dtype="float32")
-    verify_model(PNorm(axis=0, keepdim=True), input_data=input_data)
-    verify_model(PNorm(axis=0, keepdim=False), input_data=input_data)
-    verify_model(PNorm(axis=1, keepdim=True, p=1.5), input_data=input_data)
-    verify_model(PNorm(axis=-1, keepdim=True, p=3.4), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_roi_align():
-    class RoiAlign(nn.Layer):
-        def __init__(self, spatial_scale=1.0, sampling_ratio=-1, aligned=False):
-            super(RoiAlign, self).__init__()
-            self.spatial_scale = spatial_scale
-            self.sampling_ratio = sampling_ratio
-            self.aligned = aligned
-
-        @paddle.jit.to_static
-        def forward(self, input_data, rois, rois_num):
-            return paddle.vision.ops.roi_align(
-                input_data, rois, rois_num, 3, self.spatial_scale, self.sampling_ratio, self.aligned
-            )
-
-    input_data = paddle.rand((1, 128, 32, 32), dtype="float32")
-    boxes = paddle.rand([3, 4])
-    boxes[:, 2] += boxes[:, 0] + 3
-    boxes[:, 3] += boxes[:, 1] + 4
-    boxes_num = paddle.to_tensor([3]).astype("int32")
-    verify_model(RoiAlign(), input_data=[input_data, boxes, boxes_num])
-    verify_model(RoiAlign(aligned=True), input_data=[input_data, boxes, boxes_num])
-    verify_model(
-        RoiAlign(spatial_scale=2.0, aligned=True), input_data=[input_data, boxes, boxes_num]
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_softmax_with_cross_entropy():
-    class SoftmaxWithCrossEntropy(nn.Layer):
-        def __init__(self, soft_label=False, ignore_index=-100, return_softmax=False, axis=-1):
-            super(SoftmaxWithCrossEntropy, self).__init__()
-            self.soft_label = soft_label
-            self.ignore_index = ignore_index
-            self.return_softmax = return_softmax
-            self.axis = axis
-
-        @paddle.jit.to_static
-        def forward(self, input_data, label):
-            return paddle.nn.functional.softmax_with_cross_entropy(
-                input_data,
-                label,
-                soft_label=self.soft_label,
-                ignore_index=self.ignore_index,
-                return_softmax=self.return_softmax,
-                axis=self.axis,
-            )
-
-    input_data = paddle.rand([5, 3], dtype="float32")
-    label = paddle.randint(0, 2, [5, 1])
-    verify_model(SoftmaxWithCrossEntropy(), input_data=[input_data, label])
-    verify_model(SoftmaxWithCrossEntropy(return_softmax=True), input_data=[input_data, label])
-    verify_model(
-        SoftmaxWithCrossEntropy(return_softmax=True, ignore_index=1), input_data=[input_data, label]
-    )
-    input_data = paddle.rand([5, 4, 3], dtype="float32")
-    label = paddle.randint(0, 2, [5, 1, 3])
-    verify_model(SoftmaxWithCrossEntropy(axis=1), input_data=[input_data, label])
-    label = paddle.randint(0, 2, [5, 4, 3]).astype("float32")
-    verify_model(SoftmaxWithCrossEntropy(soft_label=True), input_data=[input_data, label])
-    verify_model(SoftmaxWithCrossEntropy(soft_label=True, axis=0), input_data=[input_data, label])
-
-
-@tvm.testing.uses_gpu
-def test_forward_pool3d():
-    class Pool3D1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.avg_pool3d(inputs, kernel_size=2, stride=2, padding=0)
-
-    class Pool3D2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.adaptive_avg_pool3d(inputs, output_size=[3, 3, 3])
-
-    class Pool3D3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.avg_pool3d(
-                inputs,
-                kernel_size=3,
-                stride=1,
-                padding=[1, 1, 1],
-                exclusive=False,
-                divisor_override=2.5,
-            )
-
-    class Pool3D4(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.avg_pool3d(
-                inputs,
-                kernel_size=2,
-                stride=1,
-                padding=[[0, 0], [0, 0], [1, 1], [1, 1], [1, 1]],
-                ceil_mode=True,
-                data_format="NCDHW",
-            )
-
-    class Pool3D5(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.avg_pool3d(
-                inputs,
-                kernel_size=2,
-                stride=1,
-                padding=[[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]],
-                ceil_mode=True,
-                data_format="NDHWC",
-            )
-
-    input_shapes = [[1, 2, 2, 8, 8], [1, 2, 3, 10, 10]]  # [N, C, D, H, W]
-    for input_shape in input_shapes:
-        input_data = paddle.uniform(shape=input_shape, dtype="float32", min=-1, max=1)
-        verify_model(Pool3D1(), input_data=input_data)
-        verify_model(Pool3D2(), input_data=input_data)
-        verify_model(Pool3D3(), input_data=input_data)
-        verify_model(Pool3D4(), input_data=input_data)
-        verify_model(Pool3D5(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_set_value():
-    class SetValue(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs, update_input):
-            x = inputs + 1
-            x[3:] = 3
-            x[1:] = 3.0
-            x[2:] = update_input
-            x[0] = 1
-            x[-3:-2] = 1
-            x[0][0] = 5
-            return x
-
-    input_shapes = [[5, 2], [10, 3], [10, 3, 3]]
-    for input_shape in input_shapes:
-        input_data = paddle.uniform(shape=input_shape, dtype="float32", min=-1, max=1)
-        update_shape = input_shape.copy()
-        update_shape[0] = input_shape[0] - 2
-        update_input = paddle.uniform(shape=update_shape, dtype="float32", min=-1, max=1)
-        verify_model(SetValue(), input_data=[input_data, update_input])
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/pytorch/qnn_test.py b/tests/python/frontend/pytorch/qnn_test.py
deleted file mode 100644
index 1cc1a46cea6b..000000000000
--- a/tests/python/frontend/pytorch/qnn_test.py
+++ /dev/null
@@ -1,803 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-""" Tests on quantized torch model conversion """
-import os
-
-import numpy as np
-import torch
-import tvm
-import tvm.testing
-from PIL import Image
-from torch import nn
-from torch.quantization import (
-    DeQuantStub,
-    QuantStub,
-    QuantWrapper,
-    fuse_modules,
-    get_default_qat_qconfig,
-    prepare_qat,
-)
-from tvm import relay
-from tvm.contrib.download import download_testdata
-from tvm.relay.frontend.pytorch_utils import is_version_greater_than
-from tvm.relay.op.contrib.register import get_pattern_table, register_pattern_table
-
-
-def torch_version_check():
-    from packaging import version
-
-    return version.parse(torch.__version__) > version.parse("1.4.0")
-
-
-def get_tvm_runtime(script_module, input_name, ishape, keep_quantized_weight=False, target="llvm"):
-    input_shapes = [(input_name, ishape)]
-    with tvm.testing.disable_span_filling():
-        mod, params = relay.frontend.from_pytorch(
-            script_module, input_shapes, keep_quantized_weight=keep_quantized_weight
-        )
-    with tvm.testing.enable_span_filling():
-        mod_with_span, _ = relay.frontend.from_pytorch(
-            script_module, input_shapes, keep_quantized_weight=keep_quantized_weight
-        )
-    tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-
-    if keep_quantized_weight:
-        for p in params.values():
-            assert p.dtype in ["int8", "int32"]
-
-    with tvm.transform.PassContext(opt_level=3):
-        # test on only cpu for now, torch cannot run quant models on cuda
-        # also not to make CI too slow
-        lib = relay.build(mod, target=target, params=params)
-
-    runtime = tvm.contrib.graph_executor.GraphModule(lib["default"](tvm.device(target, 0)))
-    return runtime
-
-
-def get_qconfig(per_channel):
-    from torch.quantization.observer import (
-        MovingAverageMinMaxObserver,
-        default_weight_observer,
-    )
-
-    if per_channel:
-        return torch.quantization.get_default_qconfig("fbgemm")
-    else:
-        act = MovingAverageMinMaxObserver.with_args(reduce_range=False)
-        return torch.quantization.QConfig(activation=act, weight=default_weight_observer)
-
-
-def quantize_model(model, inp, per_channel=False):
-    model.fuse_model()
-    model.qconfig = get_qconfig(per_channel)
-    torch.quantization.prepare(model, inplace=True)
-    model(inp)
-    torch.quantization.convert(model, inplace=True)
-
-
-class ConvBn(nn.Module):
-    def __init__(self, with_relu=False):
-        super().__init__()
-        layers = [nn.Conv2d(3, 32, 3, bias=True), nn.BatchNorm2d(32)]
-        if with_relu:
-            layers.append(nn.ReLU())
-        self.conv = nn.Sequential(*layers)
-        self.quant_wrap = QuantWrapper(self.conv)
-        self.with_relu = with_relu
-
-    def forward(self, x):
-        return self.quant_wrap(x)
-
-    def fuse_model(self):
-        indices = ["0", "1"]
-        if self.with_relu:
-            indices.append("2")
-        fuse_modules(self.conv, indices, inplace=True)
-
-
-class ConvTranspose(nn.Module):
-    def __init__(self):
-        super().__init__()
-        layers = [nn.ConvTranspose2d(3, 32, 3, bias=True)]
-        self.conv = nn.Sequential(*layers)
-        self.quant_wrap = QuantWrapper(self.conv)
-
-    def forward(self, x):
-        return self.quant_wrap(x)
-
-    def fuse_model(self):
-        pass
-
-
-class Linear(nn.Module):
-    def __init__(self, with_relu=False):
-        super().__init__()
-        layers = [nn.Linear(16, 32)]
-        if with_relu:
-            layers.append(nn.ReLU())
-        self.fc = nn.Sequential(*layers)
-        self.quant_wrap = QuantWrapper(self.fc)
-        self.with_relu = with_relu
-
-    def forward(self, x):
-        return self.quant_wrap(x)
-
-    def fuse_model(self):
-        if self.with_relu:
-            fuse_modules(self.fc, ["0", "1"], inplace=True)
-
-
-class ReLU(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.relu = QuantWrapper(nn.ReLU())
-
-    def forward(self, x):
-        return self.relu(x)
-
-    def fuse_model(self):
-        pass
-
-
-class LeakyReLU(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.leaky_relu = QuantWrapper(nn.LeakyReLU())
-
-    def forward(self, x):
-        return self.leaky_relu(x)
-
-    def fuse_model(self):
-        pass
-
-
-# Mobilenet V3 related modules
-class Hsigmoid(nn.Module):
-    def __init__(self, add_stub=False):
-        super().__init__()
-        self.quant = QuantStub()
-        self.dequant = DeQuantStub()
-        self.add_stub = add_stub
-        self.hsigmoid = nn.Hardsigmoid()
-
-    def forward(self, x):
-        if self.add_stub:
-            x = self.quant(x)
-        x = self.hsigmoid(x)
-        if self.add_stub:
-            x = self.dequant(x)
-        return x
-
-    def fuse_model(self):
-        pass
-
-
-class Hswish(nn.Module):
-    def __init__(self, add_stub=False):
-        super().__init__()
-        self.hswish = QuantWrapper(nn.Hardswish())
-
-    def forward(self, x):
-        return self.hswish(x)
-
-    def fuse_model(self):
-        pass
-
-
-class SqueezeExcite(nn.Module):
-    def __init__(self, channel, reduction=4, add_stub=False):
-        super(SqueezeExcite, self).__init__()
-        self.avg_pool = nn.AdaptiveAvgPool2d(1)
-        self.fc = nn.Sequential(
-            nn.Linear(channel, channel // reduction, bias=False),
-            nn.ReLU(inplace=True),
-            nn.Linear(channel // reduction, channel, bias=False),
-            Hsigmoid(add_stub=False),
-        )
-        self.fmul = nn.quantized.FloatFunctional()
-        self.quant = QuantStub()
-        self.dequant = DeQuantStub()
-        self.add_stub = add_stub
-
-    def forward(self, x):
-        b, c, _, _ = x.size()
-        if self.add_stub:
-            x = self.quant(x)
-        y = self.avg_pool(x).view(b, c)
-        y = self.fc(y).view(b, c, 1, 1)
-        out = self.fmul.mul(x, y.expand_as(x))
-        if self.add_stub:
-            return self.dequant(out)
-        else:
-            return out
-
-    def fuse_model(self):
-        fuse_modules(self.fc, ["0", "1"], inplace=True)
-
-
-# test on quantized::mul_scalar with negative scale
-class MulScalarNegative(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.float_op = nn.quantized.FloatFunctional()
-        self.quant = QuantStub()
-        self.dequant = DeQuantStub()
-
-    def forward(self, x):
-        x = self.quant(x)
-        mul = self.float_op.mul_scalar(x, -0.3)
-        return self.dequant(mul)
-
-    def fuse_model(self):
-        pass
-
-
-class UpsamplingBilinear(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.quant = QuantStub()
-        self.dequant = DeQuantStub()
-
-    def forward(self, x):
-        x = self.quant(x)
-        upsample = nn.functional.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True)
-        return self.dequant(upsample)
-
-    def fuse_model(self):
-        pass
-
-
-class AvgPool2d(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.pool = QuantWrapper(nn.AvgPool2d(kernel_size=2))
-
-    def forward(self, x):
-        return self.pool(x)
-
-    def fuse_model(self):
-        pass
-
-
-class AdaptiveAvgPool2d(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.pool = QuantWrapper(nn.AdaptiveAvgPool2d((1, 1)))
-
-    def forward(self, x):
-        return self.pool(x)
-
-    def fuse_model(self):
-        pass
-
-
-def test_quantized_modules():
-    imagenet_ishape = (1, 3, 224, 224)
-
-    qmodules = [
-        ("relu", imagenet_ishape, ReLU(), False),
-        ("upsample bilinear", (1, 3, 64, 64), UpsamplingBilinear(), False),
-        ("avgpool", imagenet_ishape, AvgPool2d(), False),
-    ]
-
-    for per_channel in [False, True]:
-        if per_channel:
-            postfix = ", per_channel"
-        else:
-            postfix = ""
-
-        qmodules += [
-            ("conv_bn" + postfix, imagenet_ishape, ConvBn(), per_channel),
-            ("conv_bn_relu" + postfix, imagenet_ishape, ConvBn(with_relu=True), per_channel),
-            ("linear" + postfix, (16, 16), Linear(), per_channel),
-            ("linear_relu" + postfix, (16, 16), Linear(with_relu=True), per_channel),
-            ("conv_transpose", imagenet_ishape, ConvTranspose(), False),
-            ("hsigmoid", imagenet_ishape, Hsigmoid(add_stub=True), False),
-            ("hswish", imagenet_ishape, Hswish(), False),
-            ("semodule", (1, 16, 64, 64), SqueezeExcite(16, add_stub=True), False),
-            ("semodule, per_channel", (1, 16, 64, 64), SqueezeExcite(16, add_stub=True), True),
-            ("mul_scalar negative", imagenet_ishape, MulScalarNegative(), False),
-            ("leaky_relu", imagenet_ishape, LeakyReLU(), False),
-        ]
-
-    for (module_name, ishape, raw_module, per_channel) in qmodules:
-        raw_module.eval()
-        inp = torch.rand(ishape)
-
-        # quantized conv_transpose2d is supported only with qnnpack engine before torch v1.8.0.
-        if module_name == "conv_transpose" and not is_version_greater_than("1.7.1"):
-            prev_engine = torch.backends.quantized.engine
-            torch.backends.quantized.engine = "qnnpack"
-            quantize_model(raw_module, inp, per_channel=per_channel)
-            torch.backends.quantized.engine = prev_engine
-        else:
-            quantize_model(raw_module, inp, per_channel=per_channel)
-
-        script_module = torch.jit.trace(raw_module, inp).eval()
-
-        with torch.no_grad():
-            pt_result = script_module(inp.clone()).numpy()
-
-        input_name = "input"
-        runtime = get_tvm_runtime(script_module, input_name, ishape)
-        runtime.set_input(input_name, inp.numpy().copy())
-        runtime.run()
-        tvm_result = runtime.get_output(0).numpy()
-
-        max_abs_diff = np.max(np.abs(tvm_result - pt_result))
-        mean_abs_diff = np.mean(np.abs(tvm_result - pt_result))
-        num_identical = np.sum(tvm_result == pt_result)
-        match_ratio = num_identical / float(np.prod(tvm_result.shape))
-
-        print(module_name, max_abs_diff, mean_abs_diff, match_ratio)
-
-        if "linear" in module_name and tvm.get_global_func("tvm.contrib.cublas.matmul", True):
-            runtime = get_tvm_runtime(script_module, input_name, ishape, target="cuda -libs=cublas")
-            runtime.set_input(input_name, inp.numpy().copy())
-            runtime.run()
-            cublas_result = runtime.get_output(0).numpy()
-            # It is generally safe to enable this assertion, but disabled for CI
-            # tvm.testing.assert_allclose(cublas_result, pt_result, atol=1e-5, rtol=1e-5)
-            print(np.max(np.abs(cublas_result - pt_result)))
-
-        # sample outputs
-        """
-        relu 0.0039215684 2.6052087e-08 0.9999933567176871
-        leaky_relu 0.0 0.0 1.0
-        upsample bilinear 0.0 0.0 1.0
-        conv_bn 0.22062653 0.011478779 0.6909348115006899
-        conv_bn_relu 0.3700896 0.010921672 0.7489366477964451
-        linear 0.15987062 0.009231662 0.794921875
-        linear_relu 0.14180502 0.0053220326 0.8828125
-        conv_transpose 0.0033792555 4.4658788e-07 0.9998678439971806
-        conv_bn, per_channel 0.01654929 2.9486866e-06 0.9998218235127019
-        conv_bn_relu, per_channel 0.009089053 1.4926576e-06 0.9998357732732732
-        linear, per_channel 0.0 0.0 1.0
-        linear_relu, per_channel 0.0 0.0 1.0
-        hsigmoid 0.002614379 0.00020525524 0.9214896896258503
-        hswish 0.0026143193 1.7367661e-08 0.9999933567176871
-        hswish, per_channel 0.0 0.0 1.0
-        semodule, per_channel 0.0039885044 0.0008620687 0.7838592529296875
-        mul_scalar negative 0.0011764616 7.815566e-09 0.9999933567176871
-        """
-
-        # we cannot make any guarantee on how close the raw output is to torch
-        # tvm.testing.assert_allclose(tvm_result, pt_result, rtol=1e-1, atol=1e-1)
-
-
-def test_quantized_imagenet():
-    def get_transform():
-        import torchvision.transforms as transforms
-
-        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-        return transforms.Compose(
-            [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize]
-        )
-
-    def get_real_image(im_height, im_width):
-        repo_base = "https://github.com/dmlc/web-data/raw/main/tensorflow/models/InceptionV1/"
-        img_name = "elephant-299.jpg"
-        image_url = os.path.join(repo_base, img_name)
-        img_path = download_testdata(image_url, img_name, module="data")
-        return Image.open(img_path).resize((im_height, im_width))
-
-    def get_imagenet_input():
-        im = get_real_image(224, 224)
-        preprocess = get_transform()
-        pt_tensor = preprocess(im)
-        return np.expand_dims(pt_tensor.numpy(), 0)
-
-    from torchvision.models.quantization import googlenet as qgooglenet
-    from torchvision.models.quantization import inception as qinception
-    from torchvision.models.quantization import mobilenet as qmobilenet
-    from torchvision.models.quantization import (
-        mobilenet_v3_large as qmobilenet_v3_large,
-    )
-    from torchvision.models.quantization import resnet as qresnet
-
-    per_channel = True
-    qmodels = [
-        ("resnet18", qresnet.resnet18(pretrained=True), per_channel),
-        ("mobilenet_v2", qmobilenet.mobilenet_v2(pretrained=True), per_channel),
-        ("inception_v3", qinception.inception_v3(pretrained=True), per_channel),
-        # tracing quantized googlenet broken as of v1.6
-        # ("googlenet", qgooglenet(pretrained=True), per_channel),
-        # As of v1.10, quantized mobilenet v3 has a weird segfault issue
-        # during make_conv_packed_param
-        # See https://ci.tlcpack.ai/blue/organizations/jenkins/tvm/detail/ci-docker-staging/192
-        # ("mobilenet_v3_large", qmobilenet_v3_large(pretrained=True, quantize=True).eval(), True)
-    ]
-
-    results = []
-
-    for (model_name, raw_model, per_channel) in qmodels:
-        raw_model.eval()
-
-        if per_channel:
-            model_name += ", per channel quantization"
-        else:
-            model_name += ", per tensor quantization"
-
-        inp = get_imagenet_input()
-        pt_inp = torch.from_numpy(inp)
-
-        if "mobilenet_v3_large" not in model_name:
-            # mv3 was qat-ed, quantize=True option above makes it already quantized
-            quantize_model(raw_model, pt_inp, per_channel=per_channel)
-
-        script_module = torch.jit.trace(raw_model, pt_inp).eval()
-
-        with torch.no_grad():
-            pt_result = script_module(pt_inp).numpy()
-
-        input_name = "image"
-        runtime = get_tvm_runtime(script_module, input_name, (1, 3, 224, 224))
-        runtime.set_input(input_name, inp)
-        runtime.run()
-
-        tvm_result = runtime.get_output(0).numpy()
-
-        results.append((model_name, pt_result[0], tvm_result[0]))
-
-    for (model_name, pt_result, tvm_result) in results:
-        max_abs_diff = np.max(np.abs(tvm_result - pt_result))
-        mean_abs_diff = np.mean(np.abs(tvm_result - pt_result))
-        num_identical = np.sum(tvm_result == pt_result)
-        pt_top3_labels = np.argsort(pt_result)[::-1][:3]
-        tvm_top3_labels = np.argsort(tvm_result)[::-1][:3]
-
-        print("\nModel name: %s" % model_name)
-        print("PyTorch top3 label:", pt_top3_labels)
-        print("TVM top3 label:", tvm_top3_labels)
-        print("max abs diff:", max_abs_diff)
-        print("mean abs_diff:", mean_abs_diff)
-        print("%d in 1000 raw outputs identical." % num_identical)
-
-        assert set(pt_top3_labels) == set(tvm_top3_labels)
-
-        # sample outputs
-        """
-        Model name: resnet18, per tensor quantization
-        PyTorch top3 label: [386 101 385]
-        TVM top3 label: [386 101 385]
-        max abs diff: 0.65681696
-        mean abs_diff: 0.14055882
-        236 in 1000 raw outputs identical.
-
-        Model name: mobilenet_v2, per tensor quantization
-        PyTorch top3 label: [101 386 385]
-        TVM top3 label: [101 386 385]
-        max abs diff: 2.1262953
-        mean abs_diff: 0.41025686
-        101 in 1000 raw outputs identical.
-
-        Model name: inception_v3, per tensor quantization
-        PyTorch top3 label: [101 386 385]
-        TVM top3 label: [101 386 385]
-        max abs diff: 0.9994669
-        mean abs_diff: 0.098697364
-        272 in 1000 raw outputs identical.
-
-        Model name: googlenet, per tensor quantization
-        PyTorch top3 label: [101 386 385]
-        TVM top3 label: [101 386 385]
-        max abs diff: 0.28248847
-        mean abs_diff: 0.0634469
-        274 in 1000 raw outputs identical.
-
-        Model name: resnet18, per channel quantization
-        PyTorch top3 label: [101 386 385]
-        TVM top3 label: [101 386 385]
-        max abs diff: 0.65908074
-        mean abs_diff: 0.1274223
-        469 in 1000 raw outputs identical.
-
-        Model name: mobilenet_v2, per channel quantization
-        PyTorch top3 label: [101 386 385]
-        TVM top3 label: [101 386 385]
-        max abs diff: 0.71120834
-        mean abs_diff: 0.15883648
-        423 in 1000 raw outputs identical.
-
-        Model name: inception_v3, per channel quantization
-        PyTorch top3 label: [386 101 385]
-        TVM top3 label: [386 101 385]
-        max abs diff: 1.3372154
-        mean abs_diff: 0.1225224
-        401 in 1000 raw outputs identical.
-
-        Model name: googlenet, per channel quantization
-        PyTorch top3 label: [101 386 385]
-        TVM top3 label: [101 386 385]
-        max abs diff: 0.34015465
-        mean abs_diff: 0.054197952
-        558 in 1000 raw outputs identical.
-        """
-
-
-def test_serialized_modules():
-    ishape = (1, 16, 64, 64)
-    raw_module = AdaptiveAvgPool2d().eval()
-    inp = torch.rand(ishape)
-
-    quantize_model(raw_module, inp)
-    script_module = torch.jit.trace(raw_module, inp).eval()
-
-    fname = "tmp.pt"
-    torch.jit.save(script_module, fname)
-    loaded = torch.jit.load(fname)
-    os.remove(fname)
-
-    with torch.no_grad():
-        pt_result = loaded(inp.clone()).numpy()
-
-    input_name = "input"
-    runtime = get_tvm_runtime(loaded, input_name, ishape)
-    runtime.set_input(input_name, inp.numpy().copy())
-    runtime.run()
-    tvm_result = runtime.get_output(0).numpy()
-
-    # with 0.5ish results, 1e-2 is relative accuracy close to 2**-6.
-    # for simple layers like here this should be achievable
-    # with 8 bit quantization
-    # we only require 90% match just to be sure
-    num_identical = np.sum(np.abs(tvm_result - pt_result) < 1e-2)
-    match_ratio = num_identical / float(np.prod(tvm_result.shape))
-    assert match_ratio > 0.90
-
-
-def test_quantize_dynamic():
-    # A wrapper is required for quantize_dynamic to work correctly
-    class LinearWrapper(nn.Module):
-        def __init__(self, in_dim, hidden_dim):
-            super().__init__()
-            self.linear = nn.Linear(in_dim, hidden_dim)
-
-        def forward(self, inp):
-            return self.linear(inp)
-
-    torch.manual_seed(0)
-    mod = LinearWrapper(16, 32)
-
-    for qconfig in [
-        torch.quantization.per_channel_dynamic_qconfig,
-        torch.quantization.default_dynamic_qconfig,
-    ]:
-        for ishape in [(16, 16), (10, 16, 16)]:
-            qspec = {nn.Linear: qconfig}
-            qmod = torch.quantization.quantize_dynamic(mod, qconfig_spec=qspec, dtype=torch.qint8)
-
-            inp = torch.randn(*ishape)
-            script_module = torch.jit.trace(qmod, inp).eval()
-
-            with torch.no_grad():
-                pt_result = script_module(inp.clone()).numpy()
-
-            input_name = "input"
-            runtime = get_tvm_runtime(script_module, "input", inp.shape)
-            runtime.set_input(input_name, inp.numpy().copy())
-            runtime.run()
-            tvm_result = runtime.get_output(0).numpy()
-
-            # Only compare with the PyTorch result for version v1.6 or newer
-            # Have seen a strange accuracy problem from PyTorch 1.4 and 1.5
-            # Even with the manual random seed set, the same PyTorch
-            # version can outputs slightly different results depending on an environment.
-            # Outputs from v1.6 seem reliable. TVM's outputs are always the same
-            if is_version_greater_than("1.5.1"):
-                tvm.testing.assert_allclose(tvm_result, pt_result, rtol=1e-4, atol=1e-4)
-
-
-def make_qnn_add_pattern():
-    from tvm.relay.dataflow_pattern import is_op, wildcard
-
-    lhs = wildcard()
-    rhs = wildcard()
-    lhs_scale = wildcard()
-    lhs_zero_point = wildcard()
-    rhs_scale = wildcard()
-    rhs_zero_point = wildcard()
-    output_scale = wildcard()
-    output_zero_point = wildcard()
-    qadd = is_op("qnn.add")(
-        lhs,
-        rhs,
-        lhs_scale,
-        lhs_zero_point,
-        rhs_scale,
-        rhs_zero_point,
-        output_scale,
-        output_zero_point,
-    )
-    return qadd.optional(is_op("clip"))
-
-
-@register_pattern_table("test_table")
-def pattern_table():
-    return [
-        ("qnn_add", make_qnn_add_pattern()),
-    ]
-
-
-def run_qnn_mergecomposite(script_module, input_name, ishape):
-    input_shapes = [(input_name, ishape)]
-    with tvm.testing.disable_span_filling():
-        mod, params = relay.frontend.from_pytorch(script_module, input_shapes)
-    with tvm.testing.enable_span_filling():
-        mod_with_span, _ = relay.frontend.from_pytorch(script_module, input_shapes)
-    tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-    pattern_table = get_pattern_table("test_table")
-    with tvm.transform.PassContext(opt_level=3):
-        pass_list = [
-            tvm.relay.transform.SimplifyInference(),
-            tvm.relay.transform.MergeComposite(pattern_table),
-        ]
-        composite_partition = tvm.transform.Sequential(pass_list)
-        partitioned = composite_partition(mod)
-
-
-def test_qnn_mergecomposite():
-    from torchvision.models.quantization import resnet as qresnet
-
-    model = qresnet.resnet18(pretrained=True)
-    model.eval()
-
-    inp = torch.zeros((1, 3, 224, 224))
-    model.fuse_model()
-    model.qconfig = torch.quantization.get_default_qconfig("fbgemm")
-    torch.quantization.prepare(model, inplace=True)
-    model(inp)
-    torch.quantization.convert(model, inplace=True)
-    script_module = torch.jit.trace(model, inp).eval()
-
-    input_name = "image"
-    run_qnn_mergecomposite(script_module, input_name, inp.shape)
-
-
-def test_keep_quantized_weight():
-    qmodules = []
-
-    for per_channel in [False, True]:
-        qmodules += [
-            ((1, 3, 224, 224), ConvBn(), per_channel),
-            ((16, 16), Linear(), per_channel),
-        ]
-
-    for (ishape, raw_module, per_channel) in qmodules:
-        raw_module.eval()
-        inp = torch.rand(ishape)
-
-        quantize_model(raw_module, inp, per_channel=per_channel)
-        script_module = torch.jit.trace(raw_module, inp).eval()
-
-        input_name = "input"
-
-        runtime = get_tvm_runtime(script_module, input_name, ishape, keep_quantized_weight=False)
-        runtime.set_input(input_name, inp.numpy().copy())
-        runtime.run()
-        tvm_result = runtime.get_output(0).numpy()
-
-        runtime_int8_weight = get_tvm_runtime(
-            script_module, input_name, ishape, keep_quantized_weight=True
-        )
-        runtime_int8_weight.set_input(input_name, inp.numpy().copy())
-        runtime_int8_weight.run()
-        tvm_result_int8_weight = runtime_int8_weight.get_output(0).numpy()
-
-        tvm.testing.assert_allclose(tvm_result, tvm_result_int8_weight)
-
-
-def test_tuple_lowered():
-    # See the following discuss thread for details
-    # https://discuss.tvm.apache.org/t/bug-frontend-pytorch-relay-ir-is-inconsistent-with-that-of-the-original-model/12010
-
-    class ConvBnRelu(nn.Module):
-        def __init__(self, inp, oup, kernel_size=3, stride=1, padding=1, bias=True, groups=1):
-            super(ConvBnRelu, self).__init__()
-            if groups > 1:
-                self.conv = nn.Conv2d(
-                    inp, inp, kernel_size, stride, padding, bias=bias, groups=groups
-                )
-                self.bn = nn.BatchNorm2d(inp)
-            else:
-                self.conv = nn.Conv2d(
-                    inp, oup, kernel_size, stride, padding, bias=bias, groups=groups
-                )
-                self.bn = nn.BatchNorm2d(oup)
-            self.relu = nn.ReLU(inplace=True)
-
-        def forward(self, inputs):
-            x = self.conv(inputs)
-            x = self.bn(x)
-            x = self.relu(x)
-            return x
-
-    def conv_bn(inp, oup, stride=1, width_multiplier=1):
-        return ConvBnRelu(inp, oup, kernel_size=3, stride=stride, padding=1, bias=False)
-
-    def conv_dw(inp, oup, stride, width_multiplier=1, padding=1):
-        dw_block = nn.Sequential()
-        depth_wise = ConvBnRelu(
-            inp, oup, kernel_size=3, stride=stride, padding=padding, bias=False, groups=inp
-        )
-        point_wise = ConvBnRelu(inp, oup, kernel_size=1, stride=1, padding=0, bias=False)
-
-        dw_block.add_module("depth_wise", depth_wise)
-        dw_block.add_module("point_wise", point_wise)
-
-        return dw_block
-
-    class Backbone(nn.Module):
-        def __init__(self, width_multiplier=1):
-            super(Backbone, self).__init__()
-            self.width_multiplier = width_multiplier
-            self.conv1 = conv_bn(3, 16, 2, self.width_multiplier)
-            self.conv2 = conv_dw(16, 32, 1, self.width_multiplier)
-
-        def forward(self, inputs):
-            x1 = self.conv1(inputs)
-            x2 = self.conv2(x1)
-            return [x1, x2]
-
-    class QuantizableBackbone(nn.Module):
-        def __init__(self, inputsize=(128, 128)):
-            super(QuantizableBackbone, self).__init__()
-            self.quant = QuantStub()
-            self.dequant = DeQuantStub()
-            self.backbone = Backbone()
-
-        def fuse_model(self):
-            fuse_modules_qat = getattr(torch.ao.quantization, "fuse_modules_qat", fuse_modules)
-            for idx, m in enumerate(self.modules()):
-                if type(m) == ConvBnRelu:
-                    fuse_modules_qat(m, ["conv", "bn", "relu"], inplace=True)
-
-        def forward(self, input):
-            input = self.quant(input)
-            y0, y1 = self.backbone(input)
-            y0 = self.dequant(y0)
-            y1 = self.dequant(y1)
-            return y0, y1
-
-    fp32_input = torch.randn(1, 3, 128, 128)
-    model = QuantizableBackbone()
-    model.train()
-    model.fuse_model()
-    model.qconfig = get_default_qat_qconfig("qnnpack")
-
-    prepare_qat(model, inplace=True)
-
-    model.eval()
-    model(fp32_input)
-
-    model_int8 = torch.quantization.convert(model, inplace=True)
-    script_module = torch.jit.trace(model_int8, fp32_input).eval()
-
-    input_infos = [("input", (fp32_input.shape, "float32"))]
-    with tvm.testing.disable_span_filling():
-        mod, _ = relay.frontend.from_pytorch(script_module, input_infos)
-    with tvm.testing.enable_span_filling():
-        mod_with_span, _ = relay.frontend.from_pytorch(script_module, input_infos)
-    tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-    output = mod["main"].body
-
-    assert isinstance(output, relay.Tuple) and len(output) == 2
-    dq1, dq2 = output
-    assert dq1.op.name == "qnn.dequantize" and dq2.op.name == "qnn.dequantize"
-    scale1 = dq1.args[1].data.numpy().item()
-    scale2 = dq2.args[1].data.numpy().item()
-    assert scale1 != scale2
diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py
deleted file mode 100644
index 9f8fac93061c..000000000000
--- a/tests/python/frontend/pytorch/test_forward.py
+++ /dev/null
@@ -1,5884 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument, missing-function-docstring
-"""Unit tests for various models and operators"""
-import os
-import platform
-import sys
-
-from packaging import version as package_version
-
-import pytest
-import numpy as np
-
-import torch
-from torch.nn import Module
-from torch.nn import functional as F
-import torchvision
-
-import tvm
-import tvm.testing
-from tvm import relay
-from tvm.contrib import graph_executor
-from tvm.contrib.nvcc import have_fp16
-from tvm.contrib import cudnn, utils
-from relay.utils.tag_span import _create_span, _set_span, _verify_structural_equal_with_span
-
-sys.setrecursionlimit(10000)
-if torch.cuda.is_available():
-    torch.backends.cuda.matmul.allow_tf32 = False
-    torch.backends.cudnn.allow_tf32 = False
-
-
-def list_ops(expr):
-    """list_ops"""
-
-    class OpLister(tvm.relay.ExprVisitor):
-        """OpLister inherits from ExprVisitor"""
-
-        def visit_op(self, op):
-            if op not in self.node_set:
-                self.node_list.append(op)
-            return super().visit_op(op)
-
-        def list_nodes(self, expr):
-            self.node_set = {}
-            self.node_list = []
-            self.visit(expr)
-            return self.node_list
-
-    return OpLister().list_nodes(expr)
-
-
-def assert_shapes_match(tru, est):
-    """Verfiy whether the shapes are equal"""
-    if tru.shape != est.shape:
-        msg = "Output shapes {} and {} don't match"
-        raise AssertionError(msg.format(tru.shape, est.shape))
-
-
-def load_torchvision(model_name):
-    """Given a model name, returns a Torchvision model in eval mode as well
-    as an example input."""
-    with torch.no_grad():
-        if model_name.startswith("inception"):
-            height = width = 299
-            mean = [0.5, 0.5, 0.5]
-            std = [0.5, 0.5, 0.5]
-        else:
-            height = width = 224
-            mean = [0.485, 0.456, 0.406]
-            std = [0.229, 0.224, 0.225]
-        input_shape = [1, 3, height, width]
-        input_data = torch.randn(input_shape).float()
-        for channel in range(3):
-            input_data[:, channel] -= mean[channel]
-            input_data[:, channel] /= std[channel]
-
-        if model_name.startswith("googlenet"):
-            model = getattr(torchvision.models, model_name)(pretrained=True, aux_logits=True)
-        else:
-            model = getattr(torchvision.models, model_name)(pretrained=True)
-        model = model.float().eval()
-        return model, [input_data]
-
-
-def load_pretrainedmodels(model_name):
-    """Given a model name, returns a pretrainedmodels.pytorch model in eval
-    mode as well as an example input."""
-    # pylint: disable=import-outside-toplevel
-    import pretrainedmodels  # https://github.com/Cadene/pretrained-models.pytorch
-
-    model = getattr(pretrainedmodels, model_name)().float().eval()
-    input_shape = [1, *model.input_size]
-    input_data = torch.rand(input_shape).float() * 256
-    for channel in range(3):
-        input_data[:, channel] -= model.mean[channel]
-        input_data[:, channel] /= model.std[channel]
-    return model, [input_data]
-
-
-def load_model(model_name):
-    """Given a model name, returns a model as well as an example input."""
-    if hasattr(torchvision.models, model_name):
-        return load_torchvision(model_name)
-    # pylint: disable=import-outside-toplevel
-    try:
-        import pretrainedmodels
-
-        if hasattr(pretrainedmodels, model_name):
-            return load_pretrainedmodels(model_name)
-    except ModuleNotFoundError as e:
-        raise ModuleNotFoundError("Please install pretrainedmodels.pytorch") from e
-    raise RuntimeError("Model not supported")
-
-
-def verify_model(
-    model_name,
-    input_data=None,
-    custom_convert_map=None,
-    rtol=1e-5,
-    atol=1e-5,
-    expected_ops=None,
-    kind="graph",
-    check_correctness=True,
-    cpu_only=False,
-    validate_structural_equal=True,
-):
-    """Assert that the output of a compiled model matches with that of its
-    baseline."""
-    input_data = [] if input_data is None else input_data
-    custom_convert_map = custom_convert_map or {}
-    expected_ops = expected_ops or []
-    if isinstance(model_name, str):
-        baseline_model, baseline_input = load_model(model_name)
-    elif isinstance(input_data, list):
-        baseline_model = model_name
-        baseline_input = input_data
-    elif isinstance(input_data, torch.Tensor) or not input_data.shape:
-        baseline_model = model_name
-        baseline_input = [input_data]
-    else:
-        assert False, "Unexpected input format"
-    if torch.cuda.is_available():
-        if isinstance(baseline_model, torch.nn.Module):
-            baseline_model = baseline_model.cuda()
-        baseline_input = [inp.cuda() for inp in baseline_input]
-
-    with torch.no_grad():
-        baseline_outputs = baseline_model(*[input.clone() for input in baseline_input])
-
-    if isinstance(baseline_outputs, tuple):
-        baseline_outputs = tuple(out.cpu().numpy() for out in baseline_outputs)
-    else:
-        baseline_outputs = (baseline_outputs.cpu().numpy(),)
-
-    trace = torch.jit.trace(baseline_model, [input.clone() for input in baseline_input])
-    if isinstance(baseline_model, torch.nn.Module):
-        trace = trace.float().eval()
-
-        if torch.cuda.is_available():
-            trace = trace.cuda()
-        else:
-            trace = trace.cpu()
-
-    input_names = [f"input{idx}" for idx, _ in enumerate(baseline_input)]
-    input_shapes = list(zip(input_names, [inp.shape for inp in baseline_input]))
-    with tvm.testing.disable_span_filling():
-        mod, params = relay.frontend.from_pytorch(trace, input_shapes, custom_convert_map)
-    if validate_structural_equal:
-        with tvm.testing.enable_span_filling():
-            mod_with_span, _ = relay.frontend.from_pytorch(trace, input_shapes, custom_convert_map)
-        tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-
-    for arg in mod["main"].params[: len(input_names)]:
-        assert arg.name_hint in input_names
-    compiled_input = dict(zip(input_names, [inp.clone().cpu().numpy() for inp in baseline_input]))
-
-    targets = ["llvm"]
-    if not cpu_only:
-        targets.append("cuda")
-
-    with tvm.transform.PassContext(opt_level=3):
-        for target in targets:
-            if not tvm.runtime.enabled(target):
-                continue
-            dev = tvm.device(target, 0)
-            exe = relay.create_executor(
-                kind, mod=mod, params=params, device=dev, target=target
-            ).evaluate()
-            result = exe(**compiled_input)
-            if not isinstance(result, list):
-                result = [result]
-
-            for i, baseline_output in enumerate(baseline_outputs):
-                output = result[i].numpy()
-
-                assert_shapes_match(baseline_output, output)
-                if check_correctness:
-                    tvm.testing.assert_allclose(baseline_output, output, rtol=rtol, atol=atol)
-
-    if expected_ops:
-
-        def visit(op):
-            if isinstance(op, tvm.ir.op.Op):
-                if op.name in expected_ops:
-                    expected_ops.remove(op.name)
-
-        tvm.relay.analysis.post_order_visit(mod["main"].body, visit)
-
-        if expected_ops:
-            msg = "TVM Relay do not contain expected ops {}"
-            raise AssertionError(msg.format(expected_ops))
-
-    del model_name
-    del baseline_model
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-
-
-def verify_model_with_input(
-    test_func,
-    input_data,
-    *,
-    input_dict=None,
-    custom_convert_map=None,
-    rtol=1e-5,
-    atol=1e-5,
-    assert_shape_only=False,
-    validate_structural_equal=True,
-):
-    """Generic function to generate and compare Pytorch and TVM output"""
-    input_dict = input_dict or {}
-    custom_convert_map = custom_convert_map or {}
-    baseline_outputs = test_func(*input_data)
-    trace = torch.jit.trace(test_func, [input.clone() for input in input_data])
-    input_names = [f"input{idx}" for idx, _ in enumerate(input_data)]
-    input_shapes = list(zip(input_names, [inp.shape for inp in input_data]))
-    with tvm.testing.disable_span_filling():
-        mod, params = relay.frontend.from_pytorch(trace, input_shapes, custom_convert_map)
-    if validate_structural_equal:
-        with tvm.testing.enable_span_filling():
-            mod_with_span, _ = relay.frontend.from_pytorch(trace, input_shapes, custom_convert_map)
-        tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-
-    with tvm.transform.PassContext(opt_level=3):
-        for target in ["llvm", "cuda"]:
-            if not tvm.runtime.enabled(target):
-                continue
-            dev = tvm.device(target, 0)
-            lib = relay.build(mod, target=target, params=params)
-            relay_model = graph_executor.GraphModule(lib["default"](dev))
-            for name, value in input_dict.items():
-                relay_model.set_input(name, value)
-            relay_model.run()
-
-            compiled_output = relay_model.get_output(0).numpy()
-            assert_shapes_match(baseline_outputs, compiled_output)
-            if assert_shape_only is False:
-                tvm.testing.assert_allclose(baseline_outputs, compiled_output, rtol=rtol, atol=atol)
-
-
-def gen_ir_module(model, inputs, use_parser_friendly_name=False):
-    """Helper function to generate IRModule with meaningful source information"""
-
-    trace = torch.jit.trace(model, inputs)
-    input_names = ["input{}".format(idx) for idx, _ in enumerate(inputs)]
-    input_shapes = list(zip(input_names, [inp.shape for inp in inputs]))
-    mod, _ = relay.frontend.from_pytorch(
-        trace,
-        input_shapes,
-        use_parser_friendly_name=use_parser_friendly_name,
-    )
-    return mod
-
-
-# Single operator tests
-@tvm.testing.uses_gpu
-def test_forward_pixel_shuffle():
-    """test_forward_pixel_shuffle"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 144, 16, 16]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.PixelShuffle(2).float().eval(), input_data=input_data)
-    verify_model(torch.nn.PixelShuffle(3).float().eval(), input_data=input_data)
-    verify_model(torch.nn.PixelShuffle(4).float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_add():
-    """test_forward_add"""
-    torch.set_grad_enabled(False)
-    input_shape = [10]
-
-    class Add1(Module):
-        def forward(self, *args):
-            return args[0] + args[0]
-
-    class Add2(Module):
-        def forward(self, *args):
-            return args[0] + 1
-
-    class Add3(Module):
-        def forward(self, *args):
-            ones = torch.ones(input_shape, dtype=torch.float)
-            if torch.cuda.is_available():
-                ones = ones.cuda()
-            return args[0] + ones
-
-    class Add4(Module):
-        def forward(self, *args):
-            ones = torch.ones([], dtype=torch.float)
-            if torch.cuda.is_available():
-                ones = ones.cuda()
-            return args[0] + ones
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Add1().float().eval(), input_data=input_data)
-    verify_model(Add2().float().eval(), input_data=input_data)
-    verify_model(Add3().float().eval(), input_data=input_data)
-    verify_model(Add4().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_subtract():
-    """test_forward_subtract"""
-    torch.set_grad_enabled(False)
-    input_shape = [10]
-
-    class Subtract1(Module):
-        def forward(self, *args):
-            return args[0] - args[0]
-
-    class Subtract2(Module):
-        def forward(self, *args):
-            return args[0] - 1
-
-    class Subtract3(Module):
-        def forward(self, *args):
-            ones = torch.ones(input_shape)
-            if torch.cuda.is_available():
-                ones = ones.cuda()
-            return args[0] - ones
-
-    class Subtract4(Module):
-        def forward(self, *args):
-            ones = torch.ones([])
-            if torch.cuda.is_available():
-                ones = ones.cuda()
-            return args[0] - ones
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Subtract1().float().eval(), input_data=input_data)
-    verify_model(Subtract2().float().eval(), input_data=input_data)
-    verify_model(Subtract3().float().eval(), input_data=input_data)
-    verify_model(Subtract4().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_multiply():
-    """test_forward_multiply"""
-    torch.set_grad_enabled(False)
-    input_shape = [10]
-
-    class Multiply1(Module):
-        def forward(self, *args):
-            return args[0] * args[0]
-
-    class Multiply2(Module):
-        def forward(self, *args):
-            return args[0] * 1.0
-
-    class Multiply3(Module):
-        def forward(self, *args):
-            ones = torch.ones(input_shape)
-            if torch.cuda.is_available():
-                ones = ones.cuda()
-            return args[0] * ones
-
-    class Multiply4(Module):
-        def forward(self, *args):
-            ones = torch.ones([])
-            if torch.cuda.is_available():
-                ones = ones.cuda()
-            return args[0] * ones
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Multiply1().float().eval(), input_data=input_data)
-    verify_model(Multiply2().float().eval(), input_data=input_data)
-    verify_model(Multiply3().float().eval(), input_data=input_data)
-    verify_model(Multiply4().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_min_max():
-    """test_min_max"""
-
-    class Max(Module):
-        def forward(self, inp):
-            return torch.max(inp)
-
-    class Min(Module):
-        def forward(self, inp):
-            return torch.min(inp)
-
-    class Max2(Module):
-        def forward(self, inp):
-            out, _ = torch.max(inp, 1, keepdim=True)
-            return out
-
-    class Min2(Module):
-        def forward(self, inp):
-            out, _ = torch.min(inp, 0, keepdim=False)
-            return out
-
-    class Max3(Module):
-        def forward(self, lhs, rhs):
-            return torch.max(lhs, rhs)
-
-    class Min3(Module):
-        def forward(self, lhs, rhs):
-            return torch.min(lhs, rhs)
-
-    class Max4(Module):
-        def forward(self, inp):
-            out = torch.amax(inp, (1, 2), keepdim=True)
-            return out
-
-    class Min4(Module):
-        def forward(self, inp):
-            out = torch.amin(inp, (0, 3), keepdim=False)
-            return out
-
-    input_data = [torch.rand((10, 10, 10, 10)), torch.rand((10, 10, 10, 10))]
-
-    verify_model(Max(), input_data=input_data[0])
-    verify_model(Min(), input_data=input_data[0])
-    verify_model(Max2(), input_data=input_data[0])
-    verify_model(Min2(), input_data=input_data[0])
-    verify_model(Max3(), input_data=input_data)
-    verify_model(Min3(), input_data=input_data)
-    verify_model(Max4(), input_data=input_data[0])
-    verify_model(Min4(), input_data=input_data[0])
-
-
-@tvm.testing.uses_gpu
-def test_minimum_maximum():
-    """test_minimum_maximum"""
-
-    class Maximum(Module):
-        def forward(self, lhs, rhs):
-            return torch.maximum(lhs, rhs)
-
-    class Minimum(Module):
-        def forward(self, lhs, rhs):
-            return torch.minimum(lhs, rhs)
-
-    input_data = [torch.rand((10, 10, 10, 10)), torch.rand((10, 10, 10, 10))]
-
-    verify_model(Maximum(), input_data=input_data)
-    verify_model(Minimum(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_reciprocal():
-    """test_forward_reciprocal"""
-    torch.set_grad_enabled(False)
-    input_shape = [2, 1, 10, 1, 10]
-
-    class Reciprocal1(Module):
-        def forward(self, *args):
-            return args[0].reciprocal()
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Reciprocal1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_repeat():
-    """test_forward_repeat"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3]
-
-    class Repeat1(Module):
-        def forward(self, *args):
-            return args[0].repeat(1, 1)
-
-    class Repeat2(Module):
-        def forward(self, *args):
-            return args[0].repeat(4, 2)
-
-    class Repeat3(Module):
-        def forward(self, *args):
-            return args[0].repeat(4, 2, 1)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Repeat1().float().eval(), input_data=input_data)
-    verify_model(Repeat2().float().eval(), input_data=input_data)
-    verify_model(Repeat3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_repeat_interleave():
-    """test_forward_repeat_interleave"""
-    torch.set_grad_enabled(False)
-    input_shape = [2, 2, 3]
-
-    class RepeatInterleave1(Module):
-        def forward(self, *args):
-            return args[0].repeat_interleave(2)
-
-    class RepeatInterleave2(Module):
-        def forward(self, *args):
-            return args[0].repeat_interleave(3, dim=0)
-
-    class RepeatInterleave3(Module):
-        def forward(self, *args):
-            return args[0].repeat_interleave(2, dim=1)
-
-    class RepeatInterleave4(Module):
-        def forward(self, *args):
-            return args[0].repeat_interleave(4, dim=2)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(RepeatInterleave1().float().eval(), input_data=input_data)
-    verify_model(RepeatInterleave2().float().eval(), input_data=input_data)
-    verify_model(RepeatInterleave3().float().eval(), input_data=input_data)
-    verify_model(RepeatInterleave4().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_unsqueeze():
-    """test_forward_unsqueeze"""
-    torch.set_grad_enabled(False)
-    input_shape = [10, 10]
-
-    class Unsqueeze1(Module):
-        def forward(self, *args):
-            return args[0].unsqueeze(2)
-
-    class Unsqueeze2(Module):
-        def forward(self, *args):
-            _ = args[0].unsqueeze_(2)
-            # Check whether operations after inplace unsqueeze works as expected
-            y = args[0].squeeze(2)
-            return torch.add(y, y)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Unsqueeze1().float().eval(), input_data=input_data)
-    verify_model(Unsqueeze2().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_squeeze():
-    """test_forward_squeeze"""
-    torch.set_grad_enabled(False)
-    input_shape = [2, 1, 10, 1, 10]
-
-    class Squeeze1(Module):
-        def forward(self, *args):
-            return args[0].squeeze()
-
-    class Squeeze2(Module):
-        def forward(self, *args):
-            return args[0].squeeze(1)
-
-    class Squeeze3(Module):
-        def forward(self, *args):
-            return args[0].squeeze((1, 3))
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Squeeze1().float().eval(), input_data=input_data)
-    verify_model(Squeeze2().float().eval(), input_data=input_data)
-    if package_version.parse(torch.__version__) >= package_version.parse("2.0.0"):
-        verify_model(Squeeze3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_arange():
-    """test_forward_arange"""
-    torch.set_grad_enabled(False)
-
-    class Arange1(Module):
-        def forward(self, *args):
-            return torch.arange(5)
-
-    class Arange2(Module):
-        def forward(self, *args):
-            return torch.arange(2.5)
-
-    class Arange3(Module):
-        def forward(self, *args):
-            return torch.arange(1, 4)
-
-    class Arange4(Module):
-        def forward(self, *args):
-            return torch.arange(1, 2.5, 0.5)
-
-    class Arange5(Module):
-        def forward(self, *args):
-            return torch.arange(1, 2, 1, dtype=torch.int32)
-
-    class Arange6(Module):
-        def forward(self, *args):
-            return torch.arange(start=1, end=6, step=2)
-
-    class Arange7(Module):
-        def forward(self, *args):
-            return torch.arange(1, 4, dtype=torch.float32)
-
-    class Arange8(Module):
-        def forward(self, *args):
-            return torch.arange(1, 2, 1, dtype=torch.int16)
-
-    class Arange9(Module):
-        def forward(self, *args):
-            end = torch.add(torch.tensor(4), 1)
-            return torch.arange(end) + torch.ones((5,), dtype=torch.int64)
-
-    class Arange10(Module):
-        def forward(self, *args):
-            end = torch.add(torch.tensor(4.0), torch.tensor(1.0))
-            return torch.arange(end) + torch.ones((5,), dtype=torch.float)
-
-    class Arange11(Module):
-        def forward(self, *args):
-            start = torch.add(torch.tensor(1), 1)
-            end = torch.add(torch.tensor(4), 1)
-            step = torch.add(torch.tensor(2), 1)
-            out = torch.arange(start, end, step)
-            return out + torch.ones((3,), dtype=torch.int64)
-
-    class Arange12(Module):
-        def forward(self, *args):
-            start = torch.add(torch.tensor(1), 1)
-            end = torch.add(torch.tensor(4), 1)
-            step = torch.add(torch.tensor(2.5), torch.tensor(4.1))
-            out = torch.arange(start, end, step)
-            return out + torch.ones((3,), dtype=torch.float)
-
-    verify_model(Arange1().float().eval())
-    verify_model(Arange2().float().eval())
-    verify_model(Arange3().float().eval())
-    verify_model(Arange4().float().eval())
-    verify_model(Arange5().float().eval())
-    verify_model(Arange6().float().eval())
-    verify_model(Arange7().float().eval())
-    verify_model(Arange8().float().eval())
-    verify_model(Arange9().float().eval())
-    verify_model(Arange10().float().eval())
-    verify_model(Arange11().float().eval())
-    verify_model(Arange12().float().eval())
-
-
-@tvm.testing.uses_gpu
-def test_forward_mesh_grid():
-    """test_forward_mesh_grid"""
-    torch.set_grad_enabled(False)
-
-    class MeshGrid1(Module):
-        def forward(self, *args):
-            x = torch.tensor([1, 2, 3])
-            y = torch.tensor([4, 5, 6])
-            grid_x, grid_y = torch.meshgrid([x, y])
-            return grid_x, grid_y
-
-    class MeshGrid2(Module):
-        def forward(self, *args):
-            x = torch.tensor([1, 2, 3], dtype=torch.float32)
-            y = torch.add(torch.tensor(5, dtype=torch.float32), 1)
-            grid_x, grid_y = torch.meshgrid([x, y])
-            return grid_x, grid_y
-
-    verify_model(MeshGrid1().float().eval())
-    verify_model(MeshGrid2().float().eval())
-
-
-@tvm.testing.uses_gpu
-def test_forward_abs():
-    """test_forward_abs"""
-    torch.set_grad_enabled(False)
-    input_shape = [2, 1, 10, 1, 10]
-
-    class Abs1(Module):
-        def forward(self, *args):
-            return args[0].abs()
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Abs1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_concatenate():
-    """test_forward_concatenate"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Concatenate1(Module):
-        def forward(self, *args):
-            return torch.cat([args[0][:, 0].unsqueeze(1), args[0][:, 1].unsqueeze(1)], 1)
-
-    class Concatenate2(Module):
-        def forward(self, *args):
-            a = (args[0][:, :, 0] + 2) * 7
-            b = (args[0][:, :, 1] + 3) * 11
-            c = (args[0][:, :, 2] + 5) * 13
-            return torch.cat([t.unsqueeze(2) for t in [a, b, c]], 2)
-
-    class Concatenate3(Module):
-        """
-        torch.concat is preserved as aten::concat only when in a nested module.
-        (In the most cases, It is converted to aten::cat instead of aten::concat.)
-        """
-
-        def __init__(self):
-            super().__init__()
-
-            class _Concatenate(Module):
-                def forward(self, *args):
-                    a = (args[0][:, :, 0] + 2) * 7
-                    b = (args[0][:, :, 1] + 3) * 11
-                    c = (args[0][:, :, 2] + 5) * 13
-                    return torch.concat([t.unsqueeze(2) for t in [a, b, c]], 2)
-
-            self.mod = _Concatenate()
-
-        def forward(self, *args):
-            return self.mod(*args)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Concatenate1().float().eval(), input_data=input_data)
-    verify_model(Concatenate2().float().eval(), input_data=input_data)
-    verify_model(Concatenate3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_relu():
-    """test_forward_relu"""
-    torch.set_grad_enabled(False)
-    input_shape = [10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.ReLU().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_relu6():
-    """test_forward_relu6"""
-    torch.set_grad_enabled(False)
-    input_shape = [10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.ReLU6().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_prelu():
-    """test_forward_prelu"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.PReLU(num_parameters=3).eval(), input_data=input_data)
-    # Test when input channel > 1 and num parameters = 1
-    verify_model(torch.nn.PReLU(num_parameters=1).eval(), input_data=input_data)
-    # Test when input dims < 2
-    verify_model(torch.nn.PReLU(num_parameters=1).eval(), input_data=torch.randn(2))
-
-
-@tvm.testing.uses_gpu
-def test_forward_leakyrelu():
-    """test_forward_leakyrelu"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.LeakyReLU().eval(), input_data=input_data)
-    verify_model(torch.nn.LeakyReLU(negative_slope=0.05).eval(), input_data=input_data)
-    verify_model(torch.nn.LeakyReLU(negative_slope=1.0, inplace=True).eval(), input_data=input_data)
-    verify_model(
-        torch.nn.LeakyReLU(negative_slope=1.25, inplace=True).eval(), input_data=input_data
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_elu():
-    """test_forward_elu"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.randn(input_shape).float()
-    verify_model(torch.nn.ELU().eval(), input_data=input_data)
-    verify_model(torch.nn.ELU(alpha=0.3).eval(), input_data=input_data)
-    verify_model(torch.nn.ELU(alpha=1.0).eval(), input_data=input_data)
-    verify_model(torch.nn.ELU(alpha=1.3).eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_celu():
-    """test_forward_celu"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.CELU().eval(), input_data=input_data)
-    verify_model(torch.nn.CELU(alpha=0.3).eval(), input_data=input_data)
-    verify_model(torch.nn.CELU(alpha=1.0).eval(), input_data=input_data)
-    verify_model(torch.nn.CELU(alpha=1.3).eval(), input_data=input_data)
-    input_data = torch.tensor([-1.0, 2.0], dtype=torch.float32)
-    verify_model(torch.nn.CELU().eval(), input_data=input_data)
-
-    input_shape = [2, 0, 1]
-    input_data = torch.rand(input_shape).float()
-    with pytest.raises(RuntimeError):
-        verify_model(torch.nn.CELU().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_gelu():
-    """test_forward_gelu"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.GELU().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_selu():
-    """test_forward_selu"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.SELU().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_silu():
-    """test_forward_silu"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.SiLU().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_glu():
-    """test_forward_glu"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.GLU().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_softplus():
-    """test_forward_softplus"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.Softplus().eval(), input_data=input_data)
-    verify_model(torch.nn.Softplus(beta=1.5, threshold=20).eval(), input_data=input_data)
-    verify_model(torch.nn.Softplus(beta=5, threshold=10).eval(), input_data=input_data)
-    verify_model(torch.nn.Softplus(beta=5, threshold=1).eval(), input_data=input_data)
-    verify_model(torch.nn.Softplus(beta=1, threshold=2).eval(), input_data=input_data)
-    verify_model(torch.nn.Softplus(beta=1, threshold=-1).eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_softsign():
-    """test_forward_softsign"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.Softsign().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_log_sigmoid():
-    """test_forward_log_sigmoid"""
-    torch.set_grad_enabled(False)
-    input_shape = [10, 10]
-    input_data = torch.rand(input_shape).float()
-    input_data_overflow = torch.tensor([-300.0, -100.0]).float()
-    verify_model(torch.nn.LogSigmoid().eval(), input_data=input_data)
-    verify_model(torch.nn.LogSigmoid().eval(), input_data=input_data_overflow)
-
-
-@tvm.testing.uses_gpu
-def test_forward_adaptive_avgpool():
-    """test_forward_adaptive_avgpool"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.AdaptiveAvgPool2d([1, 1]).eval(), input_data=input_data)
-    verify_model(torch.nn.AdaptiveAvgPool2d([10, 10]).eval(), input_data=input_data)
-
-    input_data = torch.rand([1, 3, 10]).float()
-    verify_model(torch.nn.AdaptiveAvgPool1d([1]).eval(), input_data=input_data)
-    verify_model(torch.nn.AdaptiveAvgPool1d([5]).eval(), input_data=input_data)
-
-    input_data = torch.rand([1, 3, 5, 6]).float()
-    verify_model(torch.nn.AdaptiveAvgPool2d([3, None]).eval(), input_data=input_data)
-    input_data = torch.rand([1, 1, 3, 5, 6]).float()
-    verify_model(torch.nn.AdaptiveAvgPool3d([3, None, None]).eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_adaptive_maxpool():
-    """test_forward_adaptive_maxpool"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.AdaptiveMaxPool2d([1, 1]).eval(), input_data=input_data)
-    verify_model(torch.nn.AdaptiveMaxPool2d([10, 10]).eval(), input_data=input_data)
-
-    input_data = torch.rand([1, 3, 10]).float()
-    verify_model(torch.nn.AdaptiveMaxPool1d([1]).eval(), input_data=input_data)
-    verify_model(torch.nn.AdaptiveMaxPool1d([5]).eval(), input_data=input_data)
-
-    input_data = torch.rand([1, 3, 5, 6]).float()
-    verify_model(torch.nn.AdaptiveMaxPool2d([3, None]).eval(), input_data=input_data)
-    input_data = torch.rand([1, 1, 3, 5, 6]).float()
-    verify_model(torch.nn.AdaptiveMaxPool3d([3, None, None]).eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_maxpool2d():
-    """test_forward_maxpool2d"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-
-    verify_model(torch.nn.MaxPool2d(kernel_size=[1, 1]).eval(), input_data)
-    verify_model(torch.nn.MaxPool2d(kernel_size=[2, 2], dilation=[2, 3]).eval(), input_data)
-    verify_model(torch.nn.MaxPool2d(kernel_size=[10, 10]).eval(), input_data)
-    verify_model(torch.nn.MaxPool2d(kernel_size=[4, 4], padding=2, stride=2).eval(), input_data)
-
-    # A functional variant (default strides = None case)
-    class MaxPool2D(Module):
-        def forward(self, *args):
-            return torch.nn.functional.max_pool2d(args[0], kernel_size=[10, 10])
-
-    verify_model(MaxPool2D(), input_data=input_data)
-
-    class MaxPool2DWithIndices(Module):
-        def __init__(self):
-            super().__init__()
-            self.pool = torch.nn.MaxPool2d(kernel_size=[1, 1], return_indices=True)
-
-        def forward(self, *args):
-            output, _ = self.pool(args[0])
-            return output
-
-    class MaxPool2DWithIntStrides(Module):
-        def forward(self, *args):
-            # Makes kernel_size and strides a Relay expr to test converting back to int
-            x_shape = args[0].shape
-            # kernel_size = [torch.tensor(x_shape[1]).int(), torch.tensor(x_shape[1]).int()]
-            strides = [torch.tensor(x_shape[0]).int(), torch.tensor(x_shape[0]).int()]
-            return torch.nn.functional.max_pool2d(args[0], kernel_size=[4, 4], stride=strides)
-
-    verify_model(MaxPool2DWithIndices().float().eval(), input_data=input_data)
-    verify_model(MaxPool2DWithIntStrides().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_maxpool1d():
-    """test_forward_maxpool1d"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10]
-    input_data = torch.rand(input_shape).float()
-
-    verify_model(torch.nn.MaxPool1d(kernel_size=1).eval(), input_data)
-    verify_model(torch.nn.MaxPool1d(kernel_size=2, dilation=[1]).eval(), input_data)
-    verify_model(torch.nn.MaxPool1d(kernel_size=10).eval(), input_data)
-    verify_model(torch.nn.MaxPool1d(kernel_size=4, padding=2, stride=2).eval(), input_data)
-
-    # A functional variant (default strides = None case)
-    class MaxPool1D(Module):
-        def forward(self, *args):
-            return torch.nn.functional.max_pool1d(args[0], kernel_size=10)
-
-    verify_model(MaxPool1D(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_maxpool3d():
-    """test_forward_maxpool3d"""
-    torch.set_grad_enabled(False)
-    for input_shape in [(1, 3, 10, 10, 10), (3, 10, 10, 10)]:
-        input_data = torch.rand(input_shape).float()
-
-        verify_model(torch.nn.MaxPool3d(kernel_size=[1, 1, 1]).eval(), input_data)
-        verify_model(
-            torch.nn.MaxPool3d(kernel_size=[2, 2, 2], dilation=[1, 2, 3]).eval(), input_data
-        )
-        verify_model(torch.nn.MaxPool3d(kernel_size=[10, 10, 10]).eval(), input_data)
-        verify_model(
-            torch.nn.MaxPool3d(kernel_size=[4, 4, 4], padding=2, stride=2).eval(), input_data
-        )
-
-    # A functional variant (default strides = None case)
-    class MaxPool3D(Module):
-        def forward(self, *args):
-            return torch.nn.functional.max_pool3d(args[0], kernel_size=[10, 10, 10])
-
-    verify_model(MaxPool3D(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_split():
-    """test_forward_split"""
-    torch.set_grad_enabled(False)
-    input_shape = [4, 10]
-
-    class Split(Module):
-        def __init__(self, split_size_or_sections, dim):
-            super().__init__()
-            self.split_size_or_sections = split_size_or_sections
-            self.dim = dim
-
-        def forward(self, *args):
-            return torch.split(args[0], self.split_size_or_sections, self.dim)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Split(2, 0).float().eval(), input_data=input_data)
-    verify_model(Split(3, 1).float().eval(), input_data=input_data)
-    verify_model(Split(4, 1).float().eval(), input_data=input_data)
-    verify_model(Split([2, 3, 5], 1).float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_tensor_split():
-    """test_forward_tensor_split"""
-    torch.set_grad_enabled(False)
-    input_shape = [4, 10]
-
-    class Tensor_Split(Module):
-        def __init__(self, split_size_or_sections, dim):
-            super().__init__()
-            self.split_size_or_sections = split_size_or_sections
-            self.dim = dim
-
-        def forward(self, *args):
-            return torch.tensor_split(args[0], self.split_size_or_sections, self.dim)
-
-    # tensor_split was introduced when torch > 1.7.1
-    if package_version.parse(torch.__version__) > package_version.parse("1.7.1"):
-        input_data = torch.rand(input_shape).float()
-        verify_model(Tensor_Split(2, 0).float().eval(), input_data=input_data)
-        verify_model(Tensor_Split(torch.tensor(3), 1).float().eval(), input_data=input_data)
-        verify_model(Tensor_Split([2, 3, 5], 1).float().eval(), input_data=input_data)
-        verify_model(Tensor_Split((2, 3, 5), 1).float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_avgpool1d():
-    """test_forward_avgpool1d"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10]
-
-    class AvgPool1D2(Module):
-        def forward(self, *args):
-            return torch.nn.functional.avg_pool1d(args[0], kernel_size=[10])
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.AvgPool1d(kernel_size=[10]).eval(), input_data=input_data)
-    verify_model(AvgPool1D2().float().eval(), input_data=input_data)
-    verify_model(
-        torch.nn.AvgPool1d(kernel_size=[5], stride=2, padding=2).eval(), input_data=input_data
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_avgpool2d():
-    """test_forward_avgpool2d"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class AvgPool2D2(Module):
-        def forward(self, *args):
-            return torch.nn.functional.avg_pool2d(args[0], kernel_size=[10, 10])
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.AvgPool2d(kernel_size=[10, 10]).eval(), input_data=input_data)
-    verify_model(AvgPool2D2().float().eval(), input_data=input_data)
-    verify_model(
-        torch.nn.AvgPool2d(kernel_size=5, stride=2, padding=2).eval(), input_data=input_data
-    )
-
-    input_shape = [1, 1, 1, 9]
-    input_data = torch.rand(input_shape).float()
-    verify_model(
-        torch.nn.AvgPool2d(
-            kernel_size=[1, 2], stride=[1, 2], ceil_mode=True, count_include_pad=True
-        ).eval(),
-        input_data=input_data,
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_avgpool3d():
-    """test_forward_avgpool3d"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10, 10]
-
-    class AvgPool3D1(Module):
-        def forward(self, *args):
-            return torch.nn.functional.avg_pool3d(args[0], kernel_size=[10, 10, 10])
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.AvgPool3d(kernel_size=[10, 10, 10]).eval(), input_data=input_data)
-    verify_model(AvgPool3D1().float().eval(), input_data=input_data)
-    verify_model(
-        torch.nn.AvgPool3d(kernel_size=5, stride=2, padding=2).eval(), input_data=input_data
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_hardtanh():
-    """test_forward_hardtanh"""
-    torch.set_grad_enabled(False)
-    input_shape = [10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.Hardtanh().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_conv():
-    """test_forward_conv"""
-    torch.set_grad_enabled(False)
-    conv1d_input_shape = [1, 3, 10]
-    conv2d_input_shape = [1, 3, 10, 10]
-
-    class Conv2D1(Module):
-        def __init__(self):
-            super().__init__()
-            self.conv = torch.nn.Conv2d(3, 6, 7, bias=True)
-            self.softmax = torch.nn.Softmax()
-
-        def forward(self, *args):
-            return self.softmax(self.conv(args[0]))
-
-    class Conv2D2(Module):
-        def __init__(self):
-            super().__init__()
-            self.conv = torch.nn.Conv2d(3, 6, 7, bias=False)
-            self.softmax = torch.nn.Softmax()
-
-        def forward(self, *args):
-            return self.softmax(self.conv(args[0]))
-
-    class Conv2D3(Module):
-        def __init__(self):
-            super().__init__()
-            self.conv = torch.nn.Conv2d(3, 6, 7, groups=3, bias=False)
-            self.softmax = torch.nn.Softmax()
-
-        def forward(self, *args):
-            return self.softmax(self.conv(args[0]))
-
-    class Conv1D1(Module):
-        def __init__(self):
-            super().__init__()
-            self.conv = torch.nn.Conv1d(3, 6, 7)
-            self.softmax = torch.nn.Softmax()
-
-        def forward(self, *args):
-            return self.softmax(self.conv(args[0]))
-
-    class Conv1D2(Module):
-        def __init__(self):
-            super().__init__()
-            self.conv = torch.nn.Conv1d(3, 6, 7, bias=False)
-            self.softmax = torch.nn.Softmax()
-
-        def forward(self, *args):
-            return self.softmax(self.conv(args[0]))
-
-    class Conv1D3(Module):
-        def __init__(self):
-            super().__init__()
-            self.conv = torch.nn.Conv1d(3, 6, 7, groups=3, bias=False)
-            self.softmax = torch.nn.Softmax()
-
-        def forward(self, *args):
-            return self.softmax(self.conv(args[0]))
-
-    conv2d_input_data = torch.rand(conv2d_input_shape).float()
-    verify_model(Conv2D1().float().eval(), input_data=conv2d_input_data)
-    verify_model(Conv2D2().float().eval(), input_data=conv2d_input_data)
-    # depth wise conv with channel mult 2
-    verify_model(Conv2D3().float().eval(), input_data=conv2d_input_data)
-    # group conv
-    verify_model(
-        torch.nn.Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), groups=2).eval(),
-        input_data=torch.randn((1, 8, 16, 16)),
-    )
-
-    conv1d_input_data = torch.rand(conv1d_input_shape).float()
-    verify_model(Conv1D1().float().eval(), input_data=conv1d_input_data)
-    verify_model(Conv1D2().float().eval(), input_data=conv1d_input_data)
-    verify_model(Conv1D3().float().eval(), input_data=conv1d_input_data)
-
-
-@tvm.testing.uses_gpu
-@pytest.mark.parametrize("in_channels", [3], ids=lambda x: "in_channels=" + str(x))
-@pytest.mark.parametrize("out_channels", [5], ids=lambda x: "out_channels=" + str(x))
-@pytest.mark.parametrize("kernel_size", [3], ids=lambda x: "kernel_size=" + str(x))
-@pytest.mark.parametrize("output_padding", [0, 1, 2], ids=lambda x: "output_padding=" + str(x))
-@pytest.mark.parametrize("groups", [1], ids=lambda x: "groups=" + str(x))
-@pytest.mark.parametrize("bias", [True, False], ids=lambda x: "bias=" + str(x))
-def test_forward_conv_transpose(
-    in_channels, out_channels, kernel_size, output_padding, bias, groups
-):
-    """test_forward_conv_transpose"""
-    # Note we do not test with groups  > 1 because that is not supported
-    # in tvm for conv transpose operations
-
-    # Output padding must be smaller than either stride or dilation so we
-    # opt to make the stride 1 + output padding
-    stride = output_padding + 1
-
-    # Conv 3D Transpose Tests
-    conv3d_input_shape = [1, in_channels, 16, 16, 16]
-    conv3d_input_data = torch.rand(conv3d_input_shape).float()
-    conv3d_transpose = torch.nn.ConvTranspose3d(
-        in_channels=in_channels,
-        out_channels=out_channels,
-        kernel_size=kernel_size,
-        stride=stride,
-        output_padding=output_padding,
-        groups=groups,
-        bias=bias,
-    ).eval()
-    verify_model(conv3d_transpose, conv3d_input_data)
-
-    # Conv 2D Transpose Tests
-    conv2d_input_shape = [1, in_channels, 128, 256]
-    conv2d_input_data = torch.rand(conv2d_input_shape).float()
-    conv2d_transpose = torch.nn.ConvTranspose2d(
-        in_channels=in_channels,
-        out_channels=out_channels,
-        kernel_size=kernel_size,
-        stride=stride,
-        output_padding=output_padding,
-        groups=groups,
-        bias=bias,
-    ).eval()
-    verify_model(conv2d_transpose, conv2d_input_data)
-
-    # # Conv 1D Transpose Tests
-    conv1d_input_shape = [1, in_channels, 10]
-    conv1d_input_data = torch.rand(conv1d_input_shape).float()
-    conv1d_transpose = torch.nn.ConvTranspose1d(
-        in_channels=in_channels,
-        out_channels=out_channels,
-        kernel_size=kernel_size,
-        stride=stride,
-        output_padding=output_padding,
-        groups=groups,
-        bias=bias,
-    ).eval()
-    verify_model(conv1d_transpose, conv1d_input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_conv2d_transpose_group():
-    """test_forward_conv2d_transpose_group"""
-    # https://github.com/apache/tvm/issues/10223
-
-    class ModulatedConvTranspose2D(torch.nn.Module):
-        """ModulatedConvTranspose2D module"""
-
-        def forward(self, x, w, s):
-            """forward"""
-            B, C, H, W = x.shape
-            I, O, KH, KW = w.shape
-
-            # weight is different for each input in batch (this is why we want grouped conv
-            # transpose)
-            w = w.unsqueeze(0) * s.reshape(B, 1, 1, 1, 1)
-            w = w.reshape(B * I, O, KH, KW)
-            x = x.reshape(1, B * C, H, W)
-            x = torch.nn.functional.conv_transpose2d(
-                x, w, stride=(2, 2), padding=(1, 1), output_padding=(1, 1), groups=B
-            )
-            return x.reshape(B, O, H * 2, W * 2)
-
-    b, c, h, w, k = 4, 512, 8, 16, 3
-    inputs = torch.rand(b, c, h, w)
-    weights = torch.rand(c, c // 2, k, k)
-    styles = torch.rand(b)
-
-    # cuda not supported for group > 1 conv2d_transpose
-    targets = ["llvm"]
-
-    if cudnn.exists():
-        targets.append("cuda -libs=cudnn")
-
-    verify_trace_model(ModulatedConvTranspose2D().eval(), [inputs, weights, styles], targets)
-
-
-def test_forward_deform_conv():
-    """test_forward_deform_conv"""
-    torch.set_grad_enabled(False)
-
-    def test_run(
-        batch_size,
-        in_channels,
-        out_channels,
-        in_height,
-        in_width,
-        out_height,
-        out_width,
-        offset_groups,
-        kh,
-        kw,
-        groups,
-    ):
-        input_shape = [batch_size, in_channels, in_height, in_width]
-        offset_shape = [batch_size, 2 * offset_groups * kh * kw, out_height, out_width]
-        weight_shape = [out_channels, in_channels // groups, kh, kw]
-        input_data = torch.rand(input_shape)
-        offset_data = torch.rand(offset_shape)
-        weight_data = torch.rand(weight_shape)
-
-        class DeformConv2D(Module):
-            def forward(self, *args):
-                return torchvision.ops.deform_conv2d(args[0], args[1], args[2])
-
-        verify_model(
-            DeformConv2D().float().eval(),
-            input_data=[input_data, offset_data, weight_data],
-            rtol=1e-4,
-            atol=1e-4,
-        )
-
-    batch_size = 4
-    in_channels, out_channels = 4, 6
-    in_height, in_width = 10, 10
-    out_height, out_width = 8, 8
-    offset_groups = 2
-    kh, kw = 3, 3
-    groups = 1
-
-    test_run(
-        batch_size,
-        in_channels,
-        out_channels,
-        in_height,
-        in_width,
-        out_height,
-        out_width,
-        offset_groups,
-        kh,
-        kw,
-        groups,
-    )
-
-    batch_size = 5
-    in_channels, out_channels = 4, 6
-    in_height, in_width = 10, 10
-    out_height, out_width = 8, 8
-    offset_groups = 1
-    kh, kw = 3, 3
-    groups = 1
-
-    test_run(
-        batch_size,
-        in_channels,
-        out_channels,
-        in_height,
-        in_width,
-        out_height,
-        out_width,
-        offset_groups,
-        kh,
-        kw,
-        groups,
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_threshold():
-    """test_forward_threshold"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.Threshold(0, 0).float().eval(), input_data=input_data)
-    input_data = torch.tensor([[-1.0, 2.0]], dtype=torch.float32)
-    verify_model(torch.nn.Threshold(1, 1).float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_contiguous():
-    """test_forward_contiguous"""
-    torch.set_grad_enabled(False)
-    input_shape = [10]
-
-    class Contiguous1(Module):
-        def forward(self, *args):
-            return args[0].contiguous()
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Contiguous1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_batchnorm():
-    """test_forward_batchnorm"""
-
-    def init_weight(m):
-        torch.nn.init.normal_(m.weight, 0, 0.01)
-        torch.nn.init.normal_(m.bias)
-
-    inp_2d = torch.rand((1, 16, 10, 10))
-    inp_3d = torch.rand((1, 16, 10, 10, 10))
-
-    class BatchNorm(Module):
-        def __init__(self, weight, bias):
-            super().__init__()
-            self.weight = weight
-            self.bias = bias
-
-        def forward(self, *args):
-            return torch.nn.functional.batch_norm(
-                args[0],
-                running_mean=torch.zeros(args[0].shape[1]),
-                running_var=torch.ones(args[0].shape[1]),
-                weight=self.weight,
-                bias=self.bias,
-            )
-
-    for bn, inp in [(torch.nn.BatchNorm2d(16), inp_2d), (torch.nn.BatchNorm3d(16), inp_3d)]:
-        init_weight(bn.eval())
-        verify_model(bn.eval(), input_data=inp)
-        verify_model(BatchNorm(bn.weight, None).eval(), input_data=inp)
-        verify_model(BatchNorm(bn.weight, bn.bias).eval(), input_data=inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_instancenorm():
-    """test_forward_instancenorm"""
-    inp_2d = torch.rand((1, 16, 10, 10))
-    inp_3d = torch.rand((1, 16, 10, 10, 10))
-
-    for ins_norm, inp in [
-        (torch.nn.InstanceNorm2d(16), inp_2d),
-        (torch.nn.InstanceNorm3d(16), inp_3d),
-        (torch.nn.InstanceNorm2d(16, track_running_stats=True), inp_2d),
-        (torch.nn.InstanceNorm3d(16, track_running_stats=True), inp_3d),
-    ]:
-        verify_model(ins_norm.eval(), input_data=inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_layernorm():
-    """test_forward_layernorm"""
-
-    def init_weight(m):
-        torch.nn.init.normal_(m.weight, 0, 0.01)
-        torch.nn.init.normal_(m.bias, 0.02)
-
-    inp_2d = torch.rand((1, 16, 10, 10))
-    inp_3d = torch.rand((1, 16, 10, 10, 10))
-    for ln, inp in [(torch.nn.LayerNorm(10), inp_2d), (torch.nn.LayerNorm(10), inp_3d)]:
-        init_weight(ln.eval())
-        verify_model(ln.eval(), input_data=inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_groupnorm():
-    """test_forward_groupnorm"""
-    input_shape = [10, 6, 5, 5]
-    input_data = torch.rand(input_shape).float()
-
-    # Separate 6 channels into 3 groups
-    verify_model(torch.nn.GroupNorm(3, 6).eval(), input_data=input_data)
-
-    # Put all 6 channels into a single group (equivalent with LayerNorm)
-    verify_model(torch.nn.GroupNorm(1, 6).eval(), input_data=input_data)
-
-    # Separate 6 channels into 6 groups (equivalent with InstanceNorm)
-    verify_model(torch.nn.GroupNorm(6, 6).eval(), input_data=input_data)
-
-    input_shape = [1, 10, 4, 7]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.GroupNorm(1, 10).eval(), input_data=input_data)
-    verify_model(torch.nn.GroupNorm(2, 10).eval(), input_data=input_data)
-    verify_model(torch.nn.GroupNorm(5, 10).eval(), input_data=input_data)
-    verify_model(torch.nn.GroupNorm(10, 10).eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_reshape():
-    """test_forward_reshape"""
-    torch.set_grad_enabled(False)
-    input_shape = [2, 1, 10, 1, 10]
-    new_shape = [2, 1, 10, 10]
-
-    class Reshape1(Module):
-        def forward(self, *args):
-            return args[0].reshape(new_shape)
-
-    class Reshape2(Module):
-        def forward(self, *args):
-            return args[0].reshape([-1])
-
-    class Reshape3(torch.nn.Module):
-        def forward(self, x):
-            x_shape = x.shape
-            return x.reshape((x_shape[0] * x_shape[1], x_shape[2]))
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Reshape1(), input_data=input_data)
-    verify_model(Reshape2(), input_data=input_data)
-    verify_model(Reshape3(), input_data=torch.randn(2, 3, 4))
-
-
-@tvm.testing.uses_gpu
-def test_forward_reshape_as():
-    """test_forward_reshape_as"""
-
-    def test_func(input_tensor, other_tensor):
-        return input_tensor.reshape_as(other_tensor)
-
-    input_data = [torch.rand([2, 1, 10, 1, 10]), torch.rand([2, 1, 10, 10])]
-
-    verify_model_with_input(test_func, input_data, input_dict={"input0": input_data[0]})
-
-
-@tvm.testing.uses_gpu
-def test_flatten():
-    """test_flatten"""
-
-    def _test_flatten(start_dim, end_dim):
-        return lambda inp: torch.flatten(inp, start_dim, end_dim)
-
-    inp = torch.rand((3, 5, 2, 2))
-
-    # [3, 5, 2, 2] -> [60]
-    verify_model(_test_flatten(0, -1), inp)
-    verify_model(_test_flatten(0, 3), inp)
-    verify_model(_test_flatten(-4, 3), inp)
-    verify_model(_test_flatten(-4, -1), inp)
-
-    # [3, 5, 2, 2] -> [3, 5, 2, 2]
-    verify_model(_test_flatten(3, -1), inp)
-    verify_model(_test_flatten(-1, -1), inp)
-    verify_model(_test_flatten(0, -4), inp)
-    verify_model(_test_flatten(-4, -4), inp)
-
-    # [3, 5, 2, 2] -> [3, 10, 2]
-    verify_model(_test_flatten(1, 2), inp)
-    verify_model(_test_flatten(1, -2), inp)
-    verify_model(_test_flatten(-3, 2), inp)
-    verify_model(_test_flatten(-3, -2), inp)
-
-
-@tvm.testing.uses_gpu
-def test_unflatten():
-    """test_unflatten"""
-
-    def _test_unflatten(dim, unflattened_size):
-        return lambda inp: torch.unflatten(inp, dim, unflattened_size)
-
-    inp = torch.rand(60)
-
-    # [60] -> [3, 5, 2, 2]
-    verify_model(_test_unflatten(0, (3, 5, 2, 2)), inp)
-    verify_model(_test_unflatten(0, (-1, 5, 2, 2)), inp)
-    verify_model(_test_unflatten(0, (3, -1, 2, 2)), inp)
-    verify_model(_test_unflatten(0, (3, 5, -1, 2)), inp)
-    verify_model(_test_unflatten(0, (3, 5, 2, -1)), inp)
-    verify_model(_test_unflatten(-1, (3, 5, 2, 2)), inp)
-    verify_model(_test_unflatten(-1, (-1, 5, 2, 2)), inp)
-    verify_model(_test_unflatten(-1, (3, -1, 2, 2)), inp)
-    verify_model(_test_unflatten(-1, (3, 5, -1, 2)), inp)
-    verify_model(_test_unflatten(-1, (3, 5, 2, -1)), inp)
-
-    inp = torch.rand(3, 4, 1)
-
-    # [3, 4, 1] -> [3, 2, 2, 1]
-    verify_model(_test_unflatten(1, (2, 2)), inp)
-    verify_model(_test_unflatten(1, (-1, 2)), inp)
-
-    inp = torch.rand(5, 12, 3)
-
-    # [5, 12, 3] -> [5, 2, 2, 3, 1, 1, 3]
-    verify_model(_test_unflatten(1, (2, 2, 3, 1, 1)), inp)
-    verify_model(_test_unflatten(-2, (2, 2, 3, 1, 1)), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_transpose():
-    """test_forward_transpose"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Transpose1(Module):
-        def forward(self, *args):
-            return args[0].transpose(2, 3)
-
-    class Transpose2(Module):
-        def forward(self, *args):
-            return args[0].transpose(-2, -1)
-
-    class Transpose3(Module):
-        def forward(self, *args):
-            return args[0].permute(0, 2, 3, 1)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Transpose1().float().eval(), input_data=input_data)
-    verify_model(Transpose2().float().eval(), input_data=input_data)
-    verify_model(Transpose3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_numpy_T():
-    """test_forward_numpy_T"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    def test_fn(x):
-        return x.T
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(test_fn, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_size():
-    """test_forward_size"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3]
-
-    class Size1(Module):
-        def forward(self, *args):
-            return float(args[0].size(0)) * args[0]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Size1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_type_as():
-    """test_type_as"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3]
-
-    def _create_module(dtype):
-        class TypeAs(Module):
-            def forward(self, *args):
-                expected_type_tensor = torch.zeros(1, 3, dtype=dtype)
-                return args[0].type_as(expected_type_tensor)
-
-        return TypeAs()
-
-    input_data = torch.randn(input_shape).float()
-    verify_model(_create_module(torch.float64), input_data=input_data)
-    verify_model(_create_module(torch.float32), input_data=input_data)
-    verify_model(_create_module(torch.int64), input_data=input_data)
-    verify_model(_create_module(torch.int32), input_data=input_data)
-    verify_model(_create_module(torch.int16), input_data=input_data)
-    verify_model(_create_module(torch.int8), input_data=input_data)
-
-    if torch.cuda.is_available():
-        check_fp16 = False
-        try:
-            # Only check half precision on supported hardwares.
-            if have_fp16(tvm.cuda(0).compute_version):
-                check_fp16 = True
-        # pylint: disable=broad-except
-        except Exception:
-            # If GPU is not enabled in TVM, skip the fp16 test.
-            pass
-
-        # Temporary disable fp16 test
-        check_fp16 = False
-
-        if check_fp16:
-            verify_model(_create_module(torch.float16), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_view():
-    """test_forward_view"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class View1(Module):
-        def forward(self, *args):
-            return args[0].view((1, 3 * 10 * 10))
-
-    class View2(Module):
-        def forward(self, *args):
-            return args[0].view(args[0].shape[0], -1)
-
-    class View3(Module):
-        def forward(self, *args):
-            d1 = torch.tensor(3) * torch.tensor(10) * torch.tensor(10)
-            return args[0].view(args[0].shape[0], d1)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(View1().float().eval(), input_data=input_data)
-    verify_model(View2().float().eval(), input_data=input_data)
-    verify_model(View3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_view_as():
-    """test_forward_view_as"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10]
-
-    class ViewAs1(Module):
-        def forward(self, *args):
-            t1 = torch.ones((1 * 3 * 10))
-            return args[0].view_as(t1)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(ViewAs1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_select():
-    """test_forward_select"""
-    torch.set_grad_enabled(False)
-    input_shape = [5, 3, 10, 10]
-
-    class Select1(Module):
-        def forward(self, *args):
-            return args[0].select(1, 1)
-
-    class IndexedSelect(Module):
-        def __init__(self, inp, dim):
-            super().__init__()
-            self.inp = inp
-            self.dim = dim
-            if torch.cuda.is_available():
-                self.inp = self.inp.cuda()
-
-        def forward(self, index):
-            return torch.index_select(self.inp, self.dim, index)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Select1().float().eval(), input_data=input_data)
-
-    # test negative indexing
-    verify_model(lambda x: x[-1], input_data=input_data)
-
-    x = torch.randn(3, 4)
-    indices = torch.tensor([0, 2])
-    verify_model(IndexedSelect(x, 0).eval(), input_data=indices)
-    verify_model(IndexedSelect(x, 1).eval(), input_data=indices)
-
-
-@tvm.testing.uses_gpu
-def test_forward_clone():
-    """test_forward_clone"""
-    torch.set_grad_enabled(False)
-    input_shape = [10]
-
-    class Clone1(Module):
-        def forward(self, *args):
-            return args[0].clone()
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Clone1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_gather():
-    """test_forward_gather"""
-    torch.set_grad_enabled(False)
-
-    class Gather1(Module):
-        def forward(self, *args):
-            return torch.gather(args[0], 0, args[1])
-
-    class Gather2(Module):
-        def forward(self, *args):
-            return torch.gather(args[0], 1, args[1])
-
-    class Gather3(Module):
-        def forward(self, *args):
-            return torch.gather(args[0], 2, args[1])
-
-    input_data = torch.rand((4,)).float()
-    index = torch.tensor([1])
-    verify_model(Gather1().float().eval(), input_data=[input_data, index])
-
-    input_data = torch.rand((2, 2)).float()
-    index = torch.tensor([[1, 0], [0, 1]])
-    verify_model(Gather1().float().eval(), input_data=[input_data, index])
-
-    input_data = torch.tensor([[1, 2], [3, 4]])
-    index = torch.tensor([[0, 0], [1, 0]])
-    verify_model(Gather2().float().eval(), input_data=[input_data, index])
-
-    input_data = torch.rand((2, 2)).float()
-    index = torch.tensor([[1, 0], [0, 1]])
-    verify_model(Gather2().float().eval(), input_data=[input_data, index])
-
-    input_data = torch.rand((3, 3, 3)).float()
-    index = torch.tensor(
-        [
-            [[1, 0, 0], [1, 0, 1], [0, 1, 1]],
-            [[1, 1, 1], [1, 2, 1], [1, 0, 1]],
-            [[1, 2, 1], [1, 2, 1], [1, 2, 1]],
-        ]
-    )
-    verify_model(Gather3().float().eval(), input_data=[input_data, index])
-
-
-@tvm.testing.uses_gpu
-def test_forward_logsoftmax():
-    """test_forward_logsoftmax"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class LogSoftmax1(Module):
-        def forward(self, *args):
-            return torch.nn.LogSoftmax(dim=1)(args[0][0, 0])
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(LogSoftmax1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_norm():
-    """test_forward_norm"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Norm1(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float("inf"), dim=None, keepdim=False)
-
-    class Norm2(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float("-inf"), dim=None, keepdim=False)
-
-    class Norm3(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float("-inf"), dim=None, keepdim=True)
-
-    class Norm4(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float("inf"), dim=(1, 2), keepdim=False)
-
-    class Norm5(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float("inf"), dim=(1), keepdim=True)
-
-    class Norm6(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float(0.5), dim=(1), keepdim=True)
-
-    class Norm7(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float(1), dim=None, keepdim=False)
-
-    class Norm8(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float(2.0), dim=(1), keepdim=True)
-
-    class Norm9(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float(-0.5), dim=(1, 2), keepdim=True)
-
-    class Norm10(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float(-2), dim=(1), keepdim=False)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Norm1().float().eval(), input_data=input_data)
-    verify_model(Norm2().float().eval(), input_data=input_data)
-    verify_model(Norm3().float().eval(), input_data=input_data)
-    verify_model(Norm4().float().eval(), input_data=input_data)
-    verify_model(Norm5().float().eval(), input_data=input_data)
-    verify_model(Norm6().float().eval(), input_data=input_data)
-    verify_model(Norm7().float().eval(), input_data=input_data)
-    verify_model(Norm8().float().eval(), input_data=input_data)
-    verify_model(Norm9().float().eval(), input_data=input_data)
-    verify_model(Norm10().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_frobenius_norm():
-    """test_forward_frobenius_norm"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class FroNorm1(Module):
-        def forward(self, *args):
-            return torch.norm(args[0])
-
-    class FroNorm2(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p="fro", dim=None, keepdim=True)
-
-    class FroNorm3(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p="fro", dim=(1), keepdim=True)
-
-    class FroNorm4(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], dim=None, keepdim=False)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(FroNorm1().float().eval(), input_data=input_data)
-    verify_model(FroNorm2().float().eval(), input_data=input_data)
-    verify_model(FroNorm3().float().eval(), input_data=input_data)
-    verify_model(FroNorm4().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_sigmoid():
-    """test_forward_sigmoid"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.Sigmoid().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_dense():
-    """test_forward_dense"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Dense1(Module):
-        def __init__(self):
-            super().__init__()
-            self.linear = torch.nn.Linear(10, 7, bias=True)
-
-        def forward(self, *args):
-            return self.linear(args[0][0, 0])
-
-    class Dense2(Module):
-        def __init__(self):
-            super().__init__()
-            self.linear = torch.nn.Linear(10, 7, bias=False)
-
-        def forward(self, *args):
-            return self.linear(args[0][0, 0])
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Dense1().float().eval(), input_data=input_data)
-    verify_model(Dense2().float().eval(), input_data=input_data)
-
-    trace = torch.jit.trace(Dense1(), [input_data])
-    mod, _ = relay.frontend.from_pytorch(
-        trace,
-        [("input", input_shape)],
-    )
-    assert not any(list(op.name == "multiply" for op in list_ops(mod["main"])))
-
-
-@tvm.testing.uses_gpu
-def test_forward_linear():
-    """test_forward_linear"""
-    torch.set_grad_enabled(False)
-
-    class Linear(Module):
-        def forward(self, inputs, weight, bias):
-            return F.linear(inputs, weight, bias)
-
-    class LinearNoBias(Module):
-        def forward(self, inputs, weight):
-            return F.linear(inputs, weight)
-
-    class LinearNested(Module):
-        def forward(self, x, y, z):
-            return F.linear(x, F.linear(y, z))
-
-    input1d = torch.rand([2]).float()
-    input2d = torch.rand([2, 2]).float()
-    input3d = torch.rand([4, 3, 2]).float()
-    weight1d = torch.rand([2]).float()
-    weight2d = torch.rand([2, 2]).float()
-    weight3x2 = torch.rand([3, 2]).float()
-    bias0d = torch.rand([]).float()
-    bias1d = torch.rand([2]).float()
-    bias2d = torch.rand([2, 2]).float()
-    # 2D input, 2D weight, 1D bias
-    verify_model(Linear(), input_data=[input2d, weight2d, bias1d])
-    # 2D input, 2D weight, 2D bias
-    verify_model(Linear(), input_data=[input2d, weight2d, bias2d])
-    # 2D input, 2D weight, no bias
-    verify_model(LinearNoBias(), input_data=[input2d, weight2d])
-    verify_model(LinearNoBias(), input_data=[input2d, weight3x2])
-    # 2D input, 1D weight, 1D bias is not supported by torch.linear()
-    # 2D input, 1D weight, no bias
-    verify_model(LinearNoBias(), input_data=[input2d, weight1d])
-    # 3D input, 2D weight, no bias
-    verify_model(LinearNoBias(), input_data=[input3d, weight3x2])
-    # 3D input, 2D weight, 1D bias
-    verify_model(Linear(), input_data=[input3d, weight2d, bias1d])
-
-    verify_model(LinearNested(), input_data=[torch.randn(10, 10) for _ in range(3)])
-
-    # 1D input, 2D weight, 1D bias
-    verify_model(Linear(), input_data=[input1d, weight2d, bias1d])
-    # 1D input, 2D weight, no bias
-    verify_model(LinearNoBias(), input_data=[input1d, weight2d])
-    # 1D input, 1D weight, scalar bias
-    verify_model(Linear(), input_data=[input1d, weight1d, bias0d])
-    # 1D input, 1D weight, no bias
-    verify_model(LinearNoBias(), input_data=[input1d, weight1d])
-
-
-@tvm.testing.uses_gpu
-def test_forward_dropout():
-    """test_forward_dropout"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.Dropout(p=0.5).eval(), input_data=input_data[0, 0])
-    verify_model(torch.nn.Dropout2d(p=0.5).eval(), input_data=input_data[0])
-    verify_model(torch.nn.Dropout3d(p=0.5).eval(), input_data=input_data)
-    verify_model(torch.nn.AlphaDropout(p=0.5).eval(), input_data=input_data[0, 0])
-
-
-@tvm.testing.uses_gpu
-def test_forward_slice():
-    """test_forward_slice"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Slice1(Module):
-        def forward(self, *args):
-            return args[0][:, :, :, :3]
-
-    class Slice2(Module):
-        def forward(self, *args):
-            return args[0][0, :, :-3, :]
-
-    class Slice3(Module):
-        def forward(self, *args):
-            x0 = torch.tensor(2) - torch.tensor(1)
-            x1 = torch.tensor(3) + torch.tensor(1)
-            return args[0][:, x0:, 1:x1, :]
-
-    class SliceWithStride(torch.nn.Module):
-        def forward(self, x):
-            return x[..., 0::2] + x[..., 1::2]
-
-    class SliceWithStride2(torch.nn.Module):
-        def forward(self, x):
-            return x[0::2, 0::2] + x[1::2, 1::2]
-
-    class DynamicLengthSlice(torch.nn.Module):
-        def forward(self, values, length):
-            return values[0:length]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Slice1(), input_data=input_data)
-    verify_model(Slice2(), input_data=input_data)
-    verify_model(Slice3(), input_data=input_data)
-    verify_model(SliceWithStride(), input_data=torch.randn(1, 4))
-    verify_model(SliceWithStride2(), input_data=torch.randn(4, 4))
-
-    inp = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
-    slice_len = torch.tensor(2)
-    targets = ["llvm", "cuda"]
-    verify_trace_model(DynamicLengthSlice(), [inp, slice_len], targets)
-
-
-@tvm.testing.uses_gpu
-def test_forward_narrow():
-    """test_forward_narrow"""
-    torch.set_grad_enabled(False)
-    input_shape = [3, 3]
-
-    class Narrow1(Module):
-        def forward(self, *args):
-            return torch.narrow(args[0], 0, 0, 2)
-
-    class Narrow2(Module):
-        def forward(self, *args):
-            return torch.narrow(args[0], 1, 1, 2)
-
-    class Narrow3(Module):
-        def forward(self, *args):
-            begin = torch.tensor(2) - torch.tensor(1)
-            length = torch.tensor(1) * torch.tensor(2)
-            return torch.narrow(args[0], 1, begin, length)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Narrow1(), input_data=input_data)
-    verify_model(Narrow2(), input_data=input_data)
-    verify_model(Narrow3(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_mean():
-    """test_forward_mean"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Mean1(Module):
-        def forward(self, *args):
-            return args[0].mean(2)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Mean1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_expand():
-    """test_forward_expand"""
-    torch.set_grad_enabled(False)
-
-    class Expand1(Module):
-        def forward(self, *args):
-            return args[0].expand((3, -1, -1, -1))
-
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(Expand1().float().eval(), input_data=input_data)
-
-    class Expand2(Module):
-        def forward(self, *args):
-            return args[0].expand((3, 3, 3, 1))
-
-    input_shape = [3, 1]
-    input_data = torch.rand(input_shape).float()
-    verify_model(Expand2().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_broadcast_tensors():
-    """test_forward_broadcast_tensors"""
-    torch.set_grad_enabled(False)
-
-    class BroadCastTensors1(Module):
-        def forward(self, x, y):
-            return torch.broadcast_tensors(x, y)
-
-    x = torch.arange(3).view(1, 1, 3)
-    y = torch.arange(2).view(1, 2, 1)
-    verify_model(BroadCastTensors1().float().eval(), input_data=[x, y])
-
-    class BroadCastTensors2(Module):
-        def forward(self, x, y, z):
-            return torch.broadcast_tensors(x, y, z)
-
-    x = torch.arange(3).view(1, 1, 3)
-    y = torch.arange(2).view(1, 2, 1)
-    z = torch.arange(4).view(4, 1, 1)
-    verify_model(BroadCastTensors2().float().eval(), input_data=[x, y, z])
-
-
-@tvm.testing.uses_gpu
-def test_forward_broadcast_to():
-    """test_forward_broadcast_to"""
-    torch.set_grad_enabled(False)
-
-    class BroadCastTo1(Module):
-        def forward(self, x):
-            return torch.broadcast_to(x, (3, 3))
-
-    x = torch.tensor([1, 2, 3])
-    verify_model(BroadCastTo1().float().eval(), input_data=[x])
-
-    class BroadCastTo2(Module):
-        def __init__(self):
-            super().__init__()
-            self.y = torch.tensor(1)
-            self.z = torch.tensor(2)
-
-        def forward(self, x):
-            return torch.broadcast_to(x, (self.y + self.z, 3))
-
-    x = torch.tensor([1, 2, 3])
-    verify_model(BroadCastTo2().float().eval(), input_data=[x])
-
-
-@tvm.testing.uses_gpu
-def test_forward_pow():
-    """test_forward_pow"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Pow1(Module):
-        def forward(self, *args):
-            return args[0] ** 2
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Pow1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_chunk():
-    """test_forward_chunk"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 14, 14]
-
-    class Chunk1(Module):
-        def forward(self, *args):
-            chunks = args[0].chunk(7, 2)
-            return torch.cat(chunks, 2)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Chunk1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_upsample():
-    """test_upsample"""
-
-    class Upsample(Module):
-        def __init__(self, size=None, scale=None, mode="nearest", align_corners=None):
-            super().__init__()
-            self.size = size
-            self.scale = scale
-            self.mode = mode
-            self.align_corners = align_corners
-
-        def forward(self, x):
-            return torch.nn.functional.interpolate(
-                x,
-                size=self.size,
-                scale_factor=self.scale,
-                mode=self.mode,
-                align_corners=self.align_corners,
-            )
-
-    inp = torch.rand((1, 3, 32, 32))
-    verify_model(Upsample(size=(64, 64), mode="nearest"), inp)
-    verify_model(Upsample(scale=2, mode="nearest"), inp)
-    verify_model(Upsample(size=(50, 50), mode="nearest"), inp)
-    verify_model(Upsample(size=(64, 64), mode="bilinear", align_corners=True), inp)
-    verify_model(Upsample(scale=2, mode="bilinear", align_corners=True), inp)
-    verify_model(Upsample(size=(50, 50), mode="bilinear", align_corners=True), inp)
-    verify_model(Upsample(size=(64, 64), mode="bicubic", align_corners=True), inp)
-    verify_model(Upsample(scale=2, mode="bicubic", align_corners=True), inp)
-    verify_model(Upsample(size=(50, 50), mode="bicubic", align_corners=True), inp)
-
-
-@tvm.testing.uses_gpu
-def test_to():
-    """test for aten::to(...)"""
-
-    class ToCPU(Module):
-        def forward(self, x):
-            return x.to("cpu")
-
-    class ToFloat(Module):
-        def forward(self, x):
-            return x.float()
-
-    class ToInt(Module):
-        def forward(self, x):
-            return x.int()
-
-    class ToLong(Module):
-        def forward(self, x):
-            return x.long()
-
-    class ToDouble(Module):
-        def forward(self, x):
-            return x.double()
-
-    class ToFloat16(Module):
-        def forward(self, x):
-            return x.to(torch.float16)
-
-    verify_model(ToCPU().eval(), torch.rand((1, 3, 32, 32)))
-    verify_model(ToFloat().eval(), torch.zeros((1, 3, 32, 32), dtype=torch.int))
-    verify_model(ToFloat().eval(), torch.tensor(2, dtype=torch.int))
-    verify_model(ToInt().eval(), torch.zeros((1, 3, 32, 32)))
-    verify_model(ToInt().eval(), torch.tensor(0.8))
-    verify_model(ToLong().eval(), torch.tensor(0.8))
-    verify_model(ToDouble().eval(), torch.tensor(0.8))
-    verify_model(ToFloat16().eval(), torch.tensor(2, dtype=torch.float32))
-    verify_model(ToFloat16().eval(), torch.zeros((1, 3, 32, 32), dtype=torch.int))
-
-
-@tvm.testing.uses_gpu
-def test_adaptive_pool3d():
-    """test_adaptive_pool3d"""
-    for ishape in [(1, 32, 16, 16, 16), (1, 32, 9, 15, 15), (1, 32, 13, 7, 7)]:
-        inp = torch.rand(ishape)
-        verify_model(torch.nn.AdaptiveMaxPool3d((1, 1, 1)).eval(), inp)
-        verify_model(torch.nn.AdaptiveMaxPool3d((2, 2, 2)).eval(), inp)
-        verify_model(torch.nn.AdaptiveAvgPool3d((1, 1, 1)).eval(), inp)
-        verify_model(torch.nn.AdaptiveAvgPool3d((2, 2, 2)).eval(), inp)
-        verify_model(torch.nn.AdaptiveAvgPool3d((4, 8, 8)).eval(), inp)
-        verify_model(torch.nn.AdaptiveMaxPool3d((7, 8, 9)).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_functional_pad():
-    """test_forward_functional_pad"""
-    torch.set_grad_enabled(False)
-    pad = (0, 0)
-
-    class Pad1(Module):
-        def forward(self, *args):
-            return torch.nn.functional.pad(args[0], pad, "constant", 0)
-
-    input_data = torch.rand((3, 3, 4, 2))
-    pad = (1, 1)
-    verify_model(Pad1().float().eval(), input_data=input_data)
-
-    pad = (1, 1, 2, 2)
-    verify_model(Pad1().float().eval(), input_data=input_data)
-
-    pad = (0, 1, 2, 1, 3, 3)
-    verify_model(Pad1().float().eval(), input_data=input_data)
-
-    class Pad2(Module):
-        def forward(self, *args):
-            return torch.nn.functional.pad(args[0], pad, "constant", 1)
-
-    input_data = torch.rand((3, 3, 4, 2))
-    pad = (1, 1)
-    verify_model(Pad2().float().eval(), input_data=input_data)
-
-    pad = (1, 1, 2, 2)
-    verify_model(Pad2().float().eval(), input_data=input_data)
-
-    pad = (0, 1, 2, 1, 3, 3)
-    verify_model(Pad2().float().eval(), input_data=input_data)
-
-    class Pad3(Module):
-        def forward(self, *args):
-            return torch.nn.functional.pad(args[0], pad, "constant", 1.0)
-
-    input_data = torch.rand((3, 3, 4, 2))
-    pad = (1, 1)
-    verify_model(Pad3().float().eval(), input_data=input_data)
-
-    pad = (1, 1, 2, 2)
-    verify_model(Pad3().float().eval(), input_data=input_data)
-
-    pad = (0, 1, 2, 1, 3, 3)
-    verify_model(Pad3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_zero_pad2d():
-    """test_forward_zero_pad2d"""
-    inp = torch.rand((1, 1, 3, 3))
-    verify_model(torch.nn.ZeroPad2d(2).eval(), inp)
-    verify_model(torch.nn.ZeroPad2d((1, 1, 2, 0)).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_constant_pad1d():
-    """test_forward_constant_pad1d"""
-    inp = torch.rand((1, 2, 4))
-    verify_model(torch.nn.ConstantPad1d(2, 3.5).eval(), inp)
-
-    inp = torch.rand((1, 2, 3))
-    verify_model(torch.nn.ConstantPad1d((3, 1), 3.5).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_constant_pad2d():
-    """test_forward_constant_pad2d"""
-    inp = torch.rand((1, 2, 2, 2))
-    verify_model(torch.nn.ConstantPad2d(2, 3.5).eval(), inp)
-    verify_model(torch.nn.ConstantPad2d((3, 0, 2, 1), 3.5).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_constant_pad3d():
-    """test_forward_constant_pad3d"""
-    inp = torch.rand((1, 3, 2, 2, 2))
-    verify_model(torch.nn.ConstantPad3d(3, 3.5).eval(), inp)
-    verify_model(torch.nn.ConstantPad3d((3, 4, 5, 6, 0, 1), 3.5).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_reflection_pad1d():
-    """test_forward_reflection_pad1d"""
-    inp = torch.rand((1, 2, 4))
-    verify_model(torch.nn.ReflectionPad1d(2).eval(), inp)
-    verify_model(torch.nn.ReflectionPad1d((3, 1)).eval(), inp)
-
-    inp = torch.rand((2, 4, 5))
-    verify_model(torch.nn.ReflectionPad1d((2, 3)).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_reflection_pad2d():
-    """test_forward_reflection_pad2d"""
-    inp = torch.rand((1, 1, 3, 3))
-    verify_model(torch.nn.ReflectionPad2d(2).eval(), inp)
-    verify_model(torch.nn.ReflectionPad2d((1, 1, 2, 0)).eval(), inp)
-
-    inp = torch.rand((2, 4, 5, 6))
-    verify_model(torch.nn.ReflectionPad2d((1, 3, 2, 4)).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_replication_pad1d():
-    """test_forward_replication_pad1d"""
-    inp = torch.rand((1, 2, 4))
-    verify_model(torch.nn.ReplicationPad1d(2).eval(), inp)
-    verify_model(torch.nn.ReplicationPad1d((3, 1)).eval(), inp)
-
-    inp = torch.rand((2, 4, 5))
-    verify_model(torch.nn.ReplicationPad1d((2, 3)).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_replication_pad2d():
-    """test_forward_replication_pad2d"""
-    inp = torch.rand((1, 1, 3, 3))
-    verify_model(torch.nn.ReplicationPad2d(2).eval(), inp)
-    verify_model(torch.nn.ReplicationPad2d((1, 1, 2, 0)).eval(), inp)
-
-    inp = torch.rand((2, 4, 5, 6))
-    verify_model(torch.nn.ReplicationPad2d((1, 3, 2, 4)).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_replication_pad3d():
-    """test_forward_replication_pad3d"""
-    inp = torch.rand((1, 1, 3, 3, 3))
-    verify_model(torch.nn.ReplicationPad3d(3).eval(), inp)
-    verify_model(torch.nn.ReplicationPad3d((1, 1, 2, 2, 1, 1)).eval(), inp)
-
-    inp = torch.rand((7, 5, 4, 5, 6))
-    verify_model(torch.nn.ReplicationPad3d((2, 3, 2, 5, 1, 4)).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_upsample3d():
-    """test_forward_upsample3d"""
-    inp = torch.arange(1, 9, dtype=torch.float32).view(1, 1, 2, 2, 2)
-    verify_model(torch.nn.Upsample(scale_factor=2, mode="nearest").eval(), inp)
-    verify_model(torch.nn.Upsample(scale_factor=2, mode="trilinear").eval(), inp)
-    verify_model(
-        torch.nn.Upsample(scale_factor=2, mode="trilinear", align_corners=True).eval(), inp
-    )
-
-
-def test_forward_nms():
-    """dynamic Non-Maximum Suppression"""
-    torch.set_grad_enabled(False)
-
-    class NonMaxSupression(Module):
-        def __init__(self, iou_thres):
-            super().__init__()
-            self.iou_threshold = iou_thres
-
-        def forward(self, *args):
-            return torchvision.ops.nms(args[0], args[1], self.iou_threshold)
-
-    # Generate random input data
-    def _gen_rand_inputs(num_boxes):
-        box_len = 4
-        boxes = torch.rand(num_boxes, box_len, dtype=torch.float) * 0.5
-        boxes[:, 2] += boxes[:, 0]
-        boxes[:, 3] += boxes[:, 1]
-        scores = np.linspace(0, 1, num=num_boxes).astype("float32")
-        np.random.shuffle(scores)
-        return boxes, torch.from_numpy(scores)
-
-    targets = ["llvm", "cuda"]
-
-    for num_boxes, iou_thres in [(10, 0.3), (100, 0.5), (500, 0.9)]:
-        in_boxes, in_scores = _gen_rand_inputs(num_boxes)
-        verify_trace_model(NonMaxSupression(iou_thres), [in_boxes, in_scores], targets)
-
-
-def test_forward_roi_align():
-    """ROI align"""
-    torch.set_grad_enabled(False)
-
-    class ROIAlign(Module):
-        def __init__(self, output_sizes, spatial_scale=1.0, sampling_ratio=-1):
-            super().__init__()
-            self.spatial_scale = spatial_scale
-            self.sampling_ratio = sampling_ratio
-            self.output_sizes = output_sizes
-
-        def forward(self, *args):
-            return torchvision.ops.roi_align(
-                args[0],
-                args[1],
-                self.output_sizes,
-                self.spatial_scale,
-                self.sampling_ratio,
-            )
-
-    in_data = torch.Tensor(np.random.uniform(size=(1, 8, 100, 100)))
-    in_boxes = torch.Tensor(np.random.uniform(0.0, 100.0, size=(35, 4)))
-    in_batch = torch.zeros((35, 1), dtype=torch.float)
-    in_boxes = torch.cat([in_batch, in_boxes], dim=1)
-
-    verify_model(ROIAlign(7), [in_data, in_boxes])
-    verify_model(ROIAlign((10, 10), 0.7, 5), [in_data, in_boxes])
-    verify_model(ROIAlign(15, 0.9, 3), [in_data, in_boxes])
-
-
-@tvm.testing.uses_gpu
-def test_conv3d():
-    """test_conv3d"""
-    for ishape in [(1, 32, 16, 16, 16), (1, 32, 9, 15, 15), (1, 32, 13, 7, 7)]:
-        inp = torch.rand(ishape)
-        verify_model(torch.nn.Conv3d(32, 16, (3, 3, 3), padding=(1, 1, 1)).eval(), inp)
-        verify_model(torch.nn.Conv3d(32, 16, (5, 5, 5), padding=(2, 2, 2)).eval(), inp)
-        verify_model(torch.nn.Conv3d(32, 16, kernel_size=1).eval(), inp)
-        # downsample
-        verify_model(torch.nn.Conv3d(32, 16, kernel_size=1, stride=2).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_conv3d_transpose():
-    """test_conv3d_transpose"""
-    for ishape in [(1, 8, 10, 5, 10), (1, 8, 5, 8, 8), (1, 8, 13, 7, 7)]:
-        inp = torch.rand(ishape)
-        verify_model(
-            torch.nn.ConvTranspose3d(
-                in_channels=8, out_channels=33, kernel_size=3, stride=2
-            ).eval(),
-            inp,
-        )
-        verify_model(
-            torch.nn.ConvTranspose3d(
-                in_channels=8,
-                out_channels=20,
-                kernel_size=(3, 5, 2),
-                stride=(2, 1, 1),
-                padding=(0, 4, 2),
-            ).eval(),
-            inp,
-        )
-        verify_model(
-            torch.nn.ConvTranspose3d(in_channels=8, out_channels=20, kernel_size=1).eval(), inp
-        )
-        verify_model(
-            torch.nn.ConvTranspose3d(in_channels=8, out_channels=5, kernel_size=1, stride=2).eval(),
-            inp,
-        )
-
-
-# Model tests
-@tvm.testing.uses_gpu
-def test_resnet18():
-    """test_resnet18"""
-    torch.set_grad_enabled(False)
-    verify_model("resnet18", atol=1e-4, rtol=1e-4)
-
-
-@tvm.testing.uses_gpu
-def test_squeezenet1_0():
-    """test_squeezenet1_0"""
-    torch.set_grad_enabled(False)
-    verify_model("squeezenet1_0", atol=1e-4, rtol=1e-4)
-
-
-@tvm.testing.uses_gpu
-def test_squeezenet1_1():
-    """test_squeezenet1_1"""
-    torch.set_grad_enabled(False)
-    verify_model("squeezenet1_1", atol=1e-4, rtol=1e-4)
-
-
-@tvm.testing.uses_gpu
-def test_densenet121():
-    """test_densenet121"""
-    torch.set_grad_enabled(False)
-    verify_model("densenet121", atol=1e-4, rtol=1e-4)
-
-
-@tvm.testing.uses_gpu
-def test_inception_v3():
-    """test_inception_v3"""
-    torch.set_grad_enabled(False)
-    verify_model("inception_v3", atol=1e-4, rtol=1e-4)
-
-
-@tvm.testing.uses_gpu
-def test_googlenet():
-    """test_googlenet"""
-    torch.set_grad_enabled(False)
-    verify_model("googlenet", atol=1e-4, rtol=1e-4)
-
-
-@tvm.testing.uses_gpu
-def test_mnasnet0_5():
-    """test_mnasnet0_5"""
-    torch.set_grad_enabled(False)
-    verify_model("mnasnet0_5", atol=1e-4, rtol=1e-4)
-
-
-@tvm.testing.uses_gpu
-def test_mobilenet_v2():
-    """test_mobilenet_v2"""
-    torch.set_grad_enabled(False)
-    verify_model("mobilenet_v2", atol=1e-4, rtol=1e-4)
-
-
-# pylint: disable=pointless-string-statement
-"""
-#TODO: Fix VGG and AlexNet issues (probably due to pooling)
-@tvm.testing.uses_gpu
-def test_alexnet():
-    torch.set_grad_enabled(False)
-    verify_model("alexnet")
-
-@tvm.testing.uses_gpu
-def test_vgg11():
-    torch.set_grad_enabled(False)
-    verify_model("vgg11")
-
-@tvm.testing.uses_gpu
-def test_vgg11_bn():
-    torch.set_grad_enabled(False)
-    verify_model("vgg11_bn")
-"""
-
-
-@tvm.testing.uses_gpu
-def test_custom_conversion_map():
-    """test_custom_conversion_map"""
-
-    def get_roi_align():
-        pool_size = 5
-        n_channels = 2 * (pool_size**2)
-        x = torch.rand(2, n_channels, 10, 10)
-        rois = torch.tensor(
-            [
-                [0, 0, 0, 9, 9],  # format is (xyxy)
-                [0, 0, 5, 4, 9],
-                [0, 5, 5, 9, 9],
-                [1, 0, 0, 9, 9],
-            ],
-            dtype=torch.float,
-        )
-        roi_align = torchvision.ops.RoIAlign(pool_size, spatial_scale=1, sampling_ratio=-1)
-        return roi_align.eval(), [x, rois]
-
-    def convert_roi_align():
-        def _impl(inputs, input_types):
-            spatial_scale = inputs[2]
-            pooled_size = (inputs[3], inputs[4])
-            sampling_ratio = inputs[5]
-            return relay.op.vision.roi_align(
-                inputs[0], inputs[1], pooled_size, spatial_scale, sampling_ratio
-            )
-
-        return _impl
-
-    custom_map = {"torchvision::roi_align": convert_roi_align()}
-    model, inputs = get_roi_align()
-
-    verify_model(model, inputs, custom_map)
-
-
-@tvm.testing.uses_gpu
-def test_segmentation_models():
-    """test_segmentation_models"""
-
-    class SegmentationModelWrapper(Module):
-        def __init__(self, model):
-            super().__init__()
-            self.model = model
-
-        def forward(self, inp):
-            out = self.model(inp)
-            return out["out"]
-
-    fcn = torchvision.models.segmentation.fcn_resnet101(pretrained=True)
-    deeplab = torchvision.models.segmentation.deeplabv3_resnet101(pretrained=True)
-
-    inp = [torch.rand((1, 3, 300, 300), dtype=torch.float)]
-
-    verify_model(SegmentationModelWrapper(fcn.eval()), inp, atol=1e-4, rtol=1e-4)
-    verify_model(SegmentationModelWrapper(deeplab.eval()), inp, atol=1e-4, rtol=1e-4)
-
-
-@tvm.testing.uses_gpu
-def test_3d_models():
-    """test_3d_models"""
-    input_shape = (1, 3, 4, 56, 56)
-    resnet3d = torchvision.models.video.r3d_18(pretrained=True).eval()
-    verify_model(resnet3d, [torch.rand(input_shape)], atol=1e-4, rtol=1e-4)
-
-
-def _get_default_vm_targets():
-    """Get default vm targets"""
-    return ["llvm", "cuda"]
-
-
-def verify_script_model(pt_model, ishapes, targets, idtype=None):
-    """verify_script_model"""
-    script_module = torch.jit.script(pt_model)
-
-    verify_model_vm(script_module, ishapes, idtype=idtype, targets=targets)
-
-
-def verify_trace_model(pt_model, idata, targets):
-    """verify_trace_model"""
-    traced_model = torch.jit.trace(pt_model, idata)
-    ishapes = [data.shape for data in idata]
-    verify_model_vm(traced_model, ishapes, idata=idata, targets=targets)
-
-
-def convert_pt_to_tvm_type(idtype):
-    """Accepts a pytorch dtype and returns string TVM dtype."""
-    # TVM does not support PyTorch complex dtypes
-    if idtype == torch.float64:
-        curr_dtype = "float64"
-    elif idtype == torch.float32:
-        curr_dtype = "float32"
-    elif idtype == torch.float16:
-        curr_dtype = "float16"
-    elif idtype == torch.bfloat16:
-        curr_dtype = "bfloat16"
-    elif idtype == torch.int64:
-        curr_dtype = "int64"
-    elif idtype == torch.int32:
-        curr_dtype = "int32"
-    elif idtype == torch.int16:
-        curr_dtype = "int16"
-    elif idtype == torch.int8:
-        curr_dtype = "int8"
-    elif idtype == torch.uint8:
-        curr_dtype = "uint8"
-    elif idtype == torch.bool:
-        curr_dtype = "bool"
-    else:
-        raise NotImplementedError(f"Unsupported dtype: {idtype}")
-    return curr_dtype
-
-
-def verify_model_vm(input_model, ishapes, idtype=None, idata=None, targets=None):
-    """verify_model_vm"""
-    targets = targets or ["llvm"]
-    if not idtype:
-        idtype = torch.float
-
-    input_names = [f"i{idx}" for idx, _ in enumerate(ishapes)]
-    tvm_dtype = convert_pt_to_tvm_type(idtype)
-    input_dtypes = [tvm_dtype] * len(input_names)
-    input_shapes = list(zip(input_names, list(zip(ishapes, input_dtypes))))
-
-    if idata:
-        input_data = idata
-    # If no input_data provided, generate random data of specified dtype
-    else:
-        if idtype == torch.bool:
-            input_data = [
-                torch.Tensor.bool(torch.randint(low=0, high=2, size=shape)) for shape in ishapes
-            ]
-        # Torch dtype can be float, complex, int, or Bool. Complex not supported,
-        # so if not float or Bool, dtype must be int!
-        elif not idtype.is_floating_point:
-            input_data = [
-                torch.randint(low=0, high=10, size=shape, dtype=idtype) for shape in ishapes
-            ]
-        else:
-            input_data = [torch.randn(shape, dtype=idtype) for shape in ishapes]
-
-    # Compile via VM
-    with tvm.testing.disable_span_filling():
-        mod, params = relay.frontend.from_pytorch(input_model, input_shapes)
-    with tvm.testing.enable_span_filling():
-        mod_with_span, _ = relay.frontend.from_pytorch(input_model, input_shapes)
-    tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-
-    for tgt in targets:
-        if not tvm.testing.device_enabled(tgt):
-            continue
-        print("Running on target", tgt)
-
-        dev = tvm.device(tgt, 0)
-
-        evaluator = relay.create_executor("vm", mod=mod, device=dev, target=tgt).evaluate()
-
-        # Inference
-        for name, inp in zip(input_names, input_data):
-            params[name] = inp.numpy()
-        vm_res = evaluator(**params)
-
-        # Baseline result
-        with torch.no_grad():
-            pt_result = input_model(*input_data)
-
-        # Verify the accuracy
-        if isinstance(pt_result, tuple):
-            # handle multiple outputs
-            for i, pt_result in enumerate(pt_result):
-                tvm_res = vm_res[i].numpy()
-                tvm.testing.assert_allclose(tvm_res, pt_result.numpy(), rtol=1e-5, atol=1e-5)
-        elif not isinstance(pt_result, torch.Tensor):
-            tvm_res = vm_res.numpy().item()
-            assert pt_result == tvm_res
-        else:
-            tvm.testing.assert_allclose(vm_res.numpy(), pt_result.numpy(), rtol=1e-5, atol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_control_flow():
-    """test_control_flow"""
-
-    class SimpleIf(torch.nn.Module):
-        """SimpleIf module"""
-
-        def __init__(self, N, M):
-            super().__init__()
-            self.weight = torch.nn.Parameter(torch.rand(N, M))
-
-        def forward(self, inp):
-            if inp.sum() > 0.0:
-                output = self.weight + inp
-            else:
-                output = self.weight - inp
-            return output
-
-    class NestedIf(torch.nn.Module):
-        """NestedIf module"""
-
-        def __init__(self, N, M):
-            super().__init__()
-            self.weight = torch.nn.Parameter(torch.rand(N, M))
-
-        def forward(self, inp):
-            """forward"""
-            if inp.sum() > 0.0:
-                if inp.mean() > 0.0:
-                    output = self.weight + inp
-                else:
-                    output = self.weight - inp
-            else:
-                if inp.mean() >= 0.0:
-                    output = self.weight * inp
-                else:
-                    output = self.weight / inp
-
-            return output
-
-    class ScalarLoop(torch.nn.Module):
-        """ScalarLoop module"""
-
-        def forward(self, inp):
-            """forward"""
-            a = 0
-            for i in range(inp.size(0)):
-                b = i * i
-                b = b + 1
-                a += b
-            if a != 0:
-                a += 1
-            else:
-                a += 2
-            return a
-
-    class SimpleLoop(torch.nn.Module):
-        def forward(self, inp):
-            a = inp
-            for _ in range(inp.size(0)):
-                b = a * 2.0
-                c = a + b
-                a += c
-            return a
-
-    class LoopWithIf(torch.nn.Module):
-        """LoopWithIf module"""
-
-        def forward(self, inp):
-            a = inp
-            for _ in range(inp.size(0)):
-                b = a * 2.0
-                b = a + b
-                if b.sum() > 0.0:
-                    a += b
-                else:
-                    a -= b
-            return a
-
-    class NestedLoop(torch.nn.Module):
-        def forward(self, inp):
-            a = inp
-            for i in range(inp.size(0)):
-                b = a * float(i)
-                for j in range(inp.size(1)):
-                    a += b * float(j)
-            return a
-
-    class SimpleScalarWhileLoop(torch.nn.Module):
-        """SimpleScalarWhileLoop module"""
-
-        def forward(self, inp):
-            """forward"""
-            a = 1
-            i = 0
-            while i <= inp.size(0):
-                a += i
-                i += 2
-            i = 0
-            # also test constant init cond
-            while i < 10:
-                a += i
-                i += 3
-            return a
-
-    class SimpleWhileLoop(torch.nn.Module):
-        def forward(self, inp):
-            a = inp
-            i = 0
-            while i < inp.size(0):
-                a += a * float(i) * 2.0
-                i += 1
-            return a
-
-    models = [
-        SimpleIf(10, 20),
-        NestedIf(10, 20),
-        ScalarLoop(),
-        SimpleLoop(),
-        LoopWithIf(),
-        SimpleScalarWhileLoop(),
-        SimpleWhileLoop(),
-        NestedLoop(),
-    ]
-
-    for pt_model in models:
-        verify_script_model(pt_model.eval(), [(10, 20)], _get_default_vm_targets())
-
-
-@tvm.testing.uses_gpu
-def test_simple_rnn():
-    """test_simple_rnn"""
-    # The mixed tracing and scripting example from
-    # https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html#mixing-scripting-and-tracing
-    class DecisionGate(torch.nn.Module):
-        def forward(self, x):
-            if x.sum() > 0:
-                return x
-            else:
-                return -x
-
-    class Cell(torch.nn.Module):
-        def __init__(self, dg):
-            super().__init__()
-            self.dg = dg
-            self.linear = torch.nn.Linear(4, 4)
-
-        def forward(self, x, h):
-            new_h = torch.tanh(self.dg(self.linear(x)) + h)
-            return new_h, new_h
-
-    class RNNLoop(torch.nn.Module):
-        """Pytorch RNNLoop module"""
-
-        def __init__(self):
-            super().__init__()
-            x = torch.rand(10, 4, dtype=torch.float)
-            h = torch.rand(10, 4, dtype=torch.float)
-            self.cell = torch.jit.trace(Cell(DecisionGate()), (x, h))
-
-        def forward(self, xs):
-            h = torch.zeros(10, 4, dtype=torch.float)
-            y = torch.zeros(10, 4, dtype=torch.float)
-            for i in range(xs.size(0)):
-                y, h = self.cell(xs[i], h)
-            return y
-
-    verify_script_model(RNNLoop().eval(), [(10, 10, 4)], _get_default_vm_targets())
-
-
-@tvm.testing.uses_gpu
-def test_forward_reduce_sum():
-    """test_forward_reduce_sum"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class ReduceSum1(Module):
-        def forward(self, *args):
-            return args[0].sum(1)
-
-    class ReduceSum2(Module):
-        def forward(self, *args):
-            return args[0].sum(dim=1, keepdim=False)
-
-    class ReduceSum3(Module):
-        def forward(self, *args):
-            return args[0].sum(dim=2, keepdim=True)
-
-    class ReduceSum4(Module):
-        def forward(self, *args):
-            return args[0].sum(dim=(2, 3), keepdim=True)
-
-    class ReduceSum5(Module):
-        def forward(self, *args):
-            return args[0].sum(dim=(2, 3), keepdim=False)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(ReduceSum1().float().eval(), input_data=input_data)
-    verify_model(ReduceSum2().float().eval(), input_data=input_data)
-    verify_model(ReduceSum3().float().eval(), input_data=input_data)
-    verify_model(ReduceSum4().float().eval(), input_data=input_data)
-    verify_model(ReduceSum5().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_reduce_prod():
-    """test_forward_reduce_prod"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class ReduceProd1(Module):
-        def forward(self, *args):
-            return args[0].prod(1)
-
-    class ReduceProd2(Module):
-        def forward(self, *args):
-            return args[0].prod(dim=1, keepdim=False)
-
-    class ReduceProd3(Module):
-        def forward(self, *args):
-            return args[0].prod(dim=2, keepdim=True)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(ReduceProd1().float().eval(), input_data=input_data)
-    verify_model(ReduceProd2().float().eval(), input_data=input_data)
-    verify_model(ReduceProd3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_argmin():
-    """test_forward_argmin"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class ArgMin1(Module):
-        def forward(self, *args):
-            return args[0].argmin(1)
-
-    class ArgMin2(Module):
-        def forward(self, *args):
-            return args[0].argmin(dim=1, keepdim=False)
-
-    class ArgMin3(Module):
-        def forward(self, *args):
-            return args[0].argmin(dim=2, keepdim=True)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(ArgMin1().float().eval(), input_data=input_data)
-    verify_model(ArgMin2().float().eval(), input_data=input_data)
-    verify_model(ArgMin3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_argmax():
-    """test_forward_argmax"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class ArgMax1(Module):
-        def forward(self, *args):
-            return args[0].argmax(1)
-
-    class ArgMax2(Module):
-        def forward(self, *args):
-            return args[0].argmax(dim=1, keepdim=False)
-
-    class ArgMax3(Module):
-        def forward(self, *args):
-            return args[0].argmax(dim=2, keepdim=True)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(ArgMax1().float().eval(), input_data=input_data)
-    verify_model(ArgMax2().float().eval(), input_data=input_data)
-    verify_model(ArgMax3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_std():
-    """test_forward_std"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Std1(Module):
-        def forward(self, *args):
-            return args[0].std(1, unbiased=False)
-
-    class Std2(Module):
-        def forward(self, *args):
-            return args[0].std(dim=1, keepdim=False, unbiased=False)
-
-    class Std3(Module):
-        def forward(self, *args):
-            return args[0].std(dim=2, keepdim=True, unbiased=False)
-
-    class Std4(Module):
-        def forward(self, *args):
-            return args[0].std(dim=(2, 3), keepdim=True, unbiased=False)
-
-    class Std5(Module):
-        def forward(self, *args):
-            return args[0].std(dim=(2, 3), keepdim=False, unbiased=False)
-
-    class Std6(Module):
-        def forward(self, *args):
-            return args[0].std(unbiased=False)
-
-    class Std7(Module):
-        def forward(self, *args):
-            return args[0].std(dim=1, keepdim=False, unbiased=True)
-
-    class Std8(Module):
-        def forward(self, *args):
-            return args[0].std(dim=(2, 3), keepdim=True, unbiased=True)
-
-    class Std9(Module):
-        def forward(self, *args):
-            return args[0].std(unbiased=True)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Std1().float().eval(), input_data=input_data)
-    verify_model(Std2().float().eval(), input_data=input_data)
-    verify_model(Std3().float().eval(), input_data=input_data)
-    verify_model(Std4().float().eval(), input_data=input_data)
-    verify_model(Std5().float().eval(), input_data=input_data)
-    verify_model(Std6().float().eval(), input_data=input_data)
-    verify_model(Std7().float().eval(), input_data=input_data)
-    verify_model(Std8().float().eval(), input_data=input_data)
-    verify_model(Std9().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_var_mean():
-    """test_forward_var_mean"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class VarMean1(Module):
-        def forward(self, *args):
-            return torch.var_mean(args[0], 1, unbiased=False)
-
-    class VarMean2(Module):
-        def forward(self, *args):
-            return torch.var_mean(args[0], dim=1, keepdim=False, unbiased=False)
-
-    class VarMean3(Module):
-        def forward(self, *args):
-            return torch.var_mean(args[0], dim=2, keepdim=True, unbiased=False)
-
-    class VarMean4(Module):
-        def forward(self, *args):
-            return torch.var_mean(args[0], dim=(2, 3), keepdim=True, unbiased=False)
-
-    class VarMean5(Module):
-        def forward(self, *args):
-            return torch.var_mean(args[0], dim=(2, 3), keepdim=False, unbiased=False)
-
-    class VarMean6(Module):
-        def forward(self, *args):
-            return torch.var_mean(args[0], unbiased=False)
-
-    class VarMean7(Module):
-        def forward(self, *args):
-            return torch.var_mean(args[0], dim=1, keepdim=False, unbiased=True)
-
-    class VarMean8(Module):
-        def forward(self, *args):
-            return torch.var_mean(args[0], dim=(2, 3), keepdim=True, unbiased=True)
-
-    class VarMean9(Module):
-        def forward(self, *args):
-            return torch.var_mean(args[0], unbiased=True)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(VarMean1().float().eval(), input_data=input_data)
-    verify_model(VarMean2().float().eval(), input_data=input_data)
-    verify_model(VarMean3().float().eval(), input_data=input_data)
-    verify_model(VarMean4().float().eval(), input_data=input_data)
-    verify_model(VarMean5().float().eval(), input_data=input_data)
-    verify_model(VarMean6().float().eval(), input_data=input_data)
-    verify_model(VarMean7().float().eval(), input_data=input_data)
-    verify_model(VarMean8().float().eval(), input_data=input_data)
-    verify_model(VarMean9().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_variance():
-    """test_forward_variance"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Variance1(Module):
-        def forward(self, *args):
-            return args[0].var(1, unbiased=False)
-
-    class Variance2(Module):
-        def forward(self, *args):
-            return args[0].var(dim=1, keepdim=False, unbiased=False)
-
-    class Variance3(Module):
-        def forward(self, *args):
-            return args[0].var(dim=2, keepdim=True, unbiased=False)
-
-    class Variance4(Module):
-        def forward(self, *args):
-            return args[0].var(dim=(2, 3), keepdim=True, unbiased=False)
-
-    class Variance5(Module):
-        def forward(self, *args):
-            return args[0].var(dim=(2, 3), keepdim=False, unbiased=False)
-
-    class Variance6(Module):
-        def forward(self, *args):
-            return args[0].var(unbiased=False)
-
-    class Variance7(Module):
-        def forward(self, *args):
-            return args[0].var(dim=1, keepdim=False, unbiased=True)
-
-    class Variance8(Module):
-        def forward(self, *args):
-            return args[0].var(dim=(2, 3), keepdim=True, unbiased=True)
-
-    class Variance9(Module):
-        def forward(self, *args):
-            return args[0].var(unbiased=True)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Variance1().float().eval(), input_data=input_data)
-    verify_model(Variance2().float().eval(), input_data=input_data)
-    verify_model(Variance3().float().eval(), input_data=input_data)
-    verify_model(Variance4().float().eval(), input_data=input_data)
-    verify_model(Variance5().float().eval(), input_data=input_data)
-    verify_model(Variance6().float().eval(), input_data=input_data)
-    verify_model(Variance7().float().eval(), input_data=input_data)
-    verify_model(Variance8().float().eval(), input_data=input_data)
-    verify_model(Variance9().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_rsub():
-    """test_forward_rsub"""
-    torch.set_grad_enabled(False)
-
-    class Rsub1(Module):
-        def forward(self, *args):
-            return torch.rsub(args[0], args[1])
-
-    class Rsub2(Module):
-        def forward(self, *args):
-            return torch.rsub(args[0], args[1], alpha=0.5)
-
-    d1 = torch.rand([1, 3]).float()
-    d2 = torch.rand([1, 3]).float()
-    d3 = torch.rand([1, 3]).int()
-    verify_model(Rsub1().float().eval(), input_data=[d1, d2])
-    verify_model(Rsub1().float().eval(), input_data=[d1, d3])
-    verify_model(Rsub2().float().eval(), input_data=[d1, d2])
-    verify_model(Rsub2().float().eval(), input_data=[d1, d3])
-
-    d1 = torch.rand([1, 3]).half()
-    d2 = torch.rand([1, 3]).half()
-    verify_model(Rsub1().half().eval(), input_data=[d1, d2])
-    verify_model(Rsub1().half().eval(), input_data=[d1, d3])
-    verify_model(Rsub2().half().eval(), input_data=[d1, d2])
-    verify_model(Rsub2().half().eval(), input_data=[d1, d3])
-
-
-@tvm.testing.uses_gpu
-def test_forward_embedding():
-    """test_forward_embedding"""
-    torch.set_grad_enabled(False)
-
-    input_data = torch.randint(0, 10, [2, 4]).long()
-    verify_model(torch.nn.Embedding(10, 3).float().eval(), input_data=input_data)
-
-    input_data = torch.randint(0, 4, [2, 3, 4]).long()
-    verify_model(torch.nn.Embedding(4, 5, sparse=False).float().eval(), input_data=input_data)
-
-    input_data = torch.randint(0, 4, [2, 3, 4]).long()
-    verify_model(torch.nn.Embedding(4, 5, sparse=True).float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_onehot():
-    """test_forward_onehot"""
-    torch.set_grad_enabled(False)
-
-    class OneHot1(Module):
-        def forward(self, *args):
-            return torch.nn.functional.one_hot(args[0], num_classes=3)
-
-    class OneHot2(Module):
-        def forward(self, *args):
-            return torch.nn.functional.one_hot(args[0], num_classes=5)
-
-    input_data = torch.arange(0, 5) % 3
-    verify_model(OneHot1().float().eval(), input_data=input_data)
-
-    input_data = torch.arange(0, 5) % 4
-    verify_model(OneHot2().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_isfinite():
-    """test_forward_isfinite"""
-    torch.set_grad_enabled(False)
-
-    class IsFinite1(Module):
-        def forward(self, *args):
-            return torch.isfinite(args[0])
-
-    input_data = torch.tensor([1, float("inf"), 2, float("-inf"), float("nan")]).float()
-    verify_model(IsFinite1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_isnan():
-    """test_forward_isnan"""
-    torch.set_grad_enabled(False)
-
-    class IsNan1(Module):
-        def forward(self, *args):
-            return torch.isnan(args[0])
-
-    input_data = torch.tensor([1, float("inf"), 2, float("-inf"), float("nan")]).float()
-    verify_model(IsNan1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_isinf():
-    """test_forward_isinf"""
-    torch.set_grad_enabled(False)
-
-    class IsInf1(Module):
-        def forward(self, *args):
-            return torch.isinf(args[0])
-
-    input_data = torch.tensor([1, float("inf"), 2, float("-inf"), float("nan")]).float()
-    verify_model(IsInf1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_clamp():
-    """test_forward_clamp"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Clamp1(Module):
-        def forward(self, *args):
-            return torch.clamp(args[0], min=-0.5, max=0.5)
-
-    class Clamp2(Module):
-        def forward(self, *args):
-            return torch.clamp(args[0], min=-0.3)
-
-    class Clamp3(Module):
-        def forward(self, *args):
-            return torch.clamp(args[0], max=1.0)
-
-    class Clamp_MinExpr_MaxConstant(Module):
-        def forward(self, *args):
-            h, w = args[0].shape[2:]
-            amin = h / 100.0
-            return torch.clamp(args[0], min=amin, max=w)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Clamp1().float().eval(), input_data=input_data)
-    verify_model(Clamp2().float().eval(), input_data=input_data)
-    verify_model(Clamp3().float().eval(), input_data=input_data)
-    verify_model(Clamp_MinExpr_MaxConstant().float().eval(), input_data=input_data)
-
-    verify_model(lambda inp: torch.clamp_min(inp, 0.5), input_data)
-    inp_uint8 = torch.randint(low=0, high=256, size=(100, 100), dtype=torch.uint8)
-    verify_model(lambda inp: torch.clamp_max(inp, 125), inp_uint8)
-
-
-@tvm.testing.uses_gpu
-def test_forward_clamp_():
-    """test_forward_clamp_"""
-    torch.set_grad_enabled(False)
-
-    class ClampInPlace(Module):
-        def __init__(self, i_min, i_max):
-            super().__init__()
-            self.min = i_min
-            self.max = i_max
-
-        def forward(self, *args):
-            return torch.clamp_(args[0], self.min, self.max)
-
-    for ishape, i_min, i_max in (([4, 8], 0.1, 0.9), ([7, 6], 0.2, 0.5)):
-        input_data = torch.rand(ishape).float()
-        verify_model(ClampInPlace(i_min, i_max).float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_ones():
-    """test_forward_ones"""
-    torch.set_grad_enabled(False)
-
-    class Ones1(Module):
-        def forward(self, *args):
-            return torch.ones(2, 3)
-
-    verify_model(Ones1().float().eval(), input_data=[])
-
-
-@tvm.testing.uses_gpu
-def test_forward_ones_like():
-    """test_forward_ones_like"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class OnesLike1(Module):
-        def forward(self, *args):
-            return torch.ones_like(args[0])
-
-    class OnesLike2(Module):
-        def forward(self, *args):
-            return torch.ones_like(args[0], dtype=torch.int8)
-
-    class OnesLike3(Module):
-        def forward(self, *args):
-            return torch.ones_like(args[0], dtype=torch.float)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(OnesLike1().float().eval(), input_data=input_data)
-    verify_model(OnesLike2().float().eval(), input_data=input_data)
-    verify_model(OnesLike3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_new_ones():
-    """test_forward_new_ones"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    def test_func(input_tensor):
-        return input_tensor.new_ones([3, 10, 10])
-
-    verify_model_with_input(test_func, [torch.rand(input_shape).float()])
-
-
-@tvm.testing.uses_gpu
-def test_forward_zeros():
-    """test_forward_zeros"""
-    torch.set_grad_enabled(False)
-
-    class Zeros1(Module):
-        def forward(self, *args):
-            return torch.zeros(2, 3)
-
-    verify_model(Zeros1().float().eval(), input_data=[])
-
-
-def test_forward_zero_():
-    def test_func(x):
-        return x.zero_()
-
-    verify_model_with_input(test_func, [torch.rand([1, 3, 10, 10]).float()])
-
-
-@tvm.testing.uses_gpu
-def test_forward_zeros_like():
-    """test_forward_zeros_like"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class ZerosLike1(Module):
-        def forward(self, *args):
-            return torch.zeros_like(args[0])
-
-    class ZerosLike2(Module):
-        def forward(self, *args):
-            return torch.zeros_like(args[0], dtype=torch.int32)
-
-    class ZerosLike3(Module):
-        def forward(self, *args):
-            return torch.zeros_like(args[0], dtype=torch.float)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(ZerosLike1().float().eval(), input_data=input_data)
-    verify_model(ZerosLike2().float().eval(), input_data=input_data)
-    verify_model(ZerosLike3().float().eval(), input_data=input_data)
-
-
-def test_forward_new_zeros():
-    def test_func(x):
-        return x.new_zeros((2, 3))
-
-    verify_model_with_input(test_func, [torch.rand([1, 3, 10, 10]).float()])
-
-
-@tvm.testing.uses_gpu
-def test_forward_full():
-    """test_forward_full"""
-    torch.set_grad_enabled(False)
-
-    class Full1(Module):
-        def forward(self, *args):
-            return torch.full((2, 3), 3.14)
-
-    class Full2(Module):
-        def forward(self, *args):
-            return torch.full((1, 2, 3), 1.0, dtype=torch.int32)
-
-    verify_model(Full1().float().eval(), input_data=[])
-    verify_model(Full2().float().eval(), input_data=[])
-
-
-@tvm.testing.uses_gpu
-def test_forward_adaptive_max_pool1d():
-    """test_forward_adaptive_max_pool1d"""
-    torch.set_grad_enabled(False)
-    input_data = [torch.randn([2, 2, 4], dtype=torch.float32)]
-    m = torch.nn.AdaptiveMaxPool1d(3)
-
-    verify_model(m.float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_instance_norm():
-    """test_forward_instance_norm"""
-
-    class instance_norm(Module):
-        def forward(self, *args):
-            return torch.nn.functional.instance_norm(args[0], use_input_stats=True)
-
-    m = instance_norm().float().eval()
-    input_data = torch.randn([1, 1, 1, 2], dtype=torch.float64)
-
-    verify_model(m.float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_full_like():
-    """test_forward_full_like"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class FullLike1(Module):
-        def forward(self, *args):
-            return torch.full_like(args[0], 3.14)
-
-    class FullLike2(Module):
-        def forward(self, *args):
-            return torch.full_like(args[0], 22.22, dtype=torch.int32)
-
-    class FullLike3(Module):
-        def forward(self, *args):
-            return torch.full_like(args[0], 1.4, dtype=torch.float)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(FullLike1().float().eval(), input_data=input_data)
-    verify_model(FullLike2().float().eval(), input_data=input_data)
-    verify_model(FullLike3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_new_full():
-    """test_forward_new_full"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    def test_func(input_tensor):
-        return input_tensor.new_full([2, 3], 1)
-
-    verify_model_with_input(test_func, [torch.rand(input_shape).float()])
-
-
-def test_forward_fill_():
-    def test_func(x):
-        return x.fill_(3)
-
-    verify_model_with_input(test_func, [torch.rand([1, 3, 10, 10]).float()])
-
-
-def test_forward_fill_with_div():
-    """test_forward_fill_with_div"""
-
-    def test_func(x):
-        y = torch.div(torch.tensor(6.0), torch.tensor(2.0))
-        return x.fill_(y)
-
-    verify_model_with_input(test_func, [torch.rand([1, 3, 10, 10]).float()])
-
-
-@tvm.testing.uses_gpu
-def test_forward_linspace():
-    """test_forward_linspace"""
-    torch.set_grad_enabled(False)
-
-    class Linspace1(Module):
-        def forward(self, *args):
-            return torch.linspace(5, 10, steps=100)
-
-    class Linspace2(Module):
-        def forward(self, *args):
-            return torch.linspace(-10, 10, steps=5)
-
-    class Linspace3(Module):
-        def forward(self, *args):
-            return torch.linspace(start=-10, end=10, steps=5)
-
-    class Linspace4(Module):
-        def forward(self, *args):
-            return torch.linspace(start=-10, end=10, steps=1)
-
-    class Linspace5(Module):
-        def forward(self, *args):
-            return torch.linspace(1, 2, 1, dtype=torch.int32)
-
-    class Linspace6(Module):
-        def forward(self, *args):
-            return torch.linspace(start=1, end=6, steps=2)
-
-    class Linspace7(Module):
-        def forward(self, *args):
-            return torch.linspace(1, 4, steps=100, dtype=torch.float32)
-
-    class Linspace8(Module):
-        def forward(self, *args):
-            return torch.linspace(1, 2, 1, dtype=torch.int16)
-
-    class Linspace9(Module):
-        def forward(self, *args):
-            return torch.linspace(0, 8, 10)
-
-    verify_model(Linspace1().float().eval())
-    verify_model(Linspace2().float().eval())
-    verify_model(Linspace3().float().eval())
-    verify_model(Linspace4().float().eval())
-    verify_model(Linspace5().float().eval())
-    verify_model(Linspace6().float().eval())
-    verify_model(Linspace7().float().eval())
-    verify_model(Linspace8().float().eval())
-    verify_model(Linspace9().float().eval())
-
-
-@tvm.testing.uses_gpu
-def test_forward_take():
-    """test_forward_take"""
-    torch.set_grad_enabled(False)
-
-    class Take1(Module):
-        def forward(self, *args):
-            indices = torch.tensor([[0, 0], [1, 0]])
-            if torch.cuda.is_available():
-                indices = indices.cuda()
-            return torch.take(args[0], indices)
-
-    class Take2(Module):
-        def forward(self, *args):
-            return torch.take(args[0], args[1])
-
-    input_data = torch.tensor([[1, 2], [3, 4]])
-    verify_model(Take1().float().eval(), input_data=input_data)
-    indices = torch.tensor([[0, 0], [1, 0]])
-    verify_model(Take2().float().eval(), input_data=[input_data, indices])
-    indices = torch.tensor([0, -1])
-    verify_model(Take2().float().eval(), input_data=[input_data, indices])
-
-
-@tvm.testing.uses_gpu
-def test_forward_topk():
-    """test_forward_topk"""
-    torch.set_grad_enabled(False)
-
-    class Topk1(Module):
-        def forward(self, *args):
-            return torch.topk(args[0], k=3)
-
-    class Topk2(Module):
-        def forward(self, *args):
-            return torch.topk(args[0], k=3, dim=-2)
-
-    class Topk3(Module):
-        def forward(self, *args):
-            return torch.topk(args[0], k=3, dim=3)
-
-    class Topk4(Module):
-        def forward(self, *args):
-            return torch.topk(args[0], k=3, largest=True)
-
-    class Topk5(Module):
-        def forward(self, *args):
-            return torch.topk(args[0], k=3, largest=False)
-
-    class Topk6(Module):
-        def forward(self, *args):
-            return torch.topk(args[0], k=3, sorted=True)
-
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(Topk1().float().eval(), input_data=input_data)
-    verify_model(Topk2().float().eval(), input_data=input_data)
-    verify_model(Topk3().float().eval(), input_data=input_data)
-    verify_model(Topk4().float().eval(), input_data=input_data)
-    verify_model(Topk5().float().eval(), input_data=input_data)
-    verify_model(Topk6().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_logical_not():
-    """test_forward_logical_not"""
-    torch.set_grad_enabled(False)
-
-    class LogicalNot1(Module):
-        def forward(self, *args):
-            return torch.logical_not(args[0])
-
-    input_data = torch.tensor([True, False])
-    verify_model(LogicalNot1().float().eval(), input_data=input_data)
-
-    input_data = torch.tensor([0, 1, -10], dtype=torch.int8)
-    verify_model(LogicalNot1().float().eval(), input_data=input_data)
-
-    input_data = torch.tensor([0.0, 1.5, -10.0], dtype=torch.double)
-    verify_model(LogicalNot1().float().eval(), input_data=input_data)
-
-    input_data = torch.tensor([0.0, 1.0, -10.0], dtype=torch.int32)
-    verify_model(LogicalNot1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_bitwise_not():
-    """test_forward_bitwise_not"""
-    torch.set_grad_enabled(False)
-
-    class BitwiseNot1(Module):
-        def forward(self, *args):
-            return torch.bitwise_not(args[0])
-
-    input_data = torch.tensor([0, 1, -10], dtype=torch.int8)
-    verify_model(BitwiseNot1().float().eval(), input_data=input_data)
-
-    input_data = torch.tensor([0.0, 1.0, -10.0], dtype=torch.int32)
-    verify_model(BitwiseNot1().float().eval(), input_data=input_data)
-
-    input_data = torch.tensor([True, False])
-    verify_model(BitwiseNot1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_bitwise_xor():
-    """test_forward_bitwise_xor"""
-    torch.set_grad_enabled(False)
-
-    class BitwiseXor1(Module):
-        def forward(self, *args):
-            return torch.bitwise_xor(args[0], args[1])
-
-    class BitwiseXor2(Module):
-        def forward(self, *args):
-            rhs = torch.tensor([1, 0, 3], dtype=torch.int8)
-            if torch.cuda.is_available():
-                rhs = rhs.cuda()
-            return torch.bitwise_xor(args[0], rhs)
-
-    lhs = torch.tensor([-1, -2, 3], dtype=torch.int8)
-    rhs = torch.tensor([1, 0, 3], dtype=torch.int8)
-    verify_model(BitwiseXor1().float().eval(), input_data=[lhs, rhs])
-
-    lhs = torch.tensor([True, True, False])
-    rhs = torch.tensor([False, True, False])
-    verify_model(BitwiseXor1().float().eval(), input_data=[lhs, rhs])
-
-    lhs = torch.tensor([-1, -2, 3], dtype=torch.int8)
-    verify_model(BitwiseXor2().float().eval(), input_data=[lhs])
-
-
-def test_forward_bitwise_and():
-    """test_forward_bitwise_and"""
-    torch.set_grad_enabled(False)
-
-    class BitwiseAnd1(Module):
-        def forward(self, *args):
-            return torch.bitwise_and(args[0], args[1])
-
-    class BitwiseAnd2(Module):
-        def forward(self, *args):
-            rhs = torch.tensor([1, 0, 3], dtype=torch.int8)
-            if torch.cuda.is_available():
-                rhs = rhs.cuda()
-            return torch.bitwise_and(args[0], rhs)
-
-    lhs = torch.tensor([-1, -2, 3], dtype=torch.int8)
-    rhs = torch.tensor([1, 0, 3], dtype=torch.int8)
-    verify_model(BitwiseAnd1().float().eval(), input_data=[lhs, rhs])
-
-    lhs = torch.tensor([True, True, False])
-    rhs = torch.tensor([False, True, False])
-    verify_model(BitwiseAnd1().float().eval(), input_data=[lhs, rhs])
-
-    lhs = torch.tensor([-1, -2, 3], dtype=torch.int8)
-    verify_model(BitwiseAnd2().float().eval(), input_data=[lhs])
-
-
-@tvm.testing.uses_gpu
-def test_forward_logical_xor():
-    """test_forward_logical_xor"""
-    torch.set_grad_enabled(False)
-
-    class LogicalXor1(Module):
-        def forward(self, *args):
-            return torch.logical_xor(args[0], args[1])
-
-    class LogicalXor2(Module):
-        def forward(self, *args):
-            rhs = torch.tensor([1, 0, 3], dtype=torch.int8)
-            if torch.cuda.is_available():
-                rhs = rhs.cuda()
-            return torch.logical_xor(args[0], rhs)
-
-    lhs = torch.tensor([-1, -2, 3], dtype=torch.int8)
-    rhs = torch.tensor([1, 0, 3], dtype=torch.int8)
-    verify_model(LogicalXor1().float().eval(), input_data=[lhs, rhs])
-
-    lhs = torch.tensor([True, True, False])
-    rhs = torch.tensor([False, True, False])
-    verify_model(LogicalXor1().float().eval(), input_data=[lhs, rhs])
-
-    lhs = torch.tensor([-1, -2, 3], dtype=torch.int8)
-    verify_model(LogicalXor2().float().eval(), input_data=[lhs])
-
-
-@tvm.testing.uses_gpu
-def test_forward_unary():
-    """test_forward_unary"""
-    torch.set_grad_enabled(False)
-
-    class Sqrt1(Module):
-        def forward(self, *args):
-            return torch.sqrt(args[0])
-
-    class RSqrt1(Module):
-        def forward(self, *args):
-            return torch.rsqrt(args[0])
-
-    class Ceil1(Module):
-        def forward(self, *args):
-            return torch.ceil(args[0])
-
-    class Floor1(Module):
-        def forward(self, *args):
-            return torch.floor(args[0])
-
-    class Round1(Module):
-        def forward(self, *args):
-            return torch.round(args[0])
-
-    class Cos1(Module):
-        def forward(self, *args):
-            return torch.cos(args[0])
-
-    class Sin1(Module):
-        def forward(self, *args):
-            return torch.sin(args[0])
-
-    class Tan1(Module):
-        def forward(self, *args):
-            return torch.tan(args[0])
-
-    class Tanh1(Module):
-        def forward(self, *args):
-            return torch.tanh(args[0])
-
-    class Acos1(Module):
-        def forward(self, *args):
-            return torch.acos(args[0])
-
-    class Asin1(Module):
-        def forward(self, *args):
-            return torch.asin(args[0])
-
-    class Atan1(Module):
-        def forward(self, *args):
-            return torch.atan(args[0])
-
-    class Log1(Module):
-        def forward(self, *args):
-            return torch.log(args[0])
-
-    class Exp1(Module):
-        def forward(self, *args):
-            return torch.exp(args[0])
-
-    class Erf1(Module):
-        def forward(self, *args):
-            return torch.erf(args[0])
-
-    class Trunc1(Module):
-        def forward(self, *args):
-            return torch.trunc(args[0])
-
-    class Sign1(Module):
-        def forward(self, *args):
-            return torch.sign(args[0])
-
-    class Neg1(Module):
-        def forward(self, *args):
-            return torch.neg(args[0])
-
-    class Sinh1(Module):
-        def forward(self, *args):
-            return torch.sinh(args[0])
-
-    class Cosh1(Module):
-        def forward(self, *args):
-            return torch.cosh(args[0])
-
-    class Log2_1(Module):
-        def forward(self, *args):
-            return torch.log2(args[0])
-
-    class Log10_1(Module):
-        def forward(self, *args):
-            return torch.log10(args[0])
-
-    class Log1p_1(Module):
-        def forward(self, *args):
-            return torch.log1p(args[0])
-
-    class Square(Module):
-        def forward(self, *args):
-            return torch.square(args[0])
-
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(Square().float().eval(), input_data=input_data)
-    verify_model(Sqrt1().float().eval(), input_data=input_data)
-    verify_model(RSqrt1().float().eval(), input_data=input_data)
-    verify_model(Ceil1().float().eval(), input_data=input_data)
-    verify_model(Floor1().float().eval(), input_data=input_data)
-    verify_model(Round1().float().eval(), input_data=input_data)
-    verify_model(Cos1().float().eval(), input_data=input_data)
-    verify_model(Cosh1().float().eval(), input_data=input_data)
-    verify_model(Sin1().float().eval(), input_data=input_data)
-    verify_model(Sinh1().float().eval(), input_data=input_data)
-    verify_model(Tan1().float().eval(), input_data=input_data)
-    verify_model(Tanh1().float().eval(), input_data=input_data)
-    verify_model(Acos1().float().eval(), input_data=input_data)
-    verify_model(Asin1().float().eval(), input_data=input_data)
-    verify_model(Atan1().float().eval(), input_data=input_data)
-    verify_model(Log1().float().eval(), input_data=input_data)
-    verify_model(Log2_1().float().eval(), input_data=input_data)
-    verify_model(Log10_1().float().eval(), input_data=input_data)
-    verify_model(Log1p_1().float().eval(), input_data=input_data)
-    verify_model(Exp1().float().eval(), input_data=input_data)
-    verify_model(Erf1().float().eval(), input_data=input_data)
-    verify_model(Trunc1().float().eval(), input_data=input_data)
-    verify_model(Sign1().float().eval(), input_data=input_data)
-    verify_model(Neg1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_tril():
-    """test_forward_tril"""
-    torch.set_grad_enabled(False)
-
-    def test_func(input_data):
-        return torch.tril(input_data)
-
-    input_data = torch.rand([3, 3]).float()
-    verify_model(test_func, input_data=input_data)
-    input_data = torch.rand([1, 3, 10, 10]).float()
-    verify_model(test_func, input_data=input_data)
-
-    def test_func1(input_data):
-        return torch.tril(input_data, 1)
-
-    input_data = torch.rand([3, 3]).float()
-    verify_model(test_func1, input_data=input_data)
-    input_data = torch.rand([1, 3, 10, 10]).float()
-    verify_model(test_func1, input_data=input_data)
-
-    def test_func2(input_data):
-        return torch.tril(input_data, -1)
-
-    input_data = torch.rand([3, 3]).float()
-    verify_model(test_func2, input_data=input_data)
-    input_data = torch.rand([1, 3, 10, 10]).float()
-    verify_model(test_func2, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_triu():
-    """test_forward_triu"""
-    torch.set_grad_enabled(False)
-
-    def test_func(input_data):
-        return torch.triu(input_data)
-
-    input_data = torch.rand([3, 3]).float()
-    verify_model(test_func, input_data=input_data)
-    input_data = torch.rand([1, 3, 10, 10]).float()
-    verify_model(test_func, input_data=input_data)
-
-    def test_func1(input_data):
-        return torch.triu(input_data, 1)
-
-    input_data = torch.rand([3, 3]).float()
-    verify_model(test_func1, input_data=input_data)
-    input_data = torch.rand([1, 3, 10, 10]).float()
-    verify_model(test_func1, input_data=input_data)
-
-    def test_func2(input_data):
-        return torch.triu(input_data, -1)
-
-    input_data = torch.rand([3, 3]).float()
-    verify_model(test_func2, input_data=input_data)
-    input_data = torch.rand([1, 3, 10, 10]).float()
-    verify_model(test_func2, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_where():
-    """test_forward_where"""
-    torch.set_grad_enabled(False)
-
-    class Where1(Module):
-        def forward(self, *args):
-            y = torch.ones([3, 2])
-            if torch.cuda.is_available():
-                y = y.cuda()
-            return torch.where(args[0] > 0, args[0], y)
-
-    class Where2(Module):
-        def forward(self, *args):
-            return torch.where(args[0] > 0, args[0], args[1])
-
-    class Where3(Module):
-        def forward(self, *args):
-            return torch.where(args[0])[0]
-
-    x = torch.rand([3, 2]).float()
-    verify_model(Where1(), input_data=[x])
-    y = torch.rand([3, 2])
-    verify_model(Where2(), input_data=[x, y])
-
-    # a single argument variant, equivalent to torch.nonzero(..., as_tuple=True)
-    inp = torch.rand([10])
-    inp[3:8] = 0
-    verify_trace_model(Where3(), [inp], ["llvm"])
-
-
-@tvm.testing.uses_gpu
-def test_forward_addcdiv():
-    """test_forward_addcdiv"""
-    torch.set_grad_enabled(False)
-
-    class Addcdiv1(Module):
-        def forward(self, *args):
-            t1 = torch.ones([3, 1])
-            t2 = torch.ones([1, 3])
-            if torch.cuda.is_available():
-                t1 = t1.cuda()
-                t2 = t2.cuda()
-            return torch.addcdiv(args[0], 0.1, t1, t2)
-
-    class Addcdiv2(Module):
-        def forward(self, *args):
-            return torch.addcdiv(args[0], 0.5, args[1], args[2])
-
-    input_data = torch.rand([1, 3]).float()
-    verify_model(Addcdiv1().float().eval(), input_data=input_data)
-    t1 = torch.rand([3, 1]).float()
-    t2 = torch.rand([1, 3]).float()
-    verify_model(Addcdiv2().float().eval(), input_data=[input_data, t1, t2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_addcmul():
-    """test_forward_addcmul"""
-    torch.set_grad_enabled(False)
-
-    class Addcmul1(Module):
-        def forward(self, *args):
-            t1 = torch.ones([3, 1])
-            t2 = torch.ones([1, 3])
-            if torch.cuda.is_available():
-                t1 = t1.cuda()
-                t2 = t2.cuda()
-            return torch.addcmul(args[0], 0.1, t1, t2)
-
-    class Addcmul2(Module):
-        def forward(self, *args):
-            return torch.addcmul(args[0], 0.5, args[1], args[2])
-
-    input_data = torch.rand([1, 3]).float()
-    verify_model(Addcmul1().float().eval(), input_data=input_data)
-    t1 = torch.rand([3, 1]).float()
-    t2 = torch.rand([1, 3]).float()
-    verify_model(Addcmul2().float().eval(), input_data=[input_data, t1, t2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_true_divide():
-    """test_forward_true_divide"""
-    if package_version.parse(torch.__version__) < package_version.parse("1.5.0"):
-        return
-    torch.set_grad_enabled(False)
-
-    class TrueDivide(Module):
-        def forward(self, *args):
-            return torch.true_divide(args[0], args[1])
-
-    dividend = torch.rand([5, 3]).float()
-    # divisor could be either tensor or scalar
-    divisor_tensor = torch.rand([5, 3]).float() + 0.5
-    divisor_scalar = torch.tensor(1.0, dtype=torch.float32)
-    verify_model(
-        TrueDivide().float().eval(), input_data=[dividend, divisor_tensor], atol=1e-4, rtol=1e-4
-    )
-    verify_model(
-        TrueDivide().float().eval(), input_data=[dividend, divisor_scalar], atol=1e-4, rtol=1e-4
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_is_floating_point():
-    """test_forward_is_floating_point"""
-    torch.set_grad_enabled(False)
-
-    class IsFloatingPoint(Module):
-        def forward(self, arg):
-            # `torch.jit.trace` cannot accept something that outputs
-            # a Bool, so `torch.jit.script` will be used instead
-            return torch.is_floating_point(arg)
-
-    targets = _get_default_vm_targets()
-    verify_script_model(IsFloatingPoint(), [(1, 1)], targets, idtype=torch.float64)
-    verify_script_model(IsFloatingPoint(), [(1, 1)], targets, idtype=torch.float32)
-    verify_script_model(IsFloatingPoint(), [(1, 1)], targets, idtype=torch.float16)
-    # todo(dvisnty): Run the test for bfloat16 when full bfloat16 support is implemented
-    # verify_script_model(IsFloatingPoint(), [(1,1)], targets, idtype=torch.bfloat16)
-    verify_script_model(IsFloatingPoint(), [(1, 1)], targets, idtype=torch.int64)
-    verify_script_model(IsFloatingPoint(), [(1, 1)], targets, idtype=torch.int32)
-    verify_script_model(IsFloatingPoint(), [(1, 1)], targets, idtype=torch.int16)
-    verify_script_model(IsFloatingPoint(), [(1, 1)], targets, idtype=torch.int8)
-    verify_script_model(IsFloatingPoint(), [(1, 1)], targets, idtype=torch.uint8)
-
-
-@tvm.testing.uses_gpu
-def test_forward_traced_function():
-    """test_forward_traced_function"""
-
-    def fn(t1, t2):
-        return t1 + t2
-
-    tensor1 = torch.randn(3, 4)
-    tensor2 = torch.randn(3, 4)
-    verify_model(fn, input_data=[tensor1, tensor2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_dtypes():
-    """test_forward_dtypes"""
-
-    def fn(t1, t2):
-        return 2.5 * t1 + t2
-
-    for dt in [torch.int32, torch.int64, torch.double]:
-        tensor1 = torch.randn(3, 4).to(dtype=dt)
-        tensor2 = torch.randn(3, 4).to(dtype=dt)
-        verify_model(fn, input_data=[tensor1, tensor2])
-
-    class ModuleWithIntParameters(Module):
-        def __init__(self, arr):
-            super().__init__()
-            self.param = torch.nn.Parameter(torch.LongTensor(arr), requires_grad=False)
-
-        def forward(self, x):
-            return x.long() + self.param
-
-    shape = (10, 10)
-    param = torch.ones(shape, dtype=torch.long)
-    inp = torch.ones(shape, dtype=torch.int)
-    verify_model(ModuleWithIntParameters(param), input_data=inp)
-
-
-@tvm.testing.uses_gpu
-def test_weight_names():
-    tm = torch.jit.trace(torch.nn.Linear(3, 4), [torch.randn(2, 3)])
-    _, params = relay.frontend.from_pytorch(tm, [("input", (2, 3))])
-    keys = [key.split(".")[-1] for key in params.keys()]
-    assert set(keys) == set(n for n, p in tm.named_parameters())
-
-
-@tvm.testing.uses_gpu
-def test_duplicate_weight_use():
-    """test_duplicate_weight_use"""
-    # The test cases doesn't make any sense as a neural network,
-    # the issue popped up in shared input/output embeddings of bert,
-    # but this is quicker
-    class Test(Module):
-        def __init__(self):
-            super().__init__()
-            self.lin = torch.nn.Linear(5, 3)
-
-        def forward(self, x):
-            x = self.lin(x)
-            x = x @ self.lin.weight
-            return x
-
-    verify_model(Test(), input_data=[torch.randn(5, 5)])
-
-
-@tvm.testing.uses_gpu
-def test_forward_matmul():
-    """test_forward_matmul"""
-    torch.set_grad_enabled(False)
-
-    class MatMul1(Module):
-        def forward(self, *args):
-            return torch.matmul(args[0], args[1])
-
-    # vector x vector - 1D x 1D
-    tensor1 = torch.randn(4)
-    tensor2 = torch.randn(4)
-    verify_model(MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.dense"])
-
-    # vector x matrix - 1D x 2D
-    tensor1 = torch.randn(4)
-    tensor2 = torch.randn(4, 3)
-    verify_model(MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.dense"])
-
-    # vector x batched_matrix - 1D x ND
-    tensor1 = torch.randn(5)
-    tensor2 = torch.randn(2, 3, 5, 4)
-    verify_model(
-        MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.batch_matmul"]
-    )
-
-    # matrix x vector - 2D - 1D
-    tensor1 = torch.randn(3, 4)
-    tensor2 = torch.randn(4)
-    verify_model(MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.dense"])
-
-    # matrix x matrix - 2D x 2D
-    tensor1 = torch.randn(10, 4)
-    tensor2 = torch.randn(4, 10)
-    verify_model(MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.dense"])
-
-    # broadcasted matrix x batched matrix - 2D x ND
-    tensor1 = torch.randn(10, 4)
-    tensor2 = torch.randn(2, 3, 4, 5)
-    verify_model(
-        MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.batch_matmul"]
-    )
-
-    # batched matrix x vector - ND x 1D
-    tensor1 = torch.randn(2, 3, 4, 5)
-    tensor2 = torch.randn(5)
-    verify_model(
-        MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.batch_matmul"]
-    )
-
-    # batched matrix x broadcasted matrix - ND x 2D
-    tensor1 = torch.randn(10, 3, 4)
-    tensor2 = torch.randn(4, 5)
-    verify_model(
-        MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.batch_matmul"]
-    )
-
-    # batched matrix x batched matrix - ND x ND
-    tensor1 = torch.randn(2, 10, 3, 4)
-    tensor2 = torch.randn(2, 10, 4, 5)
-    verify_model(
-        MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.batch_matmul"]
-    )
-
-    # batched matrix x broadcasted matrix - ND x ND
-    tensor1 = torch.randn(2, 5, 3, 4)
-    tensor2 = torch.randn(2, 1, 4, 5)
-    verify_model(
-        MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.batch_matmul"]
-    )
-
-    # broadcasted matrix x batched matrix - ND x ND
-    tensor1 = torch.randn(2, 1, 5, 4)
-    tensor2 = torch.randn(2, 5, 4, 3)
-    verify_model(
-        MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.batch_matmul"]
-    )
-
-    # broadcasted matrix x broadcasted matrix - ND x ND
-    tensor1 = torch.randn(3, 2, 3, 1, 5, 4)
-    tensor2 = torch.randn(2, 1, 5, 4, 3)
-    verify_model(
-        MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.batch_matmul"]
-    )
-
-
-@pytest.mark.skip(reason="unsupported op aten::lift_fresh")
-def test_forward_index():
-    """test_forward_index"""
-    torch.set_grad_enabled(False)
-    input_shape = [3, 4, 5, 6]
-
-    class Index0(Module):
-        def forward(self, x):
-            return x[[0, 1], [0, 2], :2, 4]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Index0().eval(), input_data=input_data)
-
-    class Index1(Module):
-        def forward(self, x):
-            return x[[0], [1, 2, 3, 0], [3, 1, 2, 2], [4, 2, 1, 0]]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Index1().eval(), input_data=input_data)
-
-    class Index2(Module):
-        def forward(self, x):
-            return x[None, [2, 2]]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Index2().eval(), input_data=input_data)
-
-    class Index3(Module):
-        def forward(self, x):
-            return x[None, [0, 1, 2], 1, [2, 3, 4]]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Index3().eval(), input_data=input_data)
-
-    class Index4(Module):
-        def forward(self, x):
-            return x[None, [0, 0], None, np.array([[0], [1], [2]]), None]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Index4().eval(), input_data=input_data)
-
-    class Index5(Module):
-        def forward(self, x):
-            return x[None, None, [0, 0], np.array([[0], [1], [2]]), None]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Index5().eval(), input_data=input_data)
-
-    class Index6(Module):
-        def forward(self, x):
-            return x[None, 1, None, [1, 2, 3]]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Index6().eval(), input_data=input_data)
-
-    def test_fn_bool_mask():
-        return lambda data, mask: data[0, mask]
-
-    data = torch.tensor([[1, 2, 3], [4, 5, 6]])
-    mask = torch.tensor([True, True, False])
-
-    verify_trace_model(test_fn_bool_mask(), [data, mask], ["llvm", "cuda"])
-
-
-def test_logsumexp():
-    """test_logsumexp"""
-
-    class Logsumexp(Module):
-        def __init__(self, dim, keepdim=False):
-            super().__init__()
-            self.dim = dim
-            self.keepdim = keepdim
-
-        def forward(self, x):
-            return torch.logsumexp(x, self.dim, self.keepdim)
-
-    input_shape = (100, 100)
-    input_data = torch.rand(input_shape)
-
-    verify_model(Logsumexp(0), input_data=input_data)
-    verify_model(Logsumexp(0, keepdim=True), input_data=input_data)
-    # Also test on double
-    verify_model(Logsumexp(1, keepdim=True), input_data=input_data.double())
-
-
-def test_stack():
-    """test_stack"""
-
-    class Stack(torch.nn.Module):
-        def __init__(self, axis=0):
-            super().__init__()
-            self.axis = axis
-
-        def forward(self, x):
-            return torch.stack((x, x), dim=self.axis)
-
-    inp = torch.randn(8, 8, 8)
-    verify_model(Stack(), input_data=inp)
-    verify_model(Stack(axis=-1), input_data=inp)
-    verify_model(Stack(axis=3), input_data=inp)
-    verify_model(Stack(axis=-4), input_data=inp)
-
-
-def test_stack_dynamic():
-    """test_stack_dynamic"""
-
-    class Stack(torch.nn.Module):
-        def forward(self, x):
-            tensor_list = []
-            for i in range(x.size(0)):
-                # this is a workaround to avoid generating impure aten::append op
-                tensor_list += [x[i]]
-            # relay tensor array only supports stacking on the first axis
-            return torch.stack(tensor_list, dim=0)
-
-    verify_script_model(Stack(), [(8, 8, 8)], _get_default_vm_targets())
-
-
-def test_forward_unbind():
-    """test_forward_unbind"""
-
-    class Unbind(torch.nn.Module):
-        def __init__(self, axis=0):
-            super().__init__()
-            self.axis = axis
-
-        def forward(self, x):
-            return torch.unbind(x, self.axis)
-
-    inp = torch.randn(8, 8, 8)
-    verify_model(Unbind(0), input_data=inp)
-    verify_model(Unbind(1), input_data=inp)
-    verify_model(Unbind(2), input_data=inp)
-
-
-def test_forward_nonzero():
-    """test_forward_nonzero"""
-
-    class Nonzero(Module):
-        def __init__(self, as_tuple=False):
-            super().__init__()
-            self.as_tuple = as_tuple
-
-        def forward(self, data):
-            return torch.nonzero(data, as_tuple=self.as_tuple)
-
-    inp = torch.Tensor(np.array([[0, 1, 0], [2, 0, 9], [-1, -1, 0]]).astype("float32"))
-    verify_trace_model(Nonzero(), [inp], ["llvm"])
-    verify_trace_model(Nonzero(as_tuple=True), [inp], ["llvm"])
-
-
-def test_forward_scatter():
-    """test_forward_scatter"""
-    # integer cannot be traced
-    def test_fn_scatter(dim):
-        return lambda data, index, src: torch.scatter(data, dim=dim, index=index, src=src)
-
-    def test_fn_scatter_add(dim):
-        return lambda data, index, src: torch.scatter_add(data, dim=dim, index=index, src=src)
-
-    in_data = torch.zeros(3, 5)
-    in_index = torch.tensor([[0, 1, 2, 0, 0], [2, 0, 0, 1, 2]])
-    in_src = torch.rand(2, 5)
-
-    targets = ["llvm", "cuda"]
-    verify_trace_model(test_fn_scatter(0), [in_data, in_index, in_src], targets)
-    verify_trace_model(test_fn_scatter_add(0), [in_data, in_index, in_src], targets)
-
-    in_data = torch.zeros(2, 4)
-    in_index = torch.tensor([[2], [3]])
-    in_src = torch.rand(2, 1)
-
-    verify_trace_model(test_fn_scatter(1), [in_data, in_index, in_src], targets)
-    verify_trace_model(test_fn_scatter_add(1), [in_data, in_index, in_src], targets)
-
-    # Check empty indices
-    in_data = torch.zeros(2, 4)
-    in_index = torch.empty((0,))
-    in_src = torch.rand(2, 1)
-    verify_trace_model(test_fn_scatter(0), [in_data, in_index, in_src], targets)
-    verify_trace_model(test_fn_scatter_add(0), [in_data, in_index, in_src], targets)
-
-    # Check scalar source
-    # TODO(vvchernov): Scalar source is supported on TVM side, but torch failes with
-    # input Tuple(Tensor, Tensor, float). What does scalar mean for torch in this case?
-
-
-def test_forward_scatter_reduce():
-    """test_forward_scatter_reduce"""
-    # integer cannot be traced
-    def test_fn_scatter_reduce(dim, reduce):
-        return lambda data, index, src: torch.scatter_reduce(
-            data, dim=dim, index=index, src=src, reduce=reduce
-        )
-
-    in_data = torch.rand(3, 5) - 1
-    in_index = torch.tensor([[0, 1, 2, 0, 0], [2, 0, 0, 1, 2]])
-    in_src = torch.rand(2, 5) - 1
-
-    targets = ["llvm", "cuda"]
-    for reduce in ["sum", "prod", "amin", "amax", "mean"]:
-        verify_trace_model(test_fn_scatter_reduce(0, reduce), [in_data, in_index, in_src], targets)
-
-    in_data = torch.rand(2, 4) - 1
-    in_index = torch.tensor([[2], [3]])
-    in_src = torch.rand(2, 1) - 1
-
-    for reduce in ["sum", "prod", "amin", "amax", "mean"]:
-        verify_trace_model(test_fn_scatter_reduce(1, reduce), [in_data, in_index, in_src], targets)
-
-
-def test_forward_index_put():
-    """test_forward_index_put"""
-    # torch.index_put for 2D tensor and default accumulate (False)
-    def test_fn_index_put2():
-        return lambda data, xidx, yidx, values: torch.index_put(
-            data, indices=[xidx, yidx], values=values
-        )
-
-    # torch.index_put for 3D tensor and accumulate=True
-    def test_fn_index_put3a():
-        return lambda data, xidx, yidx, zidx, values: torch.index_put(
-            data, indices=[xidx, yidx, zidx], values=values, accumulate=True
-        )
-
-    shape = (3, 5)
-    in_data = torch.zeros(shape)
-    xidx = torch.tensor([0, 1, 2, 2])
-    yidx = torch.tensor([0, 1, 3, 4])
-    values = torch.tensor([2.0, 4.0, 7.0, 9.0])
-
-    targets = ["llvm", "cuda"]
-    verify_trace_model(test_fn_index_put2(), [in_data, xidx, yidx, values], targets)
-
-    shape = (3, 5, 3)
-    in_data = torch.zeros(shape)
-    xidx = torch.tensor([0, 1, 2, 2, 0])
-    yidx = torch.tensor([0, 1, 3, 4, 0])
-    zidx = torch.tensor([0, 1, 1, 2, 0])
-    values = torch.tensor([2.0, 4.0, 7.0, 9.0, 1.0])
-
-    verify_trace_model(test_fn_index_put3a(), [in_data, xidx, yidx, zidx, values], targets)
-
-
-def test_numel():
-    """test_numel"""
-
-    class Numel(Module):
-        def forward(self, data):
-            return torch.tensor(torch.numel(data))
-
-    targets = _get_default_vm_targets()
-    verify_script_model(Numel(), [(1,)], targets)
-    verify_script_model(Numel(), [(3, 5)], targets)
-    verify_script_model(Numel(), [(3, 5, 8)], targets)
-
-
-def test_empty():
-    """Test for aten::empty"""
-
-    def test_func():
-        return torch.empty([1, 3, 10, 10])
-
-    verify_model_with_input(test_func, [], assert_shape_only=True)
-
-
-def test_empty_like():
-    """Test for aten::empty_like"""
-
-    def test_func(data):
-        return torch.empty_like(data)
-
-    verify_model_with_input(test_func, [torch.rand([1, 3, 10, 10]).float()], assert_shape_only=True)
-
-
-@tvm.testing.uses_gpu
-def test_new_empty():
-    """test_forward_new_ones"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    def test_func(input_tensor):
-        return input_tensor.new_empty([3, 10, 10])
-
-    verify_model_with_input(test_func, [torch.rand(input_shape).float()], assert_shape_only=True)
-
-    def test_func1(input_tensor):
-        return input_tensor.new_empty([3, 10, 10], dtype=torch.int32)
-
-    verify_model_with_input(test_func1, [torch.rand(input_shape).float()], assert_shape_only=True)
-
-
-def test_randn():
-    """Test for aten::randn"""
-
-    def test_func():
-        return torch.randn([1, 3, 10, 10])
-
-    verify_model_with_input(test_func, [], assert_shape_only=True, validate_structural_equal=False)
-
-    def test_func1():
-        return torch.randn(1, 3, 10, 10)
-
-    verify_model_with_input(test_func1, [], assert_shape_only=True, validate_structural_equal=False)
-
-
-def test_forward_pretrained_bert_base_uncased():
-    ######################################################################
-    # This is an example how to run BERT models using TVM
-    # ---------------------------------------------------
-    """
-    Refer the bert example given in https://pypi.org/project/pytorch-pretrained-bert
-
-    # To get started, pretrained bert package needs to be installed as prerequisite.
-
-    .. code-block:: bash
-
-        # install bert package
-        pip install pytorch_pretrained_bert==0.6.2 --user
-    """
-    # pylint: disable=import-outside-toplevel
-    try:
-        from pytorch_pretrained_bert import BertForMaskedLM, BertTokenizer
-    except ImportError:
-        print("Torch pretrained bert package must be installed to run this script.")
-        return
-
-    ######################################################################
-    # Load the tokenizer and tokenize the input
-    # -----------------------------------------
-
-    # Load pre-trained model tokenizer (vocabulary)
-    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
-
-    # Tokenized input
-    text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
-    tokenized_text = tokenizer.tokenize(text)
-
-    # Mask a token that we will try to predict back with `BertForMaskedLM`
-    masked_index = 8
-    tokenized_text[masked_index] = "[MASK]"
-    assert tokenized_text == [
-        "[CLS]",
-        "who",
-        "was",
-        "jim",
-        "henson",
-        "?",
-        "[SEP]",
-        "jim",
-        "[MASK]",
-        "was",
-        "a",
-        "puppet",
-        "##eer",
-        "[SEP]",
-    ]
-
-    # Convert token to vocabulary indices
-    indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
-    # Define sentence A and B indices associated to 1st and 2nd sentences (see paper)
-    segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
-
-    # Convert inputs to PyTorch tensors
-    tokens_tensor = torch.tensor([indexed_tokens])
-    segments_tensors = torch.tensor([segments_ids])
-
-    ######################################################################
-    # Load a pretrained PyTorch model bert-base-uncased
-    # -------------------------------------------------
-
-    # Bert Model with a language modeling
-    model = BertForMaskedLM.from_pretrained("bert-base-uncased")
-    model.eval()
-
-    ######################################################################
-    # Predict all tokens with pytorch
-    # -------------------------------
-
-    with torch.no_grad():
-        torch_preds = model(tokens_tensor, segments_tensors)
-
-    ######################################################################
-    # Make TorchScripted model via jit trace
-    # --------------------------------------
-
-    scripted_model = torch.jit.trace(model, (tokens_tensor, segments_tensors)).eval()
-
-    ######################################################################
-    # Import the graph to Relay
-    # -------------------------
-    # Convert PyTorch graph to Relay graph. The input name can be arbitrary.
-
-    input_1 = "input_ids"
-    input_2 = "input.2"
-    shape_list = [(input_1, list(tokens_tensor.shape)), (input_2, list(segments_tensors.shape))]
-
-    mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
-
-    ######################################################################
-    # Compile the model with relay
-    # ----------------------------
-
-    target = "llvm"
-    with tvm.transform.PassContext(opt_level=3):
-        relay_graph, relay_lib, relay_params = relay.build(mod, target=target, params=params)
-
-    ######################################################################
-    # Execute on TVM
-    # --------------
-
-    dev = tvm.device(target, 0)
-    relay_model = graph_executor.create(relay_graph, relay_lib, dev)
-    relay_model.set_input(**relay_params)
-    relay_model.set_input(input_1, tokens_tensor)
-    relay_model.set_input(input_2, segments_tensors)
-    relay_model.run()
-    compiled_output = relay_model.get_output(0).numpy()
-
-    ######################################################################
-    # Validate the outputs
-    # --------------------
-    # Compare the torch and tvm outputs
-
-    tvm.testing.assert_allclose(torch_preds, compiled_output, rtol=1e-3, atol=1e-3)
-
-    ######################################################################
-    # Process the output
-    # ------------------
-    # Process the model output to token.
-
-    # Torch output to token
-    torch_pred_idx = torch.argmax(torch_preds[0, masked_index]).item()
-    torch_pred_token = tokenizer.convert_ids_to_tokens([torch_pred_idx])[0]
-
-    # TVM output to token
-    tvm_pred_idx = compiled_output[0, masked_index].argmax()
-    tvm_pred_token = tokenizer.convert_ids_to_tokens([tvm_pred_idx])[0]
-
-    assert torch_pred_idx == tvm_pred_idx
-    assert torch_pred_token == tvm_pred_token
-
-    # Print the outputs
-    print(f"Torch top-1 id: {torch_pred_idx}, token: {torch_pred_idx}")
-    print(f"TVM   top-1 id: {tvm_pred_idx}, token: {tvm_pred_token}")
-
-
-@pytest.mark.skipif(
-    platform.machine() == "aarch64",
-    reason="Currently failing on AArch64",
-)
-def test_convert_torch_script_with_input_types():
-    """test_convert_torch_script_with_input_types"""
-
-    def model_fn(x, y):
-        x = x.to(dtype=torch.int32)
-        y = x + y
-        return y
-
-    ishape = (4, 5)
-    input_x = torch.rand(ishape, dtype=torch.float32)
-    input_y = torch.randint(low=0, high=100, size=ishape, dtype=torch.int32)
-    inputs = [input_x, input_y]
-
-    verify_model(model_fn, input_data=inputs)
-
-
-def test_bincount():
-    """test_bincount"""
-
-    def test_fn(x, weights=None):
-        return torch.bincount(x, weights=weights)
-
-    inp = torch.randint(0, 100, (10000,), dtype=torch.int64)
-    weights = torch.linspace(0, 100, steps=10000)
-
-    targets = ["llvm", "cuda"]
-    verify_trace_model(test_fn, [inp], targets)
-    verify_trace_model(test_fn, [inp, weights], targets)
-
-
-def test_hard_swish():
-    """test_hard_swish"""
-    examples = [torch.rand(8).float(), torch.rand(8, 10).float(), torch.rand(1, 1, 10).float()]
-    for input_data in examples:
-        verify_model(torch.nn.Hardswish().eval(), input_data=input_data)
-        verify_model(torch.nn.Hardswish(inplace=True).eval(), input_data=input_data)
-
-
-def test_hard_sigmoid():
-    """test_hard_sigmoid"""
-    examples = [torch.rand(8).float(), torch.rand(8, 10).float(), torch.rand(1, 1, 10).float()]
-    for input_data in examples:
-        verify_model(torch.nn.Hardsigmoid().eval(), input_data=input_data)
-        verify_model(torch.nn.Hardsigmoid(inplace=True).eval(), input_data=input_data)
-
-
-def test_cumsum():
-    """test_cumsum"""
-
-    def test_fn(dim, dtype=None):
-        return lambda x: torch.cumsum(x, dim=dim, dtype=dtype)
-
-    inp = torch.randint(0, 100, (10000,), dtype=torch.int32)
-    verify_model(test_fn(0), [inp])
-    verify_model(test_fn(0), [inp.to(torch.int64)])
-    verify_model(test_fn(0, dtype=torch.int64), [inp.to(torch.int64)])
-
-    inp = torch.randn((100, 100), dtype=torch.float32)
-    verify_model(test_fn(dim=0, dtype=torch.float64), [inp])
-    verify_model(test_fn(dim=1), [inp])
-
-    inp = torch.randn((100, 100), dtype=torch.float32) > 0.5
-    verify_model(test_fn(dim=0, dtype=torch.int32), [inp])
-
-
-def test_masked_fill():
-    """test_transformer"""
-
-    def test_fn(x, mask):
-        return torch.masked_fill(x, mask, 0.0)
-
-    inp = torch.randn(100, 100)
-    verify_model(test_fn, [inp, inp > 0.5])
-    verify_model(test_fn, [inp.to(torch.float64), inp > 0.5])
-
-
-def test_transformer():
-    """test_transformer"""
-    model = torch.nn.Transformer(d_model=256, nhead=8, num_encoder_layers=6, num_decoder_layers=6)
-    model = model.eval()
-    src = torch.rand((10, 32, 256))
-    tgt = torch.rand((20, 32, 256))
-    verify_model(model.eval(), input_data=[src, tgt])
-
-
-def test_argsort():
-    """test_argsort"""
-
-    def test_fn(dim, descending):
-        return lambda x: torch.argsort(x, dim=dim, descending=descending)
-
-    inp = torch.randn(100)
-    verify_model(test_fn(0, True), [inp])
-    verify_model(test_fn(0, False), [inp])
-
-    inp = torch.randn(100, 100)
-    verify_model(test_fn(0, True), [inp])
-    verify_model(test_fn(0, False), [inp])
-    verify_model(test_fn(1, True), [inp])
-    verify_model(test_fn(1, False), [inp])
-
-
-def test_sort():
-    """test_sort"""
-
-    def test_fn(dim, descending):
-        return lambda x: torch.sort(x, dim=dim, descending=descending)
-
-    inp = torch.randn(100)
-    verify_model(test_fn(0, True), [inp])
-    verify_model(test_fn(-1, False), [inp])
-
-    inp = torch.randn(100, 100)
-    verify_model(test_fn(0, True), [inp])
-    verify_model(test_fn(-2, False), [inp])
-    verify_model(test_fn(1, True), [inp])
-    verify_model(test_fn(-1, False), [inp])
-
-
-def test_logical_and():
-    """test_logical_and"""
-
-    def test_fn(x, y):
-        return torch.logical_and(x, y)
-
-    a = torch.tensor([0, 1, 10, 0], dtype=torch.int8)
-    b = torch.tensor([4, 0, 1, 0], dtype=torch.int8)
-    verify_model(test_fn, [a, b])
-
-    a = torch.tensor([True, False, True])
-    b = torch.tensor([True, False, False])
-    verify_model(test_fn, [a, b])
-
-
-def test_logical_or():
-    """test_logical_or"""
-
-    def test_fn(x, y):
-        return torch.logical_or(x, y)
-
-    a = torch.tensor([0, 1, 10, 0], dtype=torch.int8)
-    b = torch.tensor([4, 0, 1, 0], dtype=torch.int8)
-    verify_model(test_fn, [a, b])
-
-    a = torch.tensor([True, False, True])
-    b = torch.tensor([True, False, False])
-    verify_model(test_fn, [a, b])
-
-
-def test_masked_select():
-    """test_masked_select"""
-
-    def test_fn(x, mask):
-        return torch.masked_select(x, mask)
-
-    for shape in [(10,), (3, 4), (16, 32, 64)]:
-        x = torch.randn(*shape)
-        mask = x.ge(0.5)
-        verify_trace_model(test_fn, [x, mask], ["llvm", "cuda"])
-
-
-def test_unique():
-    """test_unique"""
-
-    def test_fn(is_sorted, return_inverse, return_counts):
-        return lambda x: torch.unique(x, is_sorted, return_inverse, return_counts)
-
-    in_data = torch.randint(0, 20, (10,), dtype=torch.int32)
-    targets = ["llvm", "cuda"]
-    verify_trace_model(test_fn(True, True, True), [in_data], targets)
-    verify_trace_model(test_fn(True, False, True), [in_data], targets)
-    verify_trace_model(test_fn(True, True, False), [in_data], targets)
-    verify_trace_model(test_fn(True, False, True), [in_data], targets)
-    in_data = torch.randint(0, 20, (20,), dtype=torch.int64)
-    verify_trace_model(test_fn(True, True, True), [in_data], targets)
-    verify_trace_model(test_fn(True, False, True), [in_data], targets)
-    verify_trace_model(test_fn(True, True, False), [in_data], targets)
-    verify_trace_model(test_fn(True, False, True), [in_data], targets)
-
-
-def test_forward_nll_loss():
-    """test_forward_nll_loss"""
-    torch.set_grad_enabled(False)
-    N, C = 10, 3
-    predictions = torch.rand((N, C)).float()
-    targets = torch.randint(0, 3, (N,))
-    weights = torch.tensor([1, 2, 3]).float()
-    verify_model(torch.nn.NLLLoss().eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.NLLLoss(weight=weights).eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.NLLLoss(ignore_index=1).eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.NLLLoss(reduction="sum").eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.NLLLoss(reduction="none").eval(), input_data=[predictions, targets])
-
-    # multidimension nll loss (aten::nll_loss2d)
-    d1, d2 = 2, 3
-    predictions = torch.rand((N, C, d1, d2)).float()
-    targets = torch.randint(0, 3, (N, d1, d2))
-    verify_model(torch.nn.NLLLoss().eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.NLLLoss(weight=weights).eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.NLLLoss(ignore_index=1).eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.NLLLoss(reduction="sum").eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.NLLLoss(reduction="none").eval(), input_data=[predictions, targets])
-
-
-def test_cross_entropy_loss():
-    """test_cross_entropy_loss"""
-    torch.set_grad_enabled(False)
-    N, C = 10, 3
-    # class indices
-    predictions = torch.rand((N, C)).float()
-    targets = torch.randint(0, 3, (N,))
-    weights = torch.tensor([1, 2, 3]).float()
-    verify_model(torch.nn.CrossEntropyLoss().eval(), input_data=[predictions, targets])
-    verify_model(
-        torch.nn.CrossEntropyLoss(weight=weights).eval(), input_data=[predictions, targets]
-    )
-
-    # class probabilities
-    predictions = torch.randn(N, C).float()
-    targets = torch.randn(N, C)
-    verify_model(torch.nn.CrossEntropyLoss().eval(), input_data=[predictions, targets])
-
-
-def test_forward_l1_loss():
-    """test_forward_l1_loss"""
-    torch.set_grad_enabled(False)
-    N, C = 10, 3
-    predictions = torch.rand((N, C)).float()
-    targets = torch.rand((N, C)).float()
-    verify_model(torch.nn.L1Loss().eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.L1Loss(reduction="sum").eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.L1Loss(reduction="none").eval(), input_data=[predictions, targets])
-
-    # multidimension l1 loss
-    d1, d2 = 2, 3
-    predictions = torch.rand((N, C, d1, d2)).float()
-    targets = torch.rand((N, C, d1, d2)).float()
-    verify_model(torch.nn.L1Loss().eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.L1Loss(reduction="sum").eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.L1Loss(reduction="none").eval(), input_data=[predictions, targets])
-
-
-def test_forward_mse_loss():
-    """test_forward_mse_loss"""
-    torch.set_grad_enabled(False)
-    N, C = 10, 3
-    predictions = torch.rand((N, C)).float()
-    targets = torch.rand((N, C)).float()
-    verify_model(torch.nn.MSELoss().eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.MSELoss(reduction="sum").eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.MSELoss(reduction="none").eval(), input_data=[predictions, targets])
-
-    # multidimension mse loss
-    d1, d2 = 2, 3
-    predictions = torch.rand((N, C, d1, d2)).float()
-    targets = torch.rand((N, C, d1, d2)).float()
-    verify_model(torch.nn.MSELoss().eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.MSELoss(reduction="sum").eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.MSELoss(reduction="none").eval(), input_data=[predictions, targets])
-
-
-@tvm.testing.uses_gpu
-def test_forward_flip():
-    """Test for aten::flip"""
-    torch.set_grad_enabled(False)
-
-    class Flip(Module):
-        def __init__(self, axis=0):
-            super().__init__()
-            self.axis = axis
-
-        def forward(self, x):
-            return x.flip(self.axis)
-
-    input_t = torch.randn(2, 3, 4)
-    verify_model(Flip(axis=[0]), input_data=input_t)
-    verify_model(Flip(axis=[1]), input_data=input_t)
-    verify_model(Flip(axis=[2]), input_data=input_t)
-    verify_model(Flip(axis=[-1]), input_data=input_t)
-    verify_model(Flip(axis=[0, 1]), input_data=input_t)
-
-
-def test_annotate_span():
-    """test_annotate_span"""
-    model = torchvision.models.resnet18().eval()
-    inp = torch.randn([1, 3, 224, 224])
-    trace = torch.jit.trace(model, inp).eval()
-    mod, _ = relay.frontend.from_pytorch(
-        trace, [("input", inp.shape)], use_parser_friendly_name=True
-    )
-    relay.transform.AnnotateSpans()(mod)
-
-
-@tvm.testing.uses_gpu
-def test_all_any():
-    """test_all_any"""
-
-    def test_fn(f, dim=None, keepdim=False):
-        return lambda x: f(x, dim=dim, keepdim=keepdim)
-
-    def test_fn_no_arg(f):
-        return lambda x: f(x)  # pylint: disable=unnecessary-lambda
-
-    for f in [torch.all, torch.any]:
-        verify_model(test_fn(f, 0), [torch.rand(1, 2).bool()])
-        verify_model(test_fn(f, 0), [torch.arange(0, 3).to(torch.uint8)])
-        verify_model(test_fn(f, 1), [torch.rand(4, 2).bool()])
-        verify_model(test_fn(f, 0, keepdim=True), [torch.rand(4, 2).bool()])
-        verify_model(test_fn_no_arg(f), [torch.rand(1, 2).bool()])
-        verify_model(test_fn_no_arg(f), [torch.arange(0, 3).to(torch.uint8)])
-
-
-@tvm.testing.uses_gpu
-def test_searchsorted():
-    """test_searchsorted"""
-
-    def test_fn(out_int32=False, right=False):
-        return lambda x, y: torch.searchsorted(x, y, out_int32=out_int32, right=right)
-
-    sorted_sequence = torch.tensor([[1, 3, 5, 7, 9], [2, 4, 6, 8, 10]])
-    values = torch.tensor([[3, 6, 9], [3, 6, 9]])
-    verify_model(test_fn(), [sorted_sequence, values])
-    verify_model(test_fn(out_int32=True), [sorted_sequence[0], values[0]])
-    verify_model(test_fn(right=True), [sorted_sequence, values])
-
-    sorted_sequence_1d = torch.tensor([1, 3, 5, 7, 9])
-    values = torch.tensor([[3, 6, 9], [4, 2, 7]])
-    verify_model(test_fn(), [sorted_sequence_1d, values])
-
-    verify_model(test_fn(), [sorted_sequence_1d, torch.tensor(6)])
-
-
-@tvm.testing.uses_gpu
-def test_bucketize():
-    """test_bucketize"""
-
-    def test_fn(out_int32=False, right=False):
-        return lambda x, y: torch.bucketize(x, y, out_int32=out_int32, right=right)
-
-    boundaries = torch.tensor([1, 3, 5, 7, 9])
-    values = torch.tensor([3, 6, 9])
-
-    verify_model(test_fn(), [values, boundaries])
-    verify_model(test_fn(out_int32=True, right=True), [values, boundaries])
-
-
-@tvm.testing.uses_gpu
-def test_roll():
-    """Test for aten::roll"""
-
-    def test_fn(shifts, dims):
-        return lambda x: torch.roll(x, shifts, dims)
-
-    x = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8]).view(4, 2)
-    verify_model(test_fn(1, 0), [x])
-    verify_model(test_fn(-1, 0), [x])
-    verify_model(test_fn(shifts=(2, 1), dims=(0, 1)), [x])
-
-
-@tvm.testing.uses_gpu
-def test_einsum():
-    """test_einsum"""
-
-    def test_fn(equation):
-        return lambda *x: torch.einsum(equation, *x)
-
-    x = torch.ones([2, 3])
-    y = torch.ones([3, 4])
-    z = torch.ones([4, 5])
-    verify_model(test_fn("ij,jk"), [x, y])
-    verify_model(test_fn("ij,jk,km->im"), [x, y, z])
-
-
-def test_stft():
-    """test_stft"""
-
-    def test_fn(n_fft, hop_length, win_length, center, pad_mode, normalized, onesided):
-        return lambda input, window=None: torch.stft(
-            input=input,
-            n_fft=n_fft,
-            hop_length=hop_length,
-            win_length=win_length,
-            window=window,
-            center=center,
-            pad_mode=pad_mode,
-            normalized=normalized,
-            onesided=onesided,
-            return_complex=False,
-        )
-
-    input_t = torch.rand([1, 12]).float()
-    window = torch.tensor([2, 3, 4], dtype=torch.int32)
-    targets = ["llvm", "cuda"]
-    verify_trace_model(test_fn(3, 3, 3, False, "constant", False, True), [input_t, window], targets)
-    verify_trace_model(test_fn(3, 3, 3, True, "constant", False, True), [input_t, window], targets)
-    verify_trace_model(test_fn(3, 3, 3, False, "reflect", False, True), [input_t, window], targets)
-    verify_trace_model(test_fn(3, 3, 3, True, "reflect", False, True), [input_t, window], targets)
-    verify_trace_model(test_fn(3, 3, 3, True, "reflect", True, True), [input_t, window], targets)
-    verify_trace_model(test_fn(3, 3, 3, True, "reflect", False, False), [input_t, window], targets)
-    input_t = torch.rand([2, 12]).float()
-    window = torch.tensor([2, 3, 4], dtype=torch.int32)
-    verify_trace_model(test_fn(3, 3, 3, False, "reflect", False, True), [input_t, window], targets)
-    window = torch.tensor([1, 3], dtype=torch.int32)
-    verify_trace_model(test_fn(2, 1, 2, False, "reflect", False, True), [input_t, window], targets)
-    verify_trace_model(test_fn(2, 1, 2, False, "reflect", False, True), [input_t], targets)
-
-
-@tvm.testing.uses_gpu
-def test_dot():
-    """Test for aten::dot"""
-
-    def test_fn(x):
-        return x.dot(x)
-
-    x = torch.randn([4])
-    verify_model(test_fn, [x])
-
-
-@tvm.testing.uses_gpu
-def test_mv():
-    """Test for aten::mv"""
-
-    def test_fn(m, v):
-        return m.mv(v)
-
-    verify_model(test_fn, [torch.randn(4, 4), torch.randn(4)])
-    verify_model(test_fn, [torch.randn(2, 2), torch.randn(2)])
-    verify_model(test_fn, [torch.randn(3, 8), torch.randn(8)])
-
-
-def test_grid_sample():
-    """test_grid_sample"""
-
-    class Grid_sample(Module):
-        def __init__(self, method, padding_mode, align_corners):
-            super().__init__()
-            self._method = method
-            self._padding_mode = padding_mode
-            self._align_corners = align_corners
-
-        def forward(self, x, y):
-            return torch.nn.functional.grid_sample(
-                input=x,
-                grid=y,
-                mode=self._method,
-                padding_mode=self._padding_mode,
-                align_corners=self._align_corners,
-            )
-
-    methods = ["nearest", "bilinear", "bicubic"]
-    padding_modes = ["zeros", "border", "reflection"]
-    align_corners = [True, False]
-
-    data_2D = torch.rand([4, 4, 8, 8]).float()
-    grid_2D = torch.rand([4, 16, 16, 2]).float()
-    # choosing smaller sizes to be testable on weaker GPUs
-    data_3D = torch.rand([4, 4, 4, 4, 4]).float()
-    grid_3D = torch.rand([4, 8, 8, 8, 3]).float()
-
-    for _method in methods:
-        # bicubic was introduced when pytorch > 1.7.1
-        torch_version = package_version.parse(torch.__version__)
-        if _method == "bicubic" and torch_version <= package_version.parse("1.7.1"):
-            continue
-        for _padding in padding_modes:
-            for _align in align_corners:
-                # ATTENTION:
-                #   "nearest" + "reflection" result may be different with pytorch on cpu device,
-                #   because pytorch's cpu result is different with gpu result,
-                #   and gpu result used here as baseline in tvm topi.image.grid_sample.
-                model = Grid_sample(_method, _padding, _align)
-                verify_model(model, input_data=[data_2D, grid_2D])
-
-                # 3D "bicubic"(tricubic) is not supported in pytorch
-                if _method != "bicubic":
-                    verify_model(model, input_data=[data_3D, grid_3D])
-
-
-def test_list_tuple():
-    """test compilation error for a Python list followed by a prim::TupleConstruct."""
-
-    class List_tuple(Module):
-        """List_tuple"""
-
-        def forward(self, x):
-            """forward"""
-            merged = []
-            mask_list = []
-            for i in range(3):
-                w0 = torch.sigmoid(x)
-                merged.append((w0, w0))
-                mask_list.append(x)
-
-            for i in range(3):
-                merged[i] = merged[i][0] + merged[i][1]
-            return mask_list[2], merged
-
-    x = torch.rand([4, 4, 16, 32]).float()
-    script_module = torch.jit.trace(List_tuple(), x, strict=False).eval()
-    relay.frontend.from_pytorch(script_module, [("x", x.shape)])
-
-
-# pylint: disable=unnecessary-dunder-call
-@tvm.testing.uses_gpu
-def test_binary_bitwise():
-    """Test for binary bitwise"""
-
-    def test_ior(x, y):
-        return x.__ior__(y)
-
-    def test_iand(x, y):
-        return x.__iand__(y)
-
-    def test_ixor(x, y):
-        return x.__ixor__(y)
-
-    x = torch.tensor([7, 49, 16, 1, 2, 3], dtype=torch.uint8)
-    y = torch.tensor([39, 128, 99, 228, 63, 17], dtype=torch.uint8)
-
-    for test_fn in [test_ior, test_iand, test_ixor]:
-        verify_model(test_fn, [x, y])
-
-
-@tvm.testing.uses_gpu
-def test_shift():
-    """Test for aten::__lshift__, aten::__rshift__"""
-
-    def test_lshift(x, y):
-        return x << y
-
-    def test_rshift(x, y):
-        return x >> y
-
-    x = torch.tensor([39, 128, 99, 228, 63, 17], dtype=torch.int32)
-    y = torch.tensor([3, 2, 7, 4, 5, 9], dtype=torch.int32)
-
-    for test_fn in [test_lshift, test_rshift]:
-        verify_model(test_fn, [x, y])
-
-
-@tvm.testing.uses_gpu
-def test_mod():
-    """Test for aten::fmod"""
-
-    def test_fmod(x, y):
-        return torch.fmod(x, y)
-
-    def test_remainder(x, y):
-        return torch.remainder(x, y)
-
-    for test_fn in [test_fmod, test_remainder]:
-        verify_model(test_fn, [torch.tensor([-3.0, -2, -1, 1, 2, 3]), torch.tensor(2)])
-        verify_model(test_fn, [torch.tensor([1, 2, 3, 4, 5]), torch.tensor(-1.5)])
-
-
-def test_softmax_fuse():
-    """test_softmax_fuse"""
-    # https://github.com/apache/tvm/issues/12001
-    class Model(torch.nn.Module):
-        """Pytorch model module"""
-
-        def __init__(self, nchwc_post_op=False) -> None:
-            super().__init__()
-            self.conv = torch.nn.Conv2d(3, 3, (1, 1), 1)
-            self.nchwc_post_op = nchwc_post_op
-
-        @torch.no_grad()
-        def forward(self, x):
-            """forward"""
-            t0a = self.conv(x)
-            t0b = torch.floor(x)
-            t2b = torch.softmax(t0a, dim=2)
-
-            if self.nchwc_post_op:
-                t3a = t0a - t0b
-                t4a = t2b - t0b
-                t6a = t3a + t4a
-                return t6a
-
-            return t2b + 1
-
-    sh = [3, 3, 10, 1]
-    inp = torch.ones(*sh, dtype=torch.float32)
-
-    for model in [Model(nchwc_post_op=False).eval(), Model(nchwc_post_op=True).eval()]:
-        output_torch = model(inp).numpy()
-
-        mod, params = relay.frontend.from_pytorch(torch.jit.trace(model, inp), [("inp0", sh)])
-
-        with tvm.transform.PassContext(opt_level=4):
-            out = (
-                relay.create_executor("graph", mod, params=params)
-                .evaluate()(inp0=inp.numpy())
-                .numpy()
-            )
-
-        tvm.testing.assert_allclose(out, output_torch, rtol=1e-5, atol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_lerp():
-    """test_lerp"""
-
-    def test_fn(x, y, w):
-        return torch.lerp(x, y, w)
-
-    input_shape = [16]
-    x = torch.rand(input_shape).float()
-    y = torch.rand(input_shape).float()
-    w = torch.rand(input_shape).float()
-
-    # weight can be tensor or scalar
-    verify_model(test_fn, [x, y, w])
-    verify_model(test_fn, [x, y, w[0]])
-
-
-def test_trilu():
-    def _test_trilu(op, diagonal):
-        return lambda inp: op(inp, diagonal)
-
-    for op in [torch.triu, torch.tril]:
-        verify_model(_test_trilu(op, 0), [torch.rand(size=[3, 3])])
-        verify_model(_test_trilu(op, 1), [torch.rand(size=[6, 6])])
-        verify_model(_test_trilu(op, -2), [torch.rand(size=[6, 6])])
-
-
-def test_multinomial():
-    """test_multinomial"""
-
-    def _test_multinomial(num_samples):
-        return lambda inp: torch.multinomial(inp, num_samples=num_samples, replacement=True)
-
-    # Dont check output since it's random. Instead we'll just make sure shapes are right.
-    verify_model(
-        _test_multinomial(2),
-        [torch.rand(size=[3]).float()],
-        cpu_only=True,
-        check_correctness=False,
-        validate_structural_equal=False,
-    )
-    verify_model(
-        _test_multinomial(1),
-        [torch.rand(size=[4, 5]).float()],
-        cpu_only=True,
-        check_correctness=False,
-        validate_structural_equal=False,
-    )
-
-
-def test_weight_norm():
-    """Test for atten::_weight_norm"""
-    in_channels = 32
-    out_channels = 64
-    input_data_conv = torch.rand((1, in_channels, 32, 32)).float()
-
-    conv_wn = torch.nn.utils.weight_norm(torch.nn.Conv2d(in_channels, out_channels, kernel_size=3))
-    verify_model(conv_wn.eval().float(), input_data_conv)
-
-    conv_wn_groups = torch.nn.utils.weight_norm(
-        torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, groups=2)
-    )
-    verify_model(conv_wn_groups.eval().float(), input_data_conv)
-
-    conv_wn = torch.nn.utils.weight_norm(
-        torch.nn.Conv2d(in_channels, out_channels, kernel_size=3), dim=1
-    )
-    verify_model(conv_wn.eval().float(), input_data_conv)
-
-    linear_wn = torch.nn.utils.weight_norm(torch.nn.Linear(in_channels, out_channels))
-    input_data_linear = torch.rand((128, in_channels)).float()
-    verify_model(linear_wn.eval().float(), input_data_linear)
-
-
-@tvm.testing.uses_gpu
-def test_addmm():
-    def test_fn(alpha, beta):
-        return lambda inp, batch1, batch2: torch.addmm(inp, batch1, batch2, beta=beta, alpha=alpha)
-
-    M = torch.randn(3, 5)
-    batch1 = torch.randn(3, 4)
-    batch2 = torch.randn(4, 5)
-
-    verify_model(test_fn(0.4, 0.8), [M, batch1, batch2])
-
-
-@tvm.testing.uses_gpu
-def test_baddbmm():
-    def test_fn(alpha, beta):
-        return lambda inp, batch1, batch2: torch.baddbmm(
-            inp, batch1, batch2, beta=beta, alpha=alpha
-        )
-
-    M = torch.randn(10, 3, 5)
-    batch1 = torch.randn(10, 3, 4)
-    batch2 = torch.randn(10, 4, 5)
-
-    verify_model(test_fn(0.5, 1.0), [M, batch1, batch2])
-
-
-def test_exporting_renamed_c_graph():
-    """test exproting model when export_renamed_model is set"""
-
-    # model definition
-    class Conv2D(Module):
-        def __init__(self):
-            super(Conv2D, self).__init__()
-            self.conv = torch.nn.Conv2d(3, 6, 3, bias=True)
-
-        def forward(self, *args):
-            return self.conv(args[0])
-
-    input_name, input_shape = "input", [1, 3, 10, 10]
-    shape_list = [(input_name, input_shape)]
-    temp_dir = utils.tempdir().path
-    script_module = torch.jit.trace(Conv2D(), [torch.rand(input_shape)])
-    _, _ = relay.frontend.from_pytorch(
-        script_module, shape_list, export_renamed_c_graph_path=temp_dir
-    )
-
-    exported_c_graph_name = os.listdir(temp_dir)[0]
-    assert "tvm_exported_c_graph_" in exported_c_graph_name
-
-    # make sure the renamed output variable presents in the restored _C.Graph
-    with open(f"{temp_dir}/{exported_c_graph_name}", "r") as f:
-        graph = f.read()
-        assert "%aten::_convolution_0" in graph
-
-
-def test_inplace_copy():
-    class SimpleInplaceCopy(torch.nn.Module):
-        def forward(self, x):
-            x[:5, 0, 5:] = x[:5, 0, 5:] + 1
-            return x
-
-    class NegativeSliceInplaceCopy(torch.nn.Module):
-        def forward(self, x):
-            x[5:-1, -1, :] = x[5:-1, -1, :] + 1
-            return x
-
-    class PartialDimensionInplaceCopy(torch.nn.Module):
-        def forward(self, x):
-            x[:5] = x[:5] + 1
-            x[0:5, ...] = x[0:5, ...] + 1
-            x[0:5, ..., -1] = x[0:5, ..., -1] + 1
-            return x
-
-    inputs = torch.randn(10, 10, 10)
-    verify_model(SimpleInplaceCopy(), [inputs])
-    inputs = torch.randn(10, 10, 10)
-    verify_model(NegativeSliceInplaceCopy(), [inputs])
-    inputs = torch.randn(10, 10, 10)
-    verify_model(PartialDimensionInplaceCopy(), [inputs])
-
-
-@tvm.testing.uses_gpu
-def test_swapaxes():
-    """test_swapaxes"""
-    torch.set_grad_enabled(False)
-    input_shape = [2, 3, 10, 5]
-
-    class Swapaxes1(Module):
-        def forward(self, *args):
-            return args[0].swapaxes(2, 3)
-
-    class Swapaxes2(Module):
-        def forward(self, *args):
-            return args[0].swapaxes(-2, -1)
-
-    class Swapaxes3(Module):
-        def forward(self, *args):
-            return args[0].swapaxes(1, 1)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Swapaxes1().float().eval(), input_data=input_data)
-    verify_model(Swapaxes2().float().eval(), input_data=input_data)
-    verify_model(Swapaxes3().float().eval(), input_data=input_data)
-
-
-def test_linalg_vector_norm():
-    """test_linalg_vector_norm"""
-    torch.set_grad_enabled(False)
-
-    def test_fn(order):
-        return lambda x: torch.linalg.vector_norm(x, ord=order)
-
-    input_shape = [3, 3]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(test_fn(order=2), input_data=input_data)
-    verify_model(test_fn(order=3.5), input_data=input_data)
-    verify_model(test_fn(order=np.inf), input_data=input_data)
-    verify_model(test_fn(order=-np.inf), input_data=input_data)
-    verify_model(test_fn(order=0), input_data=input_data)
-
-    # Also test on double
-    input_data = torch.rand(input_shape).double()
-    verify_model(test_fn(order=2), input_data=input_data)
-    verify_model(test_fn(order=3.5), input_data=input_data)
-    verify_model(test_fn(order=np.inf), input_data=input_data)
-    verify_model(test_fn(order=-np.inf), input_data=input_data)
-    verify_model(test_fn(order=0), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_scaled_dot_product_attention():
-    """test_scaled_dot_product_attention"""
-    torch.set_grad_enabled(False)
-
-    def test_fn(attn_mask=None, is_causal=False):
-        return lambda query, key, value: torch.nn.functional.scaled_dot_product_attention(
-            query, key, value, attn_mask=attn_mask, is_causal=is_causal
-        )
-
-    L, S, E, Ev = 5, 7, 11, 13
-    query_4d = torch.randn(2, 3, L, E)
-    query_3d = torch.randn(3, L, E)
-    key_4d = torch.randn(2, 3, S, E)
-    key_3d = torch.randn(3, S, E)
-    value_4d = torch.randn(2, 3, S, Ev)
-    value_3d = torch.randn(3, S, Ev)
-
-    verify_model(test_fn(), [query_4d, key_4d, value_4d])
-    verify_model(test_fn(), [query_4d, key_4d, value_3d])
-    verify_model(test_fn(), [query_4d, key_3d, value_4d])
-    verify_model(test_fn(), [query_4d, key_3d, value_3d])
-    verify_model(test_fn(), [query_3d, key_4d, value_4d])
-    verify_model(test_fn(), [query_3d, key_4d, value_3d])
-    verify_model(test_fn(), [query_3d, key_3d, value_4d])
-    verify_model(test_fn(), [query_3d, key_3d, value_3d])
-
-    verify_model(test_fn(is_causal=True), [query_4d, key_4d, value_4d])
-    verify_model(test_fn(is_causal=True), [query_4d, key_4d, value_3d])
-    verify_model(test_fn(is_causal=True), [query_4d, key_3d, value_4d])
-    verify_model(test_fn(is_causal=True), [query_4d, key_3d, value_3d])
-    verify_model(test_fn(is_causal=True), [query_3d, key_4d, value_4d])
-    verify_model(test_fn(is_causal=True), [query_3d, key_4d, value_3d])
-    verify_model(test_fn(is_causal=True), [query_3d, key_3d, value_4d])
-    verify_model(test_fn(is_causal=True), [query_3d, key_3d, value_3d])
-
-    # Test with explicit attn_mask
-    attn_mask = torch.ones((L, S), dtype=torch.bool).tril(diagonal=0)
-    if torch.cuda.is_available():
-        attn_mask = attn_mask.cuda()
-    verify_model(test_fn(attn_mask=attn_mask), [query_4d, key_4d, value_4d])
-    verify_model(test_fn(attn_mask=attn_mask), [query_4d, key_4d, value_3d])
-    verify_model(test_fn(attn_mask=attn_mask), [query_4d, key_3d, value_4d])
-    verify_model(test_fn(attn_mask=attn_mask), [query_4d, key_3d, value_3d])
-    verify_model(test_fn(attn_mask=attn_mask), [query_3d, key_4d, value_4d])
-    verify_model(test_fn(attn_mask=attn_mask), [query_3d, key_4d, value_3d])
-    verify_model(test_fn(attn_mask=attn_mask), [query_3d, key_3d, value_4d])
-    verify_model(test_fn(attn_mask=attn_mask), [query_3d, key_3d, value_3d])
-
-    # Test with float64
-    query_4d = torch.randn(2, 3, L, E, dtype=torch.float64)
-    query_3d = torch.randn(3, L, E, dtype=torch.float64)
-    key_4d = torch.randn(2, 3, S, E, dtype=torch.float64)
-    key_3d = torch.randn(3, S, E, dtype=torch.float64)
-    value_4d = torch.randn(2, 3, S, Ev, dtype=torch.float64)
-    value_3d = torch.randn(3, S, Ev, dtype=torch.float64)
-    verify_model(test_fn(), [query_4d, key_4d, value_4d])
-    verify_model(test_fn(), [query_4d, key_4d, value_3d])
-    verify_model(test_fn(), [query_4d, key_3d, value_4d])
-    verify_model(test_fn(), [query_4d, key_3d, value_3d])
-    verify_model(test_fn(), [query_3d, key_4d, value_4d])
-    verify_model(test_fn(), [query_3d, key_4d, value_3d])
-    verify_model(test_fn(), [query_3d, key_3d, value_4d])
-    verify_model(test_fn(), [query_3d, key_3d, value_3d])
-
-    # Test with larger tensors
-    L, S, E, Ev = 128, 128, 64, 64
-    query_4d = torch.randn(32, 8, L, E)
-    query_3d = torch.randn(8, L, E)
-    key_4d = torch.randn(32, 8, S, E)
-    key_3d = torch.randn(8, S, E)
-    value_4d = torch.randn(32, 8, S, Ev)
-    value_3d = torch.randn(8, S, Ev)
-    verify_model(test_fn(), [query_4d, key_4d, value_4d])
-    verify_model(test_fn(), [query_4d, key_4d, value_3d])
-    verify_model(test_fn(), [query_4d, key_3d, value_4d])
-    verify_model(test_fn(), [query_4d, key_3d, value_3d])
-    verify_model(test_fn(), [query_3d, key_4d, value_4d])
-    verify_model(test_fn(), [query_3d, key_4d, value_3d])
-    verify_model(test_fn(), [query_3d, key_3d, value_4d])
-    verify_model(test_fn(), [query_3d, key_3d, value_3d])
-
-
-def test_parameterlist():
-    """test_parameterlist"""
-    torch.set_grad_enabled(False)
-
-    class ParamListModel(torch.nn.Module):
-        def __init__(self, num_layer=2):
-            super().__init__()
-            self.biases = torch.nn.ParameterList([torch.randn(10)] * num_layer)
-            self.weights = torch.nn.ParameterList([torch.randn(10, 10)] * num_layer)
-
-        def forward(self, x):
-            for i in range(len(self.weights) - 1):
-                x = torch.addmm(self.biases[i], x, self.weights[i])
-            return torch.addmm(self.biases[-1], x, self.weights[-1])
-
-    input_data = torch.randn(20, 10)
-    verify_model(ParamListModel().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_tile():
-    """test_forward_repeat"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3]
-
-    class Tile1(Module):
-        def forward(self, *args):
-            return args[0].tile(1, 1)
-
-    class Tile2(Module):
-        def forward(self, *args):
-            return args[0].tile(4, 2)
-
-    class Tile3(Module):
-        def forward(self, *args):
-            return args[0].tile(4, 2, 1)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Tile1().float().eval(), input_data=input_data)
-    verify_model(Tile2().float().eval(), input_data=input_data)
-    verify_model(Tile3().float().eval(), input_data=input_data)
-
-
-class TestSetSpan:
-    """test structural equal between translated / hand-crafted relay IR with span tagged."""
-
-    def _verify(self, res_fptr, golden_fptr):
-        with tvm.testing.enable_span_filling():
-            with_span = res_fptr()
-        with tvm.testing.disable_span_filling():
-            without_span = res_fptr()
-        tvm.ir.assert_structural_equal(with_span, without_span)
-        _verify_structural_equal_with_span(with_span, golden_fptr())
-
-    def test_conv2d_bias_add(self):
-        ker_sz, in_chs, out_chs = 7, 3, 6
-        input_shape = [1, 3, 10, 10]
-
-        def _res():
-            # model definition
-            class Conv2D(Module):
-                def __init__(self):
-                    super(Conv2D, self).__init__()
-                    self.conv = torch.nn.Conv2d(in_chs, out_chs, ker_sz, bias=True)
-
-                def forward(self, *args):
-                    return self.conv(args[0])
-
-            # get frontend model
-            mod = gen_ir_module(Conv2D(), [torch.rand(input_shape)])
-            return mod["main"]
-
-        def _golden():
-            conv_si = "aten::_convolution_0"
-            input_name = "input0"
-            input_0 = relay.var(
-                input_name,
-                shape=tuple(input_shape),
-                span=_create_span(f"{conv_si}.{input_name}"),
-            )
-            weight_name = f"{conv_si}.weight"
-            conv_weight = relay.var(
-                weight_name,
-                shape=(out_chs, in_chs, ker_sz, ker_sz),
-                span=_create_span(weight_name),
-            )
-            bias_name = f"{conv_si}.bias"
-            conv_bias = relay.var(
-                bias_name,
-                shape=(out_chs,),
-                span=_create_span(bias_name),
-            )
-            conv_out = _set_span(
-                relay.nn.conv2d(
-                    input_0,
-                    conv_weight,
-                    padding=[0] * 4,
-                    channels=out_chs,
-                    kernel_size=[ker_sz] * 2,
-                ),
-                conv_si,
-            )
-            bias_out = _set_span(relay.nn.bias_add(conv_out, conv_bias), conv_si)
-            return relay.Function([input_0, conv_weight, conv_bias], bias_out)
-
-        self._verify(_res, _golden)
-
-    def test_batchnorm_span(self):
-        features = 16
-        input_shape = [1, 16, 10, 10]
-
-        def _res():
-            # model definition
-            bn_2d = torch.nn.BatchNorm2d(features)
-
-            # get frontend model
-            mod = gen_ir_module(bn_2d, [torch.rand(input_shape)])
-            return mod["main"]
-
-        def _golden():
-            bn_si = "aten::batch_norm_0"
-            input_name = "input0"
-            input_0 = relay.var(
-                input_name,
-                shape=tuple(input_shape),
-                span=_create_span(f"{bn_si}.{input_name}"),
-            )
-            weight_name = f"{bn_si}.weight"
-            bn_weight = relay.var(
-                weight_name,
-                shape=(features,),
-                span=_create_span(weight_name),
-            )
-            bias_name = f"{bn_si}.bias"
-            bn_bias = relay.var(
-                bias_name,
-                shape=(features,),
-                span=_create_span(bias_name),
-            )
-            rm_name = f"{bn_si}.running_mean"
-            bn_rm = relay.var(
-                rm_name,
-                shape=(features,),
-                span=_create_span(rm_name),
-            )
-            rv_name = f"{bn_si}.running_var"
-            bn_rv = relay.var(
-                rv_name,
-                shape=(features,),
-                span=_create_span(rv_name),
-            )
-            bn_out = _set_span(
-                relay.nn.batch_norm(input_0, bn_weight, bn_bias, bn_rm, bn_rv),
-                bn_si,
-            )
-            bn_tuple_get_item = _set_span(relay.TupleGetItem(bn_out.tuple_value, 0), bn_si)
-            return relay.Function([input_0, bn_weight, bn_bias, bn_rm, bn_rv], bn_tuple_get_item)
-
-        self._verify(_res, _golden)
-
-    def test_reshape_span(self):
-        input_shape = [2, 1, 10, 1, 10]
-        new_shape = [2, 1, 10, 10]
-
-        def _res():
-            # model definition
-            class Reshape(Module):
-                def forward(self, *args):
-                    return args[0].reshape(new_shape)
-
-            # get frontend model
-            mod = gen_ir_module(Reshape(), [torch.rand(input_shape)])
-            return mod["main"]
-
-        def _golden():
-            reshape_si = "aten::reshape_0"
-            input_name = "input0"
-            input_0 = relay.var(
-                input_name,
-                shape=tuple(input_shape),
-                span=_create_span(f"{reshape_si}.{input_name}"),
-            )
-            reshape_out = _set_span(
-                relay.reshape(input_0, newshape=new_shape),
-                reshape_si,
-            )
-            return relay.Function([input_0], reshape_out)
-
-        self._verify(_res, _golden)
-
-    def test_dense_bias_add(self):
-        in_f, out_f = 10, 7
-        input_shape = [in_f, in_f]
-
-        def _res():
-            # model definition
-            class Dense(Module):
-                def __init__(self):
-                    super(Dense, self).__init__()
-                    self.linear = torch.nn.Linear(in_f, out_f, bias=True)
-
-                def forward(self, *args):
-                    return self.linear(args[0])
-
-            # get frontend model
-            mod = gen_ir_module(Dense(), [torch.rand(input_shape)])
-            return mod["main"]
-
-        def _golden():
-            dense_si = "aten::linear_0"
-            input_name = "input0"
-            input_0 = relay.var(
-                input_name,
-                shape=tuple(input_shape),
-                span=_create_span(f"{dense_si}.{input_name}"),
-            )
-            weight_name = f"{dense_si}.weight"
-            dense_weight = relay.var(
-                weight_name,
-                shape=(out_f, in_f),
-                span=_create_span(weight_name),
-            )
-            bias_name = f"{dense_si}.bias"
-            dense_bias = relay.var(
-                bias_name,
-                shape=(out_f,),
-                span=_create_span(bias_name),
-            )
-            dense_out = _set_span(
-                relay.nn.dense(input_0, dense_weight),
-                dense_si,
-            )
-            bias_out = _set_span(
-                relay.nn.bias_add(dense_out, dense_bias, axis=-1),
-                dense_si,
-            )
-            return relay.Function([input_0, dense_weight, dense_bias], bias_out)
-
-        self._verify(_res, _golden)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/pytorch/test_fx_quant.py b/tests/python/frontend/pytorch/test_fx_quant.py
deleted file mode 100644
index 8ed6e1a74797..000000000000
--- a/tests/python/frontend/pytorch/test_fx_quant.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-""" Tests on fx-quantized torch model conversion """
-import torch
-import torchvision
-import pytest
-import numpy as np
-from torch.quantization import get_default_qconfig
-from torch.quantization.quantize_fx import prepare_fx, convert_fx
-from torchvision.models.efficientnet import efficientnet_b4
-from torchvision.models.resnet import resnet50
-from tvm import relay
-import tvm.testing
-
-
-def quantize(model, example_inputs):
-    qconfig = get_default_qconfig("fbgemm")
-    qconfig_dict = {"": qconfig}
-    return convert_fx(prepare_fx(model, qconfig_dict, example_inputs))
-
-
-def quantize_and_build(model, in_size):
-    inp = torch.rand(1, 3, in_size, in_size)
-    input_name = "inp"
-    qmodel = quantize(model, inp)
-
-    with torch.no_grad():
-        script_module = torch.jit.trace(qmodel, inp)
-        with tvm.testing.disable_span_filling():
-            mod, _ = relay.frontend.from_pytorch(script_module, [(input_name, inp.shape)])
-        with tvm.testing.enable_span_filling():
-            mod_with_span, _ = relay.frontend.from_pytorch(script_module, [(input_name, inp.shape)])
-        tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-        mod = relay.transform.InferType()(mod)
-
-        # Make sure that the model is quantized
-        assert "qnn.conv2d" in mod.astext(show_meta_data=False)
-
-        # Skip building since it is slow on CI
-        # relay.build(mod, params=params, target="llvm")
-
-
-@pytest.mark.skip(reason="unsupported op aten::linalg_vector_norm")
-def test_ssd_vgg():
-    class TraceWrapper(torch.nn.Module):
-        def __init__(self, model):
-            super().__init__()
-            self.model = model
-
-        def forward(self, inp):
-            features = self.model.backbone(inp)
-            features = list(features.values())
-            out = self.model.head(features)
-            return out["bbox_regression"], out["cls_logits"]
-
-    model_func = torchvision.models.detection.ssd300_vgg16
-    model = TraceWrapper(model_func(num_classes=50, pretrained_backbone=True)).eval()
-    quantize_and_build(model, 300)
-
-
-def test_deeplab_v3():
-    class TraceWrapper(torch.nn.Module):
-        def __init__(self, model):
-            super().__init__()
-            self.model = model
-
-        def forward(self, inp):
-            out = self.model(inp)
-            return out["out"]
-
-    deeplabv3 = torchvision.models.segmentation.deeplabv3_mobilenet_v3_large(pretrained=True)
-    model = TraceWrapper(deeplabv3.eval()).eval()
-    quantize_and_build(model, 300)
-
-
-@pytest.mark.skip(
-    reason="Model binary isn't uploaded to S3. See https://github.com/apache/tvm/pull/17397"
-)
-def test_imagenet():
-    for model_func in [resnet50, efficientnet_b4]:
-        quantize_and_build(model_func(pretrained=True).eval(), 224)
diff --git a/tests/python/frontend/pytorch/test_lstm.py b/tests/python/frontend/pytorch/test_lstm.py
deleted file mode 100644
index da4e1ae96e03..000000000000
--- a/tests/python/frontend/pytorch/test_lstm.py
+++ /dev/null
@@ -1,372 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-""" Tests on torch lstm model conversion """
-# originally from https://github.com/pytorch/pytorch/blob/master/benchmarks/fastrnns/custom_lstms.py
-# described in https://pytorch.org/blog/optimizing-cuda-rnn-with-torchscript/
-import numpy as np
-import torch
-import torch.nn as nn
-from torch.nn import Parameter
-import torch.jit as jit
-from typing import List, Tuple
-from torch import Tensor
-
-import tvm
-import tvm.testing
-from tvm import relay
-from tvm.relay.frontend.pytorch import from_pytorch
-from tvm.relay.prelude import Prelude
-from tvm.runtime.container import ADT, tuple_object
-
-
-class LayerNormLSTMCell(jit.ScriptModule):
-    def __init__(self, input_size, hidden_size):
-        super().__init__()
-        self.input_size = input_size
-        self.hidden_size = hidden_size
-        self.weight_ih = Parameter(torch.randn(4 * hidden_size, input_size))
-        self.weight_hh = Parameter(torch.randn(4 * hidden_size, hidden_size))
-
-        ln = nn.LayerNorm
-
-        self.layernorm_i = ln(4 * hidden_size)
-        self.layernorm_h = ln(4 * hidden_size)
-        self.layernorm_c = ln(hidden_size)
-
-    @jit.script_method
-    def forward(self, input, state):
-        # type: (Tensor, Tuple[Tensor, Tensor]) -> Tuple[Tensor, Tuple[Tensor, Tensor]]
-        hx, cx = state
-        igates = self.layernorm_i(torch.mm(input, self.weight_ih.t()))
-        hgates = self.layernorm_h(torch.mm(hx, self.weight_hh.t()))
-        gates = igates + hgates
-        ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)
-
-        ingate = torch.sigmoid(ingate)
-        forgetgate = torch.sigmoid(forgetgate)
-        cellgate = torch.tanh(cellgate)
-        outgate = torch.sigmoid(outgate)
-
-        cy = self.layernorm_c((forgetgate * cx) + (ingate * cellgate))
-        hy = outgate * torch.tanh(cy)
-
-        return hy, (hy, cy)
-
-
-class LSTMLayer(jit.ScriptModule):
-    def __init__(self, cell, *cell_args):
-        super().__init__()
-        self.cell = cell(*cell_args)
-
-    @jit.script_method
-    def forward(self, input, state):
-        # type: (Tensor, Tuple[Tensor, Tensor]) -> Tuple[Tensor, Tuple[Tensor, Tensor]]
-        outputs = []
-        for i in range(input.size(0)):
-            out, state = self.cell(input[i], state)
-            outputs += [out]
-        return torch.stack(outputs), state
-
-
-class ReverseLSTMLayer(jit.ScriptModule):
-    def __init__(self, cell, *cell_args):
-        super(ReverseLSTMLayer, self).__init__()
-        self.cell = cell(*cell_args)
-
-    @jit.script_method
-    def forward(self, inputs, state):
-        # type: (Tensor, Tuple[Tensor, Tensor]) -> Tuple[Tensor, Tuple[Tensor, Tensor]]
-        outputs = jit.annotate(List[Tensor], [])
-        seq_len = inputs.size(0)
-        for i in range(seq_len):
-            out, state = self.cell(inputs[seq_len - i - 1], state)
-            # workaround for the lack of list rev support
-            outputs = [out] + outputs
-        return torch.stack(outputs), state
-
-
-class BidirLSTMLayer(jit.ScriptModule):
-    __constants__ = ["directions"]
-
-    def __init__(self, cell, *cell_args):
-        super(BidirLSTMLayer, self).__init__()
-        self.directions = nn.ModuleList(
-            [
-                LSTMLayer(cell, *cell_args),
-                ReverseLSTMLayer(cell, *cell_args),
-            ]
-        )
-
-    @jit.script_method
-    def forward(self, input, states):
-        # type: (Tensor, List[Tuple[Tensor, Tensor]]) -> Tuple[Tensor, List[Tuple[Tensor, Tensor]]]
-        # List[LSTMState]: [forward LSTMState, backward LSTMState]
-        outputs = jit.annotate(List[Tensor], [])
-        output_states = jit.annotate(List[Tuple[Tensor, Tensor]], [])
-        for (i, direction) in enumerate(self.directions):
-            state = states[i]
-            out, out_state = direction(input, state)
-            outputs += [out]
-            output_states += [out_state]
-        # tensor array concat assumes axis == 0 for now
-        # return torch.cat(outputs, -1), output_states
-        return torch.cat(outputs, 0), output_states
-
-
-def init_stacked_lstm(num_layers, layer, first_layer_args, other_layer_args):
-    layers = [layer(*first_layer_args)] + [layer(*other_layer_args) for _ in range(num_layers - 1)]
-    return nn.ModuleList(layers)
-
-
-class StackedLSTM(jit.ScriptModule):
-    __constants__ = ["layers"]  # Necessary for iterating through self.layers
-
-    def __init__(self, num_layers, layer, first_layer_args, other_layer_args):
-        super().__init__()
-        self.layers = init_stacked_lstm(num_layers, layer, first_layer_args, other_layer_args)
-
-    @jit.script_method
-    def forward(self, input, states):
-        # type: (Tensor, List[Tuple[Tensor, Tensor]]) -> Tuple[Tensor, List[Tuple[Tensor, Tensor]]]
-        # List[LSTMState]: One state per layer
-        output_states = jit.annotate(List[Tuple[Tensor, Tensor]], [])
-        output = input
-        for (i, rnn_layer) in enumerate(self.layers):
-            state = states[i]
-            output, out_state = rnn_layer(output, state)
-            output_states += [out_state]
-        return output, output_states
-
-
-class StackedBidirLSTM(jit.ScriptModule):
-    __constants__ = ["layers"]  # Necessary for iterating through self.layers
-
-    def __init__(self, num_layers, layer, first_layer_args, other_layer_args):
-        super(StackedBidirLSTM, self).__init__()
-        self.layers = init_stacked_lstm(num_layers, layer, first_layer_args, other_layer_args)
-
-    @jit.script_method
-    def forward(self, input, states):
-        # type: (Tensor, List[List[Tuple[Tensor, Tensor]]]) -> Tuple[Tensor, List[List[Tuple[Tensor, Tensor]]]]
-        # List[List[LSTMState]]: The outer list is for layers,
-        #                        inner list is for directions.
-        output_states = jit.annotate(List[List[Tuple[Tensor, Tensor]]], [])
-        output = input
-        for (i, rnn_layer) in enumerate(self.layers):
-            state = states[i]
-            output, out_state = rnn_layer(output, state)
-            output_states += [out_state]
-        return output, output_states
-
-
-def lstm(input_size, hidden_size):
-    return LSTMLayer(LayerNormLSTMCell, input_size, hidden_size)
-
-
-def stacked_lstm(input_size, hidden_size, num_layers):
-    return StackedLSTM(
-        num_layers,
-        LSTMLayer,
-        first_layer_args=[LayerNormLSTMCell, input_size, hidden_size],
-        other_layer_args=[LayerNormLSTMCell, hidden_size, hidden_size],
-    )
-
-
-def bidir_lstm(input_size, hidden_size):
-    return BidirLSTMLayer(LayerNormLSTMCell, input_size, hidden_size)
-
-
-def stacked_bidir_lstm(input_size, hidden_size, num_layers):
-    return StackedBidirLSTM(
-        num_layers,
-        BidirLSTMLayer,
-        first_layer_args=[LayerNormLSTMCell, input_size, hidden_size],
-        other_layer_args=[LayerNormLSTMCell, hidden_size, hidden_size],
-    )
-
-
-def vmobj_to_list(o, dtype="float32"):
-    if isinstance(o, tvm.nd.NDArray):
-        return [o]
-    elif isinstance(o, tvm.runtime.container.ADT):
-        result = []
-        for f in o:
-            result.extend(vmobj_to_list(f, dtype))
-        return result
-    else:
-        raise RuntimeError("Unknown object type: %s" % type(o))
-
-
-def assert_equal(tvm_result, torch_result):
-    if isinstance(torch_result, (tuple, list)):
-        assert isinstance(tvm_result, list)
-        for tvm_res, pt_res in zip(tvm_result, torch_result):
-            assert_equal(tvm_res, pt_res)
-    elif isinstance(torch_result, torch.Tensor):
-        tvm.testing.assert_allclose(tvm_result.numpy(), torch_result.numpy(), rtol=1e-4, atol=1e-4)
-
-
-def run_and_compare(mod, params, pt_result, target, device):
-    exec_res = relay.create_executor("vm", mod=mod, device=device, target=target).evaluate()(
-        **params
-    )
-
-    def flatten(nested):
-        res = []
-        for r in nested:
-            if isinstance(r, torch.Tensor):
-                res.append(r)
-            else:
-                res.extend(flatten(r))
-        return res
-
-    if isinstance(exec_res, tvm.runtime.container.ADT):
-        assert not isinstance(pt_result, torch.Tensor)
-        tvm_res = vmobj_to_list(exec_res)
-        torch_res = flatten(pt_result)
-    else:
-        tvm_res = exec_res
-        torch_res = pt_result
-
-    assert_equal(tvm_res, torch_res)
-
-
-def convert_list_to_vmobj(py_lst):
-    def wrap_nd_array(arr):
-        return tvm.nd.array(arr, device=tvm.cpu(0))
-
-    mod = tvm.IRModule()
-    prelude = Prelude(mod)
-    list, cons, nil = mod.get_type("List")
-    adt_lst = ADT(nil.tag, [])
-    for elem in reversed(py_lst):
-        if isinstance(elem, np.ndarray):
-            vmobj = wrap_nd_array(elem)
-        elif isinstance(elem, tuple):
-            vmobj = tuple_object([wrap_nd_array(e) for e in elem])
-        elif isinstance(elem, list):
-            vmobj = convert_list_to_vmobj(elem)
-        adt_lst = ADT(cons.tag, [vmobj, adt_lst])
-    return adt_lst
-
-
-@tvm.testing.uses_gpu
-def test_custom_lstm():
-    input_name = "input"
-    states_name = "states"
-    seq_len = 5
-    batch = 2
-    input_size = 3
-    hidden_size = 4
-    num_layers = 3
-    state_tensor_shape = (batch, hidden_size)
-
-    torch.manual_seed(1)
-
-    inp = torch.randn(seq_len, batch, input_size)
-
-    input_shapes = [
-        (input_name, (seq_len, batch, input_size)),
-        (states_name, (state_tensor_shape, state_tensor_shape)),
-    ]
-
-    input_shapes_stacked = [
-        (input_name, (seq_len, batch, input_size)),
-        (
-            states_name,
-            [(state_tensor_shape, state_tensor_shape), (state_tensor_shape, state_tensor_shape)],
-        ),
-    ]
-
-    input_shapes_stacked_bidir = [
-        (input_name, (seq_len, batch, input_size)),
-        (
-            states_name,
-            [
-                [(state_tensor_shape, state_tensor_shape) for _ in range(2)]
-                for _ in range(num_layers)
-            ],
-        ),
-    ]
-
-    states = [
-        (torch.randn(state_tensor_shape), torch.randn(state_tensor_shape))
-        for _ in range(num_layers)
-    ]
-
-    bidir_states = [
-        (torch.randn(state_tensor_shape), torch.randn(state_tensor_shape)) for _ in range(2)
-    ]
-
-    stacked_bidir_states = [
-        [(torch.randn(state_tensor_shape), torch.randn(state_tensor_shape)) for _ in range(2)]
-        for _ in range(num_layers)
-    ]
-
-    models = [
-        ("lstm", lstm(input_size, hidden_size).eval(), states[0], input_shapes),
-        (
-            "stacked",
-            stacked_lstm(input_size, hidden_size, num_layers).eval(),
-            states,
-            input_shapes_stacked,
-        ),
-        ("bidir", bidir_lstm(input_size, hidden_size).eval(), bidir_states, input_shapes_stacked),
-        # TODO(masahi): stacked bidir seems to have a rare accuracy issue
-        # (
-        #     "stacked_bidir",
-        #     stacked_bidir_lstm(input_size, hidden_size, num_layers).eval(),
-        #     stacked_bidir_states,
-        #     input_shapes_stacked_bidir,
-        # ),
-    ]
-
-    for (name, raw_model, states, input_shapes) in models:
-        script_module = torch.jit.script(raw_model)
-        with tvm.testing.disable_span_filling():
-            mod, params = from_pytorch(script_module, input_shapes)
-        with tvm.testing.enable_span_filling():
-            mod_with_span, _ = from_pytorch(script_module, input_shapes)
-        tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-
-        with torch.no_grad():
-            pt_result = raw_model(inp.clone(), states)
-
-        params[input_name] = inp.numpy()
-
-        if isinstance(states, tuple):
-            states_np = tuple(st.numpy() for st in states)
-        elif isinstance(states, list) and isinstance(states[0], torch.Tensor):
-            states_np = [st.numpy() for st in states]
-        elif isinstance(states, list) and isinstance(states[0], tuple):
-            states_np = [tuple(st.numpy() for st in states[i]) for i in range(len(states))]
-        elif isinstance(states, list) and isinstance(states[0], list):
-            states_np = [
-                [tuple(st.numpy() for st in states) for states in states[layer]]
-                for layer in range(num_layers)
-            ]
-        else:
-            assert False
-
-        if isinstance(states_np, list):
-            params[states_name] = convert_list_to_vmobj(states_np)
-        else:
-            params[states_name] = states_np
-
-        for tgt, dev in tvm.testing.enabled_targets():
-            print("Running %s on target %s" % (name, tgt))
-            run_and_compare(mod, params, pt_result, target=tgt, device=dev)
diff --git a/tests/python/frontend/pytorch/test_object_detection.py b/tests/python/frontend/pytorch/test_object_detection.py
deleted file mode 100644
index 9dd336f7e9d2..000000000000
--- a/tests/python/frontend/pytorch/test_object_detection.py
+++ /dev/null
@@ -1,167 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument
-"""Test torch vision fasterrcnn and maskrcnn models"""
-import numpy as np
-import cv2
-
-import torch
-import torchvision
-
-import tvm
-
-import tvm.testing
-from tvm import relay
-from tvm.runtime.vm import VirtualMachine
-from tvm.relay.frontend.pytorch_utils import (
-    rewrite_nms_to_batched_nms,
-    rewrite_batched_nms_with_max_out_size,
-    rewrite_scatter_to_gather,
-)
-from tvm.contrib.download import download
-
-in_size = 300
-
-
-def process_image(img):
-    img = cv2.imread(img).astype("float32")
-    img = cv2.resize(img, (in_size, in_size))
-    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-    img = torch.from_numpy(img / 255.0).permute(2, 0, 1).float()
-    img = torch.unsqueeze(img, axis=0)
-
-    return img
-
-
-def do_trace(model, inp, in_size=in_size):
-    model_trace = torch.jit.trace(model, inp)
-    model_trace.eval()
-    return model_trace
-
-
-def dict_to_tuple(out_dict):
-    if "masks" in out_dict.keys():
-        return out_dict["boxes"], out_dict["scores"], out_dict["labels"], out_dict["masks"]
-    return out_dict["boxes"], out_dict["scores"], out_dict["labels"]
-
-
-class TraceWrapper(torch.nn.Module):
-    def __init__(self, model):
-        super().__init__()
-        self.model = model
-
-    def forward(self, inp):
-        out = self.model(inp)
-        return dict_to_tuple(out[0])
-
-
-def generate_jit_model(index):
-    model_funcs = [
-        torchvision.models.detection.fasterrcnn_resnet50_fpn,
-        torchvision.models.detection.maskrcnn_resnet50_fpn,
-    ]
-
-    model_func = model_funcs[index]
-    model = TraceWrapper(model_func(pretrained=True, rpn_pre_nms_top_n_test=1000))
-
-    model.eval()
-    inp = torch.Tensor(np.random.uniform(0.0, 250.0, size=(1, 3, in_size, in_size)))
-
-    with torch.no_grad():
-        out = model(inp)
-
-        script_module = do_trace(model, inp)
-        script_out = script_module(inp)
-
-        assert len(out[0]) > 0 and len(script_out[0]) > 0
-        return script_module
-
-
-def test_detection_models():
-    img = "test_street_small.jpg"
-    img_url = (
-        "https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/detection/street_small.jpg"
-    )
-    download(img_url, img)
-
-    input_shape = (1, 3, in_size, in_size)
-
-    input_name = "input0"
-    shape_list = [(input_name, input_shape)]
-
-    scripted_model = generate_jit_model(1)
-    with tvm.testing.disable_span_filling():
-        mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
-    with tvm.testing.enable_span_filling():
-        mod_with_span, _ = relay.frontend.from_pytorch(scripted_model, shape_list)
-    tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-
-    data = process_image(img)
-    data_np = data.detach().numpy()
-
-    with torch.no_grad():
-        pt_res = scripted_model(data)
-
-    def compile_and_run_vm(mod, params, data_np, target):
-        with tvm.transform.PassContext(opt_level=3):
-            vm_exec = relay.vm.compile(mod, target=target, params=params)
-
-        dev = tvm.device(target, 0)
-        vm = VirtualMachine(vm_exec, dev)
-        vm.set_input("main", **{input_name: data_np})
-        return vm.run()
-
-    for target in ["llvm"]:
-        tvm_res = compile_and_run_vm(mod, params, data_np, target)
-
-        # Bounding boxes
-        tvm.testing.assert_allclose(
-            pt_res[0].cpu().numpy(), tvm_res[0].numpy(), rtol=1e-5, atol=1e-5
-        )
-        # Scores
-        tvm.testing.assert_allclose(
-            pt_res[1].cpu().numpy(), tvm_res[1].numpy(), rtol=1e-5, atol=1e-5
-        )
-        # Class ids
-        np.testing.assert_equal(pt_res[2].cpu().numpy(), tvm_res[2].numpy())
-
-        score_threshold = 0.9
-        print("Num boxes:", pt_res[0].cpu().numpy().shape[0])
-        print("Num valid boxes:", np.sum(pt_res[1].cpu().numpy() >= score_threshold))
-
-    before = mod["main"]
-    mod = rewrite_nms_to_batched_nms(mod)
-    after = mod["main"]
-    assert not tvm.ir.structural_equal(after, before)
-
-    # TODO(masahi): It seems this rewrite causes flaky segfaults on CI
-    # See https://github.com/apache/tvm/issues/7363
-    # before = mod["main"]
-    # mod = rewrite_batched_nms_with_max_out_size(mod)
-    # after = mod["main"]
-    # assert not tvm.ir.structural_equal(after, before)
-
-    before = mod["main"]
-    mod = rewrite_scatter_to_gather(mod, 4)  # num_scales is 4 for maskrcnn_resnet50_fpn
-    after = mod["main"]
-    assert not tvm.ir.structural_equal(after, before)
-
-    tvm_res_after_rewrite = compile_and_run_vm(mod, params, data_np, "llvm")
-
-    # Results should be equivalent after rewriting
-    for res1, res2 in zip(tvm_res, tvm_res_after_rewrite):
-        tvm.testing.assert_allclose(res1.numpy(), res2.numpy())
diff --git a/tests/python/frontend/pytorch/test_rnns.py b/tests/python/frontend/pytorch/test_rnns.py
deleted file mode 100644
index b43af58d69a3..000000000000
--- a/tests/python/frontend/pytorch/test_rnns.py
+++ /dev/null
@@ -1,521 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import torch
-import tvm
-import tvm.testing
-import onnx
-import io
-import sys
-
-from tvm import relay
-from tvm.contrib import graph_executor
-
-from torch import nn
-
-## LSTM parameters
-lstm_feature_size = 16
-lstm_hidden_size = 32
-lstm_projection_size = 20
-
-## GRU parameters
-gru_feature_size = 8
-gru_hidden_size = 16
-
-num_layers = 2
-seqs_length = 2
-batch_size = 2
-
-##RNN parameters
-rnn_feature_size = 8
-rnn_hidden_size = 16
-
-
-class RNN_Model(nn.Module):
-    """
-    It is base class for RNN layer classes.
-    It contains some common fields and methods for child classes.
-    """
-
-    def __init__(
-        self,
-    ):
-        super().__init__()
-
-        # model is defined in child class
-        self.model = None
-
-    def forward(self, input, hidden_init=None):
-        """
-        Computes the output tensor after input inference along RNN layer.
-
-        :param input: batch of data as a tensor of shape (seqs_length, batch_size, feature_size) or (batch_size, seqs_length, feature_size) if self.batch_first = True
-        :param hidden_init: initial hidden state(s) of the RNN as a tensor(s) of shape (num_layers, batch_size, hidden_size). Will default to a tensor of zeros if None.
-        :return: the output tensor of shape (batch_size, hidden_size)
-        """
-        if self.model is None:
-            raise NotImplementedError("self.model must be defined in subclasses!")
-        out, _ = self.model(input, hidden_init)
-
-        return out
-
-    def gen_rnd_weights(self):
-        """
-        Generate random weigths for the model
-        """
-        if self.model is None:
-            raise NotImplementedError("self.model must be defined in subclasses!")
-        with torch.no_grad():
-            for weight_group in self.model.all_weights:
-                for weight in weight_group:
-                    weight.data = torch.rand(weight.shape)
-
-    def get_dummy_inputs(self):
-        raise NotImplementedError("subclasses must override get_dummy_inputs()!")
-
-    def get_input_names(self):
-        raise NotImplementedError("subclasses must override get_input_names()!")
-
-    def get_shape_desc(self, frontend_type):
-        raise NotImplementedError("subclasses must override get_shape_desc(frontend_type)!")
-
-    def get_tvm_inputs(self, dtype):
-        raise NotImplementedError("subclasses must override get_tvm_inputs(dtype)!")
-
-
-class RNN_Model_Impl(RNN_Model):
-    def __init__(
-        self,
-        seq_len=seqs_length,
-        batch_size=batch_size,
-        feature_size=rnn_feature_size,
-        hidden_size=rnn_hidden_size,
-        batch_first=False,
-        layer_num=1,
-        bidirectional=False,
-        use_bias=True,
-        rnd_weights_init=False,
-        nonlinearity="tanh",
-        dropout=0.0,
-    ):
-        super().__init__()
-        # Shapes
-        self.shape = [seq_len, batch_size, feature_size]
-        if batch_first:
-            self.shape = [batch_size, seq_len, feature_size]
-        layers_num = 2 * layer_num if bidirectional else layer_num
-        self.h0_shape = [layers_num, batch_size, hidden_size]
-        # Dummy inputs
-        self.dummy_inputs = (torch.rand(self.shape), torch.zeros(self.h0_shape))
-
-        self.model = nn.RNN(
-            input_size=feature_size,
-            hidden_size=hidden_size,
-            num_layers=layer_num,
-            nonlinearity=nonlinearity,
-            bias=use_bias,
-            batch_first=batch_first,
-            dropout=dropout,
-            bidirectional=bidirectional,
-        )
-
-        if rnd_weights_init:
-            self.gen_rnd_weights()
-
-    def gen_rnd_weights(self):
-        super().gen_rnd_weights()
-
-    def get_dummy_inputs(self):
-        return self.dummy_inputs
-
-    def get_input_names(self):
-        return ["input", "h0"]
-
-    def get_shape_desc(self, frontend_type):
-        shape_desc = None
-        if frontend_type == "pt":  # PyTorch
-            shape_desc = [("input", self.shape)]
-        elif frontend_type == "onnx":  # ONNX
-            shape_desc = {
-                "input": self.shape,
-                "h0": self.h0_shape,
-            }
-        return shape_desc
-
-    def get_tvm_inputs(self, dtype):
-        return {
-            "input": tvm.nd.array(self.dummy_inputs[0].numpy().astype(dtype)),
-            "h0": tvm.nd.array(self.dummy_inputs[1].numpy().astype(dtype)),
-        }
-
-
-class GRU_Model(RNN_Model):
-    def __init__(
-        self,
-        seq_len=seqs_length,
-        batch_size=batch_size,
-        feature_size=gru_feature_size,
-        hidden_size=gru_hidden_size,
-        batch_first=False,
-        layer_num=1,
-        bidirectional=False,
-        use_bias=True,
-        rnd_weights_init=False,
-    ):
-        super().__init__()
-
-        # Shapes
-        self.shape = [seq_len, batch_size, feature_size]
-        if batch_first:
-            self.shape = [batch_size, seq_len, feature_size]
-        layers_num = 2 * layer_num if bidirectional else layer_num
-        self.h0_shape = [layers_num, batch_size, hidden_size]
-        # Dummy inputs
-        self.dummy_inputs = (torch.rand(self.shape), torch.zeros(self.h0_shape))
-
-        self.model = nn.GRU(
-            input_size=feature_size,
-            hidden_size=hidden_size,
-            num_layers=layer_num,
-            bidirectional=bidirectional,
-            batch_first=batch_first,
-            bias=use_bias,
-        )
-
-        if rnd_weights_init:
-            self.gen_rnd_weights()
-
-    def gen_rnd_weights(self):
-        """
-        Generate random weigths for the model with biases
-        For first uni- and bidirectional weights group:
-            Wi (3*hidden_size, feature_size)
-            Wh (3*hidden_size, hidden_size)
-            Bi (3*hidden_size)
-            Bh (3*hidden_size)
-        For other weights group:
-            Wi (3*hidden_size, hidden_size)
-            Wh (3*hidden_size, hidden_size)
-            Bi (3*hidden_size)
-            Bh (3*hidden_size)
-        For generation of random weigths for the model without biases the Bi and Bh weights are skipped
-        """
-        super().gen_rnd_weights()
-
-    def get_dummy_inputs(self):
-        return self.dummy_inputs
-
-    def get_input_names(self):
-        return ["input", "h0"]
-
-    def get_shape_desc(self, frontend_type):
-        shape_desc = None
-        if frontend_type == "pt":  # PyTorch
-            shape_desc = [("input", self.shape)]
-        elif frontend_type == "onnx":  # ONNX
-            shape_desc = {
-                "input": self.shape,
-                "h0": self.h0_shape,
-            }
-        return shape_desc
-
-    def get_tvm_inputs(self, dtype):
-        return {
-            "input": tvm.nd.array(self.dummy_inputs[0].numpy().astype(dtype)),
-            "h0": tvm.nd.array(self.dummy_inputs[1].numpy().astype(dtype)),
-        }
-
-
-def check_torch_version_for_proj_in_lstm():
-    """
-    proj_size parameter is supported in torch.nn.LSTM layer started from 1.8.0 torch version
-    """
-    me = False
-
-    version = torch.__version__
-    major, minor, micro = version.split(".")
-
-    if int(major) > 1:
-        me = True
-    elif int(major) == 1:
-        if int(minor) >= 8:
-            me = True
-
-    return me
-
-
-class LSTM_Model(RNN_Model):
-    def __init__(
-        self,
-        seq_len=seqs_length,
-        batch_size=batch_size,
-        feature_size=lstm_feature_size,
-        hidden_size=lstm_hidden_size,
-        batch_first=False,
-        layer_num=1,
-        bidirectional=False,
-        proj_size=0,
-        use_bias=True,
-        rnd_weights_init=False,
-    ):
-        super().__init__()
-
-        # Shapes
-        self.shape = [seq_len, batch_size, feature_size]
-        if batch_first:
-            self.shape = [batch_size, seq_len, feature_size]
-        layers_num = 2 * layer_num if bidirectional else layer_num
-        self.h0_shape = [layers_num, batch_size, hidden_size]
-        if proj_size > 0:
-            self.h0_shape = [layers_num, batch_size, proj_size]
-        self.c0_shape = [layers_num, batch_size, hidden_size]
-        # Dummy inputs
-        self.dummy_inputs = (
-            torch.rand(self.shape),
-            (torch.zeros(self.h0_shape), torch.zeros(self.c0_shape)),
-        )
-
-        if check_torch_version_for_proj_in_lstm():
-            self.model = nn.LSTM(
-                input_size=lstm_feature_size,
-                hidden_size=lstm_hidden_size,
-                num_layers=layer_num,
-                bidirectional=bidirectional,
-                proj_size=proj_size,
-                batch_first=batch_first,
-                bias=use_bias,
-            )
-        else:
-            if proj_size > 0:
-                print(
-                    "WARNING: projection is not supported for torch version less than 1.8.0! ",
-                    "LSTM was constructed without projection!",
-                )
-                # sys.exit()
-            self.model = nn.LSTM(
-                input_size=lstm_feature_size,
-                hidden_size=lstm_hidden_size,
-                num_layers=layer_num,
-                bidirectional=bidirectional,
-                batch_first=batch_first,
-                bias=use_bias,
-            )
-
-        if rnd_weights_init:
-            self.gen_rnd_weights()
-
-    def gen_rnd_weights(self):
-        """
-        Generate random weigths for the model with biases
-        Without projection:
-            For first weights group:
-                Wi (4*lstm_hidden_size, lstm_feature_size)
-                Wh (4*lstm_hidden_size, lstm_hidden_size)
-                Bi (4*lstm_hidden_size)
-                Bh (4*lstm_hidden_size)
-            For first bidirectional weights group:
-                Wi (4*lstm_hidden_size, lstm_feature_size)
-                Wh (4*lstm_hidden_size, lstm_hidden_size)
-                Bi (4*lstm_hidden_size)
-                Bh (4*lstm_hidden_size)
-            For other weights group:
-                Wi (4*lstm_hidden_size, lstm_hidden_size)
-                Wh (4*lstm_hidden_size, lstm_hidden_size)
-                Bi (4*lstm_hidden_size)
-                Bh (4*lstm_hidden_size)
-        With projection:
-            For first weights group:
-                Wi (4*lstm_hidden_size, lstm_feature_size)
-                Wh (4*lstm_hidden_size, proj_size)
-                Bi (4*lstm_hidden_size)
-                Bh (4*lstm_hidden_size)
-                P  (proj_size, lstm_hidden_size)
-            For first bidirectional weights group:
-                Wi (4*lstm_hidden_size, lstm_feature_size)
-                Wh (4*lstm_hidden_size, proj_size)
-                Bi (4*lstm_hidden_size)
-                Bh (4*lstm_hidden_size)
-                P  (proj_size, lstm_hidden_size)
-            For other weights group:
-                Wi (4*lstm_hidden_size, proj_size * num_directions)
-                Wh (4*lstm_hidden_size, proj_size)
-                Bi (4*lstm_hidden_size)
-                Bh (4*lstm_hidden_size)
-                P  (proj_size, lstm_hidden_size)
-        For generation of random weigths for the model without biases Bi and Bh are skipped
-        """
-        super().gen_rnd_weights()
-
-    def get_dummy_inputs(self):
-        return self.dummy_inputs
-
-    def get_input_names(self):
-        return ["input", "h0", "c0"]
-
-    def get_shape_desc(self, frontend_type):
-        shape_desc = None
-        if frontend_type == "pt":  # PyTorch
-            shape_desc = [("input", self.shape)]
-        elif frontend_type == "onnx":  # ONNX
-            shape_desc = {
-                "input": self.shape,
-                "h0": self.h0_shape,
-                "c0": self.c0_shape,
-            }
-        return shape_desc
-
-    def get_tvm_inputs(self, dtype):
-        return {
-            "input": tvm.nd.array(self.dummy_inputs[0].numpy().astype(dtype)),
-            "h0": tvm.nd.array(self.dummy_inputs[1][0].numpy().astype(dtype)),
-            "c0": tvm.nd.array(self.dummy_inputs[1][1].numpy().astype(dtype)),
-        }
-
-
-def compare(input, gold_data, rtol=1e-5, atol=1e-5):
-    tvm.testing.assert_allclose(input, gold_data, rtol=rtol, atol=atol)
-
-
-def check_rnn(rnn_type, rnn_mod, target=tvm.target.Target("llvm -mcpu=core-avx2"), dev=tvm.cpu(0)):
-    def get_model(
-        rnn_type,
-        rnn_mod,
-        args,
-    ):
-        # Fill args
-        if "b" in rnn_mod:
-            args["bidirectional"] = True
-        if "s" in rnn_mod:
-            args["layer_num"] = num_layers
-        if "tanh" in rnn_mod:
-            args["nonlinearity"] = "tanh"
-        if "relu" in rnn_mod:
-            args["nonlinearity"] = "relu"
-
-        if rnn_type == "GRU":
-            RNN_Model_selector = GRU_Model
-        elif rnn_type == "LSTM":
-            RNN_Model_selector = LSTM_Model
-            if "p" in rnn_mod:
-                args["proj_size"] = lstm_projection_size
-        elif rnn_type == "RNN":
-            RNN_Model_selector = RNN_Model_Impl
-
-        return RNN_Model_selector(**args)
-
-    def get_onnx_model(model):
-        onnx_io = io.BytesIO()
-        with torch.no_grad():
-            input_names = model.get_input_names()
-            inputs = model.get_dummy_inputs()
-
-            # default export (without dynamic input)
-            torch.onnx.export(model, inputs, onnx_io, input_names=input_names)
-
-        onnx_io.seek(0, 0)
-        return onnx.load_model(onnx_io)
-
-    model = None
-    dtype = "float32"
-    device = torch.device("cpu")
-    for batch_first in (True, False):
-        for use_bias in (True, False):
-            for rnd_weights in [True]:  # (True, False):
-                model_inputs = {
-                    "batch_first": batch_first,
-                    "use_bias": use_bias,
-                    "rnd_weights_init": rnd_weights,
-                }
-                model = get_model(rnn_type, rnn_mod, model_inputs)
-                model.to(device)
-                model.eval()
-
-                # Get golden output from original model
-                dummy_inputs = model.get_dummy_inputs()
-                golden_output = model.forward(dummy_inputs[0].to(device)).detach().cpu().numpy()
-
-                tvm_output = None
-                for format in ["pt"]:  # ["pt", "onnx"]:
-                    shape_desc = model.get_shape_desc(format)
-                    if format == "pt":
-                        # Use torch.jit.trace to generate a torch.jit.ScriptModule via tracing.
-                        traced_script_module = torch.jit.trace(model, dummy_inputs[0]).eval()
-
-                        # Import model to Relay
-                        with tvm.testing.disable_span_filling():
-                            mod, params = relay.frontend.from_pytorch(
-                                traced_script_module, shape_desc
-                            )
-                        with tvm.testing.enable_span_filling():
-                            mod_with_span, _ = relay.frontend.from_pytorch(
-                                traced_script_module, shape_desc
-                            )
-                        tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-                    elif format == "onnx":
-                        try:
-                            onnx_model = get_onnx_model(model)
-                        except:
-                            print(
-                                "WARNING: torch.onnx.export does not support conversion LSTM with projection "
-                                "from pytorch! TODO: waiting for the support and correct test after that."
-                            )
-                            continue
-
-                        # Import model to Relay
-                        with tvm.testing.disable_span_filling():
-                            mod, params = relay.frontend.from_onnx(onnx_model, shape_desc)
-                        with tvm.testing.enable_span_filling():
-                            mod_with_span, _ = relay.frontend.from_onnx(onnx_model, shape_desc)
-                        tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-
-                    # Model compilation by tvm
-                    with tvm.transform.PassContext(opt_level=3):
-                        lib = relay.build(mod, target=target, params=params)
-
-                    # Inference of the model with given input data
-                    m = graph_executor.GraphModule(lib["default"](dev))
-
-                    # Set inputs
-                    tvm_inputs = model.get_tvm_inputs(dtype)
-                    m.set_input(**tvm_inputs)
-                    # Execute
-                    m.run()
-                    # Get outputs (converted to numpy array)
-                    tvm_output = m.get_output(0).numpy()
-
-                    compare(tvm_output, golden_output)
-
-
-@tvm.testing.uses_gpu
-def test_rnns():
-    for target, dev in tvm.testing.enabled_targets():
-        # RNN types: GRU, LSTM
-        # GRU modifications: unidirectional, stacked, bidirectional, stacked bidirectional
-        for mod_type in ["uni", "s", "b", "sb"]:
-            check_rnn("GRU", mod_type, target, dev)
-        # LSTM modifications: unidirectional, stacked, bidirectional, stacked bidirectional,
-        # and all these types with projection ("p", "sp", "bp", "sbp")
-        # The latter are skiped for test acceleration
-        for mod_type in ["uni", "s", "b", "sb"]:
-            check_rnn("LSTM", mod_type, target, dev)
-
-        for mod_type in ["uni", "s", "b", "sb", "tanh", "relu"]:
-            check_rnn("RNN", mod_type, target, dev)
-
-
-if __name__ == "__main__":
-    test_rnns()
diff --git a/tests/python/frontend/pytorch/test_span_naming.py b/tests/python/frontend/pytorch/test_span_naming.py
deleted file mode 100644
index fb39ddf4f061..000000000000
--- a/tests/python/frontend/pytorch/test_span_naming.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, too-many-lines, len-as-condition, no-else-return, unused-variable, too-many-nested-blocks
-# pylint: disable=consider-iterating-dictionary, invalid-name, unused-argument, unused-variable, broad-except
-# pylint: disable=import-outside-toplevel, simplifiable-if-expression, cell-var-from-loop, unnecessary-lambda
-# pylint: disable=missing-function-docstring, redefined-builtin, use-implicit-booleaness-not-comparison
-"""Tests to ensure span names are correctly populated when importing Pytorch"""
-from torch import nn
-import torch
-import tvm
-
-
-class NestedConvModule(nn.Module):
-    """Module that performs Conv2d and relu activation"""
-
-    def __init__(self, in_channels, out_channels):
-        super().__init__()
-        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
-        self.relu = nn.ReLU()
-
-    def forward(self, x):
-        x = self.relu(self.conv(x))
-        return x
-
-
-class NestedFinalModule(nn.Module):
-    """Simple module that adds 2 inputs"""
-
-    def forward(self, x, y):
-        return x + y
-
-
-class SimpleTwoConvModule(nn.Module):
-    """
-    ML model that performs 2 convolutions and adds them together.
-    All operations are inside nested modules to make scope names interesting.
-    """
-
-    def __init__(self):
-        super().__init__()
-        # First convolutional module
-        self.image_block1 = NestedConvModule(in_channels=3, out_channels=64)
-        # Second convolutional module
-        self.image_block2 = NestedConvModule(in_channels=64, out_channels=64)
-        self.final_block = NestedFinalModule()
-
-    def forward(self, x):
-        # Forward pass through the first convolutional module
-        x1 = self.image_block1(x)
-        # Forward pass through the second convolutional module
-        x2 = self.image_block2(x1)
-        # Add the outputs of the two convolutional modules
-        return self.final_block(x1, x2)
-
-
-def test_pytorch_scope_based_span_names():
-    model = SimpleTwoConvModule()
-    sample_input = torch.zeros((1, 3, 64, 64), dtype=torch.float32)
-    with torch.no_grad():
-        traced_torch_model = torch.jit.trace(model, sample_input)
-    import_input = [("model_input", (1, 3, 64, 64))]
-    relay_model_ir, relay_model_params = tvm.relay.frontend.from_pytorch(
-        traced_torch_model, import_input, preserve_pytorch_scopes=True
-    )
-    # If specified, we are preserving the pytorch named spans
-    for block in [1, 2]:
-        for key in ["weight", "bias"]:
-            assert f"image_block{block}.conv.{key}" in relay_model_params.keys()
-    # Manually check all span names since asserting structural equality is not sufficient
-    current_call = relay_model_ir["main"].body
-    assert current_call.op.name == "add"
-    assert current_call.span is not None and current_call.span.source_name.name == "final_block"
-    current_call = current_call.args[1]
-    for block in [2, 1]:
-        assert current_call.op.name == "nn.relu"
-        assert (
-            current_call.span is not None
-            and current_call.span.source_name.name == f"image_block{block}.relu"
-        )
-        current_call = current_call.args[0]
-        assert current_call.op.name == "nn.bias_add"
-        assert (
-            current_call.span is not None
-            and current_call.span.source_name.name == f"image_block{block}.conv"
-        )
-        current_call = current_call.args[0]
-        assert current_call.op.name == "nn.conv2d"
-        assert (
-            current_call.span is not None
-            and current_call.span.source_name.name == f"image_block{block}.conv"
-        )
-        current_call = current_call.args[0]
diff --git a/tests/python/frontend/tensorflow/test_bn_dynamic.py b/tests/python/frontend/tensorflow/test_bn_dynamic.py
deleted file mode 100644
index 99d8f790028c..000000000000
--- a/tests/python/frontend/tensorflow/test_bn_dynamic.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-BatchNorm without given mean and variance given testcases
-====================
-This is a test script to test fused_batch_norm operators
-in TensorFlow frontend when mean and variance are not given.
-"""
-import tvm
-import tvm.testing
-import numpy as np
-
-try:
-    import tensorflow.compat.v1 as tf
-
-    tf.disable_v2_behavior()
-except ImportError:
-    import tensorflow as tf
-from tvm import relay
-from tensorflow.python.framework import graph_util
-
-
-def verify_fused_batch_norm(shape):
-    g = tf.Graph()
-    with g.as_default():
-        input_tensor = tf.placeholder(tf.float32, shape=shape, name="input")
-        alpha = tf.constant(
-            np.random.rand(
-                shape[-1],
-            ),
-            dtype=tf.float32,
-            name="alpha",
-        )
-        beta = tf.constant(
-            np.random.rand(
-                shape[-1],
-            ),
-            dtype=tf.float32,
-            name="beta",
-        )
-        bn = tf.nn.fused_batch_norm(x=input_tensor, offset=beta, scale=alpha, name="bn")
-        out = tf.identity(bn[0], name="output")
-    data = np.random.rand(*shape)
-    with tf.Session(graph=out.graph) as sess:
-        sess.run([tf.global_variables_initializer()])
-        tf_out = sess.run(out, feed_dict={input_tensor: data})
-        constant_graph = graph_util.convert_variables_to_constants(sess, sess.graph_def, ["output"])
-
-    for device in ["llvm"]:
-        dev = tvm.device(device, 0)
-        if not tvm.testing.device_enabled(device):
-            print("Skip because %s is not enabled" % device)
-            continue
-        with tvm.testing.disable_span_filling():
-            mod, params = relay.frontend.from_tensorflow(constant_graph, outputs=["output"])
-        with tvm.testing.enable_span_filling():
-            mod_with_span, _ = relay.frontend.from_tensorflow(constant_graph, outputs=["output"])
-        tvm.ir.assert_structural_equal(mod["main"], mod_with_span["main"])
-        with tvm.transform.PassContext(opt_level=3):
-            graph, lib, params = relay.build(mod, target=device, params=params)
-        from tvm.contrib import graph_executor
-
-        m = graph_executor.create(graph, lib, dev)
-        m.set_input(**params)
-        m.set_input("input", data)
-        m.run()
-        tvm_out = m.get_output(0)
-        tvm.testing.assert_allclose(
-            tvm_out.numpy(), tf_out.astype(tvm_out.dtype), atol=1e-3, rtol=1e-3
-        )
-
-
-def test_fused_batch_norm():
-    verify_fused_batch_norm(shape=(1, 12, 12, 32))
-    verify_fused_batch_norm(shape=(1, 24, 24, 64))
-    verify_fused_batch_norm(shape=(1, 64, 64, 128))
-    verify_fused_batch_norm(shape=(8, 12, 12, 32))
-    verify_fused_batch_norm(shape=(16, 12, 12, 32))
-    verify_fused_batch_norm(shape=(32, 12, 12, 32))
-
-
-if __name__ == "__main__":
-    test_fused_batch_norm()
diff --git a/tests/python/frontend/tensorflow/test_control_flow.py b/tests/python/frontend/tensorflow/test_control_flow.py
deleted file mode 100644
index 494deb46835f..000000000000
--- a/tests/python/frontend/tensorflow/test_control_flow.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Unit tests for converting TensorFlow control flow op to Relay."""
-import pytest
-
-try:
-    import tensorflow.compat.v1 as tf
-
-    tf.disable_v2_behavior()
-except ImportError:
-    import tensorflow as tf
-from tensorflow.python.ops import control_flow_ops
-import numpy as np
-from tvm import nd, relay, ir, testing
-from tvm.relay.frontend.tensorflow import from_tensorflow
-
-
-def check_equal(graph, tf_out, input_map=None):
-    with testing.disable_span_filling():
-        mod, params = from_tensorflow(graph.as_graph_def(add_shapes=True))
-    with testing.enable_span_filling():
-        mod_with_span, _ = from_tensorflow(graph.as_graph_def(add_shapes=True))
-    assert ir.structural_equal(mod["main"], mod_with_span["main"])
-
-    if input_map is not None:
-        params.update(input_map)
-    relay_out = relay.create_executor("vm", mod=mod).evaluate()(**params)
-    if isinstance(relay_out, nd.NDArray):
-        np.testing.assert_allclose(tf_out, relay_out.numpy())
-    else:
-        if not isinstance(tf_out, (list, tuple)):
-            tf_out = [tf_out]
-        for x, y in zip(tf_out, [r.numpy() for r in relay_out]):
-            np.testing.assert_allclose(x, y)
-
-
-def test_vanilla_loop():
-    graph = tf.Graph()
-    with graph.as_default():
-        i = tf.constant(0, name="while/constant")
-
-        def c(i):
-            return tf.less(i, 10)
-
-        def b(i):
-            return tf.add(i, 1)
-
-        r = tf.while_loop(c, b, [i])
-
-        with tf.Session() as sess:
-            tf_out = sess.run(r)
-
-        check_equal(graph, tf_out)
-
-
-def test_callnode_loop_vars():
-    graph = tf.Graph()
-    with graph.as_default():
-        i = tf.add(tf.constant(0), 1)
-
-        def c(i):
-            return tf.less(i, 10)
-
-        def b(i):
-            return tf.add(i, 1)
-
-        r = tf.while_loop(c, b, [i])
-
-        with tf.Session() as sess:
-            tf_out = sess.run(r)
-
-        check_equal(graph, tf_out)
-
-
-def test_loop_2_vars():
-    graph = tf.Graph()
-    with graph.as_default():
-        i0 = tf.constant(0)
-        j0 = tf.ones([2, 2])
-
-        def c(i, j):
-            return i < 10
-
-        def b(i, j):
-            return [tf.add(i, 1), j]
-
-        i1, i2 = tf.while_loop(c, b, loop_vars=[i0, j0])
-        i1 += tf.constant(1337)
-
-        with tf.Session() as sess:
-            tf_out = sess.run(i1)
-
-    check_equal(graph, tf_out)
-
-
-def test_loop_3_vars():
-    graph = tf.Graph()
-    with graph.as_default():
-        i0 = tf.constant(1)
-        j0 = tf.constant(2)
-        k0 = tf.constant(4)
-
-        def c(i, j, k):
-            return i < 10
-
-        def b(i, j, k):
-            return [i + 1, j * k, k + i]
-
-        r = tf.while_loop(c, b, loop_vars=[i0, j0, k0])
-
-        with tf.Session() as sess:
-            tf_out = sess.run(r)
-
-    check_equal(graph, tf_out)
-
-
-def test_loop_conditions():
-    graph = tf.Graph()
-    with graph.as_default():
-        i = tf.constant(1)
-        j = tf.constant(1)
-        k = tf.constant(5)
-
-        def c(i, j, k):
-            return tf.equal(
-                tf.not_equal(tf.less(i + j, 10), tf.less(j * k, 100)), tf.greater_equal(k, i + j)
-            )
-
-        def b(i, j, k):
-            return [i + j, j + k, k + 1]
-
-        r = tf.while_loop(c, b, loop_vars=[i, j, k])
-        with tf.Session() as sess:
-            tf_out = sess.run(r)
-
-    check_equal(graph, tf_out)
-
-
-@pytest.mark.skip
-def test_loop_bodies():
-    graph = tf.Graph()
-    with graph.as_default():
-
-        def body(x):
-            a = tf.constant(np.array([[5, 6], [7, 8]]), dtype=tf.int32)
-            b = tf.constant(np.array([[1, 2], [3, 4]]), dtype=tf.int32)
-            c = a + b
-            return tf.nn.relu(x + c)
-
-        def condition(x):
-            return tf.reduce_sum(x) < 100
-
-        x = tf.constant(0, shape=[2, 2])
-        r = tf.while_loop(condition, body, [x])
-        with tf.Session() as sess:
-            tf_out = sess.run(r)
-
-    check_equal(graph, tf_out)
-
-
-def test_nested_loop():
-    graph = tf.Graph()
-    with graph.as_default():
-
-        def body(x):
-            def nest_body(c):
-                return tf.multiply(c, 2)
-
-            def cd(c):
-                return tf.less(c, 10)
-
-            c = tf.constant(2)
-            res = tf.while_loop(cd, nest_body, loop_vars=[c])
-            return tf.nn.relu(x + res)
-
-        def condition(x):
-            return tf.greater(x, 100)
-
-        x = tf.constant(3)
-        r = tf.while_loop(condition, body, loop_vars=[x])
-
-        with tf.Session() as sess:
-            tf_out = sess.run(r)
-
-    check_equal(graph, tf_out)
-
-
-def test_vanilla_cond():
-    graph = tf.Graph()
-    with graph.as_default():
-        i = tf.constant(1)
-        j = tf.constant(4)
-
-        def f1():
-            return tf.multiply(1, 17)
-
-        def f2():
-            return tf.add(4, 23)
-
-        r = tf.cond(tf.less(i, j), f1, f2)
-
-    with tf.Session(graph=graph) as sess:
-        tf_out = sess.run(r)
-
-    check_equal(graph, tf_out)
-
-
-def test_multiple_cond_vars():
-    graph = tf.Graph()
-    with graph.as_default():
-        x1 = tf.constant(7)
-        x2 = tf.constant(12)
-        z = tf.constant(20)
-        r = tf.cond(tf.less(tf.add(x1, x2), 10), lambda: tf.add(10, 2), lambda: tf.square(5))
-
-        with tf.Session() as sess:
-            tf_out = sess.run(r)
-
-    check_equal(graph, tf_out)
-
-
-def test_cond_fn_parameters():
-    graph = tf.Graph()
-    with graph.as_default():
-
-        def fn1(x, y):
-            return tf.multiply(5, 6)
-
-        def fn2(x, y):
-            return tf.add(3, 4)
-
-        i = tf.constant(1)
-        j = tf.constant(2)
-        k = tf.constant(3)
-        r = tf.cond(tf.less(i, j), lambda: fn1(i, k), lambda: fn2(j, k))
-
-        with tf.Session() as sess:
-            tf_out = sess.run(r, feed_dict={i: 1, j: 2, k: 3})
-
-    check_equal(graph, tf_out)
-
-
-def test_nested_cond():
-    graph = tf.Graph()
-    with graph.as_default():
-
-        def fn1(a, b):
-            def nest_fn1():
-                return tf.add(1, 2)
-
-            def nest_fn2():
-                return tf.subtract(10, 5)
-
-            res = tf.cond(tf.less(1, 2), nest_fn1, nest_fn2)
-            return tf.multiply(tf.add(87, res), 10)
-
-        def fn2(a, b):
-            return tf.add(10, 10)
-
-        x = tf.constant(5)
-        y = tf.constant(6)
-        z = tf.constant(7)
-        pred = tf.less(x, y)
-        r = tf.cond(pred, lambda: fn1(x, y), lambda: fn2(y, z))
-
-        with tf.Session() as sess:
-            tf_out = sess.run(r, feed_dict={x: 1, y: 2, z: 3, pred: True})
-
-    check_equal(graph, tf_out)
-
-
-def test_loop_in_cond():
-    graph = tf.Graph()
-    with graph.as_default():
-
-        def fn1(a, b):
-            i = tf.constant(0)
-
-            def cd(i):
-                return tf.less(i, 10)
-
-            def bd(i):
-                return tf.add(i, 1)
-
-            res = tf.while_loop(cd, bd, [i])
-            return tf.multiply(tf.add(20, res), 10)
-
-        def fn2(a, b):
-            return tf.add(10, 20)
-
-        x = tf.constant(7)
-        y = tf.constant(20)
-        z = tf.constant(10)
-        pred = tf.less(x, y)
-        r = tf.cond(pred, lambda: fn1(x, y), lambda: fn2(y, z))
-
-        with tf.Session() as sess:
-            tf_out = sess.run(r, feed_dict={x: 1, y: 2, z: 3, pred: True})
-
-    check_equal(graph, tf_out)
-
-
-def test_cond_in_loop():
-    graph = tf.Graph()
-    with graph.as_default():
-
-        def body(x):
-            x = tf.constant(7)
-            z = tf.constant(20)
-            res = tf.cond(tf.less(x, 10), lambda: tf.add(10, 20), lambda: tf.square(10))
-            return tf.multiply(res, x)
-
-        x = tf.constant(21)
-
-        def condition(x):
-            return tf.less(x, 100)
-
-        r = tf.while_loop(condition, body, loop_vars=[x])
-        with tf.Session() as sess:
-            tf_out = sess.run(r)
-
-    check_equal(graph, tf_out)
-
-
-def test_vanilla_loop_bound():
-    graph = tf.Graph()
-    with graph.as_default():
-        dshape = (2, 10)
-        dtype = "float32"
-        dname = "data"
-        np_data = np.random.uniform(size=dshape).astype(dtype)
-        data = tf.placeholder(shape=dshape, dtype=dtype, name=dname)
-        x = tf.slice(data, [1, 4], [1, 4])
-        outer = x + 5.0
-
-        def body(x, y):
-            res = tf.cond(tf.less(y, 10), lambda: tf.add(10.0, 20.0), lambda: tf.square(10.0))
-            z = tf.constant(7)
-            res = tf.cond(tf.less(z, 10), lambda: res * 5, lambda: res + 10)
-            return tf.multiply(res, x * outer), y + 1
-
-        y = tf.constant(0)
-
-        def condition(x, y):
-            return tf.less(y, 20)
-
-        r = tf.while_loop(condition, body, loop_vars=[x, y])
-        with tf.Session() as sess:
-            tf_out = sess.run(r, feed_dict={"%s:0" % dname: np_data})
-
-    check_equal(graph, tf_out, {dname: np_data})
-
-
-def test_nested_loop_bound():
-    graph = tf.Graph()
-    with graph.as_default():
-        dshape = (2, 10)
-        dtype = "float32"
-        dname = "data"
-        np_data = np.random.uniform(size=dshape).astype(dtype)
-        data = tf.placeholder(shape=dshape, dtype=dtype, name=dname)
-        x = tf.slice(data, [1, 4], [1, 4])
-        outer = x + 5.0
-
-        def body(x, y):
-            res = tf.cond(tf.less(y, 10), lambda: tf.add(10.0, 20.0), lambda: tf.square(10.0))
-
-            def nested_body(nx, ny):
-                return nx + 1, res + 2.0
-
-            def nested_cond(nx, ny):
-                return tf.less(nx, 15)
-
-            nx = tf.constant(0)
-            ny = tf.constant(0.0)
-            nested_res = tf.while_loop(nested_cond, nested_body, loop_vars=[nx, ny])
-            res = res + nested_res[1]
-            z = tf.constant(7)
-            res = tf.cond(tf.less(z, 10), lambda: res * 5, lambda: res + 10)
-            return tf.multiply(res, x * outer), y + 1
-
-        y = tf.constant(0)
-
-        def condition(x, y):
-            return tf.less(y, 20)
-
-        r = tf.while_loop(condition, body, loop_vars=[x, y])
-        with tf.Session() as sess:
-            tf_out = sess.run(r, feed_dict={"%s:0" % dname: np_data})
-
-    check_equal(graph, tf_out, {dname: np_data})
-
-
-def test_switch():
-    graph = tf.Graph()
-
-    with graph.as_default():
-        data_np = np.random.uniform(0, 5, size=(2, 4, 5, 1)).astype("float32")
-        dname = "data"
-        flag_name = "flag"
-        data = tf.placeholder(shape=data_np.shape, dtype=data_np.dtype, name=dname)
-        split = tf.split(data, 2, axis=0)
-        flag = tf.placeholder(shape={}, dtype=tf.bool, name=flag_name)
-        output_false, output_true = control_flow_ops.switch(split[1], flag)
-        with tf.Session() as sess:
-            tf_out = sess.run(output_false, feed_dict={data.name: data_np, flag.name: False})
-
-    check_equal(graph, tf_out, {dname: data_np, flag_name: False})
-
-
-def test_loop_tuple_input():
-    graph = tf.Graph()
-
-    with graph.as_default():
-        data_np = np.random.uniform(0, 5, size=(2, 4, 5, 1)).astype("float32")
-        dname = "data"
-        data = tf.placeholder(shape=data_np.shape, dtype=data_np.dtype, name=dname)
-        split = tf.split(data, 2, axis=0)
-
-        def body(x, y):
-            return x + 2, y + 1
-
-        start = tf.constant(0)
-
-        def condition(x, y):
-            return tf.less(y, 20)
-
-        r = tf.while_loop(condition, body, loop_vars=[split[1], start])
-        with tf.Session() as sess:
-            tf_out = sess.run(r, feed_dict={data.name: data_np})
-
-    check_equal(graph, tf_out, {dname: data_np})
-
-
-if __name__ == "__main__":
-    # tf.while_loop
-    test_vanilla_loop()
-    test_loop_2_vars()
-    test_loop_3_vars()
-    test_loop_conditions()
-    # TODO(@jroesch): Need to fix memory alloc to support closure
-    # test_loop_bodies()
-    test_callnode_loop_vars()
-
-    # tf.cond
-    test_vanilla_cond()
-    test_multiple_cond_vars()
-    test_cond_fn_parameters()
-
-    # nested cases
-    test_nested_loop()
-    test_nested_cond()
-    test_loop_in_cond()
-    test_cond_in_loop()
-    test_vanilla_loop_bound()
-    test_nested_loop_bound()
-
-    test_switch()
-    test_loop_tuple_input()
diff --git a/tests/python/frontend/tensorflow/test_debugging.py b/tests/python/frontend/tensorflow/test_debugging.py
deleted file mode 100644
index 0f7c4dd7d65a..000000000000
--- a/tests/python/frontend/tensorflow/test_debugging.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Unit tests for converting TensorFlow debugging ops to Relay."""
-try:
-    import tensorflow.compat.v1 as tf
-
-    tf.disable_v2_behavior()
-except ImportError:
-    import tensorflow as tf
-import numpy as np
-from tvm import relay, ir, testing
-from tvm.relay.frontend.tensorflow import from_tensorflow
-
-
-def run_relay(graph, shape_dict=None, *vars):
-    with testing.disable_span_filling():
-        mod, params = from_tensorflow(graph.as_graph_def(add_shapes=True), shape=shape_dict)
-    with testing.enable_span_filling():
-        mod_with_span, _ = relay.frontend.from_tensorflow(
-            graph.as_graph_def(add_shapes=True), shape=shape_dict
-        )
-    assert ir.structural_equal(mod["main"], mod_with_span["main"])
-
-    return relay.create_executor("debug", mod=mod).evaluate()(*vars)
-
-
-def test_assert_true():
-    g = tf.Graph()
-    shape = (1, 2)
-    with g.as_default():
-        x = tf.placeholder(tf.float32, shape=shape, name="input")
-        assert_op = tf.Assert(tf.reduce_all(tf.less_equal(x, x)), ["it failed"])
-
-        with tf.Session() as sess:
-            x_value = np.random.rand(*shape)
-            assert sess.run(assert_op, feed_dict={x: x_value}) is None
-
-        # In TVM, tf.assert is converted to a no-op which is actually a 0,
-        # though it should probably be none or an empty tuple.
-        #
-        # ToDo: It appears that the frontend converter gets confused here and
-        # entirely eliminates all operands from main(). Likely because x <= x
-        # is always true, so the placeholder can be eliminated. But TF doesn't
-        # do that, it's happening in Relay, and that optimization shouldn't
-        # affect the arity of the main function. We should have to pass in
-        # x_value here.
-        np.testing.assert_allclose(0, run_relay(g, {"input": shape}).numpy())
-
-
-def test_assert_true_var_capture():
-    g = tf.Graph()
-    with g.as_default():
-        x = tf.placeholder(tf.float32, shape=())
-
-        # It turns out that tf.assert() creates a large and complex subgraph if
-        # you capture a variable as part of the error message. So we need to
-        # test that, too.
-        assert_op = tf.Assert(tf.less_equal(x, x), ["it failed", x])
-
-        with tf.Session() as sess:
-            x_value = np.random.rand()
-            assert sess.run(assert_op, feed_dict={x: x_value}) is None
-
-        # TODO: The frontend converter notes the output of
-        # the graph as a boolean, which is not correct - as you can see above,
-        # TF believes that the value of this graph is None.
-        np.testing.assert_allclose(True, run_relay(g, None, x_value).numpy())
-
-
-def test_assert_false():
-    g = tf.Graph()
-    with g.as_default():
-        assert_op = tf.Assert(tf.constant(False), ["it failed"])
-
-        with tf.Session() as sess:
-            try:
-                print(sess.run(assert_op))
-                assert False  # TF should have thrown an exception
-            except tf.errors.InvalidArgumentError as e:
-                assert "it failed" in e.message
-
-        # In TVM, tf.assert is converted to a no-op which is actually a 0,
-        # though it should probably be none or an empty tuple. For the same
-        # reason, there should not be an error here, even though the assertion
-        # argument is false.
-        np.testing.assert_allclose(0, run_relay(g).numpy())
-
-
-if __name__ == "__main__":
-    test_assert_true()
-    test_assert_true_var_capture()
-    test_assert_false()
diff --git a/tests/python/frontend/tensorflow/test_forward.py b/tests/python/frontend/tensorflow/test_forward.py
deleted file mode 100644
index 354ed38a62ce..000000000000
--- a/tests/python/frontend/tensorflow/test_forward.py
+++ /dev/null
@@ -1,6100 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument, ungrouped-imports, wrong-import-order
-"""
-Tensorflow testcases
-====================
-This article is a test script to test tensorflow operator with Relay.
-"""
-from __future__ import print_function
-
-import threading
-import platform
-import os.path
-from packaging import version as package_version
-import numpy as np
-import pytest
-
-from PIL import Image
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import graph_util
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-from tensorflow.python.ops import init_ops
-from tensorflow.python.framework import function
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import gen_functional_ops
-from tensorflow.python.client import device_lib
-
-try:
-    import tensorflow.compat.v1 as tf
-
-    tf.disable_v2_behavior()
-except ImportError:
-    import tensorflow as tf
-
-import tvm
-from tvm import relay, ir
-from tvm.runtime.vm import VirtualMachine
-from tvm.relay.frontend.tensorflow import from_tensorflow
-from tvm.contrib import graph_executor
-from tvm.contrib import utils
-import tvm.testing
-import tvm.relay.testing.tf as tf_testing
-from relay.utils.tag_span import _set_span, _create_span, _verify_structural_equal_with_span
-
-
-# Only allow TF to run on half the GPU RAM to save the other half
-# For TVM
-gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
-gpu_sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
-gpu_sess.close()
-
-
-#######################################################################
-# Generic run functions for TVM & tensorflow
-# ------------------------------------------
-
-
-def convert_to_list(x):
-    if not isinstance(x, list):
-        x = [x]
-    return x
-
-
-tf_dtypes = {
-    "float32": tf.float32,
-    "float16": tf.float16,
-    "float64": tf.float64,
-    "int32": tf.int32,
-    "uint8": tf.uint8,
-    "int8": tf.int8,
-    "int16": tf.int16,
-    "uint16": tf.uint16,
-    "int64": tf.int64,
-}
-
-
-def vmobj_to_list(o):
-    """Converts TVM objects returned by VM execution to Python List."""
-    if isinstance(o, tvm.nd.NDArray):
-        return [o.numpy()]
-    elif isinstance(o, tvm.runtime.container.ADT):
-        result = []
-        for f in o:
-            result.extend(vmobj_to_list(f))
-        return result
-    elif isinstance(o, tvm.relay.backend.interpreter.ConstructorValue):
-        if o.constructor.name_hint == "Cons":
-            tl = vmobj_to_list(o.fields[1])
-            hd = vmobj_to_list(o.fields[0])
-            hd.extend(tl)
-            return hd
-        elif o.constructor.name_hint == "Nil":
-            return []
-        elif "tensor_nil" in o.constructor.name_hint:
-            return [0]
-        elif "tensor" in o.constructor.name_hint:
-            return [o.fields[0].numpy()]
-        else:
-            raise RuntimeError(f"Unknown object type: {o.constructor.name_hint}")
-    else:
-        raise RuntimeError(f"Unknown object type: {type(o)}")
-
-
-def run_tvm_graph(
-    graph_def,
-    input_data,
-    input_node,
-    num_output=1,
-    target="llvm",
-    out_names=None,
-    opt_level=3,
-    mode="graph_executor",
-    cuda_layout="NCHW",
-    layout=None,
-    disabled_pass=None,
-    ignore_in_shape=False,
-    serialize=False,
-    convert_config=None,
-):
-    """Generic function to compile on relay and execute on tvm"""
-    input_data = convert_to_list(input_data)
-    input_node = convert_to_list(input_node)
-    if target == "cuda":
-        layout = cuda_layout
-    target_host = None
-    if ignore_in_shape:
-        shape_dict = None
-    else:
-        shape_dict = {
-            e: i.shape if hasattr(i, "shape") else () for e, i in zip(input_node, input_data)
-        }
-    with tvm.testing.disable_span_filling():
-        mod, params = relay.frontend.from_tensorflow(
-            graph_def,
-            layout=layout,
-            shape=shape_dict,
-            outputs=out_names,
-            convert_config=convert_config,
-        )
-    with tvm.testing.enable_span_filling():
-        mod_with_span, _ = relay.frontend.from_tensorflow(
-            graph_def,
-            layout=layout,
-            shape=shape_dict,
-            outputs=out_names,
-            convert_config=convert_config,
-        )
-    tvm.ir.assert_structural_equal(mod["main"], mod_with_span["main"], map_free_vars=True)
-
-    dev = tvm.device(target, 0)
-    if mode == "debug":
-        inputs = []
-        for param in mod["main"].params:
-            found = False
-            for i, n in enumerate(input_node):
-                if n == param.name_hint:
-                    found = True
-                    inputs.append(tvm.nd.array(input_data[i]))
-                    break
-            # Interpreter doesn't bind constants, so still need to find in params
-            if not found:
-                inputs.append(tvm.nd.array(params[param.name_hint]))
-        result = relay.create_executor(mode, mod=mod, device=tvm.cpu(), target="llvm").evaluate()(
-            *inputs
-        )
-        return vmobj_to_list(result)
-    elif mode == "vm":
-        with tvm.transform.PassContext(opt_level=opt_level, disabled_pass=disabled_pass):
-            mod = relay.transform.InferType()(mod)
-            vm_exec = relay.vm.compile(mod, target="llvm", params=params)
-        if serialize:
-            code, lib = vm_exec.save()
-            vm_exec = tvm.runtime.vm.Executable.load_exec(code, lib)
-        vm = VirtualMachine(vm_exec, tvm.cpu())
-        inputs = {}
-        for e, i in zip(input_node, input_data):
-            inputs[e] = tvm.nd.array(i)
-        result = vm.invoke("main", **inputs)
-        return vmobj_to_list(result)
-    else:
-        with tvm.transform.PassContext(opt_level=opt_level, disabled_pass=disabled_pass):
-            target = tvm.target.Target(target, target_host)
-            graph, lib, params = relay.build(mod, target=target, params=params)
-
-        m = graph_executor.create(graph, lib, dev)
-        # set inputs
-        for e, i in zip(input_node, input_data):
-            if e != "":
-                m.set_input(e, tvm.nd.array(i))
-
-        m.set_input(**params)
-        # execute
-        m.run()
-        # get outputs
-        assert out_names is None or num_output == len(
-            out_names
-        ), f"out_names: {out_names} num_output: {num_output}"
-        tvm_output_list = [m.get_output(i).numpy() for i in range(num_output)]
-        return tvm_output_list
-
-
-def run_tf_graph(sess, input_data, input_node, output_node):
-    """Generic function to execute tensorflow"""
-    input_data = convert_to_list(input_data)
-    input_node = convert_to_list(input_node)
-    output_node = convert_to_list(output_node)
-
-    tensor = [sess.graph.get_tensor_by_name(output_name) for output_name in output_node]
-
-    input_dict = {e: input_data[i] for i, e in enumerate(input_node)}
-    if len(input_node) == 1 and input_node[0] == "":
-        output_data = sess.run(tensor)
-    else:
-        output_data = sess.run(tensor, input_dict)
-    return output_data
-
-
-def compare_tf_with_tvm(
-    in_data,
-    in_name,
-    out_name,
-    init_global_variables=False,
-    no_gpu=False,
-    opt_level=3,
-    mode="graph_executor",
-    cuda_layout="NCHW",
-    add_shapes_to_graph_def=True,
-    targets=None,
-    ignore_in_shape=False,
-    convert_config=None,
-    atol=1e-5,
-    rtol=1e-5,
-):
-    """Generic function to generate and compare tensorflow and TVM output"""
-
-    def name_without_num(name):
-        return name.split(":")[0] if ":" in name else name
-
-    out_name = convert_to_list(out_name)
-    out_node = [name_without_num(name) for name in out_name]
-
-    in_data = convert_to_list(in_data)
-    in_name = convert_to_list(in_name)
-    in_node = [name_without_num(name) for name in in_name]
-    with tf.Session() as sess:
-        if init_global_variables:
-            sess.run(variables.global_variables_initializer())
-        final_graph_def = (
-            tf_testing.AddShapesToGraphDef(sess, out_node)
-            if add_shapes_to_graph_def
-            else tf.get_default_graph().as_graph_def()
-        )
-
-        tf_output = run_tf_graph(sess, in_data, in_name, out_name)
-
-        devices = targets if targets else ["llvm", "cuda"]
-
-        for device in devices:
-            _ = tvm.device(device, 0)
-            if not tvm.testing.device_enabled(device):
-                print(f"Skip because {device} is not enabled")
-                continue
-            if no_gpu and device == "cuda":
-                continue
-            if "cublas" in device and not tvm.get_global_func("tvm.contrib.cublas.matmul", True):
-                print(f"Skip because cublas is not enabled: {device}")
-                continue
-
-            tvm_output = run_tvm_graph(
-                final_graph_def,
-                in_data,
-                in_node,
-                target=device,
-                out_names=out_name,
-                num_output=len(out_name),
-                opt_level=opt_level,
-                mode=mode,
-                cuda_layout=cuda_layout,
-                ignore_in_shape=ignore_in_shape,
-                convert_config=convert_config,
-            )
-            # since the names from tensorflow and relay runs are not exactly same,
-            # first len(tf_output) will be compared
-            for i, tf_out in enumerate(tf_output):
-                if not isinstance(tf_out, np.ndarray):
-                    assert len(tvm_output[i].shape) == 0  # pylint: disable=len-as-condition
-                tvm.testing.assert_allclose(tf_out, tvm_output[i], atol=atol, rtol=rtol)
-
-        sess.close()
-
-
-def is_gpu_available():
-    """Verify gpu is available"""
-    local_device_protos = device_lib.list_local_devices()
-    gpu_list = [x.name for x in local_device_protos if x.device_type == "GPU"]
-    if gpu_list:
-        print("Tensorflow GPU:", gpu_list)
-        return True
-    else:
-        return False
-
-
-#######################################################################
-# Pooling
-# -------
-
-
-def _test_pooling_iteration(input_shape, **kwargs):
-    """One iteration of pool operation with given shapes and attributes"""
-
-    x = -np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape) - 1
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=input_shape, dtype="float32")
-        nn_ops.pool(in_data, **kwargs)
-
-        if kwargs["pooling_type"] == "MAX":
-            out_name = "max_pool:0"
-        else:
-            out_name = "avg_pool:0"
-
-        compare_tf_with_tvm(x, "Placeholder:0", out_name)
-
-
-def _test_pooling(input_shape, **kwargs):
-    _test_pooling_iteration(input_shape, **kwargs)
-
-    if is_gpu_available():
-        if len(input_shape) == 4:
-            input_shape = [input_shape[ii] for ii in (0, 3, 1, 2)]
-            if isinstance(kwargs["padding"], list):
-                kwargs["padding"] = [kwargs["padding"][ii] for ii in (0, 3, 1, 2)]
-            kwargs["data_format"] = "NCHW"
-            _test_pooling_iteration(input_shape, **kwargs)
-
-
-def _test_pooling_dynamic(input_shape, np_shape, **kwargs):
-    """Pooling with dynamic height and width dimensions."""
-    x = -np.arange(np.prod(np_shape), dtype=np.float32).reshape(np_shape) - 1
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=input_shape, dtype="float32")
-        nn_ops.pool(in_data, **kwargs)
-
-        if kwargs["pooling_type"] == "MAX":
-            out_name = "max_pool:0"
-        else:
-            out_name = "avg_pool:0"
-
-        compare_tf_with_tvm(x, "Placeholder:0", out_name, mode="vm", ignore_in_shape=True)
-
-
-@tvm.testing.uses_gpu
-def test_forward_pooling():
-    """Pooling"""
-    # TensorFlow only supports NDHWC for max_pool3d on CPU
-    for pool_type in ["AVG", "MAX"]:
-        # NDHWC is the default layout for max_pool3d and avg_pool3d in TensorFlow
-        _test_pooling(
-            input_shape=[1, 3, 32, 32, 32],
-            window_shape=[2, 2, 2],
-            padding="VALID",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1, 1],
-            strides=[2, 2, 2],
-        )
-
-        _test_pooling(
-            input_shape=[1, 3, 32, 32, 32],
-            window_shape=[1, 1, 1],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1, 1],
-            strides=[1, 1, 1],
-        )
-
-        _test_pooling(
-            input_shape=[1, 3, 32, 32, 32],
-            window_shape=[2, 2, 2],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1, 1],
-            strides=[2, 2, 2],
-        )
-
-        _test_pooling_dynamic(
-            input_shape=[1, None, None, 3],
-            np_shape=[1, 32, 32, 3],
-            window_shape=[2, 2],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1],
-            strides=[1, 1],
-        )
-
-        # test cases for max_pool3d & avg_pool3d with layout NCDHW
-        # TensorFlow pool3d  doesn't support NCDHW on cpu
-        if is_gpu_available():
-            _test_pooling(
-                input_shape=[1, 3, 32, 32, 32],
-                window_shape=[1, 1, 1],
-                padding="SAME",
-                pooling_type=pool_type,
-                dilation_rate=[1, 1, 1],
-                strides=[1, 1, 1],
-                data_format="NCDHW",
-            )
-
-            _test_pooling(
-                input_shape=[1, 3, 32, 32, 32],
-                window_shape=[2, 2, 2],
-                padding="VALID",
-                pooling_type=pool_type,
-                dilation_rate=[1, 1, 1],
-                strides=[2, 2, 2],
-                data_format="NCDHW",
-            )
-
-        _test_pooling(
-            input_shape=[2, 9, 10, 2],
-            window_shape=[1, 1],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1],
-            strides=[1, 1],
-        )
-
-        _test_pooling(
-            input_shape=[2, 10, 9, 2],
-            window_shape=[1, 1],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1],
-            strides=[1, 1],
-        )
-
-        _test_pooling(
-            input_shape=[2, 9, 10, 2],
-            window_shape=[2, 1],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1],
-            strides=[1, 1],
-        )
-
-        _test_pooling(
-            input_shape=[2, 10, 9, 2],
-            window_shape=[2, 3],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1],
-            strides=[2, 1],
-        )
-
-        # Tests involving SpaceToBatchND
-        _test_pooling(
-            input_shape=[1, 1, 2, 1],
-            window_shape=[1, 1],
-            padding="VALID",
-            pooling_type=pool_type,
-            dilation_rate=[1, 2],
-        )
-
-        _test_pooling(
-            input_shape=[1, 2, 1],
-            window_shape=[1],
-            padding="VALID",
-            pooling_type=pool_type,
-            dilation_rate=[2],
-        )
-    # Explicit padding
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.4.1"):
-        _test_pooling(
-            input_shape=[2, 9, 10, 2],
-            window_shape=[4, 4],
-            padding=[[0, 0], [0, 1], [2, 3], [0, 0]],
-            pooling_type="MAX",
-            dilation_rate=[1, 1],
-            strides=[1, 1],
-        )
-
-
-#######################################################################
-# Convolution
-# -----------
-
-
-def _test_convolution(
-    opname,
-    tensor_in_sizes,
-    filter_in_sizes,
-    dilations,
-    strides,
-    padding,
-    data_format,
-    deconv_output_shape=None,
-    add_shapes_to_graph_def=True,
-):
-    """One iteration of convolution with given shapes and attributes"""
-    deconv_output_shape = deconv_output_shape or []
-    total_size_1 = np.prod(tensor_in_sizes)
-    total_size_2 = np.prod(filter_in_sizes)
-    # Initializes the input tensor with array containing incrementing
-    # numbers from 1.
-    data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
-    filter_array = [f * 1.0 for f in range(1, total_size_2 + 1)]
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype="float32")
-        in_filter = constant_op.constant(filter_array, shape=filter_in_sizes, dtype="float32")
-        if data_format == "NHWC":
-            strides = [1] + strides + [1]
-            dilations = [1] + dilations + [1]
-        else:
-            strides = [1, 1] + strides
-            dilations = [1, 1] + dilations
-
-        if opname == "conv":
-            nn_ops.conv2d(
-                in_data,
-                in_filter,
-                strides=strides,
-                dilations=dilations,
-                padding=padding,
-                data_format=data_format,
-            )
-
-            compare_tf_with_tvm(
-                np.reshape(data_array, tensor_in_sizes).astype("float32"),
-                "Placeholder:0",
-                "Conv2D:0",
-                add_shapes_to_graph_def=add_shapes_to_graph_def,
-            )
-        elif opname == "conv_transpose":
-            nn_ops.conv2d_transpose(
-                in_data,
-                in_filter,
-                output_shape=deconv_output_shape,
-                strides=strides,
-                padding=padding,
-                data_format=data_format,
-            )
-
-            compare_tf_with_tvm(
-                np.reshape(data_array, tensor_in_sizes).astype("float32"),
-                "Placeholder:0",
-                "conv2d_transpose:0",
-                add_shapes_to_graph_def=add_shapes_to_graph_def,
-            )
-        else:
-            nn_ops.depthwise_conv2d_native(
-                in_data,
-                in_filter,
-                strides=strides,
-                dilations=dilations,
-                padding=padding,
-                data_format=data_format,
-            )
-
-            compare_tf_with_tvm(
-                np.reshape(data_array, tensor_in_sizes).astype("float32"),
-                "Placeholder:0",
-                "DepthwiseConv2dNative:0",
-                add_shapes_to_graph_def=add_shapes_to_graph_def,
-            )
-
-
-@pytest.mark.skip(reason="See https://github.com/apache/tvm/issues/10275")
-@tvm.testing.uses_gpu
-def test_forward_convolution():
-    """Convolution"""
-    if is_gpu_available():
-        _test_convolution("conv", [4, 176, 8, 8], [1, 1, 176, 32], [1, 1], [1, 1], "SAME", "NCHW")
-        _test_convolution("conv", [4, 19, 17, 17], [3, 3, 19, 19], [1, 1], [2, 2], "VALID", "NCHW")
-        _test_convolution("conv", [4, 124, 17, 17], [1, 1, 124, 19], [1, 1], [1, 1], "SAME", "NCHW")
-        _test_convolution("conv", [4, 12, 17, 17], [3, 3, 12, 32], [1, 1], [2, 2], "VALID", "NCHW")
-        _test_convolution(
-            "depthwise", [4, 176, 8, 8], [1, 1, 176, 1], [1, 1], [1, 1], "SAME", "NCHW"
-        )
-        _test_convolution(
-            "depthwise", [4, 19, 17, 17], [3, 3, 19, 1], [1, 1], [2, 2], "VALID", "NCHW"
-        )
-        _test_convolution(
-            "depthwise", [4, 124, 17, 17], [1, 1, 124, 1], [1, 1], [1, 1], "SAME", "NCHW"
-        )
-        _test_convolution(
-            "depthwise", [4, 12, 17, 17], [3, 3, 12, 1], [1, 1], [2, 2], "VALID", "NCHW"
-        )
-        _test_convolution(
-            "depthwise", [4, 12, 17, 17], [3, 3, 12, 2], [1, 1], [2, 2], "VALID", "NCHW"
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 32, 8, 8],
-            [1, 1, 176, 32],
-            [1, 1],
-            [1, 1],
-            "SAME",
-            "NCHW",
-            [4, 176, 8, 8],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 32, 8, 8],
-            [2, 2, 176, 32],
-            [1, 1],
-            [1, 1],
-            "SAME",
-            "NCHW",
-            [4, 176, 8, 8],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 32, 8, 8],
-            [2, 2, 176, 32],
-            [1, 1],
-            [2, 2],
-            "SAME",
-            "NCHW",
-            [4, 176, 15, 15],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 32, 8, 8],
-            [3, 3, 176, 32],
-            [1, 1],
-            [1, 1],
-            "SAME",
-            "NCHW",
-            [4, 176, 8, 8],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 32, 8, 8],
-            [3, 3, 176, 32],
-            [1, 1],
-            [2, 2],
-            "SAME",
-            "NCHW",
-            [4, 176, 15, 15],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 32, 8, 8],
-            [3, 3, 176, 32],
-            [1, 1],
-            [2, 2],
-            "SAME",
-            "NCHW",
-            [4, 176, 16, 16],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 19, 8, 8],
-            [3, 3, 19, 19],
-            [1, 1],
-            [2, 2],
-            "VALID",
-            "NCHW",
-            [4, 19, 17, 17],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 19, 17, 17],
-            [1, 1, 124, 19],
-            [1, 1],
-            [1, 1],
-            "SAME",
-            "NCHW",
-            [4, 124, 17, 17],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 19, 17, 17],
-            [3, 3, 124, 19],
-            [1, 1],
-            [1, 1],
-            "SAME",
-            "NCHW",
-            [4, 124, 17, 17],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 32, 8, 8],
-            [3, 3, 12, 32],
-            [1, 1],
-            [2, 2],
-            "VALID",
-            "NCHW",
-            [4, 12, 17, 17],
-        )
-        # kernel 2x2, strides (2,2)
-        _test_convolution(
-            "conv_transpose",
-            [4, 19, 8, 8],
-            [2, 2, 19, 19],
-            [1, 1],
-            [2, 2],
-            "VALID",
-            "NCHW",
-            [4, 19, 16, 16],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 32, 8, 8],
-            [2, 2, 12, 32],
-            [1, 1],
-            [2, 2],
-            "VALID",
-            "NCHW",
-            [4, 12, 16, 16],
-        )
-        # output channel is 1
-        _test_convolution(
-            "conv_transpose",
-            [1, 19, 8, 8],
-            [1, 1, 1, 19],
-            [1, 1],
-            [1, 1],
-            "VALID",
-            "NCHW",
-            [1, 1, 8, 8],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 19, 8, 8],
-            [2, 2, 66, 19],
-            [1, 1],
-            [2, 2],
-            "VALID",
-            "NCHW",
-            [4, 66, 16, 16],
-        )
-    _test_convolution("conv", [4, 8, 8, 176], [1, 1, 176, 32], [1, 1], [1, 1], "SAME", "NHWC")
-    _test_convolution("conv", [4, 17, 17, 19], [3, 3, 19, 19], [1, 1], [2, 2], "VALID", "NHWC")
-    _test_convolution("conv", [4, 17, 17, 124], [1, 1, 124, 19], [1, 1], [1, 1], "SAME", "NHWC")
-    _test_convolution("conv", [4, 17, 17, 12], [3, 3, 12, 32], [1, 1], [2, 2], "VALID", "NHWC")
-    _test_convolution(
-        "conv",
-        [4, 17, 17, 12],
-        [3, 3, 12, 32],
-        [1, 1],
-        [2, 2],
-        "VALID",
-        "NHWC",
-        add_shapes_to_graph_def=False,
-    )
-    _test_convolution("depthwise", [4, 8, 8, 176], [1, 1, 176, 1], [1, 1], [1, 1], "SAME", "NHWC")
-    _test_convolution("depthwise", [4, 17, 17, 19], [3, 3, 19, 1], [1, 1], [2, 2], "VALID", "NHWC")
-    _test_convolution("depthwise", [4, 17, 17, 124], [1, 1, 124, 1], [1, 1], [1, 1], "SAME", "NHWC")
-    _test_convolution("depthwise", [4, 17, 17, 12], [3, 3, 12, 1], [1, 1], [2, 2], "VALID", "NHWC")
-    _test_convolution("depthwise", [4, 17, 17, 12], [3, 3, 12, 2], [1, 1], [2, 2], "VALID", "NHWC")
-    _test_convolution(
-        "depthwise",
-        [4, 17, 17, 12],
-        [3, 3, 12, 2],
-        [1, 1],
-        [2, 2],
-        "VALID",
-        "NHWC",
-        add_shapes_to_graph_def=False,
-    )
-
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 32],
-        [1, 1, 176, 32],
-        [1, 1],
-        [1, 1],
-        "SAME",
-        "NHWC",
-        [4, 8, 8, 176],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 32],
-        [2, 2, 176, 32],
-        [1, 1],
-        [1, 1],
-        "SAME",
-        "NHWC",
-        [4, 8, 8, 176],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 32],
-        [2, 2, 176, 32],
-        [1, 1],
-        [2, 2],
-        "SAME",
-        "NHWC",
-        [4, 15, 15, 176],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 32],
-        [3, 3, 176, 32],
-        [1, 1],
-        [1, 1],
-        "SAME",
-        "NHWC",
-        [4, 8, 8, 176],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 32],
-        [3, 3, 176, 32],
-        [1, 1],
-        [2, 2],
-        "SAME",
-        "NHWC",
-        [4, 15, 15, 176],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 32],
-        [3, 3, 176, 32],
-        [1, 1],
-        [2, 2],
-        "SAME",
-        "NHWC",
-        [4, 16, 16, 176],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 19],
-        [3, 3, 19, 19],
-        [1, 1],
-        [2, 2],
-        "VALID",
-        "NHWC",
-        [4, 17, 17, 19],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 17, 17, 19],
-        [1, 1, 124, 19],
-        [1, 1],
-        [1, 1],
-        "SAME",
-        "NHWC",
-        [4, 17, 17, 124],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 17, 17, 19],
-        [3, 3, 124, 19],
-        [1, 1],
-        [1, 1],
-        "SAME",
-        "NHWC",
-        [4, 17, 17, 124],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 32],
-        [3, 3, 12, 32],
-        [1, 1],
-        [2, 2],
-        "VALID",
-        "NHWC",
-        [4, 17, 17, 12],
-    )
-    # kernel 2x2, strides (2,2)
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 19],
-        [2, 2, 19, 19],
-        [1, 1],
-        [2, 2],
-        "VALID",
-        "NHWC",
-        [4, 16, 16, 19],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 32],
-        [2, 2, 12, 32],
-        [1, 1],
-        [2, 2],
-        "VALID",
-        "NHWC",
-        [4, 16, 16, 12],
-    )
-    # output channel is 1
-    _test_convolution(
-        "conv_transpose",
-        [1, 8, 8, 19],
-        [1, 1, 1, 19],
-        [1, 1],
-        [1, 1],
-        "VALID",
-        "NHWC",
-        [1, 8, 8, 1],
-    )
-    # Test without adding shapes to graph def
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 32],
-        [1, 1, 176, 32],
-        [1, 1],
-        [1, 1],
-        "SAME",
-        "NHWC",
-        [4, 8, 8, 176],
-        add_shapes_to_graph_def=False,
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 19],
-        [2, 2, 66, 19],
-        [1, 1],
-        [2, 2],
-        "VALID",
-        "NHWC",
-        [4, 16, 16, 66],
-    )
-    # Explicit padding
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.4.1"):
-        _test_convolution(
-            "conv",
-            [4, 8, 8, 16],
-            [1, 1, 16, 32],
-            [1, 1],
-            [1, 1],
-            [[0, 0], [2, 3], [0, 1], [0, 0]],
-            "NHWC",
-        )
-        _test_convolution(
-            "depthwise",
-            [4, 8, 8, 16],
-            [1, 1, 16, 1],
-            [1, 1],
-            [1, 1],
-            [[0, 0], [2, 3], [0, 1], [0, 0]],
-            "NHWC",
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 8, 8, 32],
-            [3, 3, 176, 32],
-            [1, 1],
-            [2, 2],
-            [[0, 0], [1, 0], [1, 0], [0, 0]],
-            "NHWC",
-            [4, 16, 16, 176],
-        )
-
-
-#######################################################################
-# Convolution3D
-# -------------
-
-
-def _test_convolution3d(
-    opname,
-    tensor_in_sizes,
-    filter_in_sizes,
-    dilations,
-    strides,
-    padding,
-    data_format,
-    deconv_output_shape=None,
-    add_shapes_to_graph_def=True,
-):
-    """One iteration of 3D convolution with given shapes and attributes"""
-    deconv_output_shape = deconv_output_shape or []
-    total_size_1 = np.prod(tensor_in_sizes)
-    total_size_2 = np.prod(filter_in_sizes)
-    # Initializes the input tensor with array containing incrementing
-    # numbers from 1.
-    data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
-    filter_array = [f * 1.0 for f in range(1, total_size_2 + 1)]
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype="float32")
-        in_filter = constant_op.constant(filter_array, shape=filter_in_sizes, dtype="float32")
-        if data_format == "NDHWC":
-            strides = [1] + strides + [1]
-            dilations = [1] + dilations + [1]
-        else:
-            strides = [1, 1] + strides
-            dilations = [1, 1] + dilations
-
-        if opname == "conv":
-            nn_ops.conv3d(
-                in_data,
-                in_filter,
-                strides=strides,
-                dilations=dilations,
-                padding=padding,
-                data_format=data_format,
-            )
-
-            compare_tf_with_tvm(
-                np.reshape(data_array, tensor_in_sizes).astype("float32"),
-                "Placeholder:0",
-                "Conv3D:0",
-                cuda_layout="NCDHW",
-                add_shapes_to_graph_def=add_shapes_to_graph_def,
-            )
-
-
-@tvm.testing.uses_gpu
-def test_forward_convolution3d():
-    """Convolution3d"""
-    if is_gpu_available():
-        _test_convolution3d(
-            "conv", [4, 176, 8, 8, 8], [1, 1, 1, 176, 32], [1, 1, 1], [1, 1, 1], "SAME", "NCDHW"
-        )
-        _test_convolution3d(
-            "conv", [4, 19, 17, 17, 17], [3, 3, 3, 19, 19], [1, 1, 1], [2, 2, 2], "VALID", "NCDHW"
-        )
-        _test_convolution3d(
-            "conv", [4, 124, 17, 17, 17], [1, 1, 1, 124, 19], [1, 1, 1], [1, 1, 1], "SAME", "NCDHW"
-        )
-        _test_convolution3d(
-            "conv", [4, 12, 17, 17, 17], [3, 3, 3, 12, 32], [1, 1, 1], [2, 2, 2], "VALID", "NCDHW"
-        )
-    _test_convolution3d(
-        "conv", [4, 8, 8, 8, 176], [1, 1, 1, 176, 32], [1, 1, 1], [1, 1, 1], "SAME", "NDHWC"
-    )
-    _test_convolution3d(
-        "conv", [4, 17, 17, 17, 19], [3, 3, 3, 19, 19], [1, 1, 1], [2, 2, 2], "VALID", "NDHWC"
-    )
-    _test_convolution3d(
-        "conv", [4, 17, 17, 17, 124], [1, 1, 1, 124, 19], [1, 1, 1], [1, 1, 1], "SAME", "NDHWC"
-    )
-    _test_convolution3d(
-        "conv", [4, 17, 17, 17, 12], [3, 3, 3, 12, 32], [1, 1, 1], [2, 2, 2], "VALID", "NDHWC"
-    )
-    # Test without adding shapes to graph def
-    _test_convolution3d(
-        "conv",
-        [4, 17, 17, 17, 12],
-        [3, 3, 3, 12, 32],
-        [1, 1, 1],
-        [2, 2, 2],
-        "VALID",
-        "NDHWC",
-        add_shapes_to_graph_def=False,
-    )
-
-
-#######################################################################
-# Convolution3D Transpose
-# -----------------------
-
-
-def _test_convolution3d_transpose(
-    data_shape,
-    filter_shape,
-    strides,
-    padding,
-    output_shape,
-    data_format="NCDHW",
-    add_shapes_to_graph_def=True,
-):
-    """One iteration of 3D convolution transpose with given shapes and attributes"""
-
-    dtype = "float32"
-    data_array = np.random.uniform(size=data_shape).astype(dtype)
-    filter_array = np.random.uniform(size=filter_shape).astype(dtype)
-    if data_format == "NDHWC":
-        strides = [1] + strides + [1]
-    else:
-        strides = [1, 1] + strides
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data_shape, dtype=dtype)
-        in_filter = constant_op.constant(filter_array, shape=filter_shape, dtype=dtype)
-
-        nn_ops.conv3d_transpose(
-            in_data,
-            in_filter,
-            output_shape=output_shape,
-            strides=strides,
-            padding=padding,
-            data_format=data_format,
-        )
-
-        compare_tf_with_tvm(
-            data_array,
-            "Placeholder:0",
-            "conv3d_transpose:0",
-            cuda_layout="NDHWC",
-            add_shapes_to_graph_def=add_shapes_to_graph_def,
-        )
-
-
-@tvm.testing.uses_gpu
-def test_forward_convolution3d_transpose():
-    """Convolution3d transpose"""
-    if is_gpu_available():
-        _test_convolution3d_transpose(
-            data_shape=[1, 10, 8, 8, 8],
-            filter_shape=[1, 1, 1, 6, 10],
-            strides=[1, 1, 1],
-            padding="VALID",
-            output_shape=[1, 6, 8, 8, 8],
-        )
-
-        _test_convolution3d_transpose(
-            data_shape=[4, 9, 8, 8, 8],
-            filter_shape=[1, 1, 1, 6, 9],
-            strides=[1, 1, 1],
-            padding="VALID",
-            output_shape=[4, 6, 8, 8, 8],
-        )
-
-        _test_convolution3d_transpose(
-            data_shape=[1, 3, 8, 8, 8],
-            filter_shape=[1, 1, 1, 6, 3],
-            strides=[2, 2, 2],
-            padding="SAME",
-            output_shape=[1, 6, 15, 15, 15],
-        )
-
-        _test_convolution3d_transpose(
-            data_shape=[1, 16, 8, 8, 8],
-            filter_shape=[3, 3, 3, 6, 16],
-            strides=[3, 3, 3],
-            padding="VALID",
-            output_shape=[1, 6, 24, 24, 24],
-        )
-
-    _test_convolution3d_transpose(
-        data_shape=[1, 8, 8, 8, 10],
-        filter_shape=[1, 1, 1, 6, 10],
-        strides=[1, 1, 1],
-        padding="VALID",
-        output_shape=[1, 8, 8, 8, 6],
-        data_format="NDHWC",
-    )
-
-    _test_convolution3d_transpose(
-        data_shape=[4, 8, 8, 8, 9],
-        filter_shape=[1, 1, 1, 6, 9],
-        strides=[1, 1, 1],
-        padding="VALID",
-        output_shape=[4, 8, 8, 8, 6],
-        data_format="NDHWC",
-    )
-
-    _test_convolution3d_transpose(
-        data_shape=[1, 8, 8, 8, 3],
-        filter_shape=[1, 1, 1, 6, 3],
-        strides=[2, 2, 2],
-        padding="SAME",
-        output_shape=[1, 15, 15, 15, 6],
-        data_format="NDHWC",
-    )
-
-    _test_convolution3d_transpose(
-        data_shape=[1, 8, 8, 8, 16],
-        filter_shape=[3, 3, 3, 6, 16],
-        strides=[3, 3, 3],
-        padding="VALID",
-        output_shape=[1, 24, 24, 24, 6],
-        data_format="NDHWC",
-    )
-
-    # Test without adding shapes to graph def
-    _test_convolution3d_transpose(
-        data_shape=[1, 8, 8, 8, 16],
-        filter_shape=[3, 3, 3, 6, 16],
-        strides=[3, 3, 3],
-        padding="VALID",
-        output_shape=[1, 24, 24, 24, 6],
-        data_format="NDHWC",
-        add_shapes_to_graph_def=False,
-    )
-
-
-#######################################################################
-# BiasAdd
-# -----------
-
-
-def _test_biasadd(tensor_in_sizes, data_format):
-    """One iteration of biasadd with given shapes and attributes"""
-
-    total_size_1 = 1
-    for s in tensor_in_sizes:
-        total_size_1 *= s
-    tensor_bias_sizes = [tensor_in_sizes[1]] if data_format == "NCHW" else [tensor_in_sizes[3]]
-    total_size_2 = tensor_bias_sizes[0]
-    # Initializes the input tensor with array containing incrementing
-    # numbers from 1.
-    data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
-    bias_array = [f * 1.0 for f in range(1, total_size_2 + 1)]
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype="float32")
-        in_bias = constant_op.constant(bias_array, shape=tensor_bias_sizes, dtype="float32")
-        nn_ops.bias_add(in_data, in_bias, data_format=data_format)
-
-        compare_tf_with_tvm(
-            np.reshape(data_array, tensor_in_sizes).astype("float32"), "Placeholder:0", "BiasAdd:0"
-        )
-
-
-@tvm.testing.uses_gpu
-def test_forward_biasadd():
-    """Bias add"""
-    if is_gpu_available():
-        _test_biasadd([4, 176, 8, 8], "NCHW")
-        _test_biasadd([1, 100, 1, 1], "NCHW")
-        _test_biasadd([4, 19, 17, 17], "NCHW")
-        _test_biasadd([4, 124, 3, 3], "NCHW")
-
-    _test_biasadd([4, 8, 8, 176], "NHWC")
-    _test_biasadd([1, 1, 1, 100], "NHWC")
-    _test_biasadd([4, 17, 17, 19], "NHWC")
-    _test_biasadd([4, 3, 3, 124], "NHWC")
-
-
-def _test_forward_where(input_shape):
-    with tf.Graph().as_default():
-        dtype = tf.float32
-        t = tf.constant(
-            np.random.choice([0, 1, -2, 3, -1, 0.1, -0.2], size=input_shape).astype(dtype.name)
-        )
-        out = tf.where(t)
-        compare_tf_with_tvm([], [], out.name, mode="debug")
-        compare_tf_with_tvm([], [], out.name, mode="vm")
-
-
-def test_forward_argwhere():
-    _test_forward_where((5,))
-    _test_forward_where((5, 5))
-    _test_forward_where((5, 5, 5))
-    _test_forward_where((5, 5, 5, 5))
-    _test_forward_where((5, 5, 5, 5, 5))
-
-
-def _test_forward_where_with_broadcast(in_shape, cond_shape):
-    choice_list = list(np.arange(10).astype("float32"))
-    t1 = np.random.choice(choice_list, size=cond_shape)
-    t2 = np.random.choice(choice_list, size=cond_shape)
-    x = np.random.choice(choice_list, size=in_shape)
-    y = np.random.choice(choice_list, size=in_shape)
-
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=cond_shape, dtype="float32", name="in1")
-        in2 = tf.placeholder(shape=cond_shape, dtype="float32", name="in2")
-        condition = math_ops.less(in1, in2, name="less")
-        lhs = tf.placeholder(shape=in_shape, dtype="float32", name="x")
-        rhs = tf.placeholder(shape=in_shape, dtype="float32", name="y")
-        out = tf.where(condition, lhs, rhs)
-        compare_tf_with_tvm([t1, t2, x, y], ["in1:0", "in2:0", "x:0", "y:0"], out.name)
-
-
-def test_forward_where_with_broadcast():
-    _test_forward_where_with_broadcast((5, 2), (5,))
-    _test_forward_where_with_broadcast((5, 7), (5,))
-    _test_forward_where_with_broadcast((3, 2, 5), (3,))
-
-
-#######################################################################
-# SpaceToBatchND
-# --------------
-
-
-def _test_space_to_batch_nd(input_shape, block_shape, paddings, dtype="int32"):
-    data = np.random.uniform(0, 5, size=input_shape).astype(dtype)
-
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(shape=input_shape, dtype=dtype)
-        out = tf.space_to_batch_nd(in_data, block_shape, paddings)
-
-        compare_tf_with_tvm(data, in_data.name, out.name)
-
-
-def _test_space_to_batch_nd_infer_paddings(input_shape, block_shape, dtype="int32"):
-    data = np.random.uniform(0, 5, size=input_shape).astype(dtype)
-    padding_np = np.array([0, 1]).astype(np.int32).reshape((1, 2))
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(shape=input_shape, dtype=dtype)
-        const1 = tf.constant(padding_np, dtype=tf.int32)
-        # make paddings an input to tf.transpose, but not an input to the graph,
-        # so it can be extracted with infer_value_simulated
-        paddings = tf.reverse(const1, axis=[-1])
-        out = tf.space_to_batch_nd(in_data, block_shape, paddings)
-        compare_tf_with_tvm(data, in_data.name, out.name)
-
-
-def test_forward_space_to_batch_nd():
-    """SpaceToBatchNd"""
-    # test cases: https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/space-to-batch-n-d
-    _test_space_to_batch_nd(input_shape=[1, 2, 2, 1], block_shape=[2, 2], paddings=[[0, 0], [0, 0]])
-
-    _test_space_to_batch_nd(input_shape=[1, 2, 2, 3], block_shape=[2, 2], paddings=[[0, 0], [0, 0]])
-
-    _test_space_to_batch_nd(input_shape=[1, 4, 4, 1], block_shape=[2, 2], paddings=[[0, 0], [0, 0]])
-
-    _test_space_to_batch_nd(
-        input_shape=[2, 2, 4, 1], block_shape=[2, 2], paddings=[[0, 0], [2, 0]], dtype="int64"
-    )
-
-    # pylint: disable=line-too-long
-    # https://github.com/tensorflow/tensorflow/blob/24f578/tensorflow/python/kernel_tests/spacetobatch_op_test.py
-    _test_space_to_batch_nd(input_shape=[2, 3], block_shape=[2], paddings=[[1, 0]], dtype="float32")
-
-    _test_space_to_batch_nd(
-        input_shape=[2, 3, 2], block_shape=[2], paddings=[[1, 0]], dtype="float64"
-    )
-
-    _test_space_to_batch_nd_infer_paddings(input_shape=[2, 3, 2], block_shape=[2])
-
-
-#######################################################################
-# BatchToSpaceND
-# --------------
-
-
-def _test_batch_to_space_nd(input_shape, block_shape, crops, dtype="int32"):
-    data = np.random.uniform(0, 5, size=input_shape).astype(dtype)
-
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(shape=input_shape, dtype=dtype)
-        out = tf.batch_to_space_nd(in_data, block_shape, crops)
-
-        compare_tf_with_tvm(data, in_data.name, out.name)
-
-
-def test_forward_batch_to_space_nd():
-    """BatchToSpaceNd"""
-    # test cases: https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/batch-to-space-n-d
-    _test_batch_to_space_nd(input_shape=[4, 1, 1, 1], block_shape=[2, 2], crops=[[0, 0], [0, 0]])
-
-    _test_batch_to_space_nd(input_shape=[4, 1, 1, 3], block_shape=[2, 2], crops=[[0, 0], [0, 0]])
-
-    _test_batch_to_space_nd(input_shape=[4, 2, 2, 1], block_shape=[2, 2], crops=[[0, 0], [0, 0]])
-
-    _test_batch_to_space_nd(
-        input_shape=[8, 1, 3, 1], block_shape=[2, 2], crops=[[0, 0], [2, 0]], dtype="int64"
-    )
-
-    # pylint: disable=line-too-long
-    # https://github.com/tensorflow/tensorflow/blob/24f578/tensorflow/python/kernel_tests/batchtospace_op_test.py
-    _test_batch_to_space_nd(
-        input_shape=[18, 2, 1, 2], block_shape=[2, 3], crops=[[1, 1], [0, 0]], dtype="float32"
-    )
-
-    _test_batch_to_space_nd(
-        input_shape=[20, 5, 8, 7], block_shape=[2, 2], crops=[[1, 1], [1, 1]], dtype="float64"
-    )
-
-
-#######################################################################
-# Reshape
-# -------
-
-
-def _test_reshape(data, out_shape):
-    """One iteration of reshape operation with given data and out shape"""
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        array_ops.reshape(in_data, out_shape)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "Reshape:0")
-
-
-def _test_reshape_with_call():
-    """relay.expr.Call as shape"""
-    data = np.zeros((6, 4, 2))
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out_shape = tf.constant([1, 2, 3], dtype="int32")
-        out_shape = tf.multiply(out_shape, 2)
-        array_ops.reshape(in_data, out_shape)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "Reshape:0")
-
-
-def _test_reshape_like(data, shape_like):
-    """A special case for reshape."""
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        in_shape_like = array_ops.placeholder(shape=shape_like.shape, dtype=data.dtype)
-        out_shape = array_ops.shape(in_shape_like)
-        array_ops.reshape(in_data, out_shape)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "Reshape:0")
-
-
-def _test_reshape_symbolic(data, a_data, b_data):
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        a = array_ops.placeholder(shape=a_data.shape, dtype=a_data.dtype)
-        b = array_ops.placeholder(shape=b_data.shape, dtype=b_data.dtype)
-        newshape = tf.add(a, b)
-        out = array_ops.reshape(in_data, newshape)
-
-        for mode in ["debug", "vm"]:
-            compare_tf_with_tvm(
-                [data, a_data, b_data], [in_data.name, a.name, b.name], out.name, mode=mode
-            )
-
-
-def test_forward_reshape():
-    """Reshape"""
-    _test_reshape(np.arange(6.0), [2, 3])
-    _test_reshape(np.arange(6), [-1, 2])
-    _test_reshape(np.arange(6), [3, -1])
-    _test_reshape(np.arange(6), [-1])
-    _test_reshape_with_call()
-    _test_reshape_like(np.zeros((3, 6)), np.zeros((9, 2)))
-    _test_reshape_symbolic(np.arange(6.0), np.array([2, 0]), np.array([0, 3]))
-    _test_reshape_symbolic(np.arange(6), np.array([-1, 0]), np.array([0, 2]))
-    _test_reshape_symbolic(np.arange(6), np.array([3, 0]), np.array([3, -1]))
-    _test_reshape_symbolic(np.arange(6), np.array([0]), np.array([-1]))
-
-
-#######################################################################
-# DepthToSpace
-# ------------
-
-
-def _test_depthtospace(data, block_size):
-    """One iteration of depth_to_space operation with given data and block size"""
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        array_ops.depth_to_space(in_data, block_size)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "DepthToSpace:0")
-
-
-def test_forward_depthtospace():
-    _test_depthtospace(np.random.normal(size=[1, 32, 32, 4]), 2)
-    _test_depthtospace(np.random.normal(size=[1, 16, 8, 32]), 4)
-
-
-#######################################################################
-# SpaceToDepth
-# ------------
-
-
-def _test_spacetodepth(data, block_size):
-    """One iteration of space_to_depth operation with given data and block size"""
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        array_ops.space_to_depth(in_data, block_size)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "SpaceToDepth:0")
-
-
-def test_forward_spacetodepth():
-    _test_spacetodepth(np.random.normal(size=[1, 32, 32, 4]), 2)
-    _test_spacetodepth(np.random.normal(size=[1, 16, 8, 32]), 4)
-
-
-#######################################################################
-# Squeeze
-# -------
-
-
-def _test_squeeze(data, squeeze_dims=None):
-    """One iteration of squeeze"""
-
-    if squeeze_dims is None:
-        squeeze_dims = []
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-
-        if squeeze_dims:
-            array_ops.squeeze(in_data, squeeze_dims)
-        else:
-            array_ops.squeeze(in_data)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "Squeeze:0")
-
-
-def test_forward_squeeze():
-    """Squeeze"""
-
-    # Nothing to squeeze.
-    _test_squeeze(np.arange(2).reshape((2)))
-    _test_squeeze(np.arange(6).reshape((2, 3)))
-
-    # Squeeze the middle element away.
-    _test_squeeze(np.arange(4).reshape((2, 1, 2)))
-
-    # Squeeze on both ends.
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)))
-
-    # Positive squeeze dim index.
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [0])
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [2, 4])
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [0, 4, 2])
-
-    # Negative squeeze dim index.
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [-1])
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [-3, -5])
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [-3, -5, -1])
-
-
-#######################################################################
-# TensorArray
-# -----------
-def test_tensor_array_write_read():
-    """Tensor array write read"""
-
-    def run(dtype_str, infer_shape, element_shape):
-        with tf.Graph().as_default():
-            dtype = tf_dtypes[dtype_str]
-            np_data = np.array([[1.0, 2.0], [3.0, 4.0]]).astype(dtype_str)
-            _ = [np_data, np_data]
-            t1 = tf.constant(np_data, dtype=dtype)
-            t2 = tf.constant(np_data, dtype=dtype)
-            ta1 = tf.TensorArray(
-                dtype=dtype, size=2, infer_shape=infer_shape, element_shape=element_shape
-            )
-            ta2 = ta1.write(0, t1)
-            ta3 = ta2.write(1, t2)
-            _ = ta3.read(0)
-            _ = tf.get_default_graph()
-            compare_tf_with_tvm([], [], "TensorArrayReadV3:0", mode="vm")
-
-    for dtype in ["float32", "int8"]:
-        run(dtype, False, None)
-        run(dtype, False, tf.TensorShape([None, 2]))
-        run(dtype, True, None)
-
-
-def test_tensor_array_scatter():
-    """Tensor array scatter"""
-
-    def run(dtype_str, infer_shape):
-        with tf.Graph().as_default():
-            dtype = tf_dtypes[dtype_str]
-            if infer_shape:
-                element_shape = tf.TensorShape([tf.Dimension(None)])
-            else:
-                element_shape = None
-            ta0 = _construct_scatter(dtype, dtype_str, element_shape, infer_shape, 3)
-            _ = ta0.read(0)
-            _ = ta0.read(1)
-            _ = ta0.read(2)
-            ta1 = _construct_scatter(dtype, dtype_str, element_shape, infer_shape, 4)
-            out4 = ta1.read(0)
-            _ = tf.get_default_graph()
-            compare_tf_with_tvm([], [], ["TensorArrayReadV3:0"], mode="vm")
-            compare_tf_with_tvm([], [], ["TensorArrayReadV3_1:0"], mode="vm")
-            compare_tf_with_tvm([], [], ["TensorArrayReadV3_2:0"], mode="vm")
-            compare_tf_with_tvm([], [], ["TensorArrayReadV3_2:0", out4.name], mode="vm")
-
-    def _construct_scatter(dtype, dtype_str, element_shape, infer_shape, size):
-        arr = [[float(i)] for i in range(size)]  # pylint: disable=unnecessary-comprehension
-        indices_arr = list(range(size - 1, -1, -1))
-
-        t = tf.constant(np.array(arr).astype(dtype_str), dtype=dtype)
-        indices = tf.constant(indices_arr)
-        ta1 = tf.TensorArray(
-            dtype=dtype, size=size, infer_shape=infer_shape, element_shape=element_shape
-        )
-        ta2 = ta1.scatter(indices, t)
-        return ta2
-
-    for dtype in ["float32", "int8"]:
-        run(dtype, False)
-        run(dtype, True)
-
-
-def test_tensor_array_gather():
-    """tensor array gather"""
-
-    def run(dtype_str, infer_shape):
-        with tf.Graph().as_default():
-            dtype = tf_dtypes[dtype_str]
-            t = tf.constant(np.array([[1.0], [2.0], [3.0]]).astype(dtype_str))
-            scatter_indices = tf.constant([2, 1, 0])
-            gather_indices = tf.constant([1, 2])
-            ta1 = tf.TensorArray(dtype=dtype, size=3, infer_shape=infer_shape)
-            ta2 = ta1.scatter(scatter_indices, t)
-            _ = ta2.gather(gather_indices)
-            _ = tf.get_default_graph()
-            compare_tf_with_tvm([], [], ["TensorArrayGatherV3:0"], mode="vm")
-
-    for dtype in ["float32", "int8"]:
-        run(dtype, True)
-
-
-def test_tensor_array_split():
-    """tensor array split"""
-
-    def run(dtype_str, infer_shape):
-        with tf.Graph().as_default():
-            dtype = tf_dtypes[dtype_str]
-            t = tf.constant(
-                np.array([[1.0], [2.0], [3.0], [4.0], [5.0], [6.0], [7.0], [8.0]]).astype(
-                    dtype_str
-                ),
-                dtype=dtype,
-            )
-            split_length = tf.constant([2, 2, 2, 2], dtype=tf.int32)
-            ta1 = tf.TensorArray(dtype=dtype, size=4, infer_shape=infer_shape)
-            ta2 = ta1.split(t, split_length)
-            _ = ta2.read(0)
-            _ = ta2.read(1)
-            _ = ta2.read(2)
-            _ = ta2.read(3)
-            _ = tf.get_default_graph()
-            compare_tf_with_tvm([], [], ["TensorArrayReadV3:0"], mode="debug")
-            compare_tf_with_tvm([], [], ["TensorArrayReadV3_1:0"], mode="debug")
-            compare_tf_with_tvm([], [], ["TensorArrayReadV3_2:0"], mode="debug")
-            compare_tf_with_tvm([], [], ["TensorArrayReadV3_3:0"], mode="debug")
-
-    for dtype in ["float32", "int8"]:
-        run(dtype, False)
-        run(dtype, True)
-
-
-def test_tensor_array_concat():
-    """Tensor array concat"""
-
-    def run(dtype_str, infer_shape):
-        with tf.Graph().as_default():
-            dtype = tf_dtypes[dtype_str]
-            t = tf.constant(
-                np.array([[1.0], [2.0], [3.0], [4.0], [5.0], [6.0], [7.0], [8.0]]).astype(
-                    dtype_str
-                ),
-                dtype=dtype,
-            )
-            split_length = tf.constant([2, 2, 2, 2], dtype=tf.int32)
-            ta1 = tf.TensorArray(dtype=dtype, size=4, infer_shape=infer_shape)
-            ta2 = ta1.split(t, split_length)
-            t = ta2.concat()
-            _ = tf.identity(t)
-            compare_tf_with_tvm([], [], ["Identity:0"], mode="debug")
-
-    for dtype in ["float32", "int8"]:
-        run(dtype, False)
-        run(dtype, True)
-
-
-def test_tensor_array_size():
-    """Tensor array size"""
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.15.0"):
-        pytest.skip("Needs fixing for tflite >= 1.15.0")
-
-    def run(dtype_str, infer_shape):
-        with tf.Graph().as_default():
-            dtype = tf_dtypes[dtype_str]
-            np_data = np.array([[1.0, 2.0], [3.0, 4.0]]).astype(dtype_str)
-            _ = [np_data, np_data]
-            t1 = tf.constant(np_data, dtype=dtype)
-            t2 = tf.constant(np_data, dtype=dtype)
-            ta1 = tf.TensorArray(dtype=dtype, size=2, infer_shape=infer_shape)
-            ta2 = ta1.write(0, t1)
-            ta3 = ta2.write(1, t2)
-            _ = ta3.size()
-            _ = tf.get_default_graph()
-            compare_tf_with_tvm([], [], "TensorArraySizeV3:0", mode="debug")
-
-    for dtype in ["float32", "int8"]:
-        run(dtype, False)
-        run(dtype, True)
-
-
-def test_tensor_array_stack():
-    """Tensor array stack"""
-
-    def run(dtype_str, infer_shape):
-        if package_version.parse(tf.VERSION) >= package_version.parse("1.15.0"):
-            pytest.skip("Needs fixing for tflite >= 1.15.0")
-
-        with tf.Graph().as_default():
-            dtype = tf_dtypes[dtype_str]
-            t = tf.constant(np.array([[1.0], [2.0], [3.0]]).astype(dtype_str))
-            scatter_indices = tf.constant([2, 1, 0])
-            ta1 = tf.TensorArray(dtype=dtype, size=3, infer_shape=infer_shape)
-            ta2 = ta1.scatter(scatter_indices, t)
-            t1 = ta2.stack()
-            print(t1)
-            _ = tf.get_default_graph()
-
-            compare_tf_with_tvm([], [], ["TensorArrayStack/TensorArrayGatherV3:0"], mode="vm")
-
-    for dtype in ["float32", "int8"]:
-        run(dtype, True)
-
-
-def test_tensor_array_unstack():
-    """Tensor array unstack"""
-
-    def run(dtype_str, input_shape, infer_shape):
-        if package_version.parse(tf.VERSION) >= package_version.parse("1.15.0"):
-            pytest.skip("Needs fixing for tflite >= 1.15.0")
-
-        with tf.Graph().as_default():
-            dtype = tf_dtypes[dtype_str]
-            t = tf.constant(np.random.choice([0, 1, 2, 3], size=input_shape).astype(dtype.name))
-            ta1 = tf.TensorArray(dtype=dtype, infer_shape=infer_shape, size=input_shape[0])
-            ta2 = ta1.unstack(t)
-            _ = ta2.size()
-            _ = ta2.read(0)
-            compare_tf_with_tvm([], [], "TensorArraySizeV3:0", mode="debug")
-            compare_tf_with_tvm([], [], "TensorArrayReadV3:0", mode="debug")
-
-    for dtype in ["float32", "int8"]:
-        run(dtype, (5,), False)
-        run(dtype, (5, 5), True)
-        run(dtype, (5, 5, 5), False)
-        run(dtype, (5, 5, 5, 5), True)
-
-
-#######################################################################
-# ConcatV2
-# --------
-
-
-def _test_concat_v2(shape1, shape2, dim):
-    """One iteration of ConcatV2"""
-
-    with tf.Graph().as_default():
-        dtype = "float32"
-        in1 = tf.placeholder(shape=shape1, dtype=dtype, name="in1")
-        in2 = tf.placeholder(shape=shape2, dtype=dtype, name="in2")
-        array_ops.concat_v2([in1, in2], dim)
-
-        np_data1 = np.random.uniform(size=shape1).astype(dtype)
-        np_data2 = np.random.uniform(size=shape2).astype(dtype)
-
-        compare_tf_with_tvm([np_data1, np_data2], ["in1:0", "in2:0"], "ConcatV2:0")
-
-
-def test_forward_concat_v2():
-    if package_version.parse(tf.__version__) < package_version.parse("1.4.1"):
-        return
-
-    _test_concat_v2([2, 3], [2, 3], 0)
-    _test_concat_v2([10, 3, 5], [2, 3, 5], 0)
-    _test_concat_v2([2, 3], [2, 3], 1)
-    _test_concat_v2([5, 8], [5, 4], 1)
-    _test_concat_v2([2, 8, 5], [2, 8, 6], -1)
-
-
-#######################################################################
-# Sigmoid
-# -------
-
-
-def _test_sigmoid(data):
-    """One iteration of sigmoid"""
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        _ = math_ops.sigmoid(in_data)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "Sigmoid:0")
-
-
-def test_forward_sigmoid():
-    """Sigmoid"""
-
-    _test_sigmoid(np.random.uniform(size=(3, 4, 4, 3)).astype("float32"))
-
-
-#######################################################################
-# Argmin/Argmax
-# -------------
-
-
-def _test_argx(func, data, **kwargs):
-
-    with tf.Graph().as_default():
-        inp = array_ops.placeholder(shape=data.shape, dtype=data.dtype, name="c0")
-        func(inp, name="argx0", **kwargs)
-        compare_tf_with_tvm(data, "c0:0", "argx0:0")
-
-
-def test_forward_argminmax():
-    for output_type in [tf.int64, tf.int32]:
-        for axis in [None, 0, 1, 2]:
-            data = np.random.uniform(size=(8, 4, 9)).astype("float32")
-            _test_argx(tf.argmax, data=data, axis=axis, output_type=output_type)
-            _test_argx(tf.argmin, data=data, axis=axis, output_type=output_type)
-
-
-#######################################################################
-# Variable
-# --------
-
-
-def _test_variable(data):
-    """One iteration of a variable"""
-
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        input_op = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        input_tensor = array_ops.reshape(input_op, data.shape)
-
-        size = input_tensor.shape.dims[1]
-        with variable_scope.variable_scope("linear", reuse=None):
-            w = variable_scope.get_variable("w", shape=[size, size], dtype=input_tensor.dtype)
-        math_ops.matmul(input_tensor, w)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "MatMul:0", init_global_variables=True)
-
-
-def test_forward_variable():
-    """Variable type op test"""
-    _test_variable(np.random.uniform(size=(32, 100)).astype("float32"))
-
-
-@tvm.testing.parametrize_targets("llvm", "cuda")
-def test_read_variable_op(target, dev):
-    """Read Variable op test"""
-
-    tf.reset_default_graph()
-    data = np.random.uniform(size=(32, 100)).astype("float32")
-    input_tensor = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-
-    size = input_tensor.shape.dims[1]
-    var_data = np.random.uniform(-5, 5, size=[size, size]).astype(np.float32)
-    input_var = tf.Variable(var_data, name="var1", use_resource=True)
-    math_ops.matmul(input_tensor, input_var)
-
-    out_name = ["MatMul:0"]
-    out_node = ["MatMul"]
-    in_name = ["Placeholder:0"]
-    in_node = ["Placeholder"]
-    in_data = [data]
-
-    with tf.Session() as sess:
-        sess.run(variables.global_variables_initializer())
-
-        final_graph_def = sess.graph.as_graph_def(add_shapes=True)
-        tf_output = run_tf_graph(sess, in_data, in_name, out_name)
-
-        shape_dict = {e: i.shape for e, i in zip(in_name, in_data)}
-        with pytest.raises(Exception) as execinfo:
-            with tvm.testing.disable_span_filling():
-                mod, _ = relay.frontend.from_tensorflow(
-                    final_graph_def, layout=None, shape=shape_dict, outputs=None
-                )
-            with tvm.testing.enable_span_filling():
-                mod_with_span, _ = relay.frontend.from_tensorflow(
-                    final_graph_def, layout=None, shape=shape_dict, outputs=None
-                )
-            tvm.ir.assert_structural_equal(mod["main"], mod_with_span["main"])
-
-        assert execinfo.value.args[0].startswith("Graph is not frozen. Provide a frozen graph")
-
-        # Now convert the variables to constant and run inference on the converted graph
-        final_graph_def = tf.graph_util.convert_variables_to_constants(
-            sess,
-            sess.graph.as_graph_def(add_shapes=True),
-            out_node,
-        )
-
-        tvm_output = run_tvm_graph(
-            final_graph_def,
-            in_data,
-            in_node,
-            target=target,
-            out_names=out_name,
-            num_output=len(out_name),
-        )
-        for i, tf_out in enumerate(tf_output):
-            tvm.testing.assert_allclose(tf_out, tvm_output[i], atol=1e-4, rtol=1e-5)
-
-        sess.close()
-
-
-#######################################################################
-# MatMul, BatchMatMul, BatchMatMulV2
-# ----------------------------------
-
-
-def _test_matmul(i, j, k, dtype, outer=None):
-    """One iteration of matmul"""
-
-    A_shape_init = [i, j]
-    B_shape_init = [j, k]
-
-    for transpose_a in [False, True]:
-        for transpose_b in [False, True]:
-            outer = outer or []
-            A_shape = outer + (A_shape_init[::-1] if transpose_a else A_shape_init)
-            B_shape = outer + (B_shape_init[::-1] if transpose_b else B_shape_init)
-
-            with tf.Graph().as_default():
-                A = tf.placeholder(shape=A_shape, dtype=dtype, name="A")
-                B = tf.placeholder(shape=B_shape, dtype=dtype, name="B")
-                result = tf.matmul(A, B, transpose_a=transpose_a, transpose_b=transpose_b)
-
-                A_np = np.random.uniform(high=5.0, size=A_shape).astype(dtype)
-                B_np = np.random.uniform(high=5.0, size=B_shape).astype(dtype)
-                compare_tf_with_tvm(
-                    [A_np, B_np], [A.name, B.name], result.name, convert_config={"use_dense": True}
-                )
-                compare_tf_with_tvm(
-                    [A_np, B_np], [A.name, B.name], result.name, convert_config={"use_dense": False}
-                )
-
-
-def test_forward_matmul():
-    """MatMul op test"""
-    _test_matmul(1, 3, 6, "int32")
-    _test_matmul(5, 3, 1, "float64")
-
-
-def _test_batch_matmul(A_shape, B_shape, dtype, adjoint_a=False, adjoint_b=False):
-
-    with tf.Graph().as_default():
-        A = tf.placeholder(shape=A_shape, dtype=dtype, name="A")
-        B = tf.placeholder(shape=B_shape, dtype=dtype, name="B")
-        result = tf.matmul(A, B, adjoint_a=adjoint_a, adjoint_b=adjoint_b, name="batchmatmul")
-
-        A_np = np.random.uniform(high=5.0, size=A_shape).astype(dtype)
-        B_np = np.random.uniform(high=5.0, size=B_shape).astype(dtype)
-        compare_tf_with_tvm(
-            [A_np, B_np],
-            [A.name, B.name],
-            result.name,
-            convert_config={"use_nt_batch_matmul": True},
-        )
-        compare_tf_with_tvm(
-            [A_np, B_np],
-            [A.name, B.name],
-            result.name,
-            convert_config={"use_nt_batch_matmul": False},
-        )
-
-
-def _test_batch_matmul_dynamic(
-    A_shape, B_shape, A_np_shape, B_np_shape, dtype, adjoint_a=False, adjoint_b=False
-):
-    with tf.Graph().as_default():
-        A = tf.placeholder(shape=A_shape, dtype=dtype, name="A")
-        B = tf.placeholder(shape=B_shape, dtype=dtype, name="B")
-        result = tf.matmul(A, B, adjoint_a=adjoint_a, adjoint_b=adjoint_b, name="batchmatmul")
-
-        A_np = np.random.uniform(high=5.0, size=A_np_shape).astype(dtype)
-        B_np = np.random.uniform(high=5.0, size=B_np_shape).astype(dtype)
-        # for now, in TOPI, only llvm & cublas's implementation support dynamic shape
-        # TODO add more backends support in TOPI
-        compare_tf_with_tvm(
-            [A_np, B_np],
-            [A.name, B.name],
-            result.name,
-            mode="vm",
-            targets=["llvm", "cuda -libs=cublas"],
-            convert_config={"use_nt_batch_matmul": True},
-        )
-        compare_tf_with_tvm(
-            [A_np, B_np],
-            [A.name, B.name],
-            result.name,
-            mode="vm",
-            targets=["llvm", "cuda -libs=cublas"],
-            convert_config={"use_nt_batch_matmul": False},
-        )
-
-
-def test_forward_batch_matmul():
-    """TF op BatchMatMul, BatchMatMulV2 test"""
-    _test_batch_matmul((3, 5, 4), (3, 4, 5), "int32")
-    _test_batch_matmul((3, 5, 4), (3, 4, 5), "float32", True, True)
-    _test_batch_matmul((3, 5, 4), (3, 5, 4), "int32", True, False)
-    _test_batch_matmul((3, 5, 4), (3, 5, 4), "float32", False, True)
-    _test_batch_matmul((2, 3, 4, 5, 6), (2, 3, 4, 6, 5), "int32")
-    _test_batch_matmul((1, 2, 3, 4, 5, 6), (1, 2, 3, 4, 6, 5), "float32", True, True)
-    _test_batch_matmul((3, 4, 5, 6), (3, 4, 5, 6), "int32", True, False)
-    _test_batch_matmul((2, 3, 4, 2, 3, 4, 5, 6), (2, 3, 4, 2, 3, 4, 5, 6), "float32", False, True)
-    _test_batch_matmul((1, 8, 64, 2), (2, 1), "float32", False, False)
-    _test_batch_matmul((1, 8, 8, 64), (64, 1), "float32", False, False)
-    _test_batch_matmul((1, 8, 64), (64, 1), "float32", False, False)
-
-
-def test_forward_batch_matmul_dynamic():
-    """Dynamic batch matmul"""
-    _test_batch_matmul_dynamic((None, 5, 4), (None, 4, 5), (3, 5, 4), (3, 4, 5), "int32")
-    _test_batch_matmul_dynamic(
-        (None, 5, 4), (None, 4, 5), (3, 5, 4), (3, 4, 5), "float32", True, True
-    )
-    _test_batch_matmul_dynamic(
-        (None, 5, 4), (None, 5, 4), (3, 5, 4), (3, 5, 4), "int32", True, False
-    )
-    _test_batch_matmul_dynamic(
-        (None, 5, 4), (None, 5, 4), (3, 5, 4), (3, 5, 4), "float32", False, True
-    )
-    _test_batch_matmul_dynamic(
-        (None, 4, 5, 6), (None, 4, 6, 5), (3, 4, 5, 6), (3, 4, 6, 5), "float32"
-    )
-    _test_batch_matmul_dynamic(
-        (None, None, 5, 6), (None, None, 6, 5), (3, 4, 5, 6), (3, 4, 6, 5), "float32"
-    )
-    _test_batch_matmul_dynamic(
-        (None, None, None, 5, 6),
-        (None, None, None, 6, 5),
-        (2, 3, 4, 5, 6),
-        (2, 3, 4, 6, 5),
-        "float32",
-    )
-    _test_batch_matmul_dynamic(
-        (None, None, None, 5, 6),
-        (6, None),
-        (2, 3, 4, 5, 6),
-        (6, 1),
-        "float32",
-    )
-    _test_batch_matmul_dynamic(
-        (None, 5, 6),
-        (6, None),
-        (24, 5, 6),
-        (6, 1),
-        "float32",
-    )
-
-
-#######################################################################
-# SparseTensorDenseMatMul
-# ----------------------------------
-
-
-def _test_sparse_dense_matmul(indices, values, A_inp_shape, B_inp_shape, dtype, flip=False):
-    """One iteration of sparse_dense_matmul"""
-
-    for adjoint_a in [False, True]:
-        for adjoint_b in [False, True]:
-            A_shape = A_inp_shape[::-1] if adjoint_a else A_inp_shape
-            B_shape = B_inp_shape[::-1] if adjoint_b else B_inp_shape
-
-            with tf.Graph().as_default():
-                A_sp = tf.sparse.SparseTensor(indices=indices, values=values, dense_shape=A_shape)
-                B = tf.placeholder(shape=B_shape, dtype=dtype, name="B")
-
-                if flip:
-                    result = tf.sparse.sparse_dense_matmul(
-                        B, A_sp, adjoint_a=adjoint_b, adjoint_b=adjoint_a
-                    )
-                else:
-                    result = tf.sparse.sparse_dense_matmul(
-                        A_sp, B, adjoint_a=adjoint_a, adjoint_b=adjoint_b
-                    )
-
-                B_np = np.random.uniform(high=5.0, size=B_shape).astype(dtype)
-
-                compare_tf_with_tvm([B_np], [B.name], result.name)
-
-
-def test_forward_sparse_dense_matmul():
-    """sparse_dense_matmul op test"""
-    ###################################################################
-    #
-    # In order to create a SparseTensor, it requires 3 input as below:
-    #    SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
-    #
-    # Above Sparse can be represented in Dense as below :
-    #    [[1, 0, 0, 0]
-    #     [0, 0, 2, 0]
-    #     [0, 0, 0, 0]]
-    #
-    # ------------------------------------------------------------------
-
-    _test_sparse_dense_matmul([[0, 0], [1, 2]], [4.0, 8.0], [3, 4], [4, 3], "float32")
-    _test_sparse_dense_matmul([[0, 0], [1, 2]], [4.0, 8.0], [3, 3], [3, 3], "float32")
-    _test_sparse_dense_matmul([[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [5, 5], [5, 5], "float32")
-    _test_sparse_dense_matmul([[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [7, 9], [9, 5], "float32")
-    _test_sparse_dense_matmul([[0, 0], [1, 2]], [4.0, 8.0], [4, 3], [3, 4], "float32", True)
-    _test_sparse_dense_matmul([[0, 0], [1, 2]], [4.0, 8.0], [3, 3], [3, 3], "float32", True)
-    _test_sparse_dense_matmul(
-        [[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [5, 5], [5, 5], "float32", True
-    )
-    _test_sparse_dense_matmul(
-        [[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [9, 5], [7, 9], "float32", True
-    )
-
-
-#######################################################################
-# SparseFillEmptyRows
-# ------------
-
-
-def _test_sparse_fill_empty_rows(indices_np, values_np, dense_shape_np, default_value_int, use_dyn):
-    with tf.Graph().as_default():
-        if use_dyn:
-            indices = tf.placeholder(shape=(None, None), dtype=indices_np.dtype, name="indices")
-            values = tf.placeholder(shape=(None), dtype=values_np.dtype, name="values")
-            dense_shape = tf.placeholder(
-                shape=(None), dtype=dense_shape_np.dtype, name="dense_shape"
-            )
-        else:
-            indices = tf.placeholder(shape=indices_np.shape, dtype=indices_np.dtype, name="indices")
-            values = tf.placeholder(shape=values_np.shape, dtype=values_np.dtype, name="values")
-            dense_shape = tf.placeholder(
-                shape=dense_shape_np.shape, dtype=dense_shape_np.dtype, name="dense_shape"
-            )
-
-        default_value = tf.placeholder(shape=(), dtype=values_np.dtype, name="default_value")
-        sp_input = tf.sparse.SparseTensor(indices=indices, values=values, dense_shape=dense_shape)
-        _ = tf.sparse.fill_empty_rows(sp_input, default_value, name="sparse_fill_empty_rows")
-        compare_tf_with_tvm(
-            [indices_np, values_np, dense_shape_np, default_value_int],
-            [indices.name, values.name, dense_shape.name, default_value.name],
-            [
-                "sparse_fill_empty_rows/SparseFillEmptyRows:0",
-                "sparse_fill_empty_rows/SparseFillEmptyRows:1",
-                "sparse_fill_empty_rows/SparseFillEmptyRows:2",
-            ],
-            mode="vm",
-        )
-
-
-@pytest.mark.parametrize(
-    "sparse_indices_np, sparse_values_np, dense_shape_np, default_value_int",
-    [
-        (
-            np.array([[1, 1], [0, 3], [0, 1], [2, 0], [3, 1]], dtype=np.int64),
-            np.array([1, 2, 3, 4, 5], dtype=np.int64),
-            np.array([5, 6], dtype=np.int64),
-            10,
-        ),
-        (
-            np.array([[1, 1], [0, 3], [2, 0], [3, 1]], dtype=np.int64),
-            np.array([1, 2, 3, 4], dtype=np.int64),
-            np.array([5, 6], dtype=np.int64),
-            10,
-        ),
-        (
-            np.array([[0, 1], [0, 3], [2, 0], [3, 1]], dtype=np.int64),
-            np.array([1, 2, 3, 4], dtype=np.int64),
-            np.array([5, 6], dtype=np.int64),
-            10,
-        ),
-        (
-            np.array([[1, 1, 1], [1, 3, 1], [2, 0, 5], [3, 1, 6]], dtype=np.int64),
-            np.array([1, 2, 3, 4], dtype=np.int64),
-            np.array([7, 7, 7], dtype=np.int64),
-            5,
-        ),
-        (
-            np.array([[1], [2]], dtype=np.int64),
-            np.array([7, 8], dtype=np.int64),
-            np.array([5], dtype=np.int64),
-            4,
-        ),
-        (
-            np.ones((0, 1), dtype=np.int64),
-            np.array([], dtype=np.int64),
-            np.array([5], dtype=np.int64),
-            4,
-        ),
-        (
-            np.ones((0, 3), dtype=np.int64),
-            np.array([], dtype=np.int64),
-            np.array([9, 3, 7], dtype=np.int64),
-            100,
-        ),
-    ],
-)
-@pytest.mark.parametrize("use_dyn", [True, False])
-def test_forward_sparse_fill_empty_rows(
-    sparse_indices_np, sparse_values_np, dense_shape_np, default_value_int, use_dyn
-):
-    """sparse_fill_empty_rows op test"""
-    ###################################################################
-    #
-    # In order to create a SparseTensor, it requires 3 input as below:
-    #    SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
-    #
-    # Above Sparse can be represented in Dense as below :
-    #    [[1, 0, 0, 0]
-    #     [0, 0, 2, 0]
-    #     [0, 0, 0, 0]]
-    #
-    # ------------------------------------------------------------------
-    _test_sparse_fill_empty_rows(
-        sparse_indices_np, sparse_values_np, dense_shape_np, default_value_int, use_dyn
-    )
-
-
-#######################################################################
-# SparseReshape
-# ------------
-
-
-def _test_sparse_reshape(indices_np, values_np, prev_shape_np, new_shape_np, use_dyn=False):
-    with tf.Graph().as_default():
-        if use_dyn:
-            indices = tf.placeholder(shape=(None, None), dtype=indices_np.dtype, name="indices")
-            values = tf.placeholder(shape=(None), dtype=values_np.dtype, name="values")
-            prev_shape = tf.placeholder(shape=(None), dtype=prev_shape_np.dtype, name="prev_shape")
-            new_shape = tf.placeholder(shape=(None), dtype=new_shape_np.dtype, name="new_shape")
-        else:
-            indices = tf.placeholder(shape=indices_np.shape, dtype=indices_np.dtype, name="indices")
-            values = tf.placeholder(shape=values_np.shape, dtype=values_np.dtype, name="values")
-            prev_shape = tf.placeholder(
-                shape=prev_shape_np.shape, dtype=prev_shape_np.dtype, name="prev_shape"
-            )
-            new_shape = tf.placeholder(
-                shape=new_shape_np.shape, dtype=new_shape_np.dtype, name="new_shape"
-            )
-        sp_input = tf.sparse.SparseTensor(indices=indices, values=values, dense_shape=prev_shape)
-
-        _ = tf.sparse.reshape(sp_input, new_shape, name="sparse_reshape")
-        compare_tf_with_tvm(
-            [indices_np, values_np, prev_shape_np, new_shape_np],
-            [indices.name, values.name, prev_shape.name, new_shape.name],
-            ["sparse_reshape:0", "sparse_reshape:1", "sparse_reshape/Identity:0"],
-            mode="vm",
-        )
-
-
-@pytest.mark.parametrize(
-    "sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np",
-    [
-        (
-            np.ones((0, 1), dtype=np.int64),
-            np.array([], dtype=np.int64),
-            np.array([4], dtype=np.int64),
-            np.array([2, -1], dtype=np.int64),
-        ),
-        (
-            np.ones((0, 1), dtype=np.int64),
-            np.array([], dtype=np.int64),
-            np.array([4], dtype=np.int64),
-            np.array([2, 2], dtype=np.int64),
-        ),
-        (
-            np.ones((0, 2), dtype=np.int64),
-            np.array([], dtype=np.int64),
-            np.array([3, 6], dtype=np.int64),
-            np.array([-1, 2], dtype=np.int64),
-        ),
-        (
-            np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [1, 2, 3]], dtype=np.int64),
-            np.array([7, 5, 6, 3, 9], dtype=np.int64),
-            np.array([2, 3, 6], dtype=np.int64),
-            np.array([-1, 9], dtype=np.int64),
-        ),
-        (
-            np.array(
-                [
-                    [0, 0, 0, 0, 0],
-                    [0, 0, 1, 2, 3],
-                    [0, 1, 0, 3, 5],
-                    [1, 0, 0, 4, 6],
-                    [1, 2, 3, 6, 8],
-                ],
-                dtype=np.int64,
-            ),
-            np.array([7, 5, 6, 3, 9], dtype=np.int64),
-            np.array([2, 3, 6, 7, 9], dtype=np.int64),
-            np.array([9, -1, 7], dtype=np.int64),
-        ),
-        (
-            np.array([[0, 0], [0, 1], [3, 4], [4, 3], [7, 3]], dtype=np.int64),
-            np.array([7, 5, 6, 3, 9], dtype=np.int64),
-            np.array([9, 4], dtype=np.int64),
-            np.array([-1], dtype=np.int64),
-        ),
-        (
-            np.array([[0], [5], [10], [20], [24]], dtype=np.int64),
-            np.array([7, 5, 6, 3, 9], dtype=np.int64),
-            np.array([25], dtype=np.int64),
-            np.array([5, 5], dtype=np.int64),
-        ),
-        (
-            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
-            np.array([7, 5, 6, 3, 9], dtype=np.int64),
-            np.array([500, 20], dtype=np.int64),
-            np.array([500, 20], dtype=np.int64),
-        ),
-        (
-            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
-            np.array([7, 5, 6, 3, 9], dtype=np.int64),
-            np.array([500, 20], dtype=np.int64),
-            np.array([500, -1], dtype=np.int64),
-        ),
-        (
-            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
-            np.array([7, 5, 6, 3, 9], dtype=np.int64),
-            np.array([500, 20], dtype=np.int64),
-            np.array([250, 40], dtype=np.int64),
-        ),
-    ],
-)
-@pytest.mark.parametrize("use_dyn", [True, False])
-def test_forward_sparse_reshape(
-    sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np, use_dyn
-):
-    """sparse_reshape op test"""
-    ###################################################################
-    #
-    # In order to create a SparseTensor, it requires 3 input as below:
-    #    SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
-    #
-    # Above Sparse can be represented in Dense as below :
-    #    [[1, 0, 0, 0]
-    #     [0, 0, 2, 0]
-    #     [0, 0, 0, 0]]
-    #
-    # ------------------------------------------------------------------
-    _test_sparse_reshape(sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np, use_dyn)
-
-
-#######################################################################
-# Sparse Segment Variants
-# ------------
-
-
-def _test_sparse_segment_variant(
-    tf_op, data_np, indices_np, segment_ids_np, num_segments, use_dyn=False
-):
-    with tf.Graph().as_default():
-        if use_dyn:
-            data = tf.placeholder(
-                shape=[None for _ in data_np.shape], dtype=data_np.dtype, name="data"
-            )
-            indices = tf.placeholder(shape=[None], dtype=indices_np.dtype, name="indices")
-            segment_ids = tf.placeholder(
-                shape=(None), dtype=segment_ids_np.dtype, name="segment_ids"
-            )
-        else:
-            data = tf.placeholder(shape=data_np.shape, dtype=data_np.dtype, name="data")
-            indices = tf.placeholder(shape=indices_np.shape, dtype=indices_np.dtype, name="indices")
-            segment_ids = tf.placeholder(
-                shape=segment_ids_np.shape, dtype=segment_ids_np.dtype, name="segment_ids"
-            )
-
-        _ = tf_op(
-            data, indices, segment_ids, num_segments=num_segments, name="sparse_segment_variant"
-        )
-        compare_tf_with_tvm(
-            [data_np, indices_np, segment_ids_np],
-            [data.name, indices.name, segment_ids.name],
-            ["sparse_segment_variant:0"],
-            mode="vm",
-        )
-
-
-@pytest.mark.parametrize(
-    "data_np, indices_np, segment_ids_np, num_segments",
-    [
-        (
-            np.array([5, 1, 7, 2, 3, 4], dtype=np.float32),
-            np.array([0, 3, 4], dtype=np.int32),
-            np.array([0, 1, 1], dtype=np.int32),
-            None,
-        ),
-        (
-            np.array([[1, 2, 3, 4], [-1, -2, -3, -4], [5, 6, 7, 8]], dtype=np.float64),
-            np.array([0, 1], dtype=np.int32),
-            np.array([0, 2], dtype=np.int32),
-            4,
-        ),
-        (
-            np.random.random((6, 4, 5)),
-            np.array([0, 2, 4, 3, 1], dtype=np.int32),
-            np.array([0, 0, 1, 5, 5], dtype=np.int32),
-            100,
-        ),
-        (
-            np.random.random((6, 4, 5)),
-            np.array([0, 2, 4, 3, 1], dtype=np.int32),
-            np.array([0, 0, 1, 5, 5], dtype=np.int32),
-            None,
-        ),
-        (
-            np.array([[[1, 7]], [[3, 8]], [[2, 9]]], dtype=np.float64),
-            np.array([0, 1, 2], dtype=np.int32),
-            np.array([0, 0, 1], dtype=np.int32),
-            None,
-        ),
-        (
-            np.random.random((9, 4, 5, 7)),
-            np.array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=np.int32),
-            np.array([0, 0, 1, 3, 5, 6, 7, 7, 8], dtype=np.int32),
-            9,
-        ),
-        (
-            np.random.random((9, 4, 5, 7)),
-            np.array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=np.int32),
-            np.array([0, 0, 1, 3, 5, 6, 7, 7, 8], dtype=np.int32),
-            None,
-        ),
-        (
-            np.array([[1, 2, 3, 4], [-1, -2, -3, -4], [5, 6, 7, 8]], dtype=np.float64),
-            np.array([0, 1], dtype=np.int32),
-            np.array([0, 2], dtype=np.int32),
-            None,
-        ),
-        (
-            np.random.random((9, 4, 5, 7)),
-            np.array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=np.int32),
-            np.array([0, 0, 1, 3, 5, 5, 5, 5, 5], dtype=np.int32),
-            6,
-        ),
-    ],
-)
-@pytest.mark.parametrize("use_dyn", [True, False])
-@pytest.mark.parametrize(
-    "tf_op",
-    [
-        tf.sparse.segment_sum,
-        tf.sparse.segment_sqrt_n,
-        tf.sparse.segment_mean,
-    ],
-)
-def test_forward_sparse_segment_sum_variants(
-    tf_op,
-    data_np,
-    indices_np,
-    segment_ids_np,
-    num_segments,
-    use_dyn,
-):
-    """sparse segment sum variants tests"""
-    _test_sparse_segment_variant(tf_op, data_np, indices_np, segment_ids_np, num_segments, use_dyn)
-
-
-#######################################################################
-# Math SegmentSum
-# ------------
-
-
-def _test_math_segment_sum(data_np, segment_ids_np, use_dyn=False):
-    with tf.Graph().as_default():
-        if use_dyn:
-            data = tf.placeholder(
-                shape=[None for _ in data_np.shape], dtype=data_np.dtype, name="data"
-            )
-            segment_ids = tf.placeholder(
-                shape=(None), dtype=segment_ids_np.dtype, name="segment_ids"
-            )
-        else:
-            data = tf.placeholder(shape=data_np.shape, dtype=data_np.dtype, name="data")
-            segment_ids = tf.placeholder(
-                shape=segment_ids_np.shape, dtype=segment_ids_np.dtype, name="segment_ids"
-            )
-
-        _ = tf.math.segment_sum(data, segment_ids, name="segment_sum")
-        compare_tf_with_tvm(
-            [data_np, segment_ids_np],
-            [data.name, segment_ids.name],
-            ["segment_sum:0"],
-            mode="vm",
-        )
-
-
-@pytest.mark.parametrize(
-    "data_np, segment_ids_np",
-    [
-        (
-            np.array([5, 1, 7, 2, 3, 4], dtype=np.float32),
-            np.array([0, 0, 0, 1, 1, 1], dtype=np.int32),
-        ),
-        (
-            np.array([[1, 2, 3, 4], [-1, -2, -3, -4], [5, 6, 7, 8]], dtype=np.float64),
-            np.array([0, 0, 1], dtype=np.int32),
-        ),
-        (
-            np.random.random((6, 4, 5)),
-            np.array([0, 0, 1, 2, 2, 3], dtype=np.int64),
-        ),
-        (
-            np.array([[[1, 7]], [[3, 8]], [[2, 9]]], dtype=np.float32),
-            np.array([0, 0, 1], dtype=np.int32),
-        ),
-        (
-            np.random.random((9, 4, 5, 7)),
-            np.array([0, 0, 0, 1, 2, 3, 4, 4, 5], dtype=np.int64),
-        ),
-    ],
-)
-@pytest.mark.parametrize("use_dyn", [True, False])
-def test_forward_math_segment_sum(data_np, segment_ids_np, use_dyn):
-    """math segment sum test"""
-    _test_math_segment_sum(data_np, segment_ids_np, use_dyn)
-
-
-# tensorflow.compat.v1.sparse_to_dense
-# ---------------
-def _test_sparse_to_dense(sparse_indices, sparse_values, default_value, output_shape):
-    with tf.Graph().as_default():
-        indices = tf.placeholder(
-            shape=sparse_indices.shape, dtype=str(sparse_indices.dtype), name="indices"
-        )
-        values = tf.placeholder(
-            shape=sparse_values.shape, dtype=str(sparse_values.dtype), name="values"
-        )
-        oshape = tf.constant(output_shape, shape=output_shape.shape, dtype=str(output_shape.dtype))
-
-        # Output shape depends on a dynamic input, use VM.
-        if default_value is None:
-            output = tf.sparse_to_dense(indices, oshape, values)
-            compare_tf_with_tvm(
-                [sparse_indices, sparse_values], ["indices:0", "values:0"], output.name, mode="vm"
-            )
-        else:
-            dv = tf.placeholder(shape=(), dtype=str(default_value.dtype), name="default_value")
-            output = tf.sparse_to_dense(indices, oshape, values, dv)
-            compare_tf_with_tvm(
-                [sparse_indices, sparse_values, default_value],
-                ["indices:0", "values:0", "default_value:0"],
-                output.name,
-                mode="vm",
-            )
-
-
-def test_forward_sparse_to_dense():
-    """Sparse to dense"""
-    # scalar
-    _test_sparse_to_dense(
-        sparse_indices=np.int32(1),
-        sparse_values=np.int32(3),
-        default_value=np.int32(0),
-        output_shape=np.array([5]).astype("int32"),
-    )
-
-    # vector
-    _test_sparse_to_dense(
-        sparse_indices=np.array([0, 1, 4]).astype("int32"),
-        sparse_values=np.array([3, 3, 3]).astype("int32"),
-        default_value=np.int32(0),
-        output_shape=np.array([5]).astype("int32"),
-    )
-
-    # vector nXd
-    _test_sparse_to_dense(
-        sparse_indices=np.array([[0, 0], [1, 2]]).astype("int32"),
-        sparse_values=np.array([1, 2]).astype("int32"),
-        default_value=np.int32(0),
-        output_shape=np.array([3, 4]).astype("int32"),
-    )
-
-    _test_sparse_to_dense(
-        sparse_indices=np.array([[0, 0, 0], [1, 2, 3]]).astype("int32"),
-        sparse_values=np.array([1, 2]).astype("int32"),
-        default_value=np.int32(4),
-        output_shape=np.array([2, 3, 4]).astype("int32"),
-    )
-
-    # floats
-    _test_sparse_to_dense(
-        sparse_indices=np.array([0, 1, 4]).astype("int32"),
-        sparse_values=np.array([3.1, 3.1, 3.1]).astype("float32"),
-        default_value=np.float32(3.5),
-        output_shape=np.array([5]).astype("int32"),
-    )
-
-    # default value not specified
-    _test_sparse_to_dense(
-        sparse_indices=np.array([0, 1, 4]).astype("int32"),
-        sparse_values=np.array([3.1, 3.1, 3.1]).astype("float32"),
-        default_value=None,
-        output_shape=np.array([5]).astype("int32"),
-    )
-
-
-#######################################################################
-# tensorflow.sparse.to_dense
-# ---------------
-def _test_sparse_to_dense_v2(indices, values, A_shape, dtype, default_value=None):
-    with tf.Graph().as_default():
-        A_sp = tf.sparse.SparseTensor(indices=indices, values=values, dense_shape=A_shape)
-
-        result = tf.sparse.to_dense(A_sp, default_value=default_value)
-
-        # The output shape depends on a dynamic input, use VM.
-        compare_tf_with_tvm([], [], result.name, mode="vm")
-
-
-def test_forward_sparse_to_dense_v2():
-    _test_sparse_to_dense_v2([[1]], [3.0], [5], "float32")
-    _test_sparse_to_dense_v2([[1]], [3.0], [5], "float32", 0.3)
-    _test_sparse_to_dense_v2([[0, 0], [1, 2]], [4.0, 8.0], [3, 4], "float32")
-    _test_sparse_to_dense_v2([[0, 0], [1, 2]], [4.0, 8.0], [3, 4], "float32", 1.3)
-    _test_sparse_to_dense_v2([[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [5, 5], "float32")
-    _test_sparse_to_dense_v2([[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [5, 5], "float32", 1.9)
-
-
-#######################################################################
-# tensorflow.sparse.add
-# ----------------------------------
-
-
-def _test_sparse_add(indices, values, A_shape, B_shape, dtype, flip=False):
-    """One iteration of tf.sparse.add"""
-
-    # TODO(ANSHUMAN87): support cuda
-    # TODO(ANSHUMAN87): support both sparse input case
-
-    with tf.Graph().as_default():
-        A_sp = tf.sparse.SparseTensor(
-            indices=indices, values=np.array(values).astype(dtype), dense_shape=A_shape
-        )
-        B = tf.placeholder(shape=B_shape, dtype=dtype, name="B")
-
-        # TODO(ANSHUMAN87): support user input threashold values
-        if flip:
-            if package_version.parse(tf.VERSION) < package_version.parse("1.13.0"):
-                result = tf.sparse.add(B, A_sp, thresh=0)
-            else:
-                result = tf.sparse.add(B, A_sp, threshold=0)
-        else:
-            if package_version.parse(tf.VERSION) < package_version.parse("1.13.0"):
-                result = tf.sparse.add(A_sp, B, thresh=0)
-            else:
-                result = tf.sparse.add(A_sp, B, threshold=0)
-
-        B_np = np.random.uniform(high=5.0, size=B_shape).astype(dtype)
-
-        compare_tf_with_tvm([B_np], [B.name], result.name, no_gpu=True)
-
-
-def test_sparse_add():
-    """sparse.add op test"""
-    ###################################################################
-    #
-    # In order to create a SparseTensor, it requires 3 input as below:
-    #    SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
-    #
-    # Above Sparse can be represented in Dense as below :
-    #    [[1, 0, 0, 0]
-    #     [0, 0, 2, 0]
-    #     [0, 0, 0, 0]]
-    #
-    # ------------------------------------------------------------------
-    for dtype_inp in ["float32", "float64", "int32"]:
-        _test_sparse_add([[0, 0], [1, 2]], [4.0, 8.0], [3, 4], [3, 4], dtype_inp)
-        _test_sparse_add([[0, 0], [1, 2]], [4.0, 8.0], [3, 4], [3, 4], dtype_inp, True)
-        _test_sparse_add([[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [5, 5], [5, 5], dtype_inp)
-        _test_sparse_add([[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [5, 5], [5, 5], dtype_inp, True)
-
-
-#######################################################################
-# StridedSlice
-# ------------
-
-
-def _test_stridedslice(
-    ip_shape,
-    begin,
-    end,
-    stride,
-    dtype,
-    begin_mask=0,
-    end_mask=0,
-    new_axis_mask=0,
-    shrink_axis_mask=0,
-    ellipsis_mask=0,
-):
-    """One iteration of a Stridedslice"""
-
-    tf.reset_default_graph()
-    np_data = np.random.uniform(size=ip_shape).astype(dtype)
-    with tf.Graph().as_default():
-        if len(ip_shape) == 0:  # pylint: disable=len-as-condition
-            in_data = tf.constant(np_data, dtype)
-        else:
-            in_data = tf.placeholder(dtype, ip_shape, name="in_data")
-        tf.strided_slice(
-            in_data,
-            begin,
-            end,
-            stride,
-            begin_mask=begin_mask,
-            end_mask=end_mask,
-            new_axis_mask=new_axis_mask,
-            shrink_axis_mask=shrink_axis_mask,
-            ellipsis_mask=ellipsis_mask,
-            name="strided_slice",
-        )
-        if len(ip_shape) == 0:  # pylint: disable=len-as-condition
-            compare_tf_with_tvm(None, "", "strided_slice:0")
-        else:
-            compare_tf_with_tvm(np_data, "in_data:0", "strided_slice:0")
-
-
-def test_forward_stridedslice():
-    """test StridedSlice"""
-
-    _test_stridedslice([], [0], [0], [1], "float32", new_axis_mask=1)
-    _test_stridedslice([2], [1], [1], [1], "float32", shrink_axis_mask=1)
-    _test_stridedslice([4], [-1], [0], [1], "float32", shrink_axis_mask=1)
-    _test_stridedslice([2, 1], [0], [1], [1], "float32", shrink_axis_mask=1)
-    _test_stridedslice([2, 3, 4], [-2], [0], [1], "float32", shrink_axis_mask=8)
-    _test_stridedslice([2, 3, 4], [0], [1], [1], "float32", shrink_axis_mask=8)
-    _test_stridedslice([3, 4, 3], [1, -1, 0], [4, -5, 3], [2, -1, 1], "float32")
-    _test_stridedslice([3, 4, 3], [1, 0], [4, 3], [2, 1], "float32", ellipsis_mask=8)
-    _test_stridedslice([3, 4, 3], [1, 0], [4, 2], [2, 1], "float32", ellipsis_mask=2)
-    _test_stridedslice([3, 4, 5, 3], [1, 0], [4, 2], [2, 1], "float32", ellipsis_mask=2)
-    _test_stridedslice([3, 4, 5, 3], [1, 0, 1], [4, 2, 2], [2, 1, 1], "float32", ellipsis_mask=2)
-    _test_stridedslice([3, 4, 3], [1, 1, 0], [4, 4, 2], [2, 1, 1], "float32", new_axis_mask=5)
-    _test_stridedslice(
-        [3, 4, 3], [1, 1, 1], [4, 4, 1], [2, 1, 1], "float32", ellipsis_mask=2, new_axis_mask=4
-    )
-    _test_stridedslice(
-        [6, 4, 5], [1, 1, 1], [6, 3, 4], [2, 1, 1], "float32", ellipsis_mask=2, new_axis_mask=5
-    )
-    _test_stridedslice(
-        [3, 4, 3], [1, 1, 2], [4, 4, 3], [2, 1, 1], "float32", ellipsis_mask=4, new_axis_mask=2
-    )
-    _test_stridedslice(
-        [3, 4, 3], [1, 1, 2], [4, 4, 3], [2, 1, 1], "float32", ellipsis_mask=2, new_axis_mask=3
-    )
-    _test_stridedslice(
-        [3, 4, 3], [1, 1, 0], [4, 4, 1], [2, 1, 1], "float32", ellipsis_mask=2, new_axis_mask=3
-    )
-    _test_stridedslice(
-        [3, 4, 3], [1, 1, 2], [4, 4, 3], [2, 1, 1], "float32", ellipsis_mask=2, new_axis_mask=2
-    )
-    _test_stridedslice((3, 4), [1, 0], [4, 4], [1, 1], "float32", shrink_axis_mask=2)
-    _test_stridedslice(
-        [3, 4, 3], [1, 1, 0], [4, 4, 3], [2, 1, 1], "float32", shrink_axis_mask=2, new_axis_mask=2
-    )
-    _test_stridedslice(
-        [3, 4, 3], [1, 1, 0], [4, 4, 3], [2, 1, 1], "float32", shrink_axis_mask=1, new_axis_mask=2
-    )
-    _test_stridedslice(
-        [3, 4, 3], [1, 1, 0], [4, 4, 3], [2, 1, 1], "float32", shrink_axis_mask=2, new_axis_mask=1
-    )
-    _test_stridedslice(
-        [3, 4, 5, 4, 5, 6], [0, 0], [2, 3], [1, 1], "float32", shrink_axis_mask=5, new_axis_mask=1
-    )
-    _test_stridedslice(
-        [3, 4, 5, 4, 5, 6],
-        [0, 0, 1, 2, 1],
-        [2, 3, 4, 5, 3],
-        [1, 1, 2, 2, 1],
-        "float32",
-        shrink_axis_mask=5,
-        new_axis_mask=1,
-        ellipsis_mask=2,
-        begin_mask=8,
-        end_mask=8,
-    )
-    _test_stridedslice(
-        [3, 4, 5, 4, 5, 6],
-        [0, 0, 1, 2, 1],
-        [2, 3, 4, 5, 3],
-        [1, 1, 2, 2, 1],
-        "float32",
-        shrink_axis_mask=8,
-        new_axis_mask=1,
-        ellipsis_mask=2,
-        begin_mask=5,
-        end_mask=5,
-    )
-    _test_stridedslice(
-        [3, 4, 5, 4, 5, 6],
-        [0, 0, 1, 2, 1],
-        [2, 3, 4, 5, 3],
-        [1, 1, 2, 2, 1],
-        "float32",
-        shrink_axis_mask=16,
-        new_axis_mask=1,
-        ellipsis_mask=2,
-        begin_mask=5,
-        end_mask=5,
-    )
-    _test_stridedslice(
-        [3, 4, 5, 4, 5, 6],
-        [1, 2, 0, -3],
-        [4, 5, 3, 3],
-        [2, 2, 1, 1],
-        "float32",
-        shrink_axis_mask=8,
-        new_axis_mask=1,
-        ellipsis_mask=2,
-        begin_mask=5,
-        end_mask=8,
-    )
-    _test_stridedslice(
-        [1, 13, 13, 3, 2],
-        [0, 0],
-        [1, 1],
-        [1, -1],
-        "float32",
-        ellipsis_mask=1,
-        begin_mask=2,
-        end_mask=2,
-    )
-
-
-#######################################################################
-# FloorDiv, RealDiv
-# -----------------
-def _test_forward_divide(ip_shape, dtype):
-    np_numer = np.random.uniform(-100, 100, size=ip_shape).astype(dtype)
-    np_denomin = np.random.uniform(1, 100, size=ip_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        numerator = tf.placeholder(dtype, ip_shape, name="numer")
-        denominator = tf.placeholder(dtype, ip_shape, name="denomin")
-        tf.math.divide(numerator, denominator, name="RealDiv")
-        compare_tf_with_tvm([np_numer, np_denomin], ["numer:0", "denomin:0"], "RealDiv:0")
-
-
-def _test_forward_floordiv(ip_shape, dtype):
-    np_numer = np.random.uniform(1, 100, size=ip_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        numerator = tf.placeholder(dtype, ip_shape, name="numer")
-        tf.math.floordiv(numerator, tf.constant(5, dtype=dtype), name="FloorDiv")
-        compare_tf_with_tvm([np_numer], ["numer:0"], "FloorDiv:0")
-
-
-def test_forward_divide():
-    """test FloorDiv, RealDiv"""
-    _test_forward_divide((4,), "int32")
-    _test_forward_divide((4, 3, 7), "float32")
-    _test_forward_floordiv((4, 3, 7), "float32")
-    _test_forward_floordiv((4, 3, 7), "int32")
-
-
-#######################################################################
-# FloorMod
-# --------
-def _test_forward_floormod(in_shape, if_shape, dtype):
-    np_numer = np.random.uniform(1, 100, size=in_shape).astype(dtype)
-    np_factor = np.random.uniform(1, 100, size=if_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        numerator = tf.placeholder(dtype, in_shape, name="numer")
-        factor = tf.placeholder(dtype, if_shape, name="factor")
-        tf.floormod(numerator, factor, name="FloorMod")
-        compare_tf_with_tvm([np_numer, np_factor], ["numer:0", "factor:0"], "FloorMod:0")
-
-
-def test_forward_floormod():
-    """test FloorMod"""
-    _test_forward_floormod((10,), (10,), "float32")
-    _test_forward_floormod((8, 2), (1,), "float32")
-    _test_forward_floormod((4, 3, 7), (4, 3, 7), "float32")
-    _test_forward_floormod((4, 3, 7), (4, 3, 7), "int32")
-
-
-#######################################################################
-# TruncateMod
-# -----------
-def _test_forward_truncatemod(ip_shape, dtype):
-    np_data_1 = np.random.uniform(-100, 100, size=ip_shape).astype(dtype)
-    np_data_2 = np.random.uniform(1, 10, size=ip_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data_1 = tf.placeholder(dtype, ip_shape, name="in_data_1")
-        in_data_2 = tf.placeholder(dtype, ip_shape, name="in_data_2")
-        tf.truncatemod(in_data_1, in_data_2, name="truncatemod")
-        compare_tf_with_tvm([np_data_1, np_data_2], ["in_data_1:0", "in_data_2:0"], "truncatemod:0")
-
-
-def test_forward_truncatemod():
-    """test TruncateMod"""
-    _test_forward_truncatemod((4, 3, 7), "int32")
-
-
-#######################################################################
-# Gather, GatherV2
-# --------------------------
-
-
-def _test_gather(ip_shape, indice_shape, indice_value, axis, batch_dims, dtype):
-    """One iteration of a GatherV2"""
-
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, ip_shape, name="in_data")
-        indices = tf.placeholder("int32", indice_shape, name="indices")
-        out = tf.gather(in_data, indices, axis=axis, batch_dims=batch_dims)
-        np_data = np.random.uniform(1, 10, size=ip_shape).astype(dtype)
-
-        def _fill_indices(indice_value):
-            indices = np.array(ip_shape, dtype=dtype)
-            if isinstance(indice_value, int):
-                indices = np.array([indice_value], dtype="int32")
-            else:
-                indices = np.asarray(indice_value, dtype="int32")
-            return indices
-
-        np_indices = _fill_indices(indice_value)
-        compare_tf_with_tvm([np_data, np_indices], ["in_data:0", "indices:0"], out.name)
-
-
-def test_forward_gather():
-    """test Gather/GatherV2 layer"""
-    _test_gather((4,), (1,), 1, 0, 1, "int32")
-    _test_gather((4,), (1,), 1, 0, 0, "float32")
-    _test_gather((1, 4), (1,), [0], 0, 0, "int32")
-    _test_gather((4,), (1, 2, 2), [[[1, 0], [0, 1]]], 0, 0, "float32")
-    _test_gather((2, 2), (1, 2, 2), [[[1, 0], [0, 1]]], 0, 0, "int32")
-    _test_gather((2, 2), (1, 2, 2), [[[1, 0], [0, 1]]], 1, 0, "int32")
-    _test_gather((2, 2), (1, 2, 2), [[[1, 0], [0, 1]]], 0, 0, "float32")
-    _test_gather((3, 3, 3), (1, 1, 2), [[[1, 0]]], 0, 0, "int32")
-    _test_gather((3, 3, 3), (1, 1, 2), [[[1, 0]]], 2, 0, "int32")
-    _test_gather((4, 3, 5, 6), (1, 4), [[2, 1, 0, 0]], 0, 0, "float32")
-    _test_gather((2, 2), (2, 2), [[0, 0], [0, 0]], 1, 1, "float32")
-    _test_gather(
-        (2, 2, 3, 6), (2, 2, 3), [[[1, 1, 0], [0, 0, 1]], [[0, 1, 0], [1, 0, 1]]], 2, 2, "float32"
-    )
-    _test_gather(
-        (2, 2, 3, 6), (2, 2, 3), [[[1, 1, 0], [0, 0, 1]], [[0, 1, 0], [1, 0, 1]]], 3, 1, "float32"
-    )
-    _test_gather(
-        (2, 2, 3, 6), (2, 2, 3), [[[1, 1, 0], [0, 0, 1]], [[0, 1, 0], [1, 0, 1]]], 3, 2, "float32"
-    )
-    _test_gather(
-        (2, 2, 3, 6), (2, 2, 3), [[[1, 1, 0], [0, 0, 1]], [[0, 1, 0], [1, 0, 1]]], 3, 0, "float32"
-    )
-
-
-#######################################################################
-# GatherND
-# --------------------------
-
-
-def _test_gather_nd(ip_shape, indice_value, dtype):
-    """test operator GatherNd"""
-    np_data = np.random.uniform(1, 100, size=ip_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, ip_shape, name="in_data")
-        tf.gather_nd(in_data, indices=indice_value, name="gather_nd")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "gather_nd:0")
-
-
-def test_forward_gather_nd():
-    """test operator GatherNd"""
-    _test_gather_nd((2, 2), [[0, 0], [1, 1]], "float32")
-    _test_gather_nd((2, 2, 2), [[1, 0, 0], [0, 0, 0]], "float32")
-    _test_gather_nd((4,), [1], "float32")
-    _test_gather_nd((4,), [1], "int32")
-    _test_gather_nd((1, 4), [0, 3], "int32")
-    _test_gather_nd((2, 2), [[[1, 0], [0, 1]]], "int32")
-    _test_gather_nd((2, 2), [[[1, 0], [0, 1]]], "float32")
-    _test_gather_nd((3, 3, 3), [[[1, 0]]], "int32")
-    _test_gather_nd((3, 3, 3), [[[1, 0]]], "int32")
-    _test_gather_nd((4, 3, 5, 6), [[2, 1, 0, 0]], "float32")
-    _test_gather_nd((3, 3, 3), [[[2, 1]]], "int32")
-
-
-#######################################################################
-# BiasAdd
-# -------
-def test_forward_bias_add():
-    """test Op BiasAdd"""
-
-    def check_bias_add(lh_shpae, rh_shape, dtype):
-        tf.reset_default_graph()
-        lh_data = np.random.uniform(size=lh_shpae).astype(dtype)
-        rh_data = np.random.uniform(size=rh_shape).astype(dtype)
-        with tf.Graph().as_default():
-            lft_data = tf.placeholder(dtype, name="lft_data")
-            rgt_data = tf.placeholder(dtype, name="rgt_data")
-            tf.nn.bias_add(lft_data, rgt_data, name="BiasAdd")
-            compare_tf_with_tvm([lh_data, rh_data], ["lft_data:0", "rgt_data:0"], "BiasAdd:0")
-
-    check_bias_add((10, 8, 16, 32), (32,), dtype="int32")
-    check_bias_add((10, 20), (20,), dtype="float32")
-
-
-#######################################################################
-# Split
-# -----
-
-
-def _test_split(in_shape, axis, num_or_size_splits, dtype):
-    """One iteration of a Split"""
-    np_data = np.random.uniform(-5, 5, size=in_shape).astype(dtype)
-
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, in_shape, name="in_data")
-        _ = len(num_or_size_splits) if isinstance(num_or_size_splits, list) else num_or_size_splits
-        split = tf.split(in_data, num_or_size_splits, axis=axis)
-        relu = [tf.nn.relu(i) for i in split]
-
-        compare_tf_with_tvm([np_data], ["in_data:0"], [n.name for n in relu])
-
-    # and now test together with concat
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, in_shape, name="in_data")
-        splitted = tf.split(in_data, num_or_size_splits, axis=axis)
-        concat = tf.concat(splitted, axis)
-        compare_tf_with_tvm([np_data], "in_data:0", concat.name)
-
-
-def test_forward_split():
-    """test split layer"""
-    # rank 1
-    _test_split((3,), 0, 1, "float32")
-    _test_split((3,), 0, 3, "float32")
-    _test_split((6,), 0, 3, "float32")
-    # rank 2
-    _test_split((6, 2), 0, 3, "float32")
-    _test_split((2, 6), 1, 6, "float32")
-    # rank 3
-    _test_split((6, 2, 4), 0, 2, "int32")
-    _test_split((2, 6, 4), 1, 3, "float32")
-    _test_split((2, 4, 6), 2, 1, "float32")
-    # rank 4
-    _test_split((6, 1, 3, 5), 0, 3, "float32")
-    _test_split((1, 6, 3, 5), 1, 3, "float32")
-    _test_split((1, 3, 6, 5), 2, 3, "float32")
-    _test_split((1, 3, 5, 6), 3, 3, "float32")
-    # split along negative axis
-    _test_split((6, 1, 3, 5), -4, 3, "float32")
-    _test_split((1, 6, 3, 5), -3, 3, "float32")
-    _test_split((1, 3, 6, 5), -2, 3, "float32")
-    _test_split((1, 3, 5, 6), -1, 3, "float32")
-    # size_splits list
-    _test_split((6,), 0, [1, 2, 3], "int32")
-    _test_split((3, 6, 4), -2, [1, 4, 1], "float32")
-
-
-######################################################################
-# TopKV2
-# ------
-
-
-def _test_forward_top_k_v2(in_shape, k):
-    np_data = np.random.uniform(-100, 100, size=in_shape).astype("float32")
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder("float32", in_shape, name="in_data")
-        tf.math.top_k(in_data, k, name="TopK")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "TopK:0")
-
-
-def test_forward_top_k_v2():
-    _test_forward_top_k_v2((3,), 1)
-    _test_forward_top_k_v2((3,), 3)
-    _test_forward_top_k_v2((3, 5, 7), 3)
-    _test_forward_top_k_v2((3, 5, 7), 3)
-
-
-#######################################################################
-# Unstack
-# -------
-
-
-def _test_unstack(ip_shape, axis, dtype):
-    np_data = np.random.uniform(-5, 5, size=ip_shape).astype(dtype)
-
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, ip_shape, name="in_data")
-        unstack = tf.unstack(in_data, axis=axis)
-
-        compare_tf_with_tvm([np_data], ["in_data:0"], [n.name for n in unstack])
-
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, ip_shape, name="in_data")
-        tf.stack(tf.unstack(in_data, axis=axis), axis=axis)
-
-        compare_tf_with_tvm([np_data], ["in_data:0"], "stack:0")
-
-
-def test_forward_unstack():
-    """test unstack layer"""
-    _test_unstack((6,), 0, "int32")
-    _test_unstack((2, 6), 1, "float64")
-    # negative axis
-    _test_unstack((1, 4), -1, "int32")
-    _test_unstack((3, 6, 4), -2, "float32")
-
-
-#######################################################################
-# Tile
-# ----
-
-
-def _test_tile(in_shape, multiples, dtype):
-    np_data = np.random.uniform(-5, 5, size=in_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, in_shape, name="in_data")
-        tf.tile(in_data, multiples=multiples, name="tile")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "tile:0")
-
-
-def test_forward_tile():
-    """test Tile"""
-    _test_tile((2,), (3,), "int32")
-    _test_tile((2, 2), (2, 3), "float32")
-    _test_tile((2, 4, 6), (6, 7, 8), "float64")
-
-
-#######################################################################
-# ClipByValue
-# -----------
-
-
-def _test_forward_clip_by_value(ip_shape, clip_value_min, clip_value_max, dtype):
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, ip_shape, name="in_data")
-        tf.clip_by_value(in_data, clip_value_min, clip_value_max, name="ClipByValue")
-        np_data = np.random.uniform(-100, 100, size=ip_shape).astype(dtype)
-        compare_tf_with_tvm([np_data], ["in_data:0"], "ClipByValue:0")
-
-
-def test_forward_clip_by_value():
-    """test ClipByValue op"""
-    if package_version.parse(tf.__version__) < package_version.parse("1.9"):
-        _test_forward_clip_by_value((4,), 0.1, 5.0, "float32")
-        _test_forward_clip_by_value((4, 4), 1, 5, "int32")
-
-
-#######################################################################
-# Multi Input to graph
-# --------------------
-
-
-def test_forward_multi_input():
-    """Multi Input"""
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.int32, shape=[3, 3], name="in1")
-        in2 = tf.placeholder(tf.int32, shape=[3, 3], name="in2")
-        in3 = tf.placeholder(tf.int32, shape=[3, 3], name="in3")
-        in4 = tf.placeholder(tf.int32, shape=[3, 3], name="in4")
-
-        out1 = tf.add(in1, in2, name="out1")
-        out2 = tf.subtract(in3, in4, name="out2")
-        _ = tf.multiply(out1, out2, name="out")
-        in_data = np.arange(9, dtype="int32").reshape([3, 3])
-
-        compare_tf_with_tvm(
-            [in_data, in_data, in_data, in_data], ["in1:0", "in2:0", "in3:0", "in4:0"], "out:0"
-        )
-
-
-#######################################################################
-# Multi Output to Graph
-# ---------------------
-
-
-def test_forward_multi_output():
-    """Multi Output"""
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.int32, shape=[3, 3], name="in1")
-        in2 = tf.placeholder(tf.int32, shape=[3, 3], name="in2")
-        in3 = tf.placeholder(tf.int32, shape=[3, 3], name="in3")
-        in4 = tf.placeholder(tf.int32, shape=[3, 3], name="in4")
-
-        _ = tf.add(in1, in2, name="out1")
-        _ = tf.subtract(in3, in4, name="out2")
-        in_data = np.arange(9, dtype="int32").reshape([3, 3])
-        in_data = [in_data] * 4
-        in_name = ["in1:0", "in2:0", "in3:0", "in4:0"]
-        out_name = ["out1:0", "out2:0"]
-        out_node = [out.strip(":0") for out in out_name]
-        in_node = [inp.strip(":0") for inp in in_name]
-
-        with tf.Session() as sess:
-            final_graph_def = tf.graph_util.convert_variables_to_constants(
-                sess,
-                sess.graph.as_graph_def(add_shapes=True),
-                out_node,
-            )
-            tf_output = run_tf_graph(sess, in_data, in_name, out_name)
-            tvm_output = run_tvm_graph(
-                final_graph_def, in_data, in_node, target="llvm", out_names=out_node, num_output=2
-            )
-            for i, tf_out in enumerate(tf_output):
-                tvm.testing.assert_allclose(tf_out, tvm_output[i], atol=1e-5, rtol=1e-5)
-
-
-#######################################################################
-# Resize Bilinear, Nearest_Neighbor
-# ---------------------------------
-
-
-def _test_resize_bilinear(in_shape, to_shape, align_corners):
-    """One iteration of resize bilinear"""
-
-    data = np.random.uniform(size=in_shape).astype("float32")
-    shape_data = np.array(to_shape).astype("int32")
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        shape_data = constant_op.constant(
-            shape_data, shape=shape_data.shape, dtype=shape_data.dtype
-        )
-        tf.image.resize_bilinear(in_data, shape_data, align_corners=align_corners)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "ResizeBilinear:0")
-
-
-def _test_resize_bilinear_from_tensor(in_shape, align_corners):
-    """One iteration of resize bilinear with non-constant output shape, requires
-    value inference to get proper output shape."""
-
-    data = np.random.uniform(size=in_shape).astype("float32")
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(
-            shape=[in_shape[0], None, None, in_shape[3]], dtype=data.dtype
-        )
-        to_shape = tf.shape(in_data)[1:3]
-        tf.image.resize_bilinear(in_data, to_shape, align_corners=align_corners)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "ResizeBilinear:0")
-
-
-def _test_resize_nearest_neighbor(in_shape, to_shape):
-    """One iteration of resize nearest neighbor"""
-
-    data = np.random.uniform(size=in_shape).astype("float32")
-    shape_data = np.array(to_shape).astype("int32")
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        shape_data = constant_op.constant(
-            shape_data, shape=shape_data.shape, dtype=shape_data.dtype
-        )
-        tf.image.resize_nearest_neighbor(in_data, shape_data, name="resize_nearest_neighbor")
-
-        compare_tf_with_tvm(data, "Placeholder:0", "resize_nearest_neighbor:0")
-
-
-def _test_resize_nearest_neighbor_dynamic_shape(in_shape, scale):
-    """One iteration of resize nearest neighbor for graph with dynamic input shape"""
-
-    data = np.random.uniform(size=in_shape).astype("float32")
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=None, dtype=data.dtype)
-        # multiply input shape by scale factor
-        new_shape = tf.shape(in_data)[1:3] * tf.constant(scale, dtype=tf.int32)
-        tf.image.resize_nearest_neighbor(in_data, new_shape, name="resize_nearest_neighbor")
-
-        compare_tf_with_tvm(data, "Placeholder:0", "resize_nearest_neighbor:0")
-
-
-def test_forward_resize():
-    """Resize Bilinear, Nearest_Neighbor"""
-    # TF default layout is NHWC
-    _test_resize_bilinear((4, 32, 32, 3), [50, 50], False)
-    _test_resize_bilinear((6, 32, 32, 3), [20, 20], True)
-    _test_resize_bilinear_from_tensor((4, 32, 32, 3), False)
-    _test_resize_bilinear_from_tensor((6, 50, 50, 3), True)
-    _test_resize_nearest_neighbor((6, 32, 32, 3), [20, 20])
-    _test_resize_nearest_neighbor_dynamic_shape((1, 16, 16, 3), scale=[2, 2])
-
-
-#######################################################################
-# BroadcastArgs
-# -----------
-
-
-def _test_broadcast_args(in_shape_1, in_shape_2):
-    """One iteration of broadcast_args"""
-
-    shape_1 = np.array(in_shape_1).astype("int32")
-    shape_2 = np.array(in_shape_2).astype("int32")
-
-    with tf.Graph().as_default():
-        shape_1 = constant_op.constant(shape_1, shape=shape_1.shape, dtype=shape_1.dtype)
-        shape_2 = constant_op.constant(shape_2, shape=shape_2.shape, dtype=shape_2.dtype)
-        tf.raw_ops.BroadcastArgs(s0=shape_1, s1=shape_2)
-
-        compare_tf_with_tvm(None, "", "BroadcastArgs:0", opt_level=0)
-
-
-def test_forward_broadcast_args():
-    """Resize Bilinear"""
-
-    _test_broadcast_args((4, 1, 32, 32), [4, 8, 32, 32])
-    _test_broadcast_args((6, 32, 32, 1), [6, 32, 32, 16])
-    _test_broadcast_args((32, 32, 16), [6, 32, 32, 16])
-
-
-#######################################################################
-# BroadcastTo
-# -----------
-
-
-def _test_broadcast_to(in_shape, to_shape):
-    """One iteration of broadcast_to"""
-
-    data = np.random.uniform(size=in_shape).astype("float32")
-    shape_data = np.array(to_shape).astype("int32")
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        shape_data = constant_op.constant(
-            shape_data, shape=shape_data.shape, dtype=shape_data.dtype
-        )
-        tf.broadcast_to(in_data, shape_data)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "BroadcastTo:0", opt_level=0)
-
-
-def _test_broadcast_to_from_tensor(in_shape):
-    """One iteration of broadcast_to with unknown shape at graph build"""
-
-    data = np.random.uniform(size=in_shape).astype("float32")
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=[None], dtype=data.dtype)
-
-        shape_data = tf.multiply(tf.shape(in_data), 32)
-        tf.broadcast_to(in_data, shape_data)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "BroadcastTo:0")
-
-
-def test_forward_broadcast_to():
-    """Resize Bilinear"""
-
-    _test_broadcast_to((4, 1, 32, 32), [4, 8, 32, 32])
-    _test_broadcast_to((6, 32, 32, 1), [6, 32, 32, 16])
-    _test_broadcast_to_from_tensor((1))
-
-
-#######################################################################
-# Fill
-# ----
-
-
-def _test_fill(in_shape):
-    """Use the fill op to create a tensor of ones with non-constant shape."""
-
-    with tf.Graph().as_default():
-        tf.ones(shape=in_shape, dtype="float32")
-        compare_tf_with_tvm(in_shape, [], "ones:0", opt_level=1)
-
-
-def _test_fill_from_tensor(in_shape):
-    """Use the fill op to create a tensor of ones with non-constant shape.
-    Some extra ops need to be added here to prevent the graph from
-    being fully constant and folded away."""
-
-    data = np.random.uniform(size=in_shape).astype("float32")
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(
-            shape=[in_shape[0], in_shape[1], None, None], dtype=data.dtype
-        )
-
-        x = tf.ones(shape=2 * tf.shape(in_data), dtype=data.dtype)
-        _ = tf.math.add(in_data, tf.reduce_mean(x), name="out1")
-        compare_tf_with_tvm(data, "Placeholder:0", "out1:0")
-
-
-def _test_fill_symbolic_inputs(in_shape_data, in_value_data, dtype):
-    with tf.Graph().as_default():
-        in_shape = tf.placeholder(shape=[in_shape_data.shape[0]], dtype=in_shape_data.dtype)
-        in_value = tf.placeholder(shape=(), dtype=dtype)
-        out = tf.fill(in_shape, in_value)
-        for mode in ["debug", "vm"]:
-            compare_tf_with_tvm(
-                [in_shape_data, in_value_data], [in_shape.name, in_value.name], out.name, mode=mode
-            )
-
-
-def test_forward_fill():
-    """Resize Bilinear"""
-
-    _test_fill((32))
-    _test_fill((6, 32, 64, 64))
-    _test_fill_from_tensor((6, 32, 64, 64))
-    _test_fill_symbolic_inputs(np.array((2,)), np.int32(9), tf.int32)
-    _test_fill_symbolic_inputs(np.array((2, 3)), 9, tf.int64)
-    _test_fill_symbolic_inputs(np.array((2, 3, 4)), np.float32(9.0), tf.float32)
-
-
-#######################################################################
-# Crop to bounding box
-# --------------------
-
-
-def _test_crop(in_shape, off_h, off_w, tar_h, tar_w):
-    """Crop to bounding box"""
-    data = np.random.uniform(size=in_shape).astype("float32")
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        tf.image.crop_to_bounding_box(in_data, off_h, off_w, tar_h, tar_w)
-        compare_tf_with_tvm(data, "Placeholder:0", "crop_to_bounding_box/Slice:0")
-
-
-def test_forward_crop():
-    """Crop to bounding box"""
-    _test_crop((1, 224, 224, 3), 20, 20, 120, 120)
-
-
-#######################################################################
-# CropAndResize
-# -------------
-
-
-def _test_forward_crop_and_resize(
-    img_shape,
-    boxes,
-    box_idx,
-    crop_size,
-    extrapolation_value=0.0,
-    method="bilinear",
-    dtype="float32",
-    atol=1e-4,
-    rtol=1e-4,
-):
-    image = np.random.uniform(0, 10, size=img_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(dtype, image.shape, name="in_data")
-        tf.image.crop_and_resize(
-            in_data,
-            boxes=boxes,
-            box_ind=box_idx,
-            crop_size=crop_size,
-            method=method,
-            extrapolation_value=extrapolation_value,
-            name="crop_and_resize",
-        )
-        compare_tf_with_tvm([image], ["in_data:0"], "crop_and_resize:0", atol=atol, rtol=rtol)
-
-
-def test_forward_crop_and_resize():
-    """CropAndResize"""
-    _test_forward_crop_and_resize([1, 6, 6, 3], [[0, 0, 1, 1]], [0], [3, 3])
-    _test_forward_crop_and_resize([1, 6, 6, 3], [[0, 0, 1, 1]], [0], [3, 3], 0.2)
-    _test_forward_crop_and_resize([1, 6, 6, 3], [[0, 0, 1, 1]], [0], [3, 3], 0.2, "nearest")
-    _test_forward_crop_and_resize([1, 11, 11, 3], [[0.3, 0.3, 1, 1]], [0], [21, 21])
-    _test_forward_crop_and_resize([1, 41, 41, 3], [[0.2, 0.4, 0.8, 0.8]], [0], [21, 11])
-    _test_forward_crop_and_resize([1, 100, 100, 3], [[0, 0, 0.9, 0.9]], [0], [30, 30])
-    _test_forward_crop_and_resize([1, 249, 249, 3], [[0, 0, 1, 1]], [0], [9, 9])
-    _test_forward_crop_and_resize([1, 201, 301, 3], [[0.2, 0.3, 0.7, 0.8]], [0], [51, 51])
-    _test_forward_crop_and_resize(
-        img_shape=[10, 11, 11, 3],
-        boxes=[[0, 0, 0.9, 0.9], [0.2, 0.2, 0.8, 0.8]],
-        box_idx=[0, 1],
-        crop_size=[5, 5],
-    )
-
-    if platform.machine() == "aarch64":
-        pytest.skip("Currently failing on AArch64")
-    _test_forward_crop_and_resize([1, 224, 224, 3], [[0.1, 0.2, 1, 1]], [0], [9, 9])
-    _test_forward_crop_and_resize(
-        img_shape=[20, 576, 576, 3],
-        boxes=[[0, 0, 1, 1], [0, 0, 0.8, 0.8], [0.1, 0.2, 0.9, 1], [0.2, 0, 1, 1]],
-        box_idx=[1, 0, 2, 3],
-        crop_size=[24, 24],
-        extrapolation_value=0.3,
-        atol=1e-3,
-        rtol=1e-3,
-    )
-    _test_forward_crop_and_resize(
-        img_shape=[20, 229, 229, 3],
-        boxes=[[0, 0, 0.9, 0.9], [0.3, 0.3, 1, 1], [0.2, 0.1, 0.7, 0.8], [0, 0, 1, 1]],
-        box_idx=[3, 0, 2, 1],
-        crop_size=[58, 58],
-        extrapolation_value=0.2,
-        method="nearest",
-        atol=1e-3,
-        rtol=1e-3,
-    )
-
-
-#######################################################################
-# Non Max Suppression
-# -------------------
-def _test_forward_nms_v3(
-    bx_shape, score_shape, iou_threshold, score_threshold, out_size, dtype="float32"
-):
-    boxes = np.random.uniform(0, 10, size=bx_shape).astype(dtype)
-    scores = np.random.uniform(size=score_shape).astype(dtype)
-    max_output_size = np.int32(out_size)
-    tf.reset_default_graph()
-    in_data_1 = tf.placeholder(dtype, boxes.shape, name="in_data_1")
-    in_data_2 = tf.placeholder(dtype, scores.shape, name="in_data_2")
-    in_data_3 = tf.placeholder(tf.int32, name="in_data_3")
-    tf.image.non_max_suppression(
-        boxes=in_data_1,
-        scores=in_data_2,
-        max_output_size=in_data_3,
-        iou_threshold=iou_threshold,
-        score_threshold=score_threshold,
-        name="nms",
-    )
-    compare_tf_with_tvm(
-        [boxes, scores, max_output_size],
-        ["in_data_1:0", "in_data_2:0", "in_data_3:0"],
-        "nms/NonMaxSuppressionV3:0",
-        mode="vm",
-    )
-    compare_tf_with_tvm(
-        [boxes, scores, max_output_size],
-        ["in_data_1:0", "in_data_2:0", "in_data_3:0"],
-        "nms/NonMaxSuppressionV3:0",
-        mode="debug",
-    )
-
-
-def _test_forward_nms_v4(
-    bx_shape, score_shape, iou_threshold, score_threshold, out_size, dtype="float32"
-):
-    boxes = np.random.uniform(0, 10, size=bx_shape).astype(dtype)
-    scores = np.random.uniform(size=score_shape).astype(dtype)
-    max_output_size = np.int32(out_size)
-    tf.reset_default_graph()
-    in_data_1 = tf.placeholder(dtype, boxes.shape, name="in_data_1")
-    in_data_2 = tf.placeholder(dtype, scores.shape, name="in_data_2")
-    in_data_3 = tf.placeholder(tf.int32, name="in_data_3")
-    indices_padded, num_valid = tf.image.non_max_suppression_padded(
-        boxes=in_data_1,
-        scores=in_data_2,
-        max_output_size=in_data_3,
-        iou_threshold=iou_threshold,
-        score_threshold=score_threshold,
-        name="nms",
-        pad_to_max_output_size=True,
-    )
-    num_valid = tf.reshape(num_valid, shape=(-1,))
-    indices_padded = tf.reshape(indices_padded, shape=(-1,))
-    tf.slice(indices_padded, tf.constant([0]), num_valid, name="SlicedIndices")
-    compare_tf_with_tvm(
-        [boxes, scores, max_output_size],
-        ["in_data_1:0", "in_data_2:0", "in_data_3:0"],
-        ["nms/NonMaxSuppressionV4:1", "SlicedIndices:0"],
-        mode="vm",
-    )
-    compare_tf_with_tvm(
-        [boxes, scores, max_output_size],
-        ["in_data_1:0", "in_data_2:0", "in_data_3:0"],
-        ["nms/NonMaxSuppressionV4:1", "SlicedIndices:0"],
-        mode="debug",
-    )
-
-
-def _test_forward_nms_v5(
-    bx_shape, score_shape, iou_threshold, score_threshold, out_size, dtype="float32"
-):
-    boxes = np.random.uniform(0, 10, size=bx_shape).astype(dtype)
-    scores = np.random.uniform(size=score_shape).astype(dtype)
-    max_output_size = np.int32(out_size)
-    tf.reset_default_graph()
-    in_data_1 = tf.placeholder(dtype, boxes.shape, name="in_data_1")
-    in_data_2 = tf.placeholder(dtype, scores.shape, name="in_data_2")
-    in_data_3 = tf.placeholder(tf.int32, name="in_data_3")
-    tf.image.non_max_suppression_with_scores(
-        boxes=in_data_1,
-        scores=in_data_2,
-        max_output_size=in_data_3,
-        iou_threshold=iou_threshold,
-        score_threshold=score_threshold,
-        name="nms",
-    )
-    compare_tf_with_tvm(
-        [boxes, scores, max_output_size],
-        ["in_data_1:0", "in_data_2:0", "in_data_3:0"],
-        ["nms/NonMaxSuppressionV5:0", "nms/NonMaxSuppressionV5:1"],
-        mode="vm",
-    )
-
-
-def test_forward_nms():
-    """NonMaxSuppressionV3,5"""
-    for _test_forward_nms in [_test_forward_nms_v3, _test_forward_nms_v5]:
-        _test_forward_nms((5, 4), (5,), 0.7, 0.5, 5)
-        _test_forward_nms((20, 4), (20,), 0.5, 0.6, 10)
-        _test_forward_nms((1000, 4), (1000,), 0.3, 0.7, 1000)
-        _test_forward_nms((2000, 4), (2000,), 0.4, 0.6, 7)
-
-
-def _test_forward_combined_nms(
-    bx_shape,
-    score_shape,
-    iou_threshold,
-    score_threshold,
-    out_size,
-    total_size,
-    clip_boxes=False,
-    dtype="float32",
-):
-    def get_random_scores(size, dtype):
-        size1d = np.prod(size)
-        scores = np.linspace(0, 1, num=size1d)
-        np.random.shuffle(scores)
-        return scores.reshape(size).astype(dtype)
-
-    boxes = np.random.uniform(-1, 2, size=bx_shape).astype(dtype)
-    scores = get_random_scores(score_shape, dtype)
-    max_output_size = np.int32(out_size)
-    tf.reset_default_graph()
-    in_data_1 = tf.placeholder(dtype, boxes.shape, name="in_data_1")
-    in_data_2 = tf.placeholder(dtype, scores.shape, name="in_data_2")
-    in_data_3 = tf.placeholder(tf.int32, name="in_data_3")
-    tf.image.combined_non_max_suppression(
-        boxes=in_data_1,
-        scores=in_data_2,
-        max_output_size_per_class=in_data_3,
-        max_total_size=total_size,
-        iou_threshold=iou_threshold,
-        score_threshold=score_threshold,
-        pad_per_class=False,
-        clip_boxes=clip_boxes,
-        name="nms",
-    )
-    compare_tf_with_tvm(
-        [boxes, scores, max_output_size],
-        ["in_data_1:0", "in_data_2:0", "in_data_3:0"],
-        [
-            "nms/CombinedNonMaxSuppression:0",
-            "nms/CombinedNonMaxSuppression:1",
-            "nms/CombinedNonMaxSuppression:2",
-            "nms/CombinedNonMaxSuppression:3",
-        ],
-    )
-
-
-def test_forward_combined_nms():
-    """CombinedNonMaxSuppression"""
-    _test_forward_combined_nms((1, 64, 1, 4), (1, 64, 1), 0.7, 0.5, 64, 64)
-    _test_forward_combined_nms((1, 32, 1, 4), (1, 32, 1), 0.7, 0.5, 10, 64)
-    _test_forward_combined_nms((1, 32, 1, 4), (1, 32, 2), 0.7, 0.5, 32, 64)
-    _test_forward_combined_nms((1, 64, 1, 4), (1, 64, 20), 0.7, 0.5, 64, 10)
-    # This workload seems flaky on CI.
-    # See https://github.com/apache/tvm/issues/8140
-    # _test_forward_combined_nms((1, 64, 20, 4), (1, 64, 20), 0.7, 0.5, 64, 64, clip_boxes=True)
-    _test_forward_combined_nms((2, 200, 1, 4), (2, 200, 1), 0.4, 0.6, 100, 100)
-    _test_forward_combined_nms((2, 200, 1, 4), (2, 200, 10), 0.4, 0.2, 150, 1000)
-
-
-#######################################################################
-# LSTM
-# ----
-
-
-def _test_lstm_cell(batch_size, num_hidden, num_layers, forget_bias, dtype):
-    """One iteration of a LSTM cell"""
-
-    tf.reset_default_graph()
-    input_size = num_hidden
-    input_data = np.full((batch_size, input_size), 1.0, dtype=dtype)
-    in_state_c = np.full((batch_size, num_hidden), 0.1, dtype=dtype)
-    in_state_h = np.full((batch_size, num_hidden), 0.1, dtype=dtype)
-
-    def _get_tensorflow_output():
-        with tf.Session() as sess:
-            with variable_scope.variable_scope(
-                "root", initializer=init_ops.constant_initializer(0.5)
-            ):
-                m0 = tf.placeholder(dtype, [batch_size, num_hidden], name="m0")
-                m1 = tf.placeholder(dtype, [batch_size, num_hidden], name="m1")
-                x = tf.placeholder(shape=(batch_size, input_size), dtype=dtype, name="input")
-                g, ((out_m0, out_m1)) = tensorflow.contrib.rnn.LSTMBlockCell(
-                    num_hidden, forget_bias=forget_bias
-                )(x, (m0, m1))
-                sess.run([variables.global_variables_initializer()])
-                res = sess.run(
-                    [g, out_m0, out_m1],
-                    {
-                        x.name: np.array([[1.0, 1.0]]),
-                        m0.name: in_state_c,
-                        m1.name: in_state_h,
-                    },
-                )
-            graph_def = sess.graph.as_graph_def(add_shapes=True)
-            final_graph_def = graph_util.convert_variables_to_constants(
-                sess, graph_def, ["root/lstm_cell/LSTMBlockCell"]
-            )
-
-            return final_graph_def, res
-
-    graph_def, tf_out = _get_tensorflow_output()
-    tvm_output = run_tvm_graph(
-        graph_def,
-        [input_data, in_state_c, in_state_h],
-        ["root/input", "root/m0", "root/m1"],
-        num_output=7,
-    )
-    assert isinstance(tvm_output, list)
-
-    tvm.testing.assert_allclose(tf_out[0], tvm_output[6], rtol=1e-3, atol=1e-3)
-    tvm.testing.assert_allclose(tf_out[1], tvm_output[1], rtol=1e-3, atol=1e-3)
-
-
-def test_forward_lstm():
-    """test LSTM block cell"""
-    if package_version.parse(tf.VERSION) < package_version.parse("2.0.0"):
-        # in 2.0, tf.contrib.rnn.LSTMBlockCell is removed
-        _test_lstm_cell(1, 2, 1, 0.5, "float32")
-
-
-#######################################################################
-# Pack
-# ---
-def _test_pack(axis, shape, **kwargs):
-
-    a = np.arange(np.prod(shape), dtype=np.float32).reshape(shape)
-    b = np.arange(np.prod(shape), dtype=np.float32).reshape(shape)
-
-    with tf.Graph().as_default():
-        tf_a = array_ops.placeholder(shape=shape, dtype="float32", name="pl_a")
-        tf_b = array_ops.placeholder(shape=shape, dtype="float32", name="pl_b")
-        tf_c = tf.stack([tf_a, tf_b], axis=axis, **kwargs)
-        assert tf_c.op.op_def.name == "Pack", "tf.stack() is expected to produce 'Pack' operation"
-
-        compare_tf_with_tvm([a, b], ["pl_a:0", "pl_b:0"], "stack:0")
-
-
-def test_forward_pack():
-    for axis in range(-3, 3):
-        _test_pack(axis, [3, 2, 1])
-    for axis in range(-1, 1):
-        _test_pack(axis, [3])
-    _test_pack(0, [])
-
-
-#######################################################################
-# Unpack
-# ------
-def _test_forward_unpack(in_shape, axis, dtype):
-    """test operator Unpack"""
-    np_data = np.random.uniform(-100, 100, size=in_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, in_shape, name="in_data")
-        tf.unstack(in_data, axis=axis, name="Unpack")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "Unpack:0")
-
-
-def test_forward_unpack():
-    _test_forward_unpack((3,), 0, "int32")
-    _test_forward_unpack((3,), -1, "int16")
-    _test_forward_unpack((21, 23, 3), 2, "float32")
-
-
-#######################################################################
-# Range
-# -----
-
-
-def test_forward_range():
-    """test operator Range"""
-    for dtype in [tf.int32, tf.int64]:
-        tf.reset_default_graph()
-        with tf.Graph().as_default():
-            tf.range(1, 18, 3, name="range", dtype=dtype)
-            compare_tf_with_tvm([], [], "range:0")
-
-    # test type assignment for operator Range
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        tf.range(1, 256 + 1, 1, dtype=tf.float32)
-        compare_tf_with_tvm([], [], "range:0")
-
-
-#######################################################################
-# Einsum
-# -----
-
-
-def _test_einsum(equation, dtype, *shape_of_input_tensors):
-    """Test Einsum Op"""
-
-    with tf.Graph().as_default():
-        inputs_placeholders = []
-        input_data = []
-        for idx, shape in enumerate(shape_of_input_tensors):
-            input_name = f"input_{idx}"
-            inputs_placeholders.append(tf.placeholder(shape=shape, dtype=dtype, name=input_name))
-            input_data.append(np.random.normal(size=shape).astype(dtype))
-
-        result = tf.einsum(equation, *inputs_placeholders)
-
-        compare_tf_with_tvm(input_data, [ph.name for ph in inputs_placeholders], result.name)
-
-
-def test_forward_einsum():
-    for dtype in ["float32"]:
-        _test_einsum("ij,jk->ik", dtype, [2, 3], [3, 5])  # Matmul
-        _test_einsum("ij,jk", dtype, [2, 3], [3, 5])  # Matmul
-        _test_einsum("i,i->", dtype, [2], [2])  # Dot product
-        _test_einsum("i,j->ij", dtype, [3], [5])  # Outer produce
-        _test_einsum("ij->ji", dtype, [2, 3])  # Transpose
-        _test_einsum("ii->i", dtype, [3, 3])  # Diag
-        _test_einsum("ii", dtype, [3, 3])  # Trace of a square matrix
-        _test_einsum("bij,bjk->bik", dtype, [7, 5, 3], [7, 3, 2])  # Batch matmul
-
-
-#######################################################################
-# Pad
-# ---
-
-
-def _test_pad(input_shape, paddings, mode, **kwargs):
-    """One iteration of pad operation with given shape"""
-
-    x = np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape)
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=input_shape, dtype="float32")
-        pad_values = constant_op.constant(paddings)
-        _ = tf.pad(in_data, paddings=pad_values, mode=mode, **kwargs)
-
-        if mode == "CONSTANT":
-            if "constant_values" in kwargs:
-                out_name = "PadV2:0"
-            else:
-                out_name = "Pad:0"
-        else:
-            out_name = "MirrorPad:0"
-
-        compare_tf_with_tvm(x, "Placeholder:0", out_name)
-
-
-def test_forward_pad():
-    """Pad"""
-    _test_pad((2, 3), [[1, 1], [2, 2]], mode="CONSTANT")
-    _test_pad((2, 3), [[1, 1], [2, 2]], mode="CONSTANT", constant_values=1.0)
-    _test_pad((2, 3), [[1, 1], [2, 2]], mode="SYMMETRIC")
-    _test_pad((2, 3), [[1, 1], [2, 2]], mode="REFLECT")
-
-
-#######################################################################
-# Logical operators
-# --------------------
-
-
-def test_logical_and():
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in1")
-        in2 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in2")
-        _ = tf.logical_and(in1, in2, name="out")
-        in_data1 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
-        in_data2 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
-        compare_tf_with_tvm([in_data1, in_data2], ["in1:0", "in2:0"], "out:0")
-
-
-def test_logical_or():
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in1")
-        in2 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in2")
-        _ = tf.logical_or(in1, in2, name="out")
-        in_data1 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
-        in_data2 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
-        compare_tf_with_tvm([in_data1, in_data2], ["in1:0", "in2:0"], "out:0")
-
-
-def test_logical_xor():
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in1")
-        in2 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in2")
-        _ = tf.logical_xor(in1, in2, name="out")
-        in_data1 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
-        in_data2 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
-        compare_tf_with_tvm([in_data1, in_data2], ["in1:0", "in2:0"], "out:0")
-
-
-def test_logical_not():
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in1")
-        _ = tf.logical_not(in1, name="out")
-        in_data1 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
-        compare_tf_with_tvm(in_data1, "in1:0", "out:0")
-
-
-def test_forward_logical():
-    test_logical_and()
-    test_logical_or()
-    test_logical_xor()
-    test_logical_not()
-
-
-#######################################################################
-# Where, Select, SelectV2
-# -------------
-def test_forward_where():
-    """Where: return elements depending on conditions"""
-    with tf.Graph().as_default():
-        with tf.Session() as _:
-            input1 = tf.placeholder(tf.int32, shape=[1, 4, 4, 3], name="input1")
-            input2 = tf.placeholder(tf.int32, shape=[1, 4, 4, 3], name="input2")
-            mask = input1 > input2
-            tf.where(mask, input1 + 1, input2 * 2)
-            in_data1 = np.random.uniform(0, 10, size=(1, 4, 4, 3)).astype("uint32")
-            in_data2 = np.random.uniform(0, 10, size=(1, 4, 4, 3)).astype("uint32")
-            compare_tf_with_tvm([in_data1, in_data2], ["input1:0", "input2:0"], "Select:0")
-
-
-#######################################################################
-# Inception V3
-# ------------
-@pytest.mark.skip(reason="See https://github.com/apache/tvm/issues/10275")
-def test_forward_inception_v3():
-    """test inception V3 model"""
-    with tf.Graph().as_default():
-        graph_def = tf_testing.get_workload(
-            "InceptionV3/inception_v3_2016_08_28_frozen-with_shapes.pb"
-        )
-        # Call the utility to import the graph definition into default graph.
-        graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-        data = np.random.uniform(size=(1, 299, 299, 3)).astype("float32")
-
-        with tf.Session() as sess:
-            tf_output = run_tf_graph(sess, data, "input:0", "InceptionV3/Predictions/Reshape_1:0")
-            tvm_output = run_tvm_graph(graph_def, data, "input")
-            tvm.testing.assert_allclose(tf_output[0], tvm_output[0], rtol=1e-5, atol=1e-5)
-
-
-#######################################################################
-# Inception V1
-# ------------
-
-
-def test_forward_inception_v1():
-    """test inception V1 model"""
-    with tf.Graph().as_default():
-        graph_def = tf_testing.get_workload("InceptionV1/classify_image_graph_def-with_shapes.pb")
-        # Call the utility to import the graph definition into default graph.
-        graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-        # Build an image from random data.
-        img_array = np.random.uniform(size=(1, 600, 600, 3)).astype("uint8")
-        img = Image.frombuffer("RGB", (600, 600), img_array.tostring(), "raw", "RGB", 0, 1)
-        temp = utils.tempdir()
-        img_path = temp.relpath("tf-test.jpg")
-        img.save(img_path)
-
-        if not tf.gfile.Exists(os.path.join(img_path)):
-            tf.logging.fatal("File does not exist %s", img_path)
-        data = tf.gfile.FastGFile(os.path.join(img_path), "rb").read()
-
-        temp.remove()
-
-        # Extract tensorflow decoded image frame for tvm input
-        with tf.Session() as sess:
-            tvm_data = run_tf_graph(sess, data, "DecodeJpeg/contents:0", "DecodeJpeg:0")
-
-        with tf.Session() as sess:
-            tf_output = run_tf_graph(sess, data, "DecodeJpeg/contents:0", "softmax:0")
-            tvm_output = run_tvm_graph(graph_def, tvm_data, "DecodeJpeg/contents")
-            tvm.testing.assert_allclose(tf_output[0], tvm_output[0], rtol=1e-5, atol=1e-5)
-
-
-#######################################################################
-# Mobilenet
-# ---------
-
-
-def test_forward_mobilenet():
-    """test mobilenet model"""
-    # MobilenetV2
-    with tf.Graph().as_default():
-        graph_def = tf_testing.get_workload(
-            "https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz",
-            "mobilenet_v2_1.4_224_frozen.pb",
-        )
-        # Call the utility to import the graph definition into default graph.
-        graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-        data = np.random.uniform(size=(1, 224, 224, 3)).astype("float32")
-        out_node = "MobilenetV2/Predictions/Reshape_1"
-
-        with tf.Session() as sess:
-            # Add shapes to the graph.
-            graph_def = tf_testing.AddShapesToGraphDef(sess, out_node)
-            tf_output = run_tf_graph(sess, data, "input:0", out_node + ":0")
-            tvm_output = run_tvm_graph(graph_def, data, "input")
-            tvm.testing.assert_allclose(
-                np.squeeze(tvm_output[0]), np.squeeze(tf_output[0]), rtol=1e-5, atol=1e-5
-            )
-
-
-#######################################################################
-# ResnetV2
-# --------
-
-
-@tvm.testing.requires_gpu
-def test_forward_resnetv2():
-    """test resnet model"""
-    if is_gpu_available():
-        with tf.Graph().as_default():
-            graph_def = tf_testing.get_workload(
-                "ResnetV2/resnet-20180601_resnet_v2_imagenet-shapes.pb"
-            )
-            # Call the utility to import the graph definition into default graph.
-            graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-            data = np.random.uniform(size=(128, 224, 224, 3)).astype("float32")
-            out_node = "ArgMax"
-
-            with tf.Session() as sess:
-                tf_output = run_tf_graph(sess, data, "input_tensor:0", out_node + ":0")
-                for device in ["llvm", "cuda"]:
-                    _ = tvm.device(device, 0)
-                    if not tvm.testing.device_enabled(device):
-                        print(f"Skip because {device} is not enabled")
-                        continue
-                    tvm_output = run_tvm_graph(
-                        graph_def, data, "input_tensor", len(tf_output), target=device
-                    )
-                    tvm.testing.assert_allclose(
-                        np.squeeze(tvm_output[0]), np.squeeze(tf_output[0]), rtol=1e-5, atol=1e-5
-                    )
-
-
-#######################################################################
-# SSD
-# ---
-
-
-def _test_ssd_impl():
-    """Test SSD with backbone MobileNet V1"""
-    with tf.Graph().as_default():
-        graph_def = tf_testing.get_workload(
-            "object_detection/ssd_mobilenet_v1_ppn_shared_"
-            "box_predictor_300x300_coco14_sync_2018_07_03.pb"
-        )
-        # Call the utility to import the graph definition into default graph.
-        graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-        data = np.random.uniform(0.0, 255.0, size=(1, 512, 512, 3)).astype("uint8")
-        in_node = "image_tensor"
-        out_node = ["detection_boxes", "detection_scores", "detection_classes"]
-
-        with tf.Session() as sess:
-            tf_output = run_tf_graph(
-                sess, data, f"{in_node}:0", [f"{oname}:0" for oname in out_node]
-            )
-            # TODO(kevinthesun): enable gpu test when VM heterogeneous execution is ready.
-            for device in ["llvm"]:
-                _ = tvm.device(device, 0)
-                if not tvm.testing.device_enabled(device):
-                    print(f"Skip because {device} is not enabled")
-                    continue
-                tvm_output = run_tvm_graph(
-                    graph_def,
-                    data,
-                    in_node,
-                    len(out_node),
-                    target=device,
-                    layout="NCHW",
-                    out_names=out_node,
-                    mode="vm",
-                    disabled_pass=["FoldScaleAxis"],
-                    serialize=True,
-                )
-                for i in range(len(out_node)):
-                    tvm.testing.assert_allclose(tvm_output[i], tf_output[i], rtol=1e-3, atol=1e-3)
-
-
-@pytest.mark.skip(
-    reason="Use of threading module here hides errors, see https://github.com/apache/tvm/pull/10231"
-)
-def test_forward_ssd():
-    run_thread = threading.Thread(target=_test_ssd_impl, args=())
-    old_stack_size = threading.stack_size(100 * 1024 * 1024)
-    run_thread.start()
-    run_thread.join()
-    threading.stack_size(old_stack_size)
-
-
-#######################################################################
-# Placeholder
-# -----------
-
-
-def test_forward_placeholder():
-    """test a simple pb with Placeholder node in the end of GraphDef"""
-    with tf.Graph().as_default():
-        graph_def = tf_testing.get_workload("Custom/placeholder.pb")
-        # Call the utility to import the graph definition into default graph.
-        graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-        data = np.random.uniform(size=(1, 224, 224, 3)).astype("float32")
-        out_node = "mul"
-
-        with tf.Session() as sess:
-            # Add shapes to the graph.
-            graph_def = tf_testing.AddShapesToGraphDef(sess, out_node)
-            tf_output = run_tf_graph(sess, data, "Placeholder:0", out_node + ":0")
-            tvm_output = run_tvm_graph(graph_def, data, "Placeholder")
-            tvm.testing.assert_allclose(
-                np.squeeze(tvm_output[0]), np.squeeze(tf_output[0]), rtol=1e-5, atol=1e-5
-            )
-
-
-#######################################################################
-# PTB
-# ---
-try:
-    # Load contrib for running ptb model in tf version before 2.0
-    import tensorflow.contrib
-except ImportError:
-    pass
-
-
-def test_forward_ptb():
-    """test ptb model"""
-    config = tf_testing.get_config()
-    num_steps = config.num_steps
-    num_hidden = config.hidden_size
-    num_layers = config.num_layers
-    batch_size = config.batch_size
-    vocab_size = config.vocab_size
-    out_sample_shape = (batch_size, vocab_size)
-    out_state_shape = (batch_size, num_hidden)
-    # Sample input
-    inpt = "we have no useful information on"
-    cnt_sample = 20
-
-    def _pretty_print(items, is_char_model, id2word):
-        if not is_char_model:
-            return " ".join([id2word[x] for x in items])
-        else:
-            return "".join([id2word[x] for x in items]).replace("_", " ")
-
-    def _get_tvm_graph_module(graph_def):
-        # Cell inputs 'c and 'h' consist of all layers values
-        shape_dict = {"Model/Placeholder": (batch_size, num_steps)}
-
-        with tvm.testing.disable_span_filling():
-            mod, params = relay.frontend.from_tensorflow(
-                graph_def,
-                shape=shape_dict,
-                outputs=[
-                    "Model/Softmax:0",
-                    "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell:1",
-                    "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell:6",
-                    "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_1:1",
-                    "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_1:6",
-                ],
-            )
-        with tvm.testing.enable_span_filling():
-            mod_with_span, _ = relay.frontend.from_tensorflow(
-                graph_def,
-                shape=shape_dict,
-                outputs=[
-                    "Model/Softmax:0",
-                    "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell:1",
-                    "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell:6",
-                    "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_1:1",
-                    "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_1:6",
-                ],
-            )
-        tvm.ir.assert_structural_equal(mod["main"], mod_with_span["main"])
-
-        target = "llvm"
-        with tvm.transform.PassContext(opt_level=0):
-            graph, lib, params = relay.build(mod, target, params=params)
-
-        dev = tvm.cpu(0)
-        return params, graph_executor.create(graph, lib, dev)
-
-    def _do_tvm_sample(model, data, in_states, params, num_samples):
-        """Sampled from the model"""
-        samples = []
-        state = in_states
-        sample = None
-
-        def _get_sample(data, state):
-            input_data = np.full((batch_size, num_steps), data, dtype="int32")
-
-            model.set_input("Model/Placeholder", tvm.nd.array(input_data.astype("int32")))
-            model.set_input(
-                "Model/MultiRNNCellZeroState/LSTMBlockCellZeroState/zeros",
-                tvm.nd.array(state[0].astype("float32")),
-            )
-            model.set_input(
-                "Model/MultiRNNCellZeroState/LSTMBlockCellZeroState/zeros_1",
-                tvm.nd.array(state[1].astype("float32")),
-            )
-            model.set_input(
-                "Model/MultiRNNCellZeroState/LSTMBlockCellZeroState_1/zeros",
-                tvm.nd.array(state[2].astype("float32")),
-            )
-            model.set_input(
-                "Model/MultiRNNCellZeroState/LSTMBlockCellZeroState_1/zeros_1",
-                tvm.nd.array(state[3].astype("float32")),
-            )
-            model.set_input(**params)
-            model.run()
-            tvm_output = model.get_output(0, tvm.nd.empty(out_sample_shape, "float32")).numpy()
-
-            state_output = []
-            for i in range(4):
-                state_output.append(
-                    model.get_output(i + 1, tvm.nd.empty(out_state_shape, "float32")).numpy()
-                )
-            sample = tf_testing.pick_from_weight(tvm_output[0])
-
-            return sample, state_output
-
-        for x in data:
-            sample, state = _get_sample(x, state)
-
-        if sample is not None:
-            samples.append(sample)
-        else:
-            samples.append(0)
-
-        k = 1
-        while k < num_samples:
-            sample, state = _get_sample(samples[-1], state)
-            samples.append(sample)
-            k += 1
-        return samples, state
-
-    with tf.Graph().as_default():
-        word_to_id, id_to_word, graph_def = tf_testing.get_workload_ptb()
-        vocab_size = len(word_to_id)
-        # Call the utility to import the graph definition into default graph.
-        graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-        sess = tf.Session()
-
-    # TVM graph module creation
-    params, m = _get_tvm_graph_module(graph_def)
-
-    # Create 10 predicted statments of 20 words
-    cnt_stm = 0
-    while cnt_stm < 10:
-        cnt_stm += 1
-        in_state = [np.full((batch_size, num_hidden), 0, dtype="float32")] * 2 * num_layers
-        seed_for_sample = inpt.split()
-        tvm_samples, _ = _do_tvm_sample(
-            m, [word_to_id[word] for word in seed_for_sample], in_state, params, cnt_sample
-        )
-        tvm_sample_str = _pretty_print(tvm_samples, False, id_to_word)
-        tf_samples, _ = tf_testing.do_tf_sample(
-            sess, [word_to_id[word] for word in seed_for_sample], in_state, cnt_sample
-        )
-        tf_sample_str = _pretty_print(tf_samples, False, id_to_word)
-        inpt = tvm_sample_str
-        tvm.testing.assert_allclose(tf_samples, tvm_samples, rtol=1e-5, atol=1e-5)
-        assert tvm_sample_str == tf_sample_str
-
-
-#######################################################################
-# LRN (Local Response Normalization)
-# ----------------------------------
-
-
-def _test_lrn(ishape, size, axis, bias, alpha, beta):
-    """testing local response normalization"""
-    lrn_depth_radius = size / 2
-
-    inp_array = np.random.uniform(size=ishape).astype(np.float32)
-
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype, name="lrn0_data")
-        nn_ops.local_response_normalization(
-            in1, name="lrn", depth_radius=lrn_depth_radius, bias=bias, alpha=alpha, beta=beta
-        )
-
-        compare_tf_with_tvm(inp_array, "lrn0_data:0", "lrn:0")
-
-
-def test_forward_lrn():
-    _test_lrn((1, 3, 20, 20), 3, 1, 1.0, 1.0, 0.5)
-
-
-#######################################################################
-# l2_normalize
-# ------------
-
-
-def _test_l2_normalize(ishape, eps, axis):
-    """testing l2 normalize (uses max, sum, square, sqrt frontend operators)"""
-
-    inp_array = np.random.uniform(size=ishape).astype(np.float32)
-
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        nn.l2_normalize(in1, axis=axis, epsilon=eps, name=None, dim=None)
-
-        compare_tf_with_tvm(inp_array, "Placeholder:0", "l2_normalize:0")
-
-
-def test_forward_l2_normalize():
-    _test_l2_normalize((1, 3, 20, 20), 0.001, (0,))
-
-
-#######################################################################
-# transpose
-# ---------
-
-
-def _test_forward_transpose(ishape, axes=None):
-    data = np.random.uniform(size=ishape).astype(np.float32)
-
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=data.shape, dtype=data.dtype, name="transpose_data")
-
-        if axes is None:
-            tf.transpose(in1)
-        else:
-            tf.transpose(in1, perm=axes)
-
-        compare_tf_with_tvm(data, "transpose_data:0", "transpose:0")
-
-
-def _test_forward_tranapose_axes_input(ishape, axes):
-    data = np.random.uniform(size=ishape).astype(np.float32)
-    axes_np = np.array(axes).astype(np.int32)
-
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=data.shape, dtype=data.dtype, name="transpose_data")
-
-        const1 = tf.constant(axes_np, dtype=tf.int32)
-
-        # make axes an input to tf.transpose, but not an input to the graph,
-        # so it can be extracted with infer_value_simulated
-        axes = tf.reverse(const1, axis=[-1])
-        tf.transpose(in1, axes)
-
-        compare_tf_with_tvm([data], ["transpose_data:0"], "transpose:0")
-
-
-def test_forward_transpose():
-    _test_forward_transpose((2, 3, 4), (1, 2, 0))
-    _test_forward_transpose((2, 3, 4))
-    _test_forward_transpose((7, 8, 8, 10))
-    _test_forward_transpose((2, 3, 4), (1, 2, 0))
-    _test_forward_transpose((2, 3, 4), (0, 1, 2))
-    _test_forward_transpose((2, 3, 4, 5), (3, 0, 1, 2))
-    _test_forward_tranapose_axes_input((2, 3, 4), (1, 2, 0))
-    _test_forward_tranapose_axes_input((2, 3, 4, 5), (3, 0, 1, 2))
-
-
-def _test_forward_slice_operation_input(input_value, begin_value, size_value):
-    input_data = np.array(input_value, dtype=np.float32)
-    with tf.Graph().as_default():
-        input_tensor = tf.placeholder(shape=input_data.shape, dtype=input_data.dtype, name="input")
-        tf.slice(input_tensor, begin_value, size_value, name="slice_output")
-        compare_tf_with_tvm([input_data], ["input:0"], "slice_output:0")
-
-
-def test_forward_slice():
-    _test_forward_slice_operation_input([1, 1], [0], [2])
-    _test_forward_slice_operation_input([0, 1, 2, 3], [3], [-1])
-    _test_forward_slice_operation_input(
-        [[0, 1, 2, 3], [4, 5, 6, 7]], begin_value=[0, 1], size_value=[-1, -1]
-    )
-
-
-def test_forward_ceil():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(size=ishape).astype(np.float32)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        tf.ceil(in1)
-        compare_tf_with_tvm(inp_array, "Placeholder:0", "Ceil:0")
-
-
-def test_forward_floor():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(size=ishape).astype(np.float32)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        tf.floor(in1)
-        compare_tf_with_tvm(inp_array, "Placeholder:0", "Floor:0")
-
-
-def test_forward_relu():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    for mode in ["graph_executor", "vm"]:
-        with tf.Graph().as_default():
-            in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-            tf.nn.relu(in1)
-            compare_tf_with_tvm(inp_array, "Placeholder:0", "Relu:0", mode=mode)
-
-
-def test_forward_leaky_relu():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    for mode in ["graph_executor", "vm"]:
-        with tf.Graph().as_default():
-            in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-            tf.nn.leaky_relu(in1, alpha=0.4)
-            compare_tf_with_tvm(inp_array, "Placeholder:0", "LeakyRelu:0", mode=mode)
-
-
-def test_forward_elu():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        tf.nn.elu(in1)
-        compare_tf_with_tvm(inp_array, "Placeholder:0", "Elu:0")
-
-
-def test_forward_selu():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        tf.nn.selu(in1)
-        compare_tf_with_tvm(inp_array, "Placeholder:0", "Selu:0")
-
-
-def test_forward_tanh():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        tf.nn.tanh(in1)
-        compare_tf_with_tvm(inp_array, "Placeholder:0", "Tanh:0")
-
-
-#######################################################################
-# Softmax
-# -------
-def test_forward_softmax():
-    """test operator Softmax"""
-
-    def check_softmax(in_shape, axis, dtype):
-        np_data = np.random.uniform(-100, 100, size=in_shape).astype(dtype)
-        tf.reset_default_graph()
-        with tf.Graph().as_default():
-            in_data = tf.placeholder(dtype, in_shape, name="in_data")
-            tf.nn.softmax(in_data, axis=axis, name="Softmax")
-            compare_tf_with_tvm([np_data], ["in_data:0"], "Softmax:0")
-
-    check_softmax((2, 3, 5), 2, "float32")
-    check_softmax((2, 3, 5), -1, "float32")
-
-
-#######################################################################
-# Tensor
-# ------
-
-
-def test_forward_round():
-    """test Round"""
-    np_data = np.random.uniform(-10, 10, size=(5, 7)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, (5, 7), name="in_data")
-        tf.round(in_data, name="round")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "round:0")
-
-
-def test_forward_abs():
-    """test operator Abs"""
-    np_data = np.random.uniform(1, 100, size=(9, 11)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, (9, 11), name="in_data")
-        tf.math.abs(in_data, name="abs")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "abs:0")
-
-
-def _test_forward_zeros_like(in_shape, dtype):
-    np_data = np.random.uniform(-10, 10, size=in_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, in_shape, name="in_data")
-        tf.zeros_like(in_data, name="zeros_like")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "zeros_like:0")
-
-
-def test_forward_zeros_like():
-    if package_version.parse(tf.__version__) < package_version.parse("1.2"):
-        _test_forward_zeros_like((2, 3), "int32")
-        _test_forward_zeros_like((2, 3, 5), "int8")
-        _test_forward_zeros_like((2, 3, 5, 7), "uint16")
-        _test_forward_zeros_like((2, 3, 11), "float32")
-        _test_forward_zeros_like((2, 3, 11), "float64")
-
-
-def test_forward_squared_difference():
-    ishape = (1, 3, 10, 14)
-    inp_array_a = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    inp_array_b = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array_a.shape, dtype=inp_array_a.dtype, name="in1")
-        in2 = tf.placeholder(shape=inp_array_b.shape, dtype=inp_array_b.dtype, name="in2")
-        out = tf.math.squared_difference(in1, in2)
-        compare_tf_with_tvm([inp_array_a, inp_array_b], [in1.name, in2.name], out.name)
-
-
-def _test_forward_reverse_v2(in_shape, axis, dtype):
-    np_data = np.random.uniform(-10, 10, size=in_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, in_shape, name="in_data")
-        tf.reverse(in_data, axis=[axis], name="reverse")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "reverse:0")
-
-
-def test_forward_reverse_v2():
-    """test ReverseV2"""
-    _test_forward_reverse_v2((2, 3), 0, "int32")
-    _test_forward_reverse_v2((2, 3, 5), 2, "float32")
-    _test_forward_reverse_v2((2, 3, 5, 7), 1, "float32")
-    _test_forward_reverse_v2((2, 3, 5), -1, "float64")
-    _test_forward_reverse_v2((2, 3, 5), -3, "float64")
-
-
-def test_forward_sign():
-    """test Sign"""
-    np_data = np.random.uniform(-10, 10, size=(5, 7, 11)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, (5, 7, 11), name="in_data")
-        tf.sign(in_data, name="sign")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "sign:0")
-
-
-def test_forward_square():
-    """test operator Square"""
-    np_data = np.random.uniform(1, 100, size=(2, 3, 5)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, (2, 3, 5), name="in_data")
-        tf.square(in_data, name="square")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "square:0")
-
-
-def test_forward_pow_exp():
-    """test Pow and Exp"""
-    np_in1 = np.random.uniform(-2, 2, size=(5, 7, 11)).astype(np.float32)
-    np_in2 = np.random.uniform(-2, 2, size=(5, 7, 11)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.float32, (5, 7, 11), name="in1")
-        in2 = tf.placeholder(tf.float32, (5, 7, 11), name="in2")
-        _ = tf.pow(in1, in2, name="pow")
-        _ = tf.exp(in1, name="exp")
-        compare_tf_with_tvm([np_in1, np_in2], ["in1:0", "in2:0"], "pow:0")
-        compare_tf_with_tvm([np_in1], ["in1:0"], "exp:0")
-
-
-def test_forward_unary():
-    """Unary"""
-
-    def _test_forward_unary(op, a_min=1, a_max=5, dtype=np.float32):
-        """test unary operators"""
-        np_data = np.random.uniform(a_min, a_max, size=(2, 3, 5)).astype(dtype)
-        tf.reset_default_graph()
-        with tf.Graph().as_default():
-            in_data = tf.placeholder(dtype, (2, 3, 5), name="in_data")
-            out = op(in_data)
-            compare_tf_with_tvm([np_data], ["in_data:0"], out.name)
-
-    _test_forward_unary(tf.acos, -1, 1)
-    _test_forward_unary(tf.asin, -1, 1)
-    _test_forward_unary(tf.atanh, -1, 1)
-    _test_forward_unary(tf.sinh)
-    _test_forward_unary(tf.cosh)
-    _test_forward_unary(tf.acosh)
-    _test_forward_unary(tf.asinh)
-    _test_forward_unary(tf.atan)
-    _test_forward_unary(tf.sin)
-    _test_forward_unary(tf.cos)
-    _test_forward_unary(tf.tan)
-    _test_forward_unary(tf.tanh)
-    _test_forward_unary(tf.erf)
-    _test_forward_unary(tf.log)
-    _test_forward_unary(tf.log1p)
-
-
-def test_forward_atan2():
-    """test operator tan"""
-    tf.disable_eager_execution()
-    np_data_1 = np.random.uniform(1, 100, size=(2, 3, 5)).astype(np.float32)
-    np_data_2 = np.random.uniform(1, 100, size=(2, 3, 5)).astype(np.float32)
-    tf.reset_default_graph()
-    in_data_1 = tf.placeholder(tf.float32, (2, 3, 5), name="in_data_1")
-    in_data_2 = tf.placeholder(tf.float32, (2, 3, 5), name="in_data_2")
-    tf.atan2(in_data_1, in_data_2, name="atan2")
-    compare_tf_with_tvm([np_data_1, np_data_2], ["in_data_1:0", "in_data_2:0"], "atan2:0")
-
-
-def test_forward_expm1():
-    """test operator expm1"""
-
-    def _test_forward_expm1(shape):
-        tf.disable_eager_execution()
-        np_data = np.random.uniform(1, 10, size=shape).astype(np.float32)
-        tf.reset_default_graph()
-        in_data = tf.placeholder(tf.float32, shape, name="in_data")
-        tf.expm1(in_data, name="expm1")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "expm1:0")
-
-    _test_forward_expm1([1, 100])
-    _test_forward_expm1([1, 10, 10])
-    _test_forward_expm1([2, 5, 2, 5])
-
-
-def test_forward_softsign():
-    """test operator softsign"""
-
-    def _test_forward_softsign(shape):
-        tf.disable_eager_execution()
-        np_data = np.random.uniform(1, 100, size=shape).astype(np.float32)
-        tf.reset_default_graph()
-        in_data = tf.placeholder(tf.float32, shape, name="in_data")
-        tf.nn.softsign(in_data, name="softsign")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "softsign:0")
-
-    _test_forward_softsign([1, 100])
-    _test_forward_softsign([1, 10, 10])
-    _test_forward_softsign([2, 5, 2, 5])
-
-
-def test_forward_rint():
-    """test operator rint"""
-
-    def _test_forward_rint(shape):
-        tf.disable_eager_execution()
-        np_data = np.random.uniform(-100, 100, size=shape).astype(np.float32)
-        tf.reset_default_graph()
-        in_data = tf.placeholder(tf.float32, shape, name="in_data")
-        tf.math.rint(in_data, name="rint")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "rint:0")
-
-    _test_forward_rint([100])
-    _test_forward_rint([1, 100])
-    _test_forward_rint([1, 10, 10])
-    _test_forward_rint([2, 5, 2, 5])
-
-
-def test_forward_negative():
-    """test tf operator Neg"""
-    np_data = np.random.uniform(-100, 255, size=(224, 224, 3)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, (224, 224, 3), name="in_data")
-        tf.negative(in_data, name="negative")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "negative:0")
-
-
-def test_forward_log_softmax():
-    """test operator LogSoftmax"""
-    np_data = np.random.uniform(1, 100, size=(9, 11)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, (9, 11), name="in_data")
-        tf.math.log_softmax(in_data, name="LogSoftmax")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "LogSoftmax:0")
-
-
-def test_forward_softplus():
-    """test operator Softplus"""
-    np_data = np.random.uniform(1, 10, size=(2, 3, 5)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, (2, 3, 5), name="in_data")
-        tf.nn.softplus(in_data, name="softplus")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "softplus:0")
-
-
-def test_forward_rsqrt():
-    """test Rsqrt"""
-    np_data = np.random.uniform(1, 100, size=(5, 7, 11)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, (5, 7, 11), name="in_data")
-        tf.rsqrt(in_data, name="rsqrt")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "rsqrt:0")
-
-
-def test_forward_sqrt():
-    """test Sqrt"""
-    np_data = np.random.uniform(1, 100, size=(5, 7, 11)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, (5, 7, 11), name="in_data")
-        tf.sqrt(in_data, name="sqrt")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "sqrt:0")
-
-
-def _test_forward_right_shift(in_shape, dtype):
-    """test operator RightShift"""
-    lh_data = np.random.randint(1, 3, size=in_shape).astype(dtype)
-    rh_data = np.random.randint(1, 8, size=in_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        lft_data = tf.placeholder(dtype, in_shape, name="lft_data")
-        rgt_data = tf.placeholder(dtype, in_shape, name="rgt_data")
-        tf.bitwise.right_shift(lft_data, rgt_data, name="RightShift")
-        compare_tf_with_tvm([lh_data, rh_data], ["lft_data:0", "rgt_data:0"], "RightShift:0")
-
-
-def test_forward_right_shift():
-    _test_forward_right_shift((7,), "int32")
-    _test_forward_right_shift((3, 11), "int16")
-
-
-def _test_forward_left_shift(in_shape, dtype):
-    """test operator LeftShift"""
-    lh_data = np.random.randint(100, 1000000, size=in_shape).astype(dtype)
-    rh_data = np.random.randint(1, 3, size=in_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        lft_data = tf.placeholder(dtype, in_shape, name="lft_data")
-        rgt_data = tf.placeholder(dtype, in_shape, name="rgt_data")
-        tf.bitwise.left_shift(lft_data, rgt_data, name="LeftShift")
-        compare_tf_with_tvm([lh_data, rh_data], ["lft_data:0", "rgt_data:0"], "LeftShift:0")
-
-
-def test_forward_left_shift():
-    _test_forward_left_shift((10,), "int32")
-    _test_forward_left_shift((224, 224, 3), "int16")
-
-
-#######################################################################
-# Mean
-# ----
-
-
-def test_forward_mean():
-    """Mean"""
-
-    def check_mean(ishape, **kwargs):
-        inp_array = np.random.uniform(size=ishape).astype(np.float32)
-        with tf.Graph().as_default():
-            in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-            tf.keras.backend.mean(in1, **kwargs)
-            compare_tf_with_tvm(inp_array, "Placeholder:0", "Mean:0", no_gpu=True)
-
-    check_mean((10, 8, 16, 32))
-    check_mean((10, 8, 16, 32), axis=(2, 3))
-    check_mean((10, 8, 16, 32), axis=(1, 2), keepdims=True)
-
-
-#######################################################################
-# Size
-# ----
-
-
-def test_forward_size():
-    """Size"""
-
-    def check_size(ishape):
-        np_input = np.random.uniform(size=ishape).astype(np.float32)
-
-        # if all dimensions are constant, TF will optimize away size operator into constant
-        tf_input_shape = list(np_input.shape)
-        tf_input_shape[0] = None
-
-        with tf.Graph().as_default():
-            tf_input = tf.placeholder(shape=tf_input_shape, dtype=np_input.dtype, name="input")
-            tf.size(tf_input, name="size")
-            compare_tf_with_tvm([np_input], ["input:0"], "size:0")
-
-    check_size((10, 8, 16, 32))
-    check_size((10,))
-
-
-#######################################################################
-# All, Any, Max, Min, Prod, variance, std, logsumexp, euclidean_norm
-# ------------------------------------------------------------------
-
-
-def test_forward_reduce():
-    """Reduce"""
-
-    def _check_op(tf_op, ishape, axis, keepdims, dtype="float32"):
-        tf.reset_default_graph()
-        if dtype == "bool":
-            np_data = np.random.choice([True, False], size=ishape)
-        else:
-            np_data = np.random.uniform(size=ishape).astype(dtype)
-        if tf_op == tf.math.reduce_prod:
-            axis = 1
-            np_data = np_data.reshape(1, -1)
-        with tf.Graph().as_default():
-            in_data = tf.placeholder(dtype, name="in_data")
-            reduce_op = tf_op(in_data, axis=axis, keepdims=keepdims, name="reduce_std")
-            compare_tf_with_tvm([np_data], ["in_data:0"], reduce_op.name)
-
-    def _test_math_op(op, d_types=None):
-        d_types = d_types or ["int32", "float32"]
-        for dtype in d_types:
-            _check_op(op, (3, 10), axis=(-1), keepdims=False, dtype=dtype)
-            _check_op(op, (8, 16, 32), axis=(-1), keepdims=False, dtype=dtype)
-            _check_op(op, (1, 8, 8, 3), axis=(2, 3), keepdims=True, dtype=dtype)
-            _check_op(op, (2, 3, 10, 10), axis=(1, 2), keepdims=True, dtype=dtype)
-
-    _test_math_op(tf.math.reduce_all, d_types=["bool"])
-    _test_math_op(tf.math.reduce_any, d_types=["bool"])
-    _test_math_op(tf.math.reduce_max)
-    _test_math_op(tf.math.reduce_min)
-    _test_math_op(tf.math.reduce_prod)
-    _test_math_op(tf.math.reduce_variance, d_types=["float32"])
-    _test_math_op(tf.math.reduce_std, d_types=["float32"])
-    _test_math_op(tf.math.reduce_logsumexp, d_types=["float32"])
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.15.0"):
-        _test_math_op(tf.math.reduce_euclidean_norm)
-
-
-#######################################################################
-# All, Max, Min
-# ------------------------------------------------------------------
-
-
-def test_forward_raw_reduce():
-    """Raw reduce"""
-
-    def _check_op(tf_op, ishape, axis, keepdims, range_axis=False, dtype="float32"):
-        tf.reset_default_graph()
-        if dtype == "bool":
-            np_data = np.random.choice([True, False], size=ishape)
-        else:
-            np_data = np.random.uniform(size=ishape).astype(dtype)
-        if tf_op == tf.math.reduce_prod:
-            axis = 1
-            np_data = np_data.reshape(1, -1)
-        with tf.Graph().as_default():
-            if range_axis:
-                axis = tf.range(axis[0], axis[1], axis[2], name="range", dtype="int32")
-            in_data = tf.placeholder(dtype, name="in_data")
-            reduce_op = tf_op(input=in_data, axis=axis, keep_dims=keepdims, name="reduce_std")
-            compare_tf_with_tvm([np_data], ["in_data:0"], reduce_op.name)
-
-    def _test_raw_reduce_op(op, d_types=None):
-        d_types = d_types or ["int32", "float32"]
-        for dtype in d_types:
-            _check_op(op, (3, 10), axis=(-1), keepdims=False, dtype=dtype)
-            _check_op(op, (8, 16, 32), axis=(-1), keepdims=False, dtype=dtype)
-            _check_op(op, (1, 8, 8, 3), axis=(2, 3), keepdims=True, dtype=dtype)
-            _check_op(op, (2, 3, 10, 10), axis=(1, 2), keepdims=True, dtype=dtype)
-            _check_op(op, (1, 8, 8, 3), axis=(2, 4, 1), keepdims=True, range_axis=True, dtype=dtype)
-            _check_op(
-                op, (2, 3, 10, 10), axis=(1, 3, 1), keepdims=True, range_axis=True, dtype=dtype
-            )
-
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.4.1"):
-        _test_raw_reduce_op(tf.raw_ops.All, d_types=["bool"])
-        _test_raw_reduce_op(tf.raw_ops.Max)
-        _test_raw_reduce_op(tf.raw_ops.Min)
-
-
-#######################################################################
-# Relational operators
-# --------------------
-
-
-def _test_forward_rel_op(data, func):
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=data[0].shape, dtype=data[0].dtype, name="in1")
-        in2 = tf.placeholder(shape=data[1].shape, dtype=data[1].dtype, name="in2")
-        op = func(in1, in2, name="op")
-        _ = tf.cast(op, tf.int32, name="out1")
-        compare_tf_with_tvm([data[0], data[1]], ["in1:0", "in2:0"], "out1:0")
-
-
-def test_forward_rel_ops():
-    t1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
-    t2 = np.array([[9, 8, 7], [6, 5, 4], [3, 2, 1]])
-    _test_forward_rel_op([t1, t2], math_ops.less)
-    _test_forward_rel_op([t1, t2], math_ops.greater)
-    _test_forward_rel_op([t1, t2], math_ops.less_equal)
-    _test_forward_rel_op([t1, t2], math_ops.greater_equal)
-    _test_forward_rel_op([t1, t2], math_ops.equal)
-    _test_forward_rel_op([t1, t2], math_ops.not_equal)
-
-
-#######################################################################
-# ExpandDims
-# ----------
-
-
-def _test_forward_expand_dims(data, axis):
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=data.shape, dtype=data.dtype, name="in1")
-        out = tf.expand_dims(in1, axis)
-        compare_tf_with_tvm([data], [in1.name], out.name)
-
-
-def test_forward_expand_dims():
-    _test_forward_expand_dims(np.int32(1), 0)
-    _test_forward_expand_dims(np.array([1]), 0)
-    _test_forward_expand_dims(np.array([1]), -1)
-    _test_forward_expand_dims(np.array([[1], [2]]), 0)
-    _test_forward_expand_dims(np.array([[1], [2]]), 1)
-    _test_forward_expand_dims(np.array([[1], [2]]), -1)
-
-
-#######################################################################
-# Maximum, Minimum
-# ----------------
-def test_forward_maximum():
-    """test Op Maximum"""
-
-    def check_maximum(lh_shape, rh_shape, dtype):
-        tf.reset_default_graph()
-        lh_data = np.random.uniform(size=lh_shape).astype(dtype)
-        rh_data = np.random.uniform(size=rh_shape).astype(dtype)
-        with tf.Graph().as_default():
-            lft_data = tf.placeholder(dtype, name="lft_data")
-            rgt_data = tf.placeholder(dtype, name="rgt_data")
-            tf.math.maximum(lft_data, rgt_data, name="maximum")
-            compare_tf_with_tvm([lh_data, rh_data], ["lft_data:0", "rgt_data:0"], "maximum:0")
-
-    check_maximum((10, 8, 16, 32), (1,), dtype="int32")
-    check_maximum((10, 8, 16, 32), (10, 8, 16, 32), dtype="float32")
-
-
-def test_forward_minimum():
-    """test Op Minimum"""
-
-    def check_minimum(lh_shape, rh_shape, dtype):
-        tf.reset_default_graph()
-        lh_data = np.random.uniform(size=lh_shape).astype(dtype)
-        rh_data = np.random.uniform(size=rh_shape).astype(dtype)
-        with tf.Graph().as_default():
-            lft_data = tf.placeholder(dtype, name="lft_data")
-            rgt_data = tf.placeholder(dtype, name="rgt_data")
-            tf.math.minimum(lft_data, rgt_data, name="minimum")
-            compare_tf_with_tvm([lh_data, rh_data], ["lft_data:0", "rgt_data:0"], "minimum:0")
-
-    check_minimum((10, 8, 16, 32), (1,), dtype="int32")
-    check_minimum((10, 8, 16, 32), (10, 8, 16, 32), dtype="float32")
-
-
-#######################################################################
-# PlaceholderWithDefault
-# ----------------------
-def test_placeholder():
-    """Placeholder"""
-    with tf.Graph().as_default():
-        in_data1 = np.random.uniform(-5, 5, size=(3, 4, 5)).astype(np.float32)
-        var1 = tf.Variable(in_data1, name="in1")
-        var2 = array_ops.placeholder_with_default(var1, None, name="place1")
-
-        in_data2 = np.random.uniform(-5, 5, size=(3, 4, 5)).astype(np.float32)
-        place1 = array_ops.placeholder(shape=in_data1.shape, dtype=in_data1.dtype, name="in2")
-
-        out1 = tf.math.add(var1, var2, name="out1")
-        _ = tf.math.add(out1, place1, name="out2")
-
-        compare_tf_with_tvm(
-            [in_data1, in_data2], ["place1:0", "in2:0"], "out2:0", init_global_variables=True
-        )
-
-
-#######################################################################
-# OneHot
-# ----------------------
-
-
-def _test_forward_one_hot(indices_shape, depth, on_value, off_value, axis, out_dtype):
-    inp_array1 = np.random.randint(0, 5, size=indices_shape)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array1.shape, dtype=inp_array1.dtype)
-        out = tf.one_hot(in1, depth, on_value, off_value, axis, dtype=out_dtype)
-        compare_tf_with_tvm(inp_array1, in1.name, out.name)
-
-
-def test_forward_one_hot():
-    _test_forward_one_hot((3,), 3, 1, 0, -1, "int32")
-    _test_forward_one_hot((3,), 3, 1.0, 0.0, -1, "float32")
-    _test_forward_one_hot((2, 2), 5, 2, -2, 0, "int32")
-    _test_forward_one_hot((2, 2), 5, 0.5, -0.5, 1, "float32")
-    _test_forward_one_hot((3, 2, 4, 5), 6, 1, 0, 1, "int32")
-    _test_forward_one_hot((3, 2, 4, 5), 6, 1.0, 0.0, 0, "float32")
-
-
-#######################################################################
-# AddN
-# ----------------------
-
-
-def _test_forward_add_n(inputs):
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        temp = []
-        for each in inputs:
-            temp.append(tf.placeholder(shape=each.shape, dtype=each.dtype))
-        output = tf.add_n(temp)
-        compare_tf_with_tvm(list(inputs), [each.name for each in temp], output.name)
-
-
-def test_forward_add_n():
-    """Add n"""
-    x = np.random.randint(1, 100, size=(3, 3, 3), dtype=np.int32)
-    y = np.random.randint(1, 100, size=(3, 3, 3), dtype=np.int32)
-    z = np.random.randint(1, 100, size=(3, 3, 3), dtype=np.int32)
-    m, n, o = x.astype(np.float32), y.astype(np.float32), z.astype(np.float32)
-    in0 = x
-    in1 = [x, y]
-    in2 = (x, y, z)
-    in3 = m
-    in4 = [m, n]
-    in5 = (m, n, o)
-    _test_forward_add_n(in0)
-    _test_forward_add_n(in1)
-    _test_forward_add_n(in2)
-    _test_forward_add_n(in3)
-    _test_forward_add_n(in4)
-    _test_forward_add_n(in5)
-
-
-#######################################################################
-# Sharing params case
-# ----------------------
-
-
-def test_sharing_node():
-    """Test the sharing params case."""
-    np_data = np.random.uniform(size=(2, 2, 2)).astype("float32")
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, shape=(2, 2, 2), name="in_data")
-        axis = tf.constant([-1], dtype=tf.int32, name="axis")
-        mean0 = tf.reduce_mean(in_data, axis=axis, keepdims=False, name="mean0")
-        mean1 = tf.reduce_mean(in_data, axis=axis, keepdims=False, name="mean1")
-        _ = tf.add(mean0, mean1, name="out")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "out:0")
-
-
-#######################################################################
-# Unravel Index
-# ----------------------
-def _test_forward_unravel_index(inputs):
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        temp = []
-        for each in inputs:
-            temp.append(tf.placeholder(shape=each.shape, dtype=each.dtype))
-        output = tf.unravel_index(temp[0], temp[1])
-        compare_tf_with_tvm(list(inputs), [each.name for each in temp], output.name)
-
-
-def _test_forward_unravel_index_scalar(x, y, dtype="int32"):
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        indices_1 = constant_op.constant(x, dtype=dtype)
-        dims_1 = constant_op.constant(y, dtype=dtype)
-        out_1 = array_ops.unravel_index(indices_1, dims_1)
-        compare_tf_with_tvm([], [], out_1.name)
-
-
-def test_forward_unravel_index():
-    """Unravel index"""
-    x = np.array([0, 1, 2, 3])
-    y = np.array([2, 2])
-    _test_forward_unravel_index([x, y])
-
-    x = np.array([0, 1, 2, 5])
-    y = np.array([2, 3])
-    _test_forward_unravel_index([x, y])
-
-    x = np.array([0, 1, 2, 5])
-    y = np.array([6])
-    _test_forward_unravel_index([x, y])
-
-    x = np.array([102, 300, 16])
-    y = np.array([10, 10, 9, 6])
-    _test_forward_unravel_index([x, y])
-
-    x = np.array([100])
-    y = np.array([10, 10, 9, 6])
-    _test_forward_unravel_index([x, y])
-
-    # Test scalar input
-    _test_forward_unravel_index_scalar(13, [1, 4, 5, 2])
-
-
-#######################################################################
-# Dilation2d
-# ----------------------
-def _test_dilation2d(tensor_in_sizes, filter_in_sizes, strides, dilations, padding):
-    """One iteration of dilation2d with given shapes and attributes"""
-
-    total_size_1 = np.prod(tensor_in_sizes)
-    total_size_2 = np.prod(filter_in_sizes)
-    # Initializes the input tensor with array containing incrementing
-    # numbers from 1.
-    data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
-    filter_array = [f * 1.0 for f in range(1, total_size_2 + 1)]
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype="float32")
-        in_filter = constant_op.constant(filter_array, shape=filter_in_sizes, dtype="float32")
-
-        nn_ops.dilation2d(in_data, in_filter, strides=strides, rates=dilations, padding=padding)
-
-        compare_tf_with_tvm(
-            np.reshape(data_array, tensor_in_sizes).astype("float32"),
-            "Placeholder:0",
-            "Dilation2D:0",
-            no_gpu=True,
-        )
-
-
-def test_forward_dilation():
-    """Dilation2d"""
-    _test_dilation2d([1, 18, 18, 32], [4, 4, 32], [1, 1, 1, 1], [1, 2, 1, 1], "VALID")
-    _test_dilation2d([1, 15, 15, 32], [4, 4, 32], [1, 1, 1, 1], [1, 2, 1, 1], "SAME")
-    _test_dilation2d([1, 5, 5, 1], [2, 2, 1], [1, 1, 1, 1], [1, 1, 1, 1], "VALID")
-    _test_dilation2d([1, 5, 5, 1], [3, 3, 1], [1, 1, 1, 1], [1, 2, 2, 1], "VALID")
-    _test_dilation2d([1, 5, 5, 3], [3, 3, 3], [1, 1, 1, 1], [1, 1, 1, 1], "SAME")
-    _test_dilation2d([1, 28, 28, 3], [5, 5, 3], [1, 2, 2, 1], [1, 1, 1, 1], "VALID")
-    _test_dilation2d([1, 224, 224, 10], [8, 8, 10], [1, 1, 1, 1], [1, 1, 1, 1], "VALID")
-    _test_dilation2d([1, 18, 18, 32], [4, 4, 32], [1, 1, 1, 1], [1, 2, 1, 1], "SAME")
-    _test_dilation2d([1, 15, 15, 32], [4, 4, 32], [1, 1, 1, 1], [1, 2, 1, 1], "VALID")
-    _test_dilation2d([1, 5, 5, 1], [7, 2, 1], [1, 3, 1, 1], [1, 1, 1, 1], "SAME")
-    _test_dilation2d([1, 5, 5, 1], [3, 4, 1], [1, 2, 1, 1], [1, 2, 2, 1], "SAME")
-    _test_dilation2d([1, 5, 5, 3], [3, 3, 3], [1, 1, 4, 1], [1, 1, 1, 1], "VALID")
-    _test_dilation2d([1, 28, 28, 3], [5, 6, 3], [1, 1, 2, 1], [1, 1, 1, 1], "SAME")
-    _test_dilation2d([1, 224, 224, 10], [8, 8, 10], [1, 3, 1, 1], [1, 1, 1, 1], "SAME")
-    _test_dilation2d([1, 3, 3, 1], [2, 2, 1], [1, 1, 1, 1], [1, 2, 2, 1], "SAME")
-    _test_dilation2d([1, 3, 3, 1], [2, 2, 1], [1, 1, 1, 1], [1, 1, 2, 1], "VALID")
-
-
-def _test_identityn(data_np_list):
-    with tf.Graph().as_default():
-        data_tensors = []
-        data_tensors_name = []
-        for index, data_np in enumerate(data_np_list):
-            tensor_name = f"data_{index}"
-            data_tensors_name.append(tensor_name + ":0")
-            data_tensors.append(
-                tf.placeholder(shape=data_np.shape, dtype=str(data_np.dtype), name=tensor_name)
-            )
-
-        output = tf.identity_n(data_tensors)
-        output_names = [out.name for out in output]
-        compare_tf_with_tvm(
-            data_np_list,
-            data_tensors_name,
-            output_names,
-        )
-
-
-@pytest.mark.parametrize(
-    "data_np_list",
-    [
-        (
-            [
-                np.array([[1, 1], [0, 3], [0, 1], [2, 0], [3, 1]], dtype=np.int64),
-                np.array([1, 2, 3, 4, 5], dtype=np.int64),
-                np.array([5, 6], dtype=np.int64),
-            ]
-        ),
-        (
-            [
-                np.array([[1, 1], [0, 3], [2, 0], [3, 1]], dtype=np.int64),
-                np.array([1, 2, 3, 4], dtype=np.int64),
-                np.array([5, 6], dtype=np.int64),
-                np.array([True, False, True]),
-            ]
-        ),
-        (
-            [
-                np.array([]),
-                np.array([[]]),
-            ]
-        ),
-    ],
-)
-def test_forward_identityn(data_np_list):
-    """Identityn"""
-    _test_identityn(data_np_list)
-
-
-#######################################################################
-# infinity ops
-# ------------
-def _verify_infiniteness_ops(tf_op, name):
-    """test operator infinity ops"""
-
-    # Only float types are allowed in Tensorflow for isfinite and isinf
-    # float16 is failing on cuda
-    tf_dtypes = ["float32", "float64"]  # pylint: disable=redefined-outer-name
-    for tf_dtype in tf_dtypes:
-        shape = (8, 8)
-        data = np.random.uniform(size=shape).astype(tf_dtype)
-        data.ravel()[np.random.choice(data.size, int(data.size * 0.5), replace=False)] = np.inf
-        data.ravel()[np.random.choice(data.size, int(data.size * 0.5), replace=False)] = np.nan
-
-        tf.reset_default_graph()
-        in_data = tf.placeholder(tf_dtype, shape, name="in_data")
-        tf_op(in_data, name=name)
-        compare_tf_with_tvm([data], ["in_data:0"], f"{name}:0")
-
-
-def test_forward_isinf():
-    _verify_infiniteness_ops(tf.is_inf, "isinf")
-
-
-def test_forward_isfinite():
-    _verify_infiniteness_ops(tf.is_finite, "isfinite")
-
-
-def test_forward_isnan():
-    _verify_infiniteness_ops(tf.is_nan, "isnan")
-
-
-def _test_spop_placeholder_without_shape_info():
-    with tf.Graph().as_default():
-
-        @function.Defun(*[tf.int32] * 2)
-        def Forward(x, y):
-            print(x.name)
-            print(y.name)
-            b = tf.add(x, y)
-            return b
-
-        pl1 = tf.placeholder(tf.int32, name="pl1")
-        pl2 = tf.placeholder(tf.int32, name="pl2")
-        pl3 = tf.placeholder(tf.int32, name="pl3")
-        data = np.array([[-1, 1], [2, -2]], dtype=np.int32)
-        data2 = np.array([[-2, 3], [4, -6]], dtype=np.int32)
-        data3 = np.array([[-2, 3], [4, -6]], dtype=np.int32)
-        z1 = gen_functional_ops.StatefulPartitionedCall(args=[pl1, pl2], Tout=[tf.int32], f=Forward)
-        z2 = z1 + pl3
-        compare_tf_with_tvm(
-            [data, data2, data3],
-            ["pl1:0", "pl2:0", "pl3:0"],
-            ["StatefulPartitionedCall:0", z2.name],
-            mode="vm",
-            init_global_variables=True,
-        )
-
-
-def _test_spop_placeholder_with_shape_and_default_value():
-    with tf.Graph().as_default():
-        data = np.ones([1], dtype=int).astype(np.int32)
-        dataVar = tf.Variable(data, shape=data.shape)
-        pl1 = array_ops.placeholder_with_default(dataVar, shape=data.shape, name="pl1")
-        tpl = tf.convert_to_tensor(pl1, dtype=tf.int32)
-
-        @function.Defun(*[tf.int32])
-        def pl_with_default(pl):
-            return tf.expand_dims(tf.multiply(pl, pl), 0)
-
-        _ = gen_functional_ops.StatefulPartitionedCall(
-            args=[tpl], Tout=[tf.int32], f=pl_with_default
-        )
-        compare_tf_with_tvm(
-            data, ["pl1:0"], "StatefulPartitionedCall:0", mode="vm", init_global_variables=True
-        )
-
-
-def _test_spop_placeholder_numpy_arange_feed():
-    with tf.Graph().as_default():
-        t1 = tf.placeholder(tf.int32, (3, 3, 3), "t1")
-        t1_data = np.arange(27, dtype=np.int32).reshape((3, 3, 3))
-        t2 = tf.placeholder(tf.int32, (3, 3, 3), "t2")
-        t2_data = np.arange(27, dtype=np.int32).reshape((3, 3, 3))
-
-        @tf.function
-        def add(x, y):
-            return tf.add(x, y, "add_t1_t2")
-
-        t3 = add(t1, t2)
-        compare_tf_with_tvm(
-            [t1_data, t2_data], ["t1:0", "t2:0"], [t3.name], mode="vm", init_global_variables=True
-        )
-
-
-def _test_spop_placeholder_numpy_array_feed():
-    with tf.Graph().as_default():
-        t1_data = np.array([[-1, 1, 3], [2, -2, 4], [2, -3, 14]], dtype=np.int32)
-        t2_data = np.array([[-2, 1, 2], [12, -2, 14], [12, -3, 4]], dtype=np.int32)
-        t1 = tf.placeholder(tf.int32, name="t1")
-        t2 = tf.placeholder(tf.int32, name="t2")
-
-        @tf.function
-        def add(x, y):
-            return tf.add(x, y, "add_t1_t2")
-
-        t3 = add(t1, t2)
-        compare_tf_with_tvm(
-            [t1_data, t2_data], ["t1:0", "t2:0"], [t3.name], mode="vm", init_global_variables=True
-        )
-
-
-def _test_spop_function_invocation_basic():
-    with tf.Graph().as_default():
-
-        def fun1(a):
-            return tf.multiply(a, a)
-
-        def fun2(b):
-            return tf.multiply(b, 10)
-
-        @tf.function
-        def fun3(x, y):
-            x = fun2(x)
-            y = fun1(y)
-            z = tf.add(x, y)
-            return z
-
-        t3 = fun3(tf.constant(10.5), tf.constant(20.4))
-
-        compare_tf_with_tvm([], [], [t3.name], mode="vm", init_global_variables=True)
-
-
-def _test_spop_function_invocation_nested():
-    with tf.Graph().as_default():
-        t1 = tf.placeholder(tf.int32, (3, 3, 3), name="t1")
-        t1_data = np.arange(27, dtype=np.int32).reshape((3, 3, 3))
-        t2 = tf.placeholder(tf.int32, name="t2")
-        t2_data = np.arange(27, dtype=np.int32).reshape((3, 3, 3))
-
-        @tf.function
-        def myfunc(x, y):
-            return tf.add(x, y, "myfunc")
-
-        @tf.function
-        def myfunc2(x, y):
-            z = myfunc(x, y)
-            l = myfunc(z, y)
-            m = myfunc(l, z)
-            return tf.add(l, m, "myfunc2")
-
-        res1 = myfunc(t1, t2)
-        res2 = myfunc2(res1, t1)
-
-        compare_tf_with_tvm(
-            [t1_data, t2_data], ["t1:0", "t2:0"], [res2.name], mode="vm", init_global_variables=True
-        )
-
-
-def _test_spop_function_invocation_no_autograph():
-    with tf.Graph().as_default():
-
-        @tf.function(autograph=False)
-        def fun1(a):
-            return tf.multiply(a, a)
-
-        @tf.function(autograph=False)
-        def fun2(b):
-            return tf.multiply(b, 10)
-
-        @tf.function
-        def fun3(x, y):
-            x = fun2(x)
-            y = fun1(y)
-            z = tf.add(x, y)
-            return z
-
-        t3 = fun3(tf.constant(10.5), tf.constant(20.4))
-
-        compare_tf_with_tvm([], [], [t3.name], mode="vm", init_global_variables=True)
-
-
-def _test_spop_function_invocation_defun():
-    with tf.Graph().as_default():
-
-        def fun1(a):
-            return tf.multiply(a, a)
-
-        def fun2(b):
-            return tf.multiply(b, b)
-
-        @function.Defun(dtypes.float32, dtypes.float32, func_name="Fun3")
-        def fun3(x, y):
-            x = fun2(x)
-            y = fun1(y)
-            z = tf.add(x, y)
-            return z
-
-        _ = gen_functional_ops.StatefulPartitionedCall(
-            args=[tf.constant(10.5), tf.constant(20.4)],
-            Tout=[dtypes.float32],
-            f=fun3,
-            name="SpopFnInvocation",
-        )
-        compare_tf_with_tvm([], [], "SpopFnInvocation:0", mode="vm", init_global_variables=True)
-
-
-def _test_spop_arithmetic():
-    with tf.Graph().as_default():
-
-        @function.Defun(*[dtypes.int32] * 3)
-        def arithmetic(m, x, c):
-            z = tf.add(tf.multiply(m, x), c)
-            return z
-
-        m = tf.constant(10)
-        x = tf.constant(20)
-        c = tf.constant(2)
-        _ = gen_functional_ops.StatefulPartitionedCall(
-            args=[m, x, c], Tout=[tf.int32], f=arithmetic
-        )
-
-        compare_tf_with_tvm(
-            [], [], "StatefulPartitionedCall:0", mode="vm", init_global_variables=True
-        )
-
-
-def _test_spop_control_flow():
-    with tf.Graph().as_default():
-
-        @function.Defun(*[dtypes.float32] * 2)
-        def Body1(x, y):
-            with ops.device("/job:localhost/replica:0/task:0/device:CPU:0"):
-                z = math_ops.multiply(x, y)
-                i = 0
-                while i < 10:
-                    i += 1
-                    if i == 5:
-                        continue
-                    z = math_ops.multiply(x, y * i)
-            return z
-
-        _ = gen_functional_ops.StatefulPartitionedCall(
-            args=[constant_op.constant(32.0), constant_op.constant(100.0)],
-            Tout=[dtypes.float32],
-            f=Body1,
-        )
-        compare_tf_with_tvm(
-            [], [], "StatefulPartitionedCall:0", mode="vm", init_global_variables=True
-        )
-
-
-def _test_spop_variables():
-    with tf.Graph().as_default():
-        const1 = tf.constant(10)
-        const2 = tf.constant(20)
-        var1 = tf.Variable(const1, dtype=tf.int32)
-        var2 = tf.Variable(const2, dtype=tf.int32)
-
-        @function.Defun(tf.int32, tf.int32)
-        def Forward(x, y):
-            return tf.multiply(x, y)
-
-        _ = gen_functional_ops.StatefulPartitionedCall(
-            args=[var1, var2], Tout=[tf.int32], f=Forward
-        )
-        compare_tf_with_tvm(
-            [], [], "StatefulPartitionedCall:0", init_global_variables=True, mode="vm"
-        )
-
-
-def _test_spop_constants():
-    with tf.Graph().as_default():
-
-        @function.Defun(*[dtypes.int32] * 2)
-        def constantsFn(x, y):
-            vv = tf.constant([2, 3, 4], name="vv")
-            z = tf.add(vv + x, y)
-            return z
-
-        a = tf.constant(20000, name="a")
-        b = tf.constant(40000, name="b")
-        _ = gen_functional_ops.StatefulPartitionedCall(args=[a, b], Tout=[tf.int32], f=constantsFn)
-
-        compare_tf_with_tvm(
-            [], [], "StatefulPartitionedCall:0", mode="vm", init_global_variables=True
-        )
-
-
-def _test_spop_stateful():
-    # This test case is to test that TVM rejects any TF stateful operations
-    # (including Resource Variables) except StatefulPartitionedCall/PartitionedCall
-    # (as these two operators can still be used as container graphs to execute
-    # "stateless" operations internally.
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-
-        @tf.function
-        def FunctionWithStatefulOp_One(i):
-            b = tf.random.uniform(shape=[2, 4], maxval=10, dtype=tf.float32, seed=10)
-            y = tf.multiply(b, i)
-            return y
-
-        @tf.function
-        def FunctionWithStatefulOp(m, n):
-            a = tf.random.uniform(shape=[2, 4], maxval=10, dtype=tf.float32, seed=10)
-            x = tf.multiply(a, m)
-            y = FunctionWithStatefulOp_One(n)
-            z = tf.multiply(x, y)
-            return z
-
-        op = FunctionWithStatefulOp(constant_op.constant(1.0), constant_op.constant(2.0))
-        with pytest.raises(Exception) as execinfo:
-            compare_tf_with_tvm([], [], [op.name], init_global_variables=True, mode="vm")
-        assert execinfo.value.args[0].startswith("The following operators are not implemented")
-
-
-def _test_spop_device_assignment():
-    # This test case is to test that TVM rejects inconsistent device assignment
-    # while using StatefulPartitionedCall/PartitionedCall operators which in case of TVM will
-    # be used as container graphs to internally execute "stateless" operations.
-
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-
-        def fun1(a):
-            with ops.device("/GPU:0"):
-                return tf.multiply(a, a)
-
-        def fun2(b):
-            with ops.device("/job:localhost/replica:0/task:0/device:CPU:1"):
-                return tf.multiply(b, b)
-
-        @function.Defun(dtypes.float32, dtypes.float32, func_name="Fun3")
-        def fun3(x, y):
-            with ops.device("/CPU:0"):
-                x = fun2(x)
-            with ops.device("/job:localhost/replica:0/task:0/device:CPU:2"):
-                y = fun1(y)
-            with ops.device("/job:localhost/replica:0/task:0/device:CPU:3"):
-                z = tf.add(x, y)
-                return z
-
-        _ = gen_functional_ops.StatefulPartitionedCall(
-            args=[tf.constant(10.5), tf.constant(20.4)], Tout=[dtypes.float32], f=fun3
-        )
-        with pytest.raises(Exception) as execinfo:
-            compare_tf_with_tvm(
-                [], [], "StatefulPartitionedCall:0", mode="vm", init_global_variables=True
-            )
-        assert execinfo.value.args[0].startswith("Found inconsistent Device assignment")
-
-
-def _test_spop_resource_variables():
-    # This test case is to test that TVM rejects any graph containing
-    # resource variables with StatefulPartitionedOp.
-
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-
-        const1 = tf.constant(10)
-        const2 = tf.constant(20)
-        var1 = tf.Variable(const1, dtype=tf.int32, use_resource=True)
-        var2 = tf.Variable(const2, dtype=tf.int32, use_resource=True)
-
-        @tf.function
-        def resourceVariablesTest(x, y):
-            return tf.multiply(x, y)
-
-        _ = resourceVariablesTest(var1, var2)
-        with pytest.raises(Exception) as execinfo:
-            compare_tf_with_tvm(
-                [], [], "StatefulPartitionedCall:0", mode="vm", init_global_variables=True
-            )
-        # pylint: disable=implicit-str-concat
-        assert execinfo.value.args[0].startswith("Graph is not frozen." " Provide a frozen graph")
-
-
-def test_forward_spop():
-    """Spop"""
-    _test_spop_stateful()
-    _test_spop_device_assignment()
-    # tensorflow version upgrade support
-    # This test is expected to fail in TF version >= 2.6
-    # as the generated graph will be considered frozen, hence
-    # not passing the criteria for the test below.
-    if package_version.parse(tf.__version__) < package_version.parse("2.6.1"):
-        _test_spop_resource_variables()
-
-    # Placeholder test cases
-    _test_spop_placeholder_without_shape_info()
-    _test_spop_placeholder_with_shape_and_default_value()
-    _test_spop_placeholder_numpy_arange_feed()
-    _test_spop_placeholder_numpy_array_feed()
-
-    # Function Invocation test cases
-    _test_spop_function_invocation_basic()
-    _test_spop_function_invocation_nested()
-    _test_spop_function_invocation_no_autograph()
-    _test_spop_function_invocation_defun()
-
-    # Test cases for various other TF constructs
-    _test_spop_arithmetic()
-    _test_spop_control_flow()
-    _test_spop_variables()
-    _test_spop_constants()
-
-
-#######################################################################
-# Dynamic input shape
-# -------------------
-def test_forward_dynamic_input_shape():
-    """Dynamic input shape"""
-    tf.reset_default_graph()
-
-    with tf.Graph().as_default():
-        data = tf.placeholder(tf.float32, name="data", shape=(None,))
-        _ = data + 1
-        np_data = np.random.uniform(size=(2,)).astype("float32")
-        out_name = "add"
-
-        with tf.Session() as sess:
-            graph_def = tf_testing.AddShapesToGraphDef(sess, out_name)
-            tf_output = run_tf_graph(sess, np_data, "data:0", [f"{out_name}:0"])
-            # TODO(kevinthesun): enable gpu test when VM heterogeneous execution is ready.
-            for device in ["llvm"]:
-                _ = tvm.device(device, 0)
-                if not tvm.testing.device_enabled(device):
-                    print(f"Skip because {device} is not enabled")
-                    continue
-                tvm_output = run_tvm_graph(
-                    graph_def,
-                    np_data,
-                    ["data"],
-                    1,
-                    target=device,
-                    layout="NCHW",
-                    out_names=[out_name],
-                    mode="vm",
-                    ignore_in_shape=True,
-                )
-                tvm.testing.assert_allclose(tvm_output[0], tf_output[0], rtol=1e-5, atol=1e-5)
-
-
-def test_forward_dynmaic_rnn_lstmblockcell():
-    """Dynmaic rnn lstmblockcell"""
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.0.0"):
-        return
-
-    total_series_length = 50000
-    truncated_backprop_length = 15
-    state_size = 4
-    echo_step = 3
-    batch_size = 5
-    num_layers = 5
-
-    def generateData():
-        x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))
-        y = np.roll(x, echo_step)
-        y[0:echo_step] = 0
-
-        x = x.reshape((batch_size, -1))  # The first index changing slowest, subseries as rows
-        y = y.reshape((batch_size, -1))
-
-        return (x, y)
-
-    batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
-
-    init_state = tf.placeholder(tf.float32, [num_layers, 2, batch_size, state_size])
-
-    state_per_layer_list = tf.unstack(init_state, axis=0)
-    rnn_tuple_state = tuple(
-        list(
-            tf.nn.rnn_cell.LSTMStateTuple(
-                state_per_layer_list[idx][0], state_per_layer_list[idx][1]
-            )
-            for idx in range(num_layers)
-        )
-    )
-
-    # Forward passes
-    def lstm_cell():
-        return tensorflow.contrib.rnn.LSTMBlockCell(state_size)
-
-    cell = tf.nn.rnn_cell.MultiRNNCell(
-        [lstm_cell() for _ in range(num_layers)], state_is_tuple=True
-    )
-    states_series, current_state = tf.nn.dynamic_rnn(
-        cell, tf.expand_dims(batchX_placeholder, -1), initial_state=rnn_tuple_state
-    )
-
-    with tf.Session() as sess:
-        sess.run(tf.global_variables_initializer())
-        x, _ = generateData()
-        _current_state = np.zeros((num_layers, 2, batch_size, state_size))
-
-        start_idx = 0
-        end_idx = start_idx + truncated_backprop_length
-
-        batchX = x[:, start_idx:end_idx]
-
-        # Save current state for TVM
-        current_state_tvm = _current_state
-
-        _current_state, _states_series = sess.run(
-            [current_state, states_series],
-            feed_dict={batchX_placeholder: batchX, init_state: _current_state},
-        )
-
-        # Organize results and corresponding names
-        tf_output = [_states_series]
-
-        for c in _current_state:
-            tf_output.append(c.c)
-            tf_output.append(c.h)
-
-        name = [states_series.name.split(":")[0]]
-
-        for t in current_state:
-            name.append(t.c.name.split(":")[0])
-            name.append(t.h.name.split(":")[0])
-
-        graph_def = sess.graph.as_graph_def(add_shapes=True)
-
-        final_graph_def = graph_util.convert_variables_to_constants(sess, graph_def, name)
-
-        _ = run_tvm_graph(
-            final_graph_def,
-            [batchX.astype("float32"), current_state_tvm.astype("float32")],
-            ["Placeholder", "Placeholder_1"],
-            out_names=name,
-            num_output=len(name),
-            mode="vm",
-            disabled_pass=["FoldScaleAxis"],
-        )
-
-        # Compare result
-        for _, tf_out in enumerate(tf_output):
-            tvm.testing.assert_allclose(tf_out, tf_out, atol=1e-5, rtol=1e-5)
-
-
-#######################################################################
-# Unique
-# ------------
-
-
-def _test_unique(n, dtype, is_dyn):
-    tf.reset_default_graph()
-    np_data = np.random.randint(100, size=n).astype(dtype)
-    with tf.Graph().as_default():
-        if is_dyn:
-            in_data = tf.placeholder(dtype, [n], name="in_data")
-        else:
-            in_data = tf.constant(np_data, dtype, name="in_data")
-        tf.unique(in_data)
-        if is_dyn:
-            compare_tf_with_tvm(np_data, "in_data:0", ["Unique:0", "Unique:1"], mode="vm")
-        else:
-            compare_tf_with_tvm(np_data, "", ["Unique:0", "Unique:1"], mode="vm")
-
-
-def test_forward_unique():
-    """test Unique"""
-
-    for dtype in ["int32", "int64"]:
-        for is_dyn in [False, True]:
-            _test_unique(50, dtype, is_dyn)
-            _test_unique(100, dtype, is_dyn)
-
-
-#######################################################################
-# Unique with counts
-# ------------
-
-
-def _test_unique_with_counts(n, dtype, is_dyn):
-    tf.reset_default_graph()
-    np_data = np.random.randint(100, size=n).astype(dtype)
-    with tf.Graph().as_default():
-        if is_dyn:
-            in_data = tf.placeholder(dtype, [n], name="in_data")
-        else:
-            in_data = tf.constant(np_data, dtype, name="in_data")
-        tf.unique_with_counts(in_data)
-        if is_dyn:
-            compare_tf_with_tvm(
-                np_data,
-                "in_data:0",
-                ["UniqueWithCounts:0", "UniqueWithCounts:1", "UniqueWithCounts:2"],
-                mode="vm",
-            )
-        else:
-            compare_tf_with_tvm(
-                np_data,
-                "",
-                ["UniqueWithCounts:0", "UniqueWithCounts:1", "UniqueWithCounts:2"],
-                mode="vm",
-            )
-
-
-def test_forward_unique_with_counts():
-    """test UniqueWithCounts"""
-
-    for dtype in ["int32", "int64"]:
-        for is_dyn in [False, True]:
-            _test_unique_with_counts(10, dtype, is_dyn)
-            _test_unique_with_counts(20, dtype, is_dyn)
-
-
-#######################################################################
-# check graph ir for nn.moments
-# ------------
-
-
-def test_moments():
-    """NN.moments"""
-    g = tf.Graph()
-    shape = [4, 176, 8, 8]
-    dtype = "float32"
-    with g.as_default():
-        A = tf.placeholder(shape=shape, dtype=dtype, name="A")
-        _ = tf.placeholder(shape=shape, dtype=dtype, name="B")
-        mean, variance = tf.nn.moments(A, [1], keep_dims=True)
-        _ = (A - mean) / tf.sqrt(variance + 0.0005)
-
-    with tvm.testing.disable_span_filling():
-        mod, _ = from_tensorflow(g.as_graph_def(add_shapes=True))
-    with tvm.testing.enable_span_filling():
-        mod_with_span, _ = from_tensorflow(g.as_graph_def(add_shapes=True))
-    tvm.ir.assert_structural_equal(mod["main"], mod_with_span["main"], map_free_vars=True)
-
-    program = """
-    def @main(%A: Tensor[(4, 176, 8, 8), float32]) {
-        %527 = mean(%A, axis=[1], keepdims=True) /* moments/mean */;
-        %528 = subtract(%A, %527) /* sub */;
-        %529 = subtract(%A, %527);
-        %530 = multiply(%529, %529) /* moments/SquaredDifference */;
-        %531 = mean(%530, axis=[1], keepdims=True) /* moments/variance */;
-        %532 = add(%531, 0.0005f) /* add */;
-        %533 = sqrt(%532) /* Sqrt */;
-        divide(%528, %533) /* truediv */
-    }
-    """
-    mod_golden = tvm.relay.parse('#[version = "0.0.5"]\n' + program)
-    tvm.ir.assert_structural_equal(mod["main"].body, mod_golden["main"].body, map_free_vars=True)
-
-
-#######################################################################
-# invert_permutation
-# --------------------
-
-
-def test_invert_permutation():
-    """test InvertPermutation"""
-    tf.reset_default_graph()
-
-    input_shape = [6]
-    x = np.array([3, 4, 0, 2, 1, 5]).astype("int32")
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(shape=input_shape, dtype="int32")
-        tf.invert_permutation(in_data)
-        out_name = "InvertPermutation:0"
-        compare_tf_with_tvm(x, "Placeholder:0", out_name, no_gpu=False)
-
-
-#######################################################################
-# Bincount
-# ----
-
-
-def _test_bincount(in_shape, size, weights):
-    with tf.Graph().as_default():
-        inputs = []
-        data = []
-        inputs.append(tf.placeholder(shape=in_shape, dtype="int32", name="input0"))
-        data.append(np.random.uniform(0, size, size=in_shape).astype("int32"))
-        inputs.append(tf.placeholder(shape=(), dtype="int32", name="size"))
-        data.append(np.array(size, "int32"))
-        if weights:
-            inputs.append(tf.placeholder(shape=in_shape, dtype="float32", name="weights"))
-            data.append(np.reshape(weights, in_shape).astype("float32"))
-        else:
-            inputs.append(tf.placeholder(shape=(0,), dtype="float32", name="weights"))
-            data.append(np.array([], "float32"))
-        result = tf.raw_ops.Bincount(arr=data[0], size=data[1], weights=data[2])
-        compare_tf_with_tvm(data, [a.name for a in inputs], result.name, mode="vm")
-
-
-def test_forward_bincount():
-    """Test Bincount Op"""
-    # 2D input
-    _test_bincount((3, 10), 20, [1.0] * 30)
-    _test_bincount((3, 10), 20, [1.5] * 30)
-    _test_bincount((3, 10), 20, None)
-    # 1D input
-    _test_bincount((10,), 20, [1.0] * 10)
-    _test_bincount((10,), 20, [1.5] * 10)
-    _test_bincount((10,), 20, None)
-
-
-#######################################################################
-# DenseBincount
-# ----
-
-
-def _test_dense_bincount(in_shape, size, weights, binary_output):
-    with tf.Graph().as_default():
-        inputs = []
-        data = []
-        inputs.append(tf.placeholder(shape=in_shape, dtype="int32", name="input0"))
-        data.append(np.random.uniform(0, size, size=in_shape).astype("int32"))
-        inputs.append(tf.placeholder(shape=(), dtype="int32", name="size"))
-        data.append(np.array(size, "int32"))
-        if weights:
-            inputs.append(tf.placeholder(shape=in_shape, dtype="float32", name="weights"))
-            data.append(np.reshape(weights, in_shape).astype("float32"))
-        else:
-            inputs.append(tf.placeholder(shape=(0,), dtype="float32", name="weights"))
-            data.append(np.array([], "float32"))
-        result = tf.raw_ops.DenseBincount(
-            input=data[0],
-            size=data[1],
-            weights=data[2],
-            binary_output=binary_output,
-        )
-        compare_tf_with_tvm(data, [a.name for a in inputs], result.name, mode="vm")
-
-
-def test_forward_dense_bincount():
-    """Test DenseBincount Op"""
-    for binary_output in [False, True]:
-        # 2D input
-        _test_dense_bincount((3, 10), 20, [1.0] * 30, binary_output)
-        _test_dense_bincount((3, 10), 20, [1.5] * 30, binary_output)
-        _test_dense_bincount((3, 10), 20, None, binary_output)
-        # 1D input
-        _test_dense_bincount((10,), 20, [1.0] * 10, binary_output)
-        _test_dense_bincount((10,), 20, [1.5] * 10, binary_output)
-        _test_dense_bincount((10,), 20, None, binary_output)
-
-
-#######################################################################
-# Test structural_equal and span of a model
-# --------------------------------------
-class TestSetSpan:
-    """Test Structure and span of frequently-used models"""
-
-    def _verify(self, res_fptr, golden_fptr):
-        with tvm.testing.enable_span_filling():
-            with_span = res_fptr()
-        with tvm.testing.disable_span_filling():
-            without_span = res_fptr()
-        tvm.ir.assert_structural_equal(with_span, without_span)
-        _verify_structural_equal_with_span(with_span, golden_fptr())
-
-    def test_conv2d_bias_add_span(self):
-        """Test Structure and span of conv2d and bias add model match to the expected result"""
-
-        def _res():
-            in_shape = (1, 5, 5, 1)
-            kernel_shpae = (2, 2, 1, 2)
-            kernel_in = np.ones(kernel_shpae)
-            bias_val_shape = tuple([2])
-            bias_val_in = np.ones(bias_val_shape)
-
-            with tf.Graph().as_default() as g:
-                x = array_ops.placeholder(shape=in_shape, dtype="float32", name="input")
-                kernel = tf.constant(kernel_in, dtype=tf.float32, name="filter_weight")
-                bias_val_tensor = tf.constant(bias_val_in, dtype=tf.float32, name="conv2d_bias")
-                conv2d = tf.nn.conv2d(
-                    x, kernel, strides=[1, 1, 1, 1], padding="VALID", name="conv2d"
-                )
-                _ = tf.nn.bias_add(conv2d, bias_val_tensor, name="bias_add")
-
-                mod, _ = relay.frontend.from_tensorflow(
-                    g.as_graph_def(), shape={"input": in_shape}, outputs=["bias_add"]
-                )
-                return mod["main"]
-
-        def _golden():
-            model_in = relay.var(
-                "input", relay.TensorType([1, 5, 5, 1]), span=_create_span("input")
-            )
-            weight = relay.var(
-                "filter_weight", relay.TensorType([2, 2, 1, 2]), span=_create_span("filter_weight")
-            )
-            bias = relay.var("conv2d_bias", relay.TensorType([2]), span=_create_span("conv2d_bias"))
-            conv2d = _set_span(
-                relay.nn.conv2d(
-                    model_in,
-                    weight,
-                    channels=2,
-                    kernel_size=[2, 2],
-                    data_layout="NHWC",
-                    kernel_layout="HWIO",
-                ),
-                "conv2d",
-            )
-            add = _set_span(relay.op.add(conv2d, bias), "bias_add")
-            mod = ir.IRModule.from_expr(add)
-            return mod["main"]
-
-        self._verify(_res, _golden)
-
-    def test_fully_connected_bias_add_span(self):
-        """Test Structure and span of fully connected model match to the expected result"""
-
-        def _res():
-            in_shape = (1, 10)
-            kernel_shpae = (10, 10)
-            kernel_in = np.ones(kernel_shpae)
-            bias_val_shape = tuple([10])
-            bias_val_in = np.ones(bias_val_shape)
-
-            with tf.Graph().as_default() as g:
-                x = array_ops.placeholder(shape=in_shape, dtype="float32", name="input")
-                in_filter = tf.constant(kernel_in, dtype=tf.float32, name="filter_weight")
-                bias_val_tensor = tf.constant(bias_val_in, dtype=tf.float32, name="dense_bias")
-                mat_mul = math_ops.mat_mul(x, in_filter, name="dense")
-                _ = tf.nn.bias_add(mat_mul, bias_val_tensor, name="bias_add")
-
-                mod, _ = relay.frontend.from_tensorflow(
-                    g.as_graph_def(),
-                    shape={"input": in_shape},
-                    outputs=["bias_add"],
-                    convert_config={"use_dense": True},
-                )
-                return mod["main"]
-
-        def _golden():
-            model_in = relay.var("input", relay.TensorType([1, 10]), span=_create_span("input"))
-            weight = relay.var(
-                "filter_weight", relay.TensorType([10, 10]), span=_create_span("filter_weight")
-            )
-            bias = relay.var("dense_bias", relay.TensorType([10]), span=_create_span("dense_bias"))
-            transpose = _set_span(relay.transpose(weight, [1, 0]), "dense")
-            dense = _set_span(relay.nn.dense(model_in, transpose, units=10), "dense")
-            add = _set_span(relay.op.add(dense, bias), "bias_add")
-            mod = ir.IRModule.from_expr(add)
-            return mod["main"]
-
-        self._verify(_res, _golden)
-
-    def test_reshape_span(self):
-        """Test Structure and span of reshape model match to the expected result"""
-
-        def _res():
-            in_shape = (1, 10)
-            output_shape = (2, 5)
-
-            with tf.Graph().as_default() as g:
-                x = array_ops.placeholder(shape=in_shape, dtype="float32", name="input")
-                _ = array_ops.reshape(x, output_shape, "reshape")
-
-                mod, _ = relay.frontend.from_tensorflow(
-                    g.as_graph_def(), shape={"input": in_shape}, outputs=["reshape"]
-                )
-                return mod["main"]
-
-        def _golden():
-            model_in = relay.var("input", relay.TensorType([1, 10]), span=_create_span("input"))
-            reshape = _set_span(relay.reshape(model_in, [2, 5]), "reshape")
-            mod = ir.IRModule.from_expr(reshape)
-            return mod["main"]
-
-        self._verify(_res, _golden)
-
-    def test_batch_norm_span(self):
-        """Test Structure and span of batchnorm model match to the expected result"""
-
-        def _res():
-            in_shape = (1, 12, 12, 32)
-            with tf.Graph().as_default() as g:
-                input_tensor = tf.placeholder(tf.float32, shape=in_shape, name="input")
-                alpha = tf.constant(
-                    np.ones(
-                        in_shape[-1],
-                    ),
-                    dtype=tf.float32,
-                    name="alpha",
-                )
-                beta = tf.constant(
-                    np.ones(
-                        in_shape[-1],
-                    ),
-                    dtype=tf.float32,
-                    name="beta",
-                )
-                _ = tf.nn.fused_batch_norm(x=input_tensor, offset=beta, scale=alpha, name="bn")
-                mod, _ = relay.frontend.from_tensorflow(
-                    g.as_graph_def(), shape={"input": in_shape}, outputs=["bn"]
-                )
-                return mod["main"]
-
-        def _golden():
-            model_in = relay.var(
-                "input", relay.TensorType([1, 12, 12, 32]), span=_create_span("input")
-            )
-            alpha = relay.var("alpha", relay.TensorType([32]), span=_create_span("alpha"))
-            beta = relay.var("beta", relay.TensorType([32]), span=_create_span("beta"))
-            mean = _set_span(relay.op.mean(model_in, axis=[3], exclude=True), "bn")
-            variance_mean = _set_span(
-                relay.op.mean(model_in, axis=[3], keepdims=True, exclude=True), "bn"
-            )
-            variance = _set_span(
-                relay.op._make._variance(model_in, variance_mean, [3], False, True, False), "bn"
-            )
-            bn = _set_span(
-                relay.nn.batch_norm(model_in, alpha, beta, mean, variance, axis=3, epsilon=0.001),
-                "bn",
-            )
-            mod = ir.IRModule.from_expr(bn[0])
-            return mod["main"]
-
-        self._verify(_res, _golden)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/tensorflow/test_no_op.py b/tests/python/frontend/tensorflow/test_no_op.py
deleted file mode 100644
index bc6be5c3059c..000000000000
--- a/tests/python/frontend/tensorflow/test_no_op.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Unit tests for converting TensorFlow debugging ops to Relay."""
-try:
-    import tensorflow.compat.v1 as tf
-
-    tf.disable_v2_behavior()
-except ImportError:
-    import tensorflow as tf
-import numpy as np
-from tvm import relay, ir, testing
-from tvm.relay.frontend.tensorflow import from_tensorflow
-
-
-def run_relay(graph):
-    with testing.disable_span_filling():
-        mod, params = from_tensorflow(graph.as_graph_def(add_shapes=True))
-    with testing.enable_span_filling():
-        mod_with_span, _ = relay.frontend.from_tensorflow(graph.as_graph_def(add_shapes=True))
-    assert ir.structural_equal(mod["main"], mod_with_span["main"])
-
-    return relay.create_executor("debug", mod=mod).evaluate()(**params)
-
-
-def test_no_op():
-    g = tf.Graph()
-    with g.as_default():
-        no_op = tf.no_op()
-        with tf.Session() as sess:
-            # In TF, the type of a no-op is None.
-            assert sess.run(no_op) is None
-
-        # In TVM, no-op is currently translated to 0, though it should
-        # probably be none or an empty tuple.
-        np.testing.assert_allclose(0, run_relay(g).numpy())
-
-
-if __name__ == "__main__":
-    test_no_op()
diff --git a/tests/python/frontend/tensorflow2/common.py b/tests/python/frontend/tensorflow2/common.py
deleted file mode 100644
index f9bf00e4239e..000000000000
--- a/tests/python/frontend/tensorflow2/common.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument, too-many-lines, len-as-condition, broad-except
-# pylint: disable=import-outside-toplevel, redefined-builtin
-"""TF2 to relay converter test utilities"""
-
-import tvm
-from tvm import relay
-
-from tvm.runtime.vm import VirtualMachine
-import tvm.contrib.graph_executor as runtime
-from tvm.relay.frontend.tensorflow2 import from_tensorflow
-import tvm.testing
-from tvm.relay.testing.tf import vmobj_to_list as vmobj_to_list
-
-import tensorflow as tf
-from tensorflow.python.eager.def_function import Function
-
-
-def run_tf_code(func, input_):
-    if type(func) is Function:
-        f_out = func(input_)
-        if isinstance(f_out, (list, tuple)):
-            np_out = [x.numpy() for x in f_out]
-        else:
-            np_out = [f_out.numpy()]
-    else:
-        f_out = func(tf.constant(input_))
-        if type(f_out) is dict:
-            np_out = [f_out[k].numpy() for k in sorted(f_out.keys())]
-        elif type(f_out) is list:
-            np_out = [x.numpy() for x in f_out]
-        else:
-            np_out = f_out.numpy()
-    return np_out
-
-
-def compile_graph_executor(mod, params, target="llvm", target_host="llvm", opt_level=3):
-    with tvm.transform.PassContext(opt_level):
-        lib = relay.build(mod, target=tvm.target.Target(target, host=target_host), params=params)
-    return lib
-
-
-def compile_vm(mod, params, target="llvm", target_host="llvm", opt_level=3, disabled_pass=None):
-    with tvm.transform.PassContext(opt_level, disabled_pass=disabled_pass):
-        vm_exec = relay.vm.compile(
-            mod, target=tvm.target.Target(target, host=target_host), params=params
-        )
-    return vm_exec
-
-
-def run_vm(vm_exec, input_, ctx=tvm.cpu(0)):
-    vm = VirtualMachine(vm_exec, ctx)
-    _out = vm.invoke("main", input_)
-    return vmobj_to_list(_out)
-
-
-def run_graph_executor(lib, input_, ctx=tvm.cpu(0)):
-    mod = runtime.GraphModule(lib["default"](ctx))
-    mod.set_input(0, input_)
-    mod.run()
-    return [mod.get_output(i).numpy() for i in range(mod.get_num_outputs())]
-
-
-def compare_tf_tvm(gdef, input_, output_, runtime="vm", output_tensors=None):
-    """compare tf and tvm execution for the same input.
-
-    Parameters
-    ----------
-    gdef: TF2 graph def extracted to be fed into from_tensorflow parser.
-    (https://www.tensorflow.org/code/tensorflow/core/framework/graph.proto)
-
-    input_: a single numpy array object
-
-    output_: the expected output from TF to match TVM output with
-
-    runtime: choose TVM runtime; either "vm" for VirtualMachine or "graph" for GraphExecutor
-
-    output_tensors : List of output tensor names (Optional)
-            if not specified then the last node is assumed as graph output.
-    """
-    mod, params = from_tensorflow(gdef, outputs=output_tensors)
-    if runtime == "vm":
-        exec_ = compile_vm(mod, params)
-        tvm_out = run_vm(exec_, input_)
-    elif runtime == "graph":
-        lib = compile_graph_executor(mod, params)
-        tvm_out = run_graph_executor(lib, input_)
-    else:
-        raise RuntimeError("Runtime input not supported: %s" % runtime)
-
-    tvm.testing.assert_allclose(output_, tvm_out, atol=1e-5)
diff --git a/tests/python/frontend/tensorflow2/test_functional_models.py b/tests/python/frontend/tensorflow2/test_functional_models.py
deleted file mode 100644
index 53ece82217a1..000000000000
--- a/tests/python/frontend/tensorflow2/test_functional_models.py
+++ /dev/null
@@ -1,649 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument, too-many-lines, len-as-condition, broad-except
-# pylint: disable=import-outside-toplevel, redefined-builtin
-"""TF2 to relay converter test: tests basic examples"""
-
-import tempfile
-import tensorflow as tf
-import numpy as np
-import pytest
-from common import compare_tf_tvm
-from common import run_tf_code
-
-
-def _function_graph(TestClass):
-    f = TestClass().func
-    gdef = f.get_concrete_function().graph.as_graph_def()
-    gdef_ops = list(set([n.op for n in gdef.node]))
-    input_ = TestClass().get_input()
-    output = run_tf_code(f, input_)
-    return gdef, input_, output
-
-
-def _model_graph(TestClass):
-    model = TestClass()
-    with tempfile.TemporaryDirectory() as model_path:
-        tf.saved_model.save(model, model_path)
-        imported = tf.saved_model.load(model_path)
-
-    f = imported.signatures["serving_default"]
-    gdef = f.graph.as_graph_def(add_shapes=True)
-
-    input_ = model.get_input()
-    output = run_tf_code(f, input_)
-    return gdef, input_, output
-
-
-def run_func_graph(TestClass, runtime="vm", outputs=None):
-    compare_tf_tvm(*_function_graph(TestClass), runtime=runtime, output_tensors=outputs)
-
-
-def run_model_graph(TestClass, outputs=None):
-    compare_tf_tvm(*_model_graph(TestClass), runtime="vm", output_tensors=outputs)
-
-
-def run_all(TestClass):
-    run_model_graph(TestClass)
-    for runtime_ in ["vm", "graph"]:
-        run_func_graph(TestClass, runtime=runtime_)
-
-
-def test_add_one():
-    class AddOne(tf.Module):
-        """simple function to test x=x+1; scalar as input"""
-
-        def get_input(self):
-            return np.array(1.0, dtype="float32")
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(), dtype=tf.float32)])
-        def func(self, x):
-            return x + 1
-
-    run_all(AddOne)
-
-
-def test_add_one_2d():
-    class AddOne2D(tf.Module):
-        """2D array as input"""
-
-        def get_input(self):
-            return np.ones((2, 2), dtype="float32")
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 2), dtype=tf.float32)])
-        def func(self, x):
-            return x + 1
-
-    run_all(AddOne2D)
-
-
-def test_add_one_2d_constant():
-    class AddOne2DConstant(tf.Module):
-        """2D array as input with 2D constant as well; 2D constant stored in params after convert"""
-
-        def get_input(self):
-            return np.ones((2, 2), dtype="float32")
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 2), dtype=tf.float32)])
-        def func(self, x):
-            return x + np.ones((2, 2), dtype="float32")
-
-    run_all(AddOne2DConstant)
-
-
-def test_sub_one_2d_constant():
-    class SubOne2DConstant(tf.Module):
-        """2D array as input with 2D constant as well; 2D constant stored in params after convert"""
-
-        def get_input(self):
-            return np.ones((2, 2), dtype="float32")
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 2), dtype=tf.float32)])
-        def func(self, x):
-            return x - np.ones((2, 2), dtype="float32")
-
-    run_all(SubOne2DConstant)
-
-
-def test_mul_one_2d_constant():
-    class MulOne2DConstant(tf.Module):
-        """2D array as input with 2D constant as well; 2D constant stored in params after convert"""
-
-        def get_input(self):
-            return np.ones((2, 2), dtype="float32")
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 2), dtype=tf.float32)])
-        def func(self, x):
-            return x * np.ones((2, 2), dtype="float32")
-
-    run_all(MulOne2DConstant)
-
-
-def test_div_one_2d_constant():
-    class DivOne2DConstant(tf.Module):
-        """2D array as input with 2D constant as well; 2D constant stored in params after convert"""
-
-        def get_input(self):
-            return np.ones((2, 2), dtype="float32")
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 2), dtype=tf.float32)])
-        def func(self, x):
-            return x / np.ones((2, 2), dtype="float32")
-
-    run_all(DivOne2DConstant)
-
-
-def test_strided_slice():
-    class StridedSlice(tf.Module):
-        def get_input(self):
-            return np.ones((3, 2, 3), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(3, 2, 3), dtype=tf.float32)])
-        def func(self, x):
-            return tf.strided_slice(x, [1, 0, 0], [2, 1, 3], [1, 1, 1])
-
-    run_all(StridedSlice)
-
-
-def test_split():
-    class Split(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            a, b, c = tf.split(x, 3, axis=1)
-            return tf.raw_ops.Pack(values=[a, b, c], axis=1)
-
-    run_all(Split)
-
-
-def test_shape():
-    class Shape(tf.Module):
-        def get_input(self):
-            return np.ones((3, 2, 3), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(3, 2, 3), dtype=tf.float32)])
-        def func(self, x):
-            a = tf.ones_like(tf.raw_ops.Shape(input=x), dtype=tf.float32)
-            return a + x
-
-    run_all(Shape)
-
-
-def test_pack():
-    class Pack(tf.Module):
-        def get_input(self):
-            return np.ones((2, 3), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 3), dtype=tf.float32)])
-        def func(self, x):
-            return tf.raw_ops.Pack(values=[x, x], axis=0)
-
-    run_all(Pack)
-
-
-def test_max():
-    class Maximum(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            a, b = tf.split(x, 2, axis=1)
-            return tf.math.maximum(a, b, name=None)
-
-    run_all(Maximum)
-
-
-def test_less():
-    class Less(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            a, b = tf.split(x, 2, axis=1)
-            return tf.math.less(a, b, name=None)
-
-    run_all(Less)
-
-
-def test_equal():
-    class Equal(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            a, b = tf.split(x, 2, axis=1)
-            return tf.math.equal(a, b, name=None)
-
-    run_all(Equal)
-
-
-def test_cast():
-    class Cast(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            return tf.cast(x, tf.int32)
-
-    run_all(Cast)
-
-
-def test_expand_dims():
-    class ExpandDims(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            return tf.expand_dims(x, axis=2)
-
-    run_all(ExpandDims)
-
-
-def test_transpose():
-    class Transpose(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            x = tf.expand_dims(x, axis=2)
-            return tf.transpose(x, perm=[0, 2, 1])
-
-    run_all(Transpose)
-
-
-def test_reshape():
-    class Reshape(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            return tf.reshape(x, (1, 2, 15))
-
-    run_all(Reshape)
-
-
-def test_tanh():
-    class Tanh(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            return tf.math.tanh(x)
-
-    run_all(Tanh)
-
-
-def test_sigmoid():
-    class Sigmoid(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            return tf.math.sigmoid(x)
-
-    run_all(Sigmoid)
-
-
-def test_relu():
-    class Relu(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            return tf.nn.relu(x)
-
-    run_all(Relu)
-
-
-def test_floor():
-    class Floor(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            return tf.math.floor(x)
-
-    run_all(Floor)
-
-
-def test_floor_mod():
-    class FloorMod(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            a, b = tf.split(x, 2, axis=1)
-            return tf.math.floormod(a, b)
-
-    run_all(FloorMod)
-
-
-def test_concat_v2():
-    class ConcatV2(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            a, b, c = tf.split(x, 3, axis=1)
-            axis = tf.add(tf.constant(1, dtype="int32"), tf.constant(0, dtype="int32"))
-            return tf.raw_ops.ConcatV2(values=[a, b, c], axis=axis)
-
-    run_all(ConcatV2)
-
-
-def test_multi_output():
-    class MultiOutput(tf.Module):
-        def get_input(self):
-            return np.ones((2, 2), dtype="float32")
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 2), dtype=tf.float32)])
-        def func(self, x):
-            y = 2 * x
-            return x, y
-
-    run_func_graph(MultiOutput, runtime="vm", outputs=["Identity:output:0", "Identity_1:output:0"])
-    run_func_graph(
-        MultiOutput, runtime="graph", outputs=["Identity:output:0", "Identity_1:output:0"]
-    )
-    run_model_graph(MultiOutput, outputs=["Identity:output:0"])
-
-
-def test_if():
-    def create_if_class(_condition=True):
-        class If(tf.Module):
-            def get_input(self):
-                return np.ones((2, 2), dtype="float32")
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=(2, 2), dtype=tf.float32)])
-            def func(self, x):
-                @tf.function(input_signature=[tf.TensorSpec(shape=(2, 2), dtype=tf.float32)])
-                def double(x):
-                    return 2 * x
-
-                @tf.function(input_signature=[tf.TensorSpec(shape=(2, 2), dtype=tf.float32)])
-                def triple(x):
-                    return 3 * x
-
-                output = tf.raw_ops.If(
-                    cond=_condition,
-                    input=[x],
-                    Tout=[tf.float32],
-                    output_shapes=[(2, 2)],
-                    then_branch=double.get_concrete_function(),
-                    else_branch=triple.get_concrete_function(),
-                )
-                return output[0]
-
-        return If
-
-    for cond in [True, False]:
-        if_class = create_if_class(_condition=cond)
-        run_func_graph(if_class, runtime="vm")
-        run_model_graph(if_class)
-
-
-def test_stateless_while():
-    class StatelessWhile(tf.Module):
-        def get_input(self):
-            return np.array([6], dtype="float32")
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1,), dtype=tf.float32)])
-        def func(self, x):
-            i = tf.constant(3.0)
-            cond = lambda i: tf.less(i, x)
-            body = lambda i: (tf.add(i, 2),)
-            r = tf.while_loop(cond, body, [i])
-            return r[0]
-
-    run_func_graph(StatelessWhile, runtime="vm")
-    run_model_graph(StatelessWhile)
-
-
-def test_stateless_while_2var():
-    class StatelessWhile2Var(tf.Module):
-        def get_input(self):
-            return np.array([20], dtype="float32")
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1,), dtype=tf.float32)])
-        def func(self, x):
-            i = tf.constant(3.0)
-            j = tf.constant(5.0)
-            cond = lambda i, j: tf.less(i + j, x)
-            body = lambda i, j: (tf.add(i, 2), tf.add(j, 3))
-            r = tf.while_loop(cond, body, [i, j])
-            return r
-
-    run_func_graph(
-        StatelessWhile2Var, runtime="vm", outputs=["Identity:output:0", "Identity_1:output:0"]
-    )
-    run_model_graph(StatelessWhile2Var, outputs=["Identity:output:0"])
-
-
-def test_tensorlist():
-    def run_test(elem_shape):
-        class TensorList(tf.Module):
-            def get_input(self):
-                in_tens = np.ones((2, 3), dtype="float32")
-                in_tens[1, :] = np.zeros((3,), dtype="float32")
-                return in_tens
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=(2, 3), dtype=tf.float32)])
-            def func(self, x):
-                dtype = tf.float32
-                tl = tf.raw_ops.TensorListReserve(
-                    element_shape=elem_shape, num_elements=2, element_dtype=dtype
-                )
-                tl = tf.raw_ops.TensorListSetItem(input_handle=tl, index=0, item=x[0, :])
-                tl = tf.raw_ops.TensorListSetItem(input_handle=tl, index=1, item=x[1, :])
-                output = tf.raw_ops.TensorListGetItem(
-                    input_handle=tl, index=0, element_shape=elem_shape, element_dtype=dtype
-                )
-                return output
-
-        run_model_graph(TensorList)
-        run_func_graph(TensorList, runtime="vm")
-
-    run_test((3,))
-    run_test((-1,))
-
-
-def test_tensorlist_stack():
-    def run_test(elem_shape):
-        class TensorListStack(tf.Module):
-            def get_input(self):
-                in_tens = np.ones((2, 3), dtype="float32")
-                in_tens[1] = np.zeros((3,), dtype="float32")
-                return in_tens
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=(2, 3), dtype=tf.float32)])
-            def func(self, x):
-                dtype = tf.float32
-                tl = tf.raw_ops.TensorListReserve(
-                    element_shape=elem_shape, num_elements=2, element_dtype=dtype
-                )
-                tl = tf.raw_ops.TensorListFromTensor(tensor=x, element_shape=elem_shape)
-                output = tf.raw_ops.TensorListStack(
-                    input_handle=tl, element_shape=elem_shape, element_dtype=dtype
-                )
-                return output
-
-        run_model_graph(TensorListStack)
-        run_func_graph(TensorListStack, runtime="vm")
-
-    run_test((3,))
-    run_test((-1,))
-
-
-def test_tensorlist_2d():
-    def run_test(elem_shape):
-        class TensorList2D(tf.Module):
-            def get_input(self):
-                in_tens = np.ones((2, 3, 4), dtype="float32")
-                in_tens[1, :, :] = np.zeros((3, 4), dtype="float32")
-                return in_tens
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=(2, 3, 4), dtype=tf.float32)])
-            def func(self, x):
-                dtype = tf.float32
-                tl = tf.raw_ops.TensorListReserve(
-                    element_shape=elem_shape, num_elements=2, element_dtype=dtype
-                )
-                tl = tf.raw_ops.TensorListSetItem(input_handle=tl, index=0, item=x[0, :, :])
-                tl = tf.raw_ops.TensorListSetItem(input_handle=tl, index=1, item=x[1, :, :])
-                output = tf.raw_ops.TensorListGetItem(
-                    input_handle=tl, index=0, element_shape=elem_shape, element_dtype=dtype
-                )
-                return output
-
-        run_model_graph(TensorList2D)
-        run_func_graph(TensorList2D, runtime="vm")
-
-    run_test((3, 4))
-    run_test((-1, -1))
-
-
-def test_tensorlist_stack_2d():
-    def run_test(elem_shape):
-        class TensorListStack2D(tf.Module):
-            def get_input(self):
-                in_tens = np.ones((2, 3, 4), dtype="float32")
-                in_tens[1, :, :] = np.zeros((3, 4), dtype="float32")
-                return in_tens
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=(2, 3, 4), dtype=tf.float32)])
-            def func(self, x):
-                dtype = tf.float32
-                tl = tf.raw_ops.TensorListReserve(
-                    element_shape=elem_shape, num_elements=2, element_dtype=dtype
-                )
-                tl = tf.raw_ops.TensorListFromTensor(tensor=x, element_shape=elem_shape)
-                output = tf.raw_ops.TensorListStack(
-                    input_handle=tl, element_shape=elem_shape, element_dtype=dtype
-                )
-                return output
-
-        run_model_graph(TensorListStack2D)
-        run_func_graph(TensorListStack2D, runtime="vm")
-
-    run_test((3, 4))
-    run_test((-1, -1))
-
-
-def test_tensorlist_stack_unpack():
-    def run_test(elem_shape):
-        class TensorListStack2D(tf.Module):
-            def get_input(self):
-                in_tens = np.ones((1, 3, 4), dtype="float32")
-                return in_tens
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=(1, 3, 4), dtype=tf.float32)])
-            def func(self, x):
-                dtype = tf.float32
-                tl = tf.raw_ops.TensorListReserve(
-                    element_shape=elem_shape, num_elements=1, element_dtype=dtype
-                )
-                tl = tf.raw_ops.TensorListSetItem(input_handle=tl, index=0, item=x[0, :, :])
-                output = tf.raw_ops.TensorListStack(
-                    input_handle=tl, element_shape=elem_shape, element_dtype=dtype, num_elements=1
-                )
-                output = tf.raw_ops.Unpack(value=output, num=1, axis=0)
-                return output
-
-        run_model_graph(TensorListStack2D)
-        run_func_graph(TensorListStack2D, runtime="vm")
-
-    run_test((3, 4))
-    run_test((-1, -1))
-
-
-def test_bincount_1d():
-    def run_test(weights, minlength, maxlength, axis, binary_output):
-        class Bincount1D(tf.Module):
-            def get_input(self):
-                return np.random.uniform(low=0, high=maxlength, size=(100,)).astype("int32")
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=[None], dtype=tf.int32)])
-            def func(self, x):
-                return tf.math.bincount(
-                    x,
-                    weights=weights,
-                    minlength=minlength,
-                    maxlength=maxlength,
-                    axis=axis,
-                    binary_output=binary_output,
-                )
-
-        run_model_graph(Bincount1D)
-        run_func_graph(Bincount1D, runtime="vm")
-
-    for axis in [None, 0, -1]:
-        run_test(weights=None, minlength=20, maxlength=20, axis=axis, binary_output=False)
-        run_test(weights=None, minlength=20, maxlength=20, axis=axis, binary_output=True)
-
-    # weights and axis=None need operator UnsortedSegmentSum to be implemented. Skip axis=None
-    weights = np.random.uniform(low=0.2, high=5, size=(100,)).astype("float32")
-    for axis in [0, -1]:
-        run_test(weights=weights, minlength=20, maxlength=20, axis=axis, binary_output=False)
-
-
-def test_bincount_2d():
-    def run_test(weights, minlength, maxlength, axis, binary_output):
-        class Bincount2D(tf.Module):
-            def get_input(self):
-                return np.random.uniform(low=0, high=maxlength, size=(3, 100)).astype("int32")
-
-            @tf.function(input_signature=[tf.TensorSpec([None, None], tf.int32)])
-            def func(self, x):
-                return tf.math.bincount(
-                    x,
-                    weights=weights,
-                    minlength=minlength,
-                    maxlength=maxlength,
-                    axis=axis,
-                    binary_output=binary_output,
-                )
-
-        run_model_graph(Bincount2D)
-        run_func_graph(Bincount2D, runtime="vm")
-
-    for axis in [None, 0, -1]:
-        run_test(weights=None, minlength=20, maxlength=20, axis=axis, binary_output=False)
-        run_test(weights=None, minlength=20, maxlength=20, axis=axis, binary_output=True)
-
-    # weights and axis=None need operator UnsortedSegmentSum to be implemented. Skip axis=None
-    weights = np.random.uniform(low=0.2, high=5, size=(3, 100)).astype("float32")
-    for axis in [0, -1]:
-        run_test(weights=weights, minlength=20, maxlength=20, axis=axis, binary_output=False)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/tensorflow2/test_sequential_models.py b/tests/python/frontend/tensorflow2/test_sequential_models.py
deleted file mode 100644
index 2ad41508630c..000000000000
--- a/tests/python/frontend/tensorflow2/test_sequential_models.py
+++ /dev/null
@@ -1,168 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument, too-many-lines, len-as-condition, broad-except
-# pylint: disable=import-outside-toplevel, redefined-builtin
-"""TF2 to relay converter test: testing models built with tf.keras.Sequential()"""
-
-import tempfile
-import numpy as np
-import pytest
-import tensorflow as tf
-from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
-
-from common import compare_tf_tvm
-from common import run_tf_code
-
-
-def run_sequential_model(model_fn, input_shape):
-    def get_input(shape):
-        _input = np.random.uniform(0, 1, shape).astype(dtype="float32")
-        return _input
-
-    def save_and_reload(_model):
-        with tempfile.TemporaryDirectory() as model_path:
-            tf.saved_model.save(_model, model_path)
-            loaded = tf.saved_model.load(model_path)
-            func = loaded.signatures["serving_default"]
-            frozen_func = convert_variables_to_constants_v2(func)
-        return frozen_func
-
-    def model_graph(model, input_shape):
-        _input = get_input(input_shape)
-        f = save_and_reload(model(input_shape))
-        _output = run_tf_code(f, _input)
-        gdef = f.graph.as_graph_def(add_shapes=True)
-        return gdef, _input, _output
-
-    compare_tf_tvm(*model_graph(model_fn, input_shape), runtime="vm")
-
-
-def test_dense_model():
-    def dense_model(input_shape, num_units=128):
-        return tf.keras.Sequential(
-            [tf.keras.layers.Flatten(input_shape=input_shape[1:]), tf.keras.layers.Dense(num_units)]
-        )
-
-    run_sequential_model(dense_model, input_shape=(1, 28, 28))
-
-
-def test_mnist_model():
-    def mnist_model(input_shape):
-        return tf.keras.Sequential(
-            [
-                tf.keras.layers.Flatten(input_shape=input_shape[1:]),
-                tf.keras.layers.Dense(128, activation="relu"),
-                tf.keras.layers.Dense(10),
-            ]
-        )
-
-    run_sequential_model(mnist_model, input_shape=(1, 28, 28))
-
-
-def test_conv2d_model():
-    def conv2d_model(input_shape, kernel=(3, 3), filters=16):
-        model = tf.keras.Sequential(
-            [
-                tf.keras.layers.Input(shape=input_shape[1:], batch_size=1),
-                tf.keras.layers.Conv2D(filters, kernel),
-            ]
-        )
-        return model
-
-    run_sequential_model(conv2d_model, input_shape=(1, 32, 32, 3))
-
-
-def test_maxpool_model():
-    def maxpool_model(input_shape, pool_size=(2, 2)):
-        model = tf.keras.Sequential(
-            [tf.keras.layers.MaxPool2D(pool_size=pool_size, input_shape=input_shape[1:])]
-        )
-        return model
-
-    run_sequential_model(maxpool_model, input_shape=(1, 32, 32, 3))
-
-
-def test_maxpool_batchnorm_model():
-    def maxpool_batchnorm_model(input_shape, pool_size=(2, 2)):
-        model = tf.keras.Sequential(
-            [
-                tf.keras.layers.MaxPool2D(pool_size=pool_size, input_shape=input_shape[1:]),
-                tf.keras.layers.BatchNormalization(),
-            ]
-        )
-        return model
-
-    run_sequential_model(maxpool_batchnorm_model, input_shape=(1, 32, 32, 3))
-
-
-def test_tensorlist_stack_model():
-    def tensorlist_stack_model(input_shape):
-        class TensorArrayStackLayer(tf.keras.layers.Layer):
-            def __init__(self):
-                super().__init__()
-
-            def call(self, inputs):
-                inputs = tf.squeeze(inputs)
-                outputs = tf.TensorArray(
-                    tf.float32,
-                    size=inputs.shape[0],
-                    infer_shape=False,
-                    element_shape=inputs.shape[1:],
-                )
-                outputs = outputs.unstack(inputs)
-
-                return outputs.stack()
-
-        input_shape = (3, 32)
-        model = tf.keras.Sequential(
-            [tf.keras.layers.Input(shape=input_shape, batch_size=1), TensorArrayStackLayer()]
-        )
-        return model
-
-    run_sequential_model(tensorlist_stack_model, input_shape=(3, 32))
-
-
-def test_tensorlist_read_model():
-    def tensorlist_read_model(input_shape):
-        class TensorArrayReadLayer(tf.keras.layers.Layer):
-            def __init__(self):
-                super().__init__()
-
-            def call(self, inputs):
-                inputs = tf.squeeze(inputs)
-                outputs = tf.TensorArray(
-                    tf.float32,
-                    size=inputs.shape[0],
-                    infer_shape=False,
-                    element_shape=inputs.shape[1:],
-                )
-                for i in range(inputs.shape[0]):
-                    outputs = outputs.write(i, inputs[i, :])
-
-                return outputs.read(0)
-
-        input_shape = (3, 32)
-        model = tf.keras.Sequential(
-            [tf.keras.layers.Input(shape=input_shape, batch_size=1), TensorArrayReadLayer()]
-        )
-        return model
-
-    run_sequential_model(tensorlist_read_model, input_shape=(3, 32))
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/test_common.py b/tests/python/frontend/test_common.py
deleted file mode 100644
index 2b35ae71f2d6..000000000000
--- a/tests/python/frontend/test_common.py
+++ /dev/null
@@ -1,220 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import numpy as np
-
-from tvm import relay, testing, transform
-from tvm.relay.frontend.common import StrAttrsDict, set_span
-from relay.utils.tag_span import _set_span, _create_span, _verify_structural_equal_with_span
-
-
-def test_key_is_present():
-    attrs = StrAttrsDict({"a": 1})
-    assert attrs.has_attr("a")
-
-
-def test_key_is_not_present():
-    attrs = StrAttrsDict({"a": 1})
-    assert not attrs.has_attr("b")
-
-
-class TestSetSpan:
-    def test_pass_ctx_switch(self):
-        def _res(should_fill):
-            if should_fill:
-                with testing.enable_span_filling():
-                    return set_span(relay.var("x", shape=(1, 64, 56, 56)), "x_var")
-            else:
-                with testing.disable_span_filling():
-                    return set_span(relay.var("x", shape=(1, 64, 56, 56)), "x_var")
-
-        disable = relay.var("x", shape=(1, 64, 56, 56))
-        enable = relay.var("x", shape=(1, 64, 56, 56), span=_create_span("x_var"))
-
-        _verify_structural_equal_with_span(_res(False), disable)
-        _verify_structural_equal_with_span(_res(True), enable)
-
-    # Should tag all exprs without span, and stop when expr is span-tagged
-    def test_builtin_tuple(self):
-        def _res():
-            a = relay.const(np.ones([1, 1, 1]), dtype="int64", span=_create_span("a"))
-            b = relay.const(np.zeros([1, 1, 1]), dtype="int64")
-            return set_span(tuple([a, b]), "tuple")
-
-        def _golden():
-            a = relay.const(np.ones([1, 1, 1]), dtype="int64", span=_create_span("a"))
-            b = relay.const(np.zeros([1, 1, 1]), dtype="int64", span=_create_span("tuple"))
-            return tuple([a, b])
-
-        res_tuple, golden_tuple = _res(), _golden()
-        assert len(res_tuple) == len(golden_tuple)
-        for i in range(len(res_tuple)):
-            _verify_structural_equal_with_span(res_tuple[i], golden_tuple[i])
-
-    def test_builtin_list(self):
-        def _res():
-            a = relay.const(np.ones([1, 1, 1]), dtype="int64", span=_create_span("a"))
-            b = relay.const(np.zeros([1, 1, 1]), dtype="int64")
-            t = relay.Tuple([a, b])
-            t_a = relay.TupleGetItem(t, 0)
-            t_b = relay.TupleGetItem(t, 1)
-            return set_span([t_a, t_b], "list")
-
-        def _golden():
-            a = relay.const(np.ones([1, 1, 1]), dtype="int64", span=_create_span("a"))
-            b = relay.const(np.zeros([1, 1, 1]), dtype="int64", span=_create_span("list"))
-            t = relay.Tuple([a, b], span=_create_span("list"))
-            t_a = relay.TupleGetItem(t, 0, span=_create_span("list"))
-            t_b = relay.TupleGetItem(t, 1, span=_create_span("list"))
-            return [t_a, t_b]
-
-        res_list, golden_list = _res(), _golden()
-        assert len(res_list) == len(golden_list)
-        for i in range(len(res_list)):
-            _verify_structural_equal_with_span(res_list[i], golden_list[i])
-
-    def test_var(self):
-        x = set_span(relay.var("x", shape=(1, 64, 56, 56)), "x_var")
-        x_expected = relay.var("x", shape=(1, 64, 56, 56), span=_create_span("x_var"))
-        _verify_structural_equal_with_span(x, x_expected)
-
-    def test_constant(self):
-        c = set_span(relay.const(np.ones([64, 64, 3, 3]), dtype="int64"), "const_c")
-        c_expected = relay.const(
-            np.ones([64, 64, 3, 3]), dtype="int64", span=_create_span("const_c")
-        )
-        _verify_structural_equal_with_span(c, c_expected)
-
-    def test_call(self):
-        def _res():
-            x = set_span(relay.var("x", shape=(1, 64, 56, 56)), "x_var")
-            w = relay.const(np.ones([64, 64, 3, 3]), dtype="int64")
-            y = set_span(
-                relay.nn.conv2d(x, w, channels=64, kernel_size=(3, 3), padding=(1, 1)), "conv2d"
-            )
-            return relay.Function([x], y)
-
-        def _golden():
-            x = relay.var("x", shape=(1, 64, 56, 56), span=_create_span("x_var"))
-            w = relay.const(np.ones([64, 64, 3, 3]), dtype="int64", span=_create_span("conv2d"))
-            y = _set_span(
-                relay.nn.conv2d(x, w, channels=64, kernel_size=(3, 3), padding=(1, 1)), "conv2d"
-            )
-            return relay.Function([x], y)
-
-        _verify_structural_equal_with_span(_res(), _golden())
-
-    def test_tuple(self):
-        def _res():
-            a = set_span(relay.const(np.ones([1, 1, 1]), dtype="int64"), "a")
-            b = relay.const(np.ones([1, 1, 1]), dtype="int64")
-            t = set_span(relay.Tuple([a, b]), "t")
-            return relay.Function([], t)
-
-        def _golden():
-            a = relay.const(np.ones([1, 1, 1]), dtype="int64", span=_create_span("a"))
-            b = relay.const(np.ones([1, 1, 1]), dtype="int64", span=_create_span("t"))
-            t = relay.Tuple([a, b], span=_create_span("t"))
-            return relay.Function([], t)
-
-        _verify_structural_equal_with_span(_res(), _golden())
-
-    def test_tuple_getitem(self):
-        def _res():
-            a = set_span(relay.const(np.ones([1, 1, 1]), dtype="int64"), "a")
-            b = relay.const(np.ones([1, 1, 1]), dtype="int64")
-            t = relay.Tuple([a, b])
-            i = set_span(relay.TupleGetItem(t, 0), "i")
-            return relay.Function([], i)
-
-        def _golden():
-            a = relay.const(np.ones([1, 1, 1]), dtype="int64", span=_create_span("a"))
-            b = relay.const(np.ones([1, 1, 1]), dtype="int64", span=_create_span("i"))
-            t = relay.Tuple([a, b], span=_create_span("i"))
-            i = relay.TupleGetItem(t, 0, span=_create_span("i"))
-            return relay.Function([], i)
-
-        _verify_structural_equal_with_span(_res(), _golden())
-
-    def test_let(self):
-        def _res():
-            x = set_span(relay.Var("x"), "x_var")
-            c_1 = relay.const(np.ones(10))
-            add = relay.add(x, x)
-            body = set_span(relay.Let(x, c_1, add), "let")
-
-            c_2 = set_span(relay.const(np.zeros(10)), "zeros")
-            y = set_span(relay.add(body, c_2), "add_2")
-            return relay.Function([x], y)
-
-        def _golden():
-            x = relay.Var("x", span=_create_span("x_var"))
-            c_1 = relay.const(np.ones(10), span=_create_span("let"))
-            add = _set_span(relay.add(x, x), "let")
-            body = relay.Let(x, c_1, add, span=_create_span("let"))
-
-            c_2 = relay.const(np.zeros(10), span=_create_span("zeros"))
-            y = _set_span(relay.add(body, c_2), "add_2")
-            return relay.Function([x], y)
-
-        _verify_structural_equal_with_span(_res(), _golden())
-
-    def test_if(self):
-        def _res():
-            x = set_span(relay.var("x", shape=[], dtype="float32"), "x_var")
-            y = set_span(relay.var("y", shape=[], dtype="float32"), "y_var")
-            eq = relay.equal(x, y)
-
-            true_branch = set_span(relay.add(x, y), "true_branch")
-            false_branch = relay.subtract(x, y)
-            ife = set_span(relay.If(eq, true_branch, false_branch), "if")
-            return relay.Function([x, y], ife)
-
-        def _golden():
-            x = relay.var("x", shape=[], dtype="float32", span=_create_span("x_var"))
-            y = relay.var("y", shape=[], dtype="float32", span=_create_span("y_var"))
-            eq = _set_span(relay.equal(x, y), "if")
-
-            true_branch = _set_span(relay.add(x, y), "true_branch")
-            false_branch = _set_span(relay.subtract(x, y), "if")
-            ife = relay.If(eq, true_branch, false_branch, span=_create_span("if"))
-            return relay.Function([x, y], ife)
-
-        _verify_structural_equal_with_span(_res(), _golden())
-
-    def test_fn(self):
-        def _res():
-            x = set_span(relay.var("x", shape=(1, 64, 56, 56)), "x_var")
-            w = relay.const(np.ones([64, 64, 3, 3]), dtype="int64")
-            y = relay.nn.conv2d(x, w, channels=64, kernel_size=(3, 3), padding=(1, 1))
-            f = set_span(relay.Function([x], y), "func")
-            return f
-
-        def _golden():
-            x = relay.var("x", shape=(1, 64, 56, 56), span=_create_span("x_var"))
-            w = relay.const(np.ones([64, 64, 3, 3]), dtype="int64", span=_create_span("func"))
-            y = _set_span(
-                relay.nn.conv2d(x, w, channels=64, kernel_size=(3, 3), padding=(1, 1)), "func"
-            )
-            f = relay.Function([x], y, span=_create_span("func"))
-            return f
-
-        _verify_structural_equal_with_span(_res(), _golden())
-
-
-if __name__ == "__main__":
-    testing.main()
diff --git a/tests/python/frontend/tflite/test_forward.py b/tests/python/frontend/tflite/test_forward.py
deleted file mode 100644
index cb0b17ea3fcf..000000000000
--- a/tests/python/frontend/tflite/test_forward.py
+++ /dev/null
@@ -1,5722 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-argument, import-outside-toplevel, inconsistent-return-statements
-"""
-TFLite testcases
-================
-This article is a test script to test TFLite operator with Relay.
-"""
-from __future__ import print_function
-from functools import partial
-import platform
-import os
-import tempfile
-import typing
-from packaging import version as package_version
-import pytest
-import numpy as np
-
-from PIL import Image
-
-from tflite.BuiltinOperator import BuiltinOperator
-
-try:
-    import tensorflow.compat.v1 as tf
-
-    # tensorflow.python.framework.ops module itself is not part of
-    # TensorFlow's public API: the precise contents of that module
-    # may vary from one version to the next
-    import tensorflow.compat.v1 as ops
-except ImportError:
-    import tensorflow as tf
-    import tensorflow as ops
-from tensorflow.python.framework import constant_op
-
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import image_ops
-from tensorflow.python.ops import gen_array_ops
-from tensorflow.python.ops import nn_impl
-from tensorflow.python.ops import variables
-from tensorflow import raw_ops
-
-try:
-    from tensorflow import lite as interpreter_wrapper
-except ImportError:
-    from tensorflow.contrib import lite as interpreter_wrapper
-
-import tvm
-import tvm.relay.testing.tf as tf_testing
-from tvm.contrib.download import download_testdata
-from tvm import relay, ir
-from tvm.contrib import graph_executor
-from relay.utils.tag_span import _set_span, _create_span, _verify_structural_equal_with_span
-
-
-#######################################################################
-# Generic run functions for TVM & TFLite
-# --------------------------------------
-def convert_to_list(x):
-    if not isinstance(x, list):
-        x = [x]
-    return x
-
-
-#######################################################################
-# Get a real image for e2e testing
-# --------------------------------
-def get_real_image(im_height, im_width, quantized=True):
-    repo_base = "https://github.com/dmlc/web-data/raw/main/tensorflow/models/InceptionV1/"
-    img_name = "elephant-299.jpg"
-    image_url = os.path.join(repo_base, img_name)
-    img_path = download_testdata(image_url, img_name, module="data")
-    image = Image.open(img_path).resize((im_height, im_width))
-    x = np.array(image).astype("uint8") if quantized else np.array(image).astype("float32")
-    data = np.reshape(x, (1, im_height, im_width, 3))
-    return data
-
-
-def pre_processed_image(height, width):
-    """Image preprocessed"""
-    repo_base = "https://github.com/dmlc/web-data/raw/main/tensorflow/models/InceptionV1/"
-    img_name = "elephant-299.jpg"
-    image_url = os.path.join(repo_base, img_name)
-    img_path = download_testdata(image_url, img_name, module="data")
-    image = tf.io.read_file(img_path)
-    image = tf.image.decode_jpeg(image, channels=3)
-    with tf.name_scope("eval_image"):
-        if image.dtype != tf.float32:
-            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
-        image = tf.image.central_crop(image, central_fraction=0.875)
-    # Resize the image to the specified height and width.
-    image = tf.image.resize(image, [height, width], align_corners=False)
-    image = tf.expand_dims(image, axis=0)
-    return image
-
-
-def get_real_image_object_detection(im_height, im_width):
-    repo_base = "https://github.com/dmlc/web-data/raw/main/gluoncv/detection/"
-    img_name = "street_small.jpg"
-    image_url = os.path.join(repo_base, img_name)
-    img_path = download_testdata(image_url, img_name, module="data")
-    image = Image.open(img_path).resize((im_height, im_width))
-    x = np.array(image).astype("uint8")
-    data = np.reshape(x, (1, im_height, im_width, 3))
-    return data
-
-
-def vmobj_to_list(obj):
-    """Converts TVM objects returned by VM execution to Python List."""
-    if isinstance(obj, tvm.nd.NDArray):
-        return [obj.numpy().tolist()]
-    elif isinstance(obj, tvm.runtime.container.ADT):
-        result = []
-        for f in obj:
-            result.extend(vmobj_to_list(f))
-        return result
-    elif isinstance(obj, tvm.relay.backend.interpreter.ConstructorValue):
-        if obj.constructor.name_hint == "Cons":
-            t_l = vmobj_to_list(obj.fields[1])
-            h_d = vmobj_to_list(obj.fields[0])
-            h_d.extend(t_l)
-            return h_d
-        elif obj.constructor.name_hint == "Nil":
-            return []
-        elif "tensor_nil" in obj.constructor.name_hint:
-            return [0]
-        elif "tensor" in obj.constructor.name_hint:
-            return [obj.fields[0].numpy()]
-        else:
-            raise RuntimeError(f"Unknown object type: {obj.constructor.name_hint}")
-    else:
-        raise RuntimeError(f"Unknown object type: {type(obj)}")
-
-
-def _quantize_keras_model(
-    keras_model,
-    representative_data_gen,
-    is_float_input=False,
-    is_float_output=False,
-    int_quant_dtype=tf.int8,
-):
-    """Utility function to quantize a Keras model using TFLite converter."""
-    converter = interpreter_wrapper.TFLiteConverter.from_keras_model(keras_model)
-    if int_quant_dtype == tf.int8:
-        converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
-        converter.representative_dataset = representative_data_gen
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        inference_dtype = tf.uint8
-    elif int_quant_dtype == tf.int16:
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_data_gen
-        converter.target_spec.supported_ops = [
-            tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
-        ]
-        inference_dtype = tf.uint16
-    else:
-        raise RuntimeError(
-            f"Invalid quantized dtype {int_quant_dtype}. Supported types: int8, int16."
-        )
-
-    # NOTE: If representative dataset is provided, and inference input type is not set,
-    #       then converter will self add quant & dequant Op accordingly.
-    if not is_float_input:
-        converter.inference_input_type = inference_dtype
-    if not is_float_output:
-        converter.inference_output_type = inference_dtype
-
-    return converter.convert()
-
-
-def run_tvm_graph(
-    tflite_model_buf,
-    input_data,
-    input_node,
-    num_output=1,
-    target="llvm",
-    out_names=None,
-    mode="graph_executor",
-    op_converter=relay.frontend.tflite.OperatorConverter,
-):
-    """Generic function to compile on relay and execute on tvm"""
-    # TFLite.Model.Model has changed to TFLite.Model from 1.14 to 2.1
-    try:
-        import tflite.Model
-
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
-    except AttributeError:
-        import tflite
-
-        tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
-    except ImportError as exc:
-        raise ImportError("The tflite package must be installed") from exc
-
-    input_data = convert_to_list(input_data)
-    input_node = convert_to_list(input_node)
-
-    shape_dict = {}
-    dtype_dict = {}
-    for i, node in enumerate(input_node):
-        shape_dict[node] = input_data[i].shape
-        dtype_dict[node] = input_data[i].dtype.name
-
-    with tvm.testing.disable_span_filling():
-        mod, params = relay.frontend.from_tflite(
-            tflite_model, shape_dict=shape_dict, dtype_dict=dtype_dict, op_converter=op_converter
-        )
-    with tvm.testing.enable_span_filling():
-        mod_with_span, _ = relay.frontend.from_tflite(
-            tflite_model, shape_dict=shape_dict, dtype_dict=dtype_dict, op_converter=op_converter
-        )
-    tvm.ir.assert_structural_equal(mod["main"], mod_with_span["main"])
-
-    if mode in ["debug", "vm"]:
-        inputs = []
-        for param in mod["main"].params:
-            found = False
-            for i, n in enumerate(input_node):
-                if n == param.name_hint:
-                    found = True
-                    inputs.append(tvm.nd.array(input_data[i]))
-                    break
-            # Interpreter doesn't bind constants, so still need to find in params
-            if not found:
-                inputs.append(tvm.nd.array(params[param.name_hint]))
-        result = relay.create_executor(mode, mod=mod, device=tvm.cpu(), target="llvm").evaluate()(
-            *inputs
-        )
-        return vmobj_to_list(result)
-    else:
-        with tvm.transform.PassContext(opt_level=3):
-            lib = relay.build(mod, target, params=params)
-
-        dev = tvm.device(target, 0)
-
-        m = graph_executor.GraphModule(lib["default"](dev))
-        # set inputs
-        for i, node in enumerate(input_node):
-            m.set_input(node, tvm.nd.array(input_data[i].astype(input_data[i].dtype)))
-        # execute
-        m.run()
-        # get outputs
-        assert out_names is None or num_output == len(
-            out_names
-        ), f"out_names: {out_names} num_output: {num_output}"
-        tvm_output_list = []
-        for i in range(0, num_output):
-            tvm_output = m.get_output(i)
-            tvm_output_list.append(tvm_output.numpy())
-        return tvm_output_list
-
-
-def run_tflite_graph(tflite_model_buf, input_data):
-    """Generic function to execute TFLite"""
-    input_data = convert_to_list(input_data)
-
-    interpreter = interpreter_wrapper.Interpreter(model_content=tflite_model_buf)
-    input_details = interpreter.get_input_details()
-    output_details = interpreter.get_output_details()
-
-    for i, input_detail in enumerate(input_details):
-        interpreter.resize_tensor_input(input_detail["index"], input_data[i].shape)
-    interpreter.allocate_tensors()
-
-    # set input
-    assert len(input_data) == len(input_details)
-    for i, input_detail in enumerate(input_details):
-        interpreter.set_tensor(input_detail["index"], input_data[i])
-
-    # Run
-    interpreter.invoke()
-
-    # get output
-    tflite_output = []
-    for _, output_detail in enumerate(output_details):
-        tflite_output.append(interpreter.get_tensor(output_detail["index"]))
-
-    return tflite_output
-
-
-def compare_tflite_with_tvm(
-    in_data: typing.List[np.ndarray],
-    in_name: typing.List[str],
-    input_tensors: typing.List,
-    output_tensors: typing.List,
-    init_global_variables: bool = False,
-    out_names=None,
-    quantized=False,
-    input_range=None,
-    mode="graph_executor",
-    experimental_new_converter=False,
-    fp16_quantized=False,
-    int_quant_dtype=tf.uint8,
-):
-    """Generic function to generate and compare TFLite and TVM output"""
-    in_data = convert_to_list(in_data)
-    in_name = convert_to_list(in_name)
-    out_names = convert_to_list(out_names)
-    in_node = [0] * len(in_name)
-    for i, _ in enumerate(in_name):
-        in_node[i] = in_name[i].split(":")[0] if ":" in in_name[i] else in_name[i]
-
-    with tf.Session() as sess:
-        if init_global_variables:
-            sess.run(variables.global_variables_initializer())
-        # convert to tflite model
-        converter = tf.lite.TFLiteConverter.from_session(sess, input_tensors, output_tensors)
-
-        if len(input_tensors) > 1:
-            if len(input_tensors[0].shape) <= 4 and len(input_tensors[1].shape) <= 4:
-                converter._experimental_disable_batchmatmul_unfold = True
-            else:
-                converter._experimental_disable_batchmatmul_unfold = False
-
-        converter.experimental_new_converter = experimental_new_converter
-        if quantized:
-            if int_quant_dtype == tf.int16:
-                converter.optimizations = [tf.lite.Optimize.DEFAULT]
-                converter.target_spec.supported_ops = [
-                    tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
-                ]
-            elif int_quant_dtype == tf.int8:
-                converter.inference_type = tf.lite.constants.INT8
-            else:
-                # default to int8 quantization
-                converter.inference_type = tf.lite.constants.QUANTIZED_UINT8
-
-            input_arrays = converter.get_input_arrays()
-            input_stats = {}
-            # calculate the mean and quantization scale for every input tensor,
-            # with respect to its fp32 input range, defined in fake_quant.
-            # s = 255/(fmax-fmin);  m = -fmin*s (the zero point)
-            for i in input_arrays:
-                try:
-                    quant_scale = 255 / (input_range[i][1] - input_range[i][0])
-                except ZeroDivisionError:
-                    print("Min and max of the input range for tensor " + i + " can't be equal")
-                mean = -input_range[i][0] * quant_scale
-                input_stats[i] = (mean, quant_scale)
-            converter.quantized_input_stats = input_stats
-        elif fp16_quantized:
-            converter.optimizations = [tf.lite.Optimize.DEFAULT]
-            converter.target_spec.supported_types = [tf.float16]
-
-        tflite_model_buffer = converter.convert()
-        tflite_output = run_tflite_graph(tflite_model_buffer, in_data)
-
-        for device in ["llvm"]:
-            _ = tvm.device(device, 0)
-            if not tvm.testing.device_enabled(device):
-                print(f"Skip because {device} is not enabled")
-                continue
-
-            tvm_output = run_tvm_graph(
-                tflite_model_buffer,
-                in_data,
-                in_node,
-                target=device,
-                num_output=len(out_names),
-                out_names=out_names,
-                mode=mode,
-            )
-            # WARNING: the results could well be random values clipped to 0 or 255 because of badly
-            # tuned output range for the specific operator. While adding test ensure that we aren't
-            # getting only clipped values in output tensors that still pass the assertion.
-            # For reference see _test_elemwise_qnn_out_range()
-            if quantized and not fp16_quantized:
-                for i, _ in enumerate(tflite_output):
-                    # allow absolute tolerance of 1 in the quantized results
-                    tvm.testing.assert_allclose(
-                        tflite_output[i],  # pylint: disable=unnecessary-list-index-lookup
-                        tvm_output[i],
-                        atol=1,
-                        rtol=1e-5,
-                    )
-            else:
-                for i, _ in enumerate(tflite_output):
-                    tvm.testing.assert_allclose(
-                        tflite_output[i],  # pylint: disable=unnecessary-list-index-lookup
-                        tvm_output[i],
-                        atol=1e-5,
-                        rtol=1e-5,
-                    )
-
-
-def with_fused_activation_function(input_tensor, fn_name):
-    """Fused activation function"""
-    if fn_name is None or fn_name == "NONE":
-        return input_tensor
-    if fn_name == "RELU":
-        return nn_ops.relu(input_tensor)
-    if fn_name == "RELU6":
-        return nn_ops.relu6(input_tensor)
-    if fn_name == "RELU_N1_TO_1":
-        return math_ops.maximum(-1, math_ops.minimum(input_tensor, 1))
-    if fn_name == "TANH":
-        return math_ops.tanh(input_tensor)
-    raise AssertionError(f"Unknown fused_activation_function {fn_name}")
-
-
-def _test_split(in_shape, axis, num_splits, dtype):
-    """internal split tester taking as parameters in_shape, number of tensors to split into
-    and dtype (data type)"""
-
-    np_data = np.random.uniform(-5, 5, size=in_shape).astype(dtype)
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=in_shape, dtype=dtype, name="in_data")
-        out = array_ops.split(in_data, num_splits, axis=axis)
-        num_splits = len(num_splits) if isinstance(num_splits, list) else num_splits
-        out_names = ["out_" + str(n) + ":0" for n in range(num_splits)]
-        compare_tflite_with_tvm([np_data], ["in_data"], [in_data], out, out_names=out_names)
-
-
-def test_forward_split():
-    """test split layer"""
-    # rank 1
-    _test_split((3,), 0, 1, "float32")
-    _test_split((3,), 0, 3, "float32")
-    _test_split((6,), 0, 3, "float32")
-    # rank 2
-    _test_split((6, 2), 0, 3, "float32")
-    _test_split((2, 6), 1, 6, "float32")
-    # rank 3
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        _test_split((6, 2, 4), 0, 2, "int32")
-
-    _test_split((2, 6, 4), 1, 3, "float32")
-    _test_split((2, 4, 6), 2, 1, "float32")
-    # rank 4
-    _test_split((6, 1, 3, 5), 0, 3, "float32")
-    _test_split((1, 6, 3, 5), 1, 3, "float32")
-    _test_split((1, 3, 6, 5), 2, 3, "float32")
-    _test_split((1, 3, 5, 6), 3, 3, "float32")
-    # split along negative axis
-    _test_split((6, 1, 3, 5), -4, 3, "float32")
-    _test_split((1, 6, 3, 5), -3, 3, "float32")
-    _test_split((1, 3, 6, 5), -2, 3, "float32")
-    _test_split((1, 3, 5, 6), -1, 3, "float32")
-    # size_splits split
-    _test_split((6,), 0, [1, 2, 3], "float32")
-    _test_split((3, 6, 4), -2, [1, 4, 1], "float32")
-
-
-#######################################################################
-# slice
-# -----
-
-
-def _test_slice(data, begin, size):
-    """One iteration of SLICE"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out = array_ops.slice(in_data, begin, size)
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_slice():
-    """SLICE"""
-    _test_slice(np.arange(4, dtype=np.float32).reshape((4,)), begin=[0], size=[2])
-    _test_slice(np.arange(18, dtype=np.int32).reshape((3, 2, 3)), begin=[1, 0, 0], size=[1, 1, 3])
-    # tflite 1.13 outputs nonsense values if size[i] == -1
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        _test_slice(np.arange(8, dtype=np.int32).reshape((2, 4)), begin=[0, 1], size=[-1, -1])
-        _test_slice(np.arange(5, dtype=np.int32).reshape((5,)), begin=[4], size=[-1])
-
-
-#######################################################################
-# Topk
-# ----
-def _test_topk(in_shape, k=1):
-    """One iteration of TOPK"""
-    data = np.random.uniform(size=in_shape).astype("float32")
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out = nn_ops.top_k(in_data, k, name="TopK")
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out[0]])
-
-
-def test_forward_topk():
-    """TOPK"""
-    _test_topk((3,), 1)
-    _test_topk((3,), 3)
-    _test_topk((3, 5, 7), 3)
-    _test_topk((3, 5, 7), 3)
-
-
-#######################################################################
-# Gather
-# ------
-
-
-def _test_gather(dshape, indices, axis, dtype, quantized=False, oob=False, wrap_idx=False):
-    """One iteration of Gather"""
-    indices = np.asarray(indices).astype("int32")
-    data = np.random.uniform(1, 10, size=dshape)
-    data = data.astype(np.uint8) if quantized else data.astype(dtype)
-    with tf.Graph().as_default():
-        if wrap_idx:
-            in_name = "in_indices"
-            indices_expr = array_ops.placeholder(
-                shape=indices.shape, dtype=indices.dtype, name=in_name
-            )
-            in_tensor_name = [in_name + ":0"]
-            in_indices = [indices_expr]
-        else:
-            indices_expr = indices
-            indices = []
-            in_tensor_name = []
-            in_indices = []
-
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype, name="in_data")
-        if axis:
-            out = array_ops.gather(in_data, indices_expr, axis=axis)
-        else:
-            out = array_ops.gather(in_data, indices_expr)  # tflite conversion fails for None axis
-        input_range = {"in_data": (-100, 100)} if quantized else None
-        try:
-            compare_tflite_with_tvm(
-                [data] + indices,
-                ["in_data:0"] + in_tensor_name,
-                [in_data] + in_indices,
-                [out],
-                quantized=quantized,
-                input_range=input_range,
-            )
-        except ValueError as exc:
-            if not oob:
-                raise exc
-        except Exception as exc:
-            raise exc
-
-
-def test_forward_gather():
-    """GATHER"""
-    for quantized in [False, True]:
-        for wrap_idx in [False, True]:
-            _test_gather((4,), [1], 0, "float32", quantized, wrap_idx)
-            _test_gather((4,), [1], None, "int32", quantized, wrap_idx)
-            _test_gather((1, 4), [0], 0, "int32", quantized, wrap_idx)
-            _test_gather((4,), [[[1, 0], [0, 1]]], 0, "float32", quantized, wrap_idx)
-            _test_gather((2, 2), [[[1, 0], [0, 1]]], 1, "int32", quantized, wrap_idx)
-            _test_gather((2, 2), [[[1, 0], [0, 1]]], None, "float32", quantized, wrap_idx)
-            _test_gather((3, 3, 3), [[[1, 0]]], 0, "int32", quantized, wrap_idx)
-            _test_gather((3, 3, 3), [[[1, 0]]], 2, "int32", quantized, wrap_idx)
-            _test_gather((4, 3, 5, 6), [[2, 1, 0, 0]], 0, "float32", quantized, wrap_idx)
-            _test_gather((3, 3, 3), [[[2, 1]]], -1, "int32", quantized, wrap_idx)
-        # Out of boundary error cannot be tested with wrapped index
-        _test_gather((4,), [16], 0, "float32", quantized, oob=True)
-        _test_gather((1, 3, 3), [12], 0, "int32", quantized, oob=True)
-        _test_gather((1, 3, 3), [20], 1, "float32", quantized, oob=True)
-        _test_gather((1, 3, 3), [20, 20], 2, "float32", quantized, oob=True)
-
-
-#######################################################################
-# Gather_ND
-# ---------
-
-
-def _test_gather_nd(data, indices):
-    """One iteration of GATHER_ND"""
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(shape=data.shape, dtype=data.dtype, name="data")
-        indices_data = tf.placeholder(shape=indices.shape, dtype=indices.dtype, name="indices")
-        out = tf.gather_nd(in_data, indices_data)
-
-        compare_tflite_with_tvm(
-            [data, indices], ["data:0", "indices:0"], [in_data, indices_data], [out]
-        )
-
-
-def test_forward_gather_nd():
-    """GATHER_ND"""
-    _test_gather_nd(
-        np.array([[[1.2, 2.0], [3.1, 4.1]], [[5.1, 6.1], [7.1, 8.1]]]).astype("float32"),
-        np.asarray([[0, 1], [1, 0]]).astype("int32"),
-    )
-    _test_gather_nd(
-        np.reshape(np.arange(30), [5, 6]).astype("int32"), np.asarray([[1, 2]]).astype("int32")
-    )
-    _test_gather_nd(
-        np.reshape(np.arange(12), [2, 3, 2]).astype("int32"),
-        np.asarray([[[0, 0], [0, 1]], [[1, 0], [1, 1]]]).astype("int32"),
-    )
-    _test_gather_nd(
-        np.reshape(np.arange(4), [4]).astype("float32"), np.asarray([1]).astype("int32")
-    )
-    _test_gather_nd(
-        np.reshape(np.arange(4), [1, 4]).astype("float32"), np.asarray([0]).astype("int32")
-    )
-    _test_gather_nd(
-        np.reshape(np.arange(4), [1, 4]).astype("float32"), np.asarray([0, 3]).astype("int32")
-    )
-
-
-#######################################################################
-# StridedSlice
-# ------------
-
-
-def _test_stridedslice(
-    ip_shape,
-    begin,
-    end,
-    stride,
-    dtype,
-    begin_mask=0,
-    end_mask=0,
-    new_axis_mask=0,
-    shrink_axis_mask=0,
-    ellipsis_mask=0,
-    quantized=False,
-):
-    """One iteration of a Stridedslice"""
-    data = np.random.uniform(size=ip_shape).astype(dtype)
-    data = data.astype(np.uint8) if quantized else data.astype(dtype)
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, ip_shape, name="in_data")
-        out = array_ops.strided_slice(
-            in_data,
-            begin,
-            end,
-            stride,
-            begin_mask=begin_mask,
-            end_mask=end_mask,
-            new_axis_mask=new_axis_mask,
-            shrink_axis_mask=shrink_axis_mask,
-            ellipsis_mask=ellipsis_mask,
-        )
-        input_range = {"in_data": (-100, 100)} if quantized else None
-        compare_tflite_with_tvm(
-            [data], ["in_data:0"], [in_data], [out], quantized=quantized, input_range=input_range
-        )
-
-
-def test_forward_stridedslice():
-    """test StridedSlice"""
-    for quantized in [False, True]:
-        _test_stridedslice(
-            (1, 3, 3),
-            [0, 0, 0],
-            [3, 3, 3],
-            [1, 1, 1],
-            "float32",
-            shrink_axis_mask=7,
-            quantized=quantized,
-        )
-        _test_stridedslice(
-            (1, 3, 3),
-            [0, 0, 0],
-            [3, 3, 3],
-            [1, 1, 1],
-            "float32",
-            shrink_axis_mask=5,
-            quantized=quantized,
-        )
-        _test_stridedslice((2), [1], [1], [1], "float32", shrink_axis_mask=1, quantized=quantized)
-        _test_stridedslice(
-            (3, 4, 3), [1, -1, 0], [4, -5, 3], [2, -1, 1], "float32", quantized=quantized
-        )
-        _test_stridedslice(
-            (3, 4), [1, 0], [4, 4], [1, 1], "float32", shrink_axis_mask=0, quantized=quantized
-        )
-        _test_stridedslice(
-            (4, 4), [1, 0], [4, 4], [1, 1], "float32", shrink_axis_mask=2, quantized=quantized
-        )
-        _test_stridedslice(
-            (3, 4), [-1, 0], [0, 3], [1, 1], "float32", shrink_axis_mask=1, quantized=quantized
-        )
-
-
-#######################################################################
-# transpose
-# ---------
-
-
-def _test_forward_transpose(ishape, axes=()):
-    data = np.random.uniform(size=ishape).astype(np.float32)
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-
-        if not axes:
-            out = array_ops.transpose(in_data)
-        else:
-            out = array_ops.transpose(in_data, axes)
-
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_transpose():
-    _test_forward_transpose((2, 2))
-    _test_forward_transpose((2, 3, 4))
-    _test_forward_transpose((7, 8, 8, 10))
-    _test_forward_transpose((2, 3, 4), (1, 2, 0))
-    _test_forward_transpose((2, 3, 4), (0, 1, 2))
-    _test_forward_transpose((2, 3, 4, 5), (3, 0, 1, 2))
-    _test_forward_transpose((2, 3, 4, 5), ())
-
-
-#######################################################################
-# Cast
-# ----
-
-
-def _test_cast(data, cast_dtype, use_mlir=False):
-    """One iteration of CAST"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out = math_ops.cast(in_data, cast_dtype)
-        compare_tflite_with_tvm(
-            data, "Placeholder:0", [in_data], [out], experimental_new_converter=use_mlir
-        )
-
-
-def test_forward_cast():
-    """CAST"""
-    for use_mlir in [False, True]:
-        _test_cast(
-            np.arange(6.0, dtype=np.float32).reshape((1, 6)), cast_dtype=tf.int32, use_mlir=use_mlir
-        )
-        _test_cast(
-            np.arange(6.0, dtype=np.float32).reshape((1, 6)), cast_dtype=tf.uint8, use_mlir=use_mlir
-        )
-        _test_cast(
-            np.arange(6.0, dtype=np.int32).reshape((1, 6)), cast_dtype=tf.int64, use_mlir=use_mlir
-        )
-
-
-#######################################################################
-# Batch Mat Mul
-# ----
-def _test_batch_matmul(
-    a_shape, b_shape, dtype, out_dtype, adjoint_a=False, adjoint_b=False, quantized=False
-):
-    with tf.Graph().as_default():
-        a = array_ops.placeholder(shape=a_shape, dtype=dtype, name="A")
-        b = array_ops.placeholder(shape=b_shape, dtype=dtype, name="B")
-        print(tf.__version__)
-
-        result = raw_ops.BatchMatMulV3(
-            x=a, y=b, Tout=out_dtype, adj_x=adjoint_a, adj_y=adjoint_b, name="batchmatmul"
-        )
-        input_range = {"A": (-100, 100), "B": (-100, 100)} if quantized else None
-
-        a_np = np.random.uniform(high=5.0, size=a_shape).astype(dtype)
-        b_np = np.random.uniform(high=5.0, size=b_shape).astype(dtype)
-        compare_tflite_with_tvm(
-            [a_np, b_np],
-            [a.name, b.name],
-            [a, b],
-            [result],
-            experimental_new_converter=True,
-            quantized=quantized,
-            input_range=input_range,
-        )
-
-
-@pytest.mark.parametrize("config", [("int8", "int32", True), ("float32", "float32", False)])
-def test_forward_batch_matmul(config):
-    """BATCH_MAT_MUL"""
-    _test_batch_matmul(
-        (3, 5, 4), (3, 4, 5), dtype=config[0], out_dtype=config[1], quantized=config[2]
-    )
-    _test_batch_matmul(
-        (3, 5, 4),
-        (3, 4, 5),
-        dtype=config[0],
-        out_dtype=config[1],
-        adjoint_a=True,
-        adjoint_b=True,
-        quantized=config[2],
-    )
-    _test_batch_matmul(
-        (3, 5, 4),
-        (3, 5, 4),
-        dtype=config[0],
-        out_dtype=config[1],
-        adjoint_a=True,
-        adjoint_b=False,
-        quantized=config[2],
-    )
-    _test_batch_matmul(
-        (2, 3, 5, 4),
-        (1, 3, 5, 4),
-        dtype=config[0],
-        out_dtype=config[1],
-        adjoint_a=True,
-        adjoint_b=False,
-        quantized=config[2],
-    )
-    _test_batch_matmul(
-        (3, 5, 4),
-        (3, 5, 4),
-        dtype=config[0],
-        out_dtype=config[1],
-        adjoint_a=False,
-        adjoint_b=True,
-        quantized=config[2],
-    )
-    _test_batch_matmul(
-        (2, 3, 5, 4),
-        (1, 3, 5, 4),
-        dtype=config[0],
-        out_dtype=config[1],
-        adjoint_a=False,
-        adjoint_b=True,
-        quantized=config[2],
-    )
-    _test_batch_matmul(
-        (3, 4, 5, 6), (3, 4, 6, 5), dtype=config[0], out_dtype=config[1], quantized=config[2]
-    )
-    # BatchMatMul doesn't support larger than 4D tensors
-    # _test_batch_matmul(
-    #    (2, 3, 4, 5, 6), (2, 3, 4, 6, 5), dtype=config[0], out_dtype=config[1], quantized=config[2]
-    # )
-
-
-#######################################################################
-# Tile
-# ----
-
-
-def _test_forward_tile(in_shape, reps, dtype):
-    data = np.random.uniform(-5, 5, size=in_shape).astype(dtype)
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-
-        out = array_ops.tile(in_data, reps)
-
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_tile():
-    _test_forward_tile((2,), (3,), "int32")
-    _test_forward_tile((2, 2), (2, 3), "float32")
-
-
-######################################################################
-# BatchToSpaceND
-# --------------
-
-
-def _test_batch_to_space_nd(input_shape, block_shape, crops, dtype="int32"):
-    data = np.random.uniform(0, 5, size=input_shape).astype(dtype)
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=input_shape, dtype=dtype)
-
-        out = array_ops.batch_to_space_nd(in_data, block_shape, crops)
-
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_batch_to_space_nd():
-    # test cases: https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/batch-to-space-n-d
-    _test_batch_to_space_nd(input_shape=[4, 1, 1, 1], block_shape=[2, 2], crops=[[0, 0], [0, 0]])
-
-    _test_batch_to_space_nd(input_shape=[4, 1, 1, 3], block_shape=[2, 2], crops=[[0, 0], [0, 0]])
-
-    _test_batch_to_space_nd(input_shape=[4, 2, 2, 1], block_shape=[2, 2], crops=[[0, 0], [0, 0]])
-
-    _test_batch_to_space_nd(input_shape=[4, 3, 3, 1], block_shape=[2, 2], crops=[[0, 1], [0, 1]])
-
-
-######################################################################
-# SpaceToBatchND
-# --------------
-
-
-def _test_space_to_batch_nd(input_shape, block_shape, paddings, dtype="int32"):
-    data = np.random.uniform(0, 5, size=input_shape).astype(dtype)
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=input_shape, dtype=dtype)
-
-        out = array_ops.space_to_batch_nd(in_data, block_shape, paddings)
-
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_space_to_batch_nd():
-    # test cases: https://www.tensorflow.org/api_docs/python/tf/space_to_batch_nd
-    _test_space_to_batch_nd(input_shape=[1, 2, 2, 1], block_shape=[2, 2], paddings=[[0, 0], [0, 0]])
-
-    _test_space_to_batch_nd(input_shape=[1, 2, 2, 3], block_shape=[2, 2], paddings=[[0, 0], [0, 0]])
-
-    _test_space_to_batch_nd(input_shape=[1, 4, 4, 1], block_shape=[2, 2], paddings=[[0, 0], [0, 0]])
-
-    _test_space_to_batch_nd(input_shape=[2, 2, 4, 1], block_shape=[2, 2], paddings=[[0, 0], [2, 0]])
-
-
-#######################################################################
-# Pooling
-# -------
-def _test_pooling_iteration(input_shape, **kwargs):
-    """One iteration of pool operation with given shapes and attributes"""
-
-    x = -np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape) - 1
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=input_shape, dtype="float32")
-        out = nn_ops.pool(in_data, **kwargs)
-
-        compare_tflite_with_tvm(x, "Placeholder:0", [in_data], [out])
-
-
-def _test_pooling(input_shape, **kwargs):
-    _test_pooling_iteration(input_shape, **kwargs)
-
-
-def test_forward_pooling():
-    """Pooling"""
-
-    for pool_type in ["AVG", "MAX"]:
-        _test_pooling(
-            input_shape=[2, 9, 10, 2],
-            window_shape=[1, 1],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1],
-            strides=[1, 1],
-        )
-
-        _test_pooling(
-            input_shape=[2, 10, 9, 2],
-            window_shape=[1, 1],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1],
-            strides=[1, 1],
-        )
-
-        _test_pooling(
-            input_shape=[2, 9, 10, 2],
-            window_shape=[2, 1],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1],
-            strides=[1, 1],
-        )
-
-        _test_pooling(
-            input_shape=[2, 10, 9, 2],
-            window_shape=[2, 3],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1],
-            strides=[2, 1],
-        )
-
-
-def _test_l2_pool2d(input_shape, ksize, strides, padding, data_format, fused_func_name=None):
-    x = np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape) - 1
-
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype=tf.float32, name="input", shape=input_shape)
-        out = tf.sqrt(
-            tf.nn.avg_pool(
-                tf.square(in_data),
-                ksize=ksize,
-                strides=strides,
-                padding=padding,
-                data_format=data_format,
-            )
-        )
-        out = with_fused_activation_function(out, fused_func_name)
-
-        compare_tflite_with_tvm(x, "input", [in_data], [out])
-
-
-def test_forward_l2_pool2d():
-    _test_l2_pool2d([1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], "SAME", "NHWC", "RELU6")
-    _test_l2_pool2d([2, 9, 10, 2], [1, 1, 1, 1], [1, 1, 1, 1], "SAME", "NHWC", "RELU6")
-    _test_l2_pool2d([2, 9, 10, 2], [1, 2, 1, 1], [1, 1, 1, 1], "SAME", "NHWC")
-    _test_l2_pool2d([2, 9, 10, 2], [1, 2, 1, 1], [1, 1, 2, 1], "SAME", "NHWC")
-    _test_l2_pool2d([1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], "VALID", "NHWC", "RELU")
-    _test_l2_pool2d([2, 9, 10, 2], [1, 1, 1, 1], [1, 1, 1, 1], "VALID", "NHWC")
-    _test_l2_pool2d([2, 9, 10, 2], [1, 2, 1, 1], [1, 1, 1, 1], "VALID", "NHWC")
-    _test_l2_pool2d([2, 9, 10, 2], [1, 2, 1, 1], [1, 1, 2, 1], "VALID", "NHWC", "RELU6")
-
-
-#######################################################################
-# Convolution
-# -----------
-
-
-def _test_tflite2_quantized_convolution(
-    input_shape,
-    kernel_shape,
-    filters,
-    padding="valid",
-    data_format=None,
-    int_quant_dtype=tf.int8,
-    groups=1,
-):
-    """One iteration of TFLite2 quantized convolution with given shapes and attributes"""
-    data_format = "channels_last" if data_format == "NHWC" else "channels_first"
-    data = np.random.uniform(0, 1, input_shape).astype("float32")
-    _ = np.random.uniform(0, 1, kernel_shape).astype("float32")
-
-    data_in = tf.keras.layers.Input(shape=data.shape[1:])
-    conv = tf.keras.layers.Conv2D(
-        filters=filters,
-        kernel_size=(kernel_shape[0], kernel_shape[1]),
-        activation=tf.nn.relu,
-        padding=padding,
-        data_format=data_format,
-        groups=groups,
-    )(data_in)
-    keras_model = tf.keras.models.Model(data_in, conv)
-
-    # To create quantized values with dynamic range of activations, needs representative dataset
-    def representative_data_gen():
-        for _ in range(1):
-            yield [data]
-
-    tflite_model_quant = _quantize_keras_model(
-        keras_model,
-        representative_data_gen,
-        is_float_input=True,
-        is_float_output=True,
-        int_quant_dtype=int_quant_dtype,
-    )
-    # TFLite.Model.Model has changed to TFLite.Model from 1.14 to 2.1
-    try:
-        import tflite.Model
-
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_quant, 0)
-    except AttributeError:
-        import tflite
-
-        tflite_model = tflite.Model.GetRootAsModel(tflite_model_quant, 0)
-    except ImportError as exc:
-        raise ImportError("The tflite package must be installed") from exc
-
-    subgraph = tflite_model.Subgraphs(0)
-    model_input = subgraph.InputsAsNumpy()
-    input_node = subgraph.Tensors(model_input).Name().decode("utf-8")
-
-    tflite_output = run_tflite_graph(tflite_model_quant, data)
-    if package_version.parse(tf.__version__) < package_version.parse("2.9"):
-        input_node = data_in.name.replace(":0", "")
-    else:
-        input_node = "serving_default_" + data_in.name + ":0"
-    tvm_output = run_tvm_graph(tflite_model_quant, data, input_node)
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-2, atol=1e-2
-    )
-
-
-def test_forward_quantized_convolution():
-    """Quantized convolution"""
-    for int_quant_dtype in [tf.int8, tf.int16]:
-        _test_tflite2_quantized_convolution(
-            (1, 28, 28, 1),
-            (1, 1),
-            12,
-            data_format="NHWC",
-            int_quant_dtype=int_quant_dtype,
-        )
-
-        _test_tflite2_quantized_convolution(
-            (1, 1, 28, 28),
-            (1, 1),
-            12,
-            data_format="NCWH",
-            int_quant_dtype=int_quant_dtype,
-        )
-
-        _test_tflite2_quantized_convolution(
-            (64, 2, 28, 28),
-            (1, 1),
-            12,
-            data_format="NCWH",
-            int_quant_dtype=int_quant_dtype,
-        )
-
-        _test_tflite2_quantized_convolution(
-            (2, 32, 28, 28),
-            (1, 1),
-            16,
-            data_format="NCWH",
-            int_quant_dtype=int_quant_dtype,
-            groups=8,
-        )
-
-        if platform.machine() == "aarch64":
-            pytest.skip(
-                reason=(
-                    "Grouped convolution type inference error for `arm_cpu`. "
-                    "See https://github.com/apache/tvm/issues/16532"
-                )
-            )
-
-        _test_tflite2_quantized_convolution(
-            (1, 16, 10, 10),
-            (3, 3),
-            2,
-            data_format="NCWH",
-            int_quant_dtype=int_quant_dtype,
-            groups=2,
-        )
-
-
-def test_forward_quantized_depthwise_convolution():
-    """Test qnn.conv2d depthwise compiled with TVM against TFLite reference."""
-    for int_quant_dtype in [tf.int8, tf.int16]:
-        _test_tflite2_quantized_depthwise_convolution(
-            [1, 17, 17, 12], [3, 3, 12, 1], [1, 1], [2, 2], "VALID", "NHWC", 1, int_quant_dtype
-        )
-        _test_tflite2_quantized_depthwise_convolution(
-            [1, 24, 24, 3], [7, 7, 3, 8], [1, 1], [2, 2], "SAME", "NHWC", 8, int_quant_dtype
-        )
-    _test_tflite2_quantized_depthwise_convolution(
-        [1, 8, 8, 128], [1, 1, 128, 1], [1, 1], [1, 1], "SAME", "NHWC", 1, tf.int8
-    )
-
-    if platform.machine() == "aarch64":
-        pytest.skip(
-            reason=(
-                "Tensor intrinsic data type mismatch error. "
-                "See https://github.com/apache/tvm/issues/16533"
-            )
-        )
-
-    _test_tflite2_quantized_depthwise_convolution(
-        [1, 8, 8, 128], [1, 1, 128, 1], [1, 1], [1, 1], "SAME", "NHWC", 1, tf.int16
-    )
-
-
-def _test_tflite2_quantized_depthwise_convolution(
-    input_shape,
-    kernel_shape,
-    dilations,
-    strides,
-    padding,
-    data_format,
-    depth_multiplier,
-    int_quant_dtype=tf.int8,
-):
-    """One iteration of TFLite2 quantized depthwise convolution with given shapes and attributes"""
-
-    data_format = "channels_last" if data_format == "NHWC" else "channels_first"
-    data = np.random.uniform(0, 1, input_shape).astype("float32")
-    kernel = np.random.uniform(0, 1, kernel_shape).astype("float32")
-
-    data_in = tf.keras.layers.Input(shape=data.shape[1:])
-    conv = tf.keras.layers.DepthwiseConv2D(
-        kernel_size=(kernel_shape[0], kernel_shape[1]),
-        strides=strides,
-        padding=padding,
-        data_format=data_format,
-        activation="relu",
-        use_bias=False,
-        depth_multiplier=depth_multiplier,
-    )(data_in)
-    keras_model = tf.keras.models.Model(data_in, conv)
-    keras_model.layers[1].set_weights([kernel])
-
-    # To create quantized values with dynamic range of activations, needs representative dataset
-    def representative_data_gen():
-        for _ in range(1):
-            yield [data]
-
-    tflite_model_quant = _quantize_keras_model(
-        keras_model,
-        representative_data_gen,
-        is_float_input=True,
-        is_float_output=True,
-        int_quant_dtype=int_quant_dtype,
-    )
-
-    # TFLite.Model.Model has changed to TFLite.Model from 1.14 to 2.1
-    try:
-        import tflite.Model
-
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_quant, 0)
-    except AttributeError:
-        import tflite
-
-        tflite_model = tflite.Model.GetRootAsModel(tflite_model_quant, 0)
-    except ImportError as exc:
-        raise ImportError("The tflite package must be installed") from exc
-
-    subgraph = tflite_model.Subgraphs(0)
-    model_input = subgraph.InputsAsNumpy()
-    input_node = subgraph.Tensors(model_input).Name().decode("utf-8")
-
-    tflite_output = run_tflite_graph(tflite_model_quant, data)
-    tvm_output = run_tvm_graph(tflite_model_quant, data, input_node)
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-2, atol=1e-2
-    )
-
-
-def _test_convolution(
-    tensor_in_sizes,
-    filter_in_sizes,
-    dilations,
-    strides,
-    padding,
-    data_format,
-    is_depthwise=False,
-    quantized=False,
-    fp16_quantized=False,
-):
-    """One iteration of convolution with given shapes and attributes"""
-
-    total_size_1 = 1
-    total_size_2 = 1
-    for s in tensor_in_sizes:
-        total_size_1 *= s
-    for s in filter_in_sizes:
-        total_size_2 *= s
-    # Initializes the input tensor with array containing incrementing
-    # numbers from 1.
-    if quantized:
-        data_array = np.random.uniform(0, 255, tensor_in_sizes).astype("uint8")
-        filter_array = np.random.uniform(0, 255, filter_in_sizes).astype("uint8")
-    else:
-        data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
-        filter_array = [f * 1.0 for f in range(1, total_size_2 + 1)]
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype="float32", name="in_data")
-        in_filter = constant_op.constant(
-            filter_array, shape=filter_in_sizes, dtype="float32", name="in_filter"
-        )
-        strides = [1] + strides + [1]
-        dilations = [1] + dilations + [1]
-
-        if is_depthwise:
-            out = nn_ops.depthwise_conv2d_native(
-                in_data, in_filter, strides=strides, padding=padding, data_format=data_format
-            )
-        else:
-            out = nn_ops.conv2d(
-                in_data, in_filter, strides=strides, padding=padding, data_format=data_format
-            )
-
-        if quantized and not fp16_quantized:
-            if is_depthwise:
-                # Quantized the inputs and feed them to the convolution
-                inq_data = tf.quantization.fake_quant_with_min_max_args(
-                    in_data, min=-100, max=100, name="inq_data"
-                )
-                inq_filter = tf.quantization.fake_quant_with_min_max_args(
-                    in_filter, min=-100, max=100, name="inq_filter"
-                )
-                out = nn_ops.depthwise_conv2d_native(
-                    inq_data, inq_filter, strides=strides, padding=padding, data_format=data_format
-                )
-                out = tf.quantization.fake_quant_with_min_max_args(
-                    out, min=-200, max=200, name="out"
-                )
-
-                # Set the input quantization range
-                input_range = {"in_data": (-100, 100)} if quantized else None
-
-                # Compare
-                compare_tflite_with_tvm(
-                    data_array,
-                    "in_data",
-                    [in_data],
-                    [out],
-                    quantized=quantized,
-                    input_range=input_range,
-                    experimental_new_converter=True,
-                )
-            else:
-                # Quantized the inputs and feed them to the convolution
-                inq_data = tf.quantization.fake_quant_with_min_max_args(
-                    in_data, min=-100, max=100, name="inq_data"
-                )
-                inq_filter = tf.quantization.fake_quant_with_min_max_args(
-                    in_filter, min=-100, max=100, name="inq_filter"
-                )
-                out = nn_ops.conv2d(
-                    inq_data, inq_filter, strides=strides, padding=padding, data_format=data_format
-                )
-                out = tf.quantization.fake_quant_with_min_max_args(
-                    out, min=-200, max=200, name="out"
-                )
-
-                # Set the input quantization range
-                input_range = {"in_data": (-100, 100)} if quantized else None
-
-                # Compare
-                compare_tflite_with_tvm(
-                    data_array,
-                    "in_data",
-                    [in_data],
-                    [out],
-                    quantized=quantized,
-                    input_range=input_range,
-                    experimental_new_converter=True,
-                )
-        else:
-            data_array = np.reshape(data_array, tensor_in_sizes).astype("float32")
-            compare_tflite_with_tvm(data_array, "in_data", [in_data], [out])
-
-
-def test_forward_convolution():
-    """Convolution"""
-    for quantized in [False, True]:
-        for fp16_quantized in [False, True]:
-            _test_convolution(
-                [4, 8, 8, 176],
-                [1, 1, 176, 32],
-                [1, 1],
-                [1, 1],
-                "SAME",
-                "NHWC",
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-            _test_convolution(
-                [4, 17, 17, 19],
-                [3, 3, 19, 19],
-                [1, 1],
-                [2, 2],
-                "VALID",
-                "NHWC",
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-            _test_convolution(
-                [4, 17, 17, 124],
-                [1, 1, 124, 19],
-                [1, 1],
-                [1, 1],
-                "SAME",
-                "NHWC",
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-            _test_convolution(
-                [4, 17, 17, 12],
-                [3, 3, 12, 32],
-                [1, 1],
-                [2, 2],
-                "VALID",
-                "NHWC",
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-
-            # depthwise convolution
-            _test_convolution(
-                [4, 8, 8, 176],
-                [1, 1, 176, 1],
-                [1, 1],
-                [1, 1],
-                "SAME",
-                "NHWC",
-                True,
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-            _test_convolution(
-                [4, 17, 17, 19],
-                [3, 3, 19, 1],
-                [1, 1],
-                [2, 2],
-                "VALID",
-                "NHWC",
-                True,
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-            _test_convolution(
-                [4, 17, 17, 124],
-                [1, 1, 124, 1],
-                [1, 1],
-                [1, 1],
-                "SAME",
-                "NHWC",
-                True,
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-            _test_convolution(
-                [4, 17, 17, 12],
-                [3, 3, 12, 1],
-                [1, 1],
-                [2, 2],
-                "VALID",
-                "NHWC",
-                True,
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-            _test_convolution(
-                [4, 17, 17, 12],
-                [3, 3, 12, 2],
-                [1, 1],
-                [2, 2],
-                "VALID",
-                "NHWC",
-                True,
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-            # depthwise convolution with single input channel
-            _test_convolution(
-                [1, 76, 64, 1],
-                [9, 5, 1, 96],
-                [1, 1],
-                [1, 1],
-                "SAME",
-                "NHWC",
-                True,
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-
-    # TFLite2 quantized convolution testing
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.3.0"):
-        _test_convolution(
-            [1, 8, 8, 176], [1, 1, 176, 32], [1, 1], [1, 1], "SAME", "NHWC", quantized=True
-        )
-        _test_convolution(
-            [1, 17, 17, 12], [3, 3, 12, 32], [1, 1], [2, 2], "VALID", "NHWC", quantized=True
-        )
-        _test_convolution(
-            [1, 17, 17, 19], [3, 3, 19, 19], [1, 1], [2, 2], "VALID", "NHWC", quantized=True
-        )
-        _test_convolution(
-            [1, 17, 17, 124], [1, 1, 124, 19], [1, 1], [1, 1], "SAME", "NHWC", quantized=True
-        )
-
-
-#######################################################################
-# Transpose Convolution
-# ---------------------
-
-
-def _test_transpose_conv(
-    tensor_in_sizes,
-    filter_in_sizes,
-    output_shape,
-    strides,
-    padding,
-    quantized=False,
-    fp16_quantized=False,
-):
-    """One iteration of transpose convolution with given shapes and attributes"""
-
-    total_size_1 = 1
-    total_size_2 = 1
-    for s in tensor_in_sizes:
-        total_size_1 *= s
-    for s in filter_in_sizes:
-        total_size_2 *= s
-
-    with tf.Graph().as_default():
-        if quantized and not fp16_quantized:
-            # Initializes the input tensor with array containing incrementing
-            # numbers from 1.
-            data_array = [max(f, 255) for f in range(1, total_size_1 + 1)]
-            filter_array = [max(f, 255) for f in range(1, total_size_2 + 1)]
-            data_array = np.reshape(data_array, tensor_in_sizes).astype("uint8")
-            filter_array = np.reshape(filter_array, filter_in_sizes).astype("uint8")
-
-            in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype="float32", name="in_data")
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-100, max=100, name="q_data"
-            )
-            input_range = {"q_data": (-100, 100)}
-
-            in_filter = constant_op.constant(
-                filter_array, shape=filter_in_sizes, dtype="float32", name="in_filter"
-            )
-            inq_filter = tf.quantization.fake_quant_with_min_max_args(
-                in_filter, min=-100, max=100, name="q_filter"
-            )
-
-            strides = [1] + strides + [1]
-
-            out = nn_ops.conv2d_transpose(
-                inq_data, inq_filter, output_shape=output_shape, strides=strides, padding=padding
-            )
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-100, max=100, name="out")
-            compare_tflite_with_tvm(
-                [data_array], ["q_data"], [inq_data], [out], quantized=True, input_range=input_range
-            )
-        else:
-            # Initializes the input tensor with array containing incrementing
-            # numbers from 1.
-            data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
-            filter_array = [f * 1.0 for f in range(1, total_size_2 + 1)]
-
-            in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype="float32", name="in_data")
-            in_filter = constant_op.constant(
-                filter_array, shape=filter_in_sizes, dtype="float32", name="in_filter"
-            )
-            strides = [1] + strides + [1]
-            # in_filter layout is HWOI
-            out = nn_ops.conv2d_transpose(
-                in_data, in_filter, output_shape=output_shape, strides=strides, padding=padding
-            )
-            data_array = np.reshape(data_array, tensor_in_sizes).astype("float32")
-            compare_tflite_with_tvm(
-                [data_array], ["in_data"], [in_data], [out], fp16_quantized=fp16_quantized
-            )
-
-
-def test_forward_transpose_conv():
-    """Transpose convolution"""
-    for quantized in [True, False]:
-        for fp16_quantized in [True, False]:
-            # odd size input, padding VALID
-            _test_transpose_conv(
-                [1, 5, 6, 16],
-                [2, 2, 16, 16],
-                [1, 10, 12, 16],
-                [2, 2],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-            # odd size input, padding SAME
-            _test_transpose_conv(
-                [1, 5, 6, 16],
-                [2, 2, 16, 16],
-                [1, 10, 12, 16],
-                [2, 2],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-            # kernel 3x3, padding VALID
-            _test_transpose_conv(
-                [4, 32, 32, 16],
-                [3, 3, 5, 16],
-                [4, 34, 34, 5],
-                [1, 1],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [3, 3, 5, 16],
-                [1, 65, 65, 5],
-                [2, 2],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [3, 3, 5, 16],
-                [1, 65, 34, 5],
-                [2, 1],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-
-            # kernel 3x3, padding SAME
-            _test_transpose_conv(
-                [4, 32, 32, 16],
-                [3, 3, 5, 16],
-                [4, 32, 32, 5],
-                [1, 1],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [3, 3, 5, 16],
-                [1, 64, 64, 5],
-                [2, 2],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [3, 3, 5, 16],
-                [1, 64, 32, 5],
-                [2, 1],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-
-            # kernel 2x2, padding VALID
-            _test_transpose_conv(
-                [4, 32, 32, 16],
-                [2, 2, 5, 16],
-                [4, 33, 33, 5],
-                [1, 1],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [2, 2, 5, 16],
-                [1, 64, 64, 5],
-                [2, 2],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [2, 2, 5, 16],
-                [1, 64, 33, 5],
-                [2, 1],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-
-            # kernel 2x2, padding SAME
-            _test_transpose_conv(
-                [4, 32, 32, 16],
-                [2, 2, 5, 16],
-                [4, 32, 32, 5],
-                [1, 1],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [2, 2, 5, 16],
-                [1, 64, 64, 5],
-                [2, 2],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [2, 2, 5, 16],
-                [1, 64, 32, 5],
-                [2, 1],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-
-            # kernel 1x1, padding VALID
-            _test_transpose_conv(
-                [4, 32, 32, 16],
-                [1, 1, 5, 16],
-                [4, 32, 32, 5],
-                [1, 1],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [1, 1, 5, 16],
-                [1, 63, 63, 5],
-                [2, 2],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [1, 1, 5, 16],
-                [1, 63, 32, 5],
-                [2, 1],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-
-            # kernel 1x1, padding SAME
-            _test_transpose_conv(
-                [4, 32, 32, 16],
-                [1, 1, 5, 16],
-                [4, 32, 32, 5],
-                [1, 1],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [1, 1, 5, 16],
-                [1, 63, 63, 5],
-                [2, 2],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [1, 1, 5, 16],
-                [1, 63, 32, 5],
-                [2, 1],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-
-
-def _test_tflite2_quantized_transpose_conv(
-    input_shape,
-    kernel_shape,
-    filters,
-    padding="valid",
-    strides=(1, 1),
-    data_format=None,
-    int_quant_dtype=tf.int8,
-):
-    """One iteration of TFLite2 quantized tranpose conv with given shapes and attributes"""
-    data_format = "channels_last" if data_format == "NHWC" else "channels_first"
-    data = np.random.uniform(0, 1, input_shape).astype("float32")
-    _ = np.random.uniform(0, 1, kernel_shape).astype("float32")
-
-    data_in = tf.keras.layers.Input(shape=data.shape[1:], batch_size=1)
-    transpose_conv = tf.keras.layers.Conv2DTranspose(
-        filters=filters,
-        kernel_size=(kernel_shape[0], kernel_shape[1]),
-        padding=padding,
-        strides=strides,
-        use_bias=True,
-    )(data_in)
-    keras_model = tf.keras.models.Model(data_in, transpose_conv)
-
-    # To create quantized values with dynamic range of activations, needs representative dataset
-    def representative_data_gen():
-        for _ in range(1):
-            yield [data]
-
-    tflite_model_quant = _quantize_keras_model(
-        keras_model,
-        representative_data_gen,
-        is_float_input=True,
-        is_float_output=True,
-        int_quant_dtype=int_quant_dtype,
-    )
-
-    # TFLite.Model.Model has changed to TFLite.Model from 1.14 to 2.1
-    try:
-        import tflite.Model
-
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_quant, 0)
-    except AttributeError:
-        import tflite
-
-        tflite_model = tflite.Model.GetRootAsModel(tflite_model_quant, 0)
-    except ImportError as exc:
-        raise ImportError("The tflite package must be installed") from exc
-
-    subgraph = tflite_model.Subgraphs(0)
-    model_input = subgraph.InputsAsNumpy()
-    input_node = subgraph.Tensors(model_input).Name().decode("utf-8")
-
-    tflite_output = run_tflite_graph(tflite_model_quant, data)
-
-    if package_version.parse(tf.__version__) < package_version.parse("2.9"):
-        input_node = data_in.name.replace(":0", "")
-    else:
-        input_node = "serving_default_" + data_in.name + ":0"
-
-    tvm_output = run_tvm_graph(tflite_model_quant, data, input_node)
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-2, atol=1e-2
-    )
-
-
-def test_forward_quantized_transpose_conv():
-    """Quantized convolution"""
-    for int_quant_dtype in [tf.int8, tf.int16]:
-        _test_tflite2_quantized_transpose_conv(
-            (1, 1, 5, 64),
-            (3, 3),
-            64,
-            padding="same",
-            strides=(1, 2),
-            data_format="NHWC",
-            int_quant_dtype=int_quant_dtype,
-        )
-
-
-#######################################################################
-# Reshape
-# -------
-
-
-def _test_reshape(data, out_shape, wrap_shape, quantized=False):
-    """One iteration of reshape operation with given data and out shape"""
-    if quantized:
-        with tf.Graph().as_default():
-            in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in")
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-100, max=100, name="inq_0"
-            )
-
-            input_range = {"inq_0": (-100, 100)}
-            out_shape = out_shape if not wrap_shape else np.array(out_shape, dtype=np.int32)
-
-            in_shape = (
-                out_shape
-                if not wrap_shape
-                else array_ops.placeholder(
-                    shape=out_shape.shape, dtype=out_shape.dtype, name="Newshape"
-                )
-            )
-
-            out = array_ops.reshape(inq_data, in_shape)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-200, max=200, name="out")
-            compare_tflite_with_tvm(
-                [data, out_shape] if wrap_shape else [data],
-                ["inq_0:0", "Newshape:0"] if wrap_shape else ["inq_0:0"],
-                [inq_data, in_shape] if wrap_shape else [inq_data],
-                [out],
-                quantized=True,
-                input_range=input_range,
-                mode="vm",
-            )
-    else:
-        # Test with tensor and constant
-        with tf.Graph().as_default():
-            in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-
-            out_shape = out_shape if not wrap_shape else np.array(out_shape, dtype=np.int32)
-
-            in_shape = (
-                out_shape
-                if not wrap_shape
-                else array_ops.placeholder(
-                    shape=out_shape.shape, dtype=out_shape.dtype, name="Newshape"
-                )
-            )
-
-            out = array_ops.reshape(in_data, in_shape)
-
-            compare_tflite_with_tvm(
-                [data, out_shape] if wrap_shape else [data],
-                ["Placeholder:0", "Newshape:0"] if wrap_shape else ["Placeholder:0"],
-                [in_data, in_shape] if wrap_shape else [in_data],
-                [out],
-                mode="vm",
-            )
-
-
-def test_forward_reshape():
-    for wrap in [True, False]:
-        _test_reshape(np.arange(6.0, dtype=np.float32), [2, 3], wrap)
-        _test_reshape(np.arange(6), [-1, 2], wrap)
-        _test_reshape(np.arange(6), [3, -1], wrap)
-        _test_reshape(np.arange(6), [-1], wrap)
-
-    _test_reshape(np.arange(6, dtype=np.uint8), [2, 3], False, True)
-    _test_reshape(np.arange(6, dtype=np.uint8), [-1, 2], False, True)
-
-
-#######################################################################
-# Resize
-# ------
-
-
-def _test_resize(
-    tf_resize_op, images_data, size_data, align_corners, half_pixel_centers, quantized=False
-):
-    """One iteration of Resize"""
-    # Test with tensor and constant
-    with tf.Graph().as_default():
-        images_tensor = array_ops.placeholder(shape=images_data.shape, dtype="float32", name="in")
-        size = ops.convert_to_tensor(size_data, dtype=size_data.dtype)
-
-        if quantized:
-            images_tensor_q = tf.quantization.fake_quant_with_min_max_args(
-                images_tensor, min=-3, max=2, name="in"
-            )
-            input_range = {"in": (-3, 2)}
-            out_tensor = tf_resize_op(
-                images=images_tensor_q,
-                size=size,
-                align_corners=align_corners,
-                half_pixel_centers=half_pixel_centers,
-            )
-            out_tensor = tf.quantization.fake_quant_with_min_max_args(
-                out_tensor, min=-3, max=2, name="out_tensor"
-            )
-
-            compare_tflite_with_tvm(
-                [images_data],
-                ["in:0"],
-                [images_tensor],
-                [out_tensor],
-                quantized=True,
-                input_range=input_range,
-            )
-        else:
-            out_tensor = tf_resize_op(
-                images=images_tensor,
-                size=size,
-                align_corners=align_corners,
-                half_pixel_centers=half_pixel_centers,
-            )
-            compare_tflite_with_tvm([images_data], ["in:0"], [images_tensor], [out_tensor])
-
-
-def test_all_resize():
-    """Resize"""
-    images_data = np.random.uniform(0, 255, (1, 16, 16, 3))
-    images_data_float32 = images_data.astype(np.float32)
-    images_data_uint8 = images_data.astype(np.uint8)
-    size_data = np.array([8, 8]).astype("int32")
-    ### RESIZE_BILINEAR
-    _test_resize(
-        tf.image.resize_bilinear,
-        images_data_float32,
-        size_data,
-        align_corners=False,
-        half_pixel_centers=False,
-        quantized=False,
-    )
-    _test_resize(
-        tf.image.resize_bilinear,
-        images_data_float32,
-        size_data,
-        align_corners=True,
-        half_pixel_centers=False,
-        quantized=False,
-    )
-    _test_resize(
-        tf.image.resize_bilinear,
-        images_data_uint8,
-        size_data,
-        align_corners=False,
-        half_pixel_centers=False,
-        quantized=True,
-    )
-    _test_resize(
-        tf.image.resize_bilinear,
-        images_data_uint8,
-        size_data,
-        align_corners=True,
-        half_pixel_centers=False,
-        quantized=True,
-    )
-    _test_resize(
-        tf.image.resize_bilinear,
-        images_data_uint8,
-        size_data,
-        align_corners=False,
-        half_pixel_centers=True,
-        quantized=True,
-    )
-    ### RESIZE_NEAREST_NEIGHBOR (was added in v1.13)
-    # According to topi resize.h
-    # Align corners not supported for nearest neighbour
-
-    if "RESIZE_NEAREST_NEIGHBOR" in dir(BuiltinOperator()):
-        _test_resize(
-            tf.image.resize_nearest_neighbor,
-            images_data_float32,
-            size_data,
-            align_corners=False,
-            half_pixel_centers=False,
-        )
-        _test_resize(
-            tf.image.resize_nearest_neighbor,
-            images_data_float32,
-            size_data,
-            align_corners=True,
-            half_pixel_centers=False,
-        )
-        _test_resize(
-            tf.image.resize_nearest_neighbor,
-            images_data_float32,
-            size_data,
-            align_corners=False,
-            half_pixel_centers=True,
-        )
-
-
-#######################################################################
-# Range
-# -----
-def _test_range(start, limit, delta):
-    # tflite 1.13 convert method does not accept empty shapes
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        tf.reset_default_graph()
-        with tf.Graph().as_default():
-            start_scalar, limit_scalar, delta_scalar = (
-                tf.placeholder(dtype=start.dtype, shape=(), name="start"),
-                tf.placeholder(dtype=limit.dtype, shape=(), name="limit"),
-                tf.placeholder(dtype=delta.dtype, shape=(), name="delta"),
-            )
-
-            out = tf.range(start_scalar, limit_scalar, delta_scalar, name="range")
-
-            compare_tflite_with_tvm(
-                [start, limit, delta],
-                ["start", "limit", "delta"],
-                [start_scalar, limit_scalar, delta_scalar],
-                [out],
-                mode="vm",
-                quantized=False,
-            )
-
-
-def _test_range_default():
-    # tflite 1.13 convert method does not accept empty shapes
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        tf.reset_default_graph()
-        with tf.Graph().as_default():
-            inputs = [
-                tf.placeholder(dtype=tf.int32, shape=(), name="p1"),
-                tf.placeholder(dtype=tf.int32, shape=(), name="p2"),
-            ]
-            outputs = [
-                tf.range(start=inputs[0], limit=inputs[1]),  # use default delta
-                tf.range(
-                    start=inputs[1]
-                ),  # use start as limit with 0 as the first item in the range
-            ]
-
-            compare_tflite_with_tvm(
-                [np.int32(1), np.int32(18)], ["p1", "p2"], inputs, outputs, mode="vm"
-            )
-
-
-def test_forward_range():
-    _test_range(np.int32(1), np.int32(18), np.int32(3))
-    _test_range(np.int32(1), np.int32(18), np.float32(3.1))  # increment is of type float
-    _test_range(np.float32(1.0), np.int32(18), np.int32(3.1))  # start is of type float
-    _test_range_default()
-
-
-#######################################################################
-# Shape
-# -----
-
-
-def _test_shape(dtype):
-    # tflite 1.13 convert method does not accept empty shapes
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        tf.reset_default_graph()
-        with tf.Graph().as_default():
-            data = np.array([1, 18, 3], dtype=np.int32)
-            start = tf.placeholder(dtype=tf.int32, shape=[], name="start")
-            limit = tf.placeholder(dtype=tf.int32, shape=[], name="limit")
-            delta = tf.placeholder(dtype=tf.int32, shape=[], name="delta")
-            tf_range = tf.range(start, limit, delta, tf.int32, name="range")
-            out = tf.shape(tf_range, out_type=dtype)
-            out = tf.add(out, tf.constant([1], dtype=dtype))
-            compare_tflite_with_tvm(
-                list(np.nditer(data)),
-                ["start", "limit", "delta"],
-                [start, limit, delta],
-                [out],
-                mode="vm",
-            )
-
-
-def test_forward_shape():
-    _test_shape(tf.int32)
-    _test_shape(tf.int64)
-
-
-#######################################################################
-# Concatenation
-# -------------
-
-
-def _test_concatenation(data, axis):
-    """One iteration of concatenation"""
-
-    assert len(data) >= 1
-
-    with tf.Graph().as_default():
-        in_data = [
-            array_ops.placeholder(shape=tensor.shape, dtype=tensor.dtype, name=f"in_{idx}")
-            for idx, tensor in enumerate(data)
-        ]
-        out = array_ops.concat(in_data, axis)
-        name = [f"in_{idx}:0" for idx in range(len(data))]
-
-        compare_tflite_with_tvm(data, name, in_data, [out])
-
-
-def test_forward_concatenation():
-
-    _test_concatenation([np.arange(6).reshape((1, 2, 1, 3)), np.arange(6).reshape((1, 2, 1, 3))], 1)
-
-    _test_concatenation([np.arange(6).reshape((3, 2)), np.arange(6).reshape((3, 2))], 1)
-
-    _test_concatenation(
-        [
-            np.arange(6).reshape((2, 1, 1, 3)),
-            np.arange(6).reshape((2, 1, 1, 3)),
-            np.arange(6).reshape((2, 1, 1, 3)),
-        ],
-        1,
-    )
-
-
-#######################################################################
-# Unary elemwise
-# --------------
-
-
-def _test_unary_elemwise(math_op, data, quantized, quant_range=(-6, 6), int_quant_dtype=tf.int8):
-    """One iteration of unary elemwise"""
-    if quantized:
-        with tf.Graph().as_default():
-            quant_min, quant_max = quant_range
-            in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in_0")
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=quant_min, max=quant_max, name="inq_0"
-            )
-            input_range = {"inq_0": (quant_min, quant_max)}
-            out = math_op(inq_data)
-            out = tf.quantization.fake_quant_with_min_max_args(
-                out, min=quant_min, max=quant_max, name="out"
-            )
-            compare_tflite_with_tvm(
-                data,
-                "inq_0:0",
-                [inq_data],
-                [out],
-                quantized=True,
-                input_range=input_range,
-                experimental_new_converter=True,
-                int_quant_dtype=int_quant_dtype,
-            )
-    else:
-        with tf.Graph().as_default():
-            in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype, name="in")
-            out = math_op(in_data)
-            compare_tflite_with_tvm(
-                data, ["in:0"], [in_data], [out], experimental_new_converter=True
-            )
-
-
-def _unary_elewise_create_model(math_op, data, offset=0, int_quant_dtype=tf.int8):
-    class Model(tf.Module):
-        @tf.function
-        def tf_function(self, x):
-            op = math_op(x)
-            return op
-
-    if int_quant_dtype in (tf.int8, tf.uint8):
-        _ = "int8"
-    elif int_quant_dtype in (tf.int16, tf.uint16):
-        _ = "int16"
-    else:
-        raise Exception(f"Unsupported dtype '{int_quant_dtype}' for unary elementwise test.")
-
-    model = Model()
-
-    # Save the model
-    export_dir = tempfile.gettempdir() + "/tf_model"
-    tf.saved_model.save(
-        model,
-        export_dir,
-        signatures=model.tf_function.get_concrete_function(
-            tf.TensorSpec(data.shape, tf.float32, name="input")
-        ),
-    )
-
-    # Convert the model
-    def representative_dataset():
-        for _ in range(100):
-            tmp_data = np.random.rand(*tuple(data.shape))
-            yield [tmp_data.astype(np.float32) * 2 - offset]
-
-    converter = tf.lite.TFLiteConverter.from_saved_model(export_dir)
-    converter.optimizations = [tf.lite.Optimize.DEFAULT]
-    converter.representative_dataset = representative_dataset
-
-    if int_quant_dtype in (tf.int16, tf.uint16):
-        converter.target_spec.supported_ops = [
-            tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
-        ]
-    else:
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-
-    converter.inference_input_type = int_quant_dtype
-    converter.inference_output_type = int_quant_dtype
-
-    tflite_model = converter.convert()
-    return tflite_model
-
-
-#######################################################################
-# Abs
-# ----
-
-
-def _test_abs(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of abs"""
-    if quantized:
-        tflite_model_quant = _unary_elewise_create_model(
-            tf.math.abs, data, offset=1, int_quant_dtype=int_quant_dtype
-        )
-        tflite_output = run_tflite_graph(tflite_model_quant, data)
-
-        # TFLite 2.6.x upgrade support
-        if package_version.parse(tf.__version__) < package_version.parse("2.6.1"):
-            in_node = ["serving_default_input_int8"]
-        elif package_version.parse(tf.__version__) < package_version.parse("2.9"):
-            in_node = (
-                ["serving_default_input_int16"] if int_quant_dtype == tf.int16 else ["tfl.quantize"]
-            )
-        else:
-            in_node = "serving_default_input"
-
-        tvm_output = run_tvm_graph(tflite_model_quant, data, in_node)
-        tvm.testing.assert_allclose(
-            np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-2
-        )
-    else:
-        return _test_unary_elemwise(math_ops.abs, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Rsqrt
-# ----
-
-
-def _test_rsqrt(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of rsqrt"""
-
-    # tensorflow version upgrade support
-    if package_version.parse(tf.__version__) < package_version.parse("2.6.1") or not quantized:
-        return _test_unary_elemwise(
-            math_ops.rsqrt, data, quantized, quant_range=[1, 6], int_quant_dtype=int_quant_dtype
-        )
-    else:
-        tflite_model_quant = _unary_elewise_create_model(
-            tf.math.rsqrt, data, int_quant_dtype=int_quant_dtype
-        )
-        tflite_output = run_tflite_graph(tflite_model_quant, data)
-        if package_version.parse(tf.__version__) < package_version.parse("2.9"):
-            in_node = ["tfl.quantize"]
-        else:
-            in_node = "serving_default_input"
-        tvm_output = run_tvm_graph(tflite_model_quant, data, in_node)
-        tvm.testing.assert_allclose(
-            np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-2
-        )
-
-
-#######################################################################
-# Ceil
-# ----
-
-
-def _test_ceil(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of ceil"""
-    return _test_unary_elemwise(math_ops.ceil, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Floor
-# -----
-
-
-def _test_floor(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of floor"""
-    return _test_unary_elemwise(math_ops.floor, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Round
-# -----
-
-
-def _test_round(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of round"""
-    return _test_unary_elemwise(math_ops.round, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Exp
-# ---
-
-
-def _test_exp(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of exp"""
-    return _test_unary_elemwise(math_ops.exp, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Log
-# ---
-
-
-def _test_log(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of log"""
-    return _test_unary_elemwise(
-        math_ops.log, data, quantized, quant_range=[1, 6], int_quant_dtype=int_quant_dtype
-    )
-
-
-#######################################################################
-# Sin
-# ---
-
-
-def _test_sin(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of sin"""
-    return _test_unary_elemwise(math_ops.sin, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Cos
-# ---
-
-
-def _test_cos(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of cos"""
-    if quantized:
-        tflite_model_quant = _unary_elewise_create_model(
-            tf.math.cos, data, int_quant_dtype=int_quant_dtype
-        )
-        tflite_output = run_tflite_graph(tflite_model_quant, data)
-        if package_version.parse(tf.__version__) < package_version.parse("2.9"):
-            in_node = ["tfl.quantize"]
-        else:
-            in_node = "serving_default_input"
-        tvm_output = run_tvm_graph(tflite_model_quant, data, in_node)
-        tvm.testing.assert_allclose(
-            np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-2
-        )
-    else:
-        return _test_unary_elemwise(math_ops.cos, data, quantized)
-
-
-#######################################################################
-# Tan
-# ---
-
-
-def _test_tan(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of tan"""
-    return _test_unary_elemwise(math_ops.tan, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Square
-# ------
-
-
-def _test_square(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of square"""
-    return _test_unary_elemwise(math_ops.square, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Neg
-# ------
-
-
-def _test_neg(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of neg"""
-    return _test_unary_elemwise(math_ops.neg, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Sqrt
-# ------
-
-
-def _test_sqrt(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of sqrt"""
-    return _test_unary_elemwise(
-        math_ops.sqrt, data, quantized, quant_range=[1, 6], int_quant_dtype=int_quant_dtype
-    )
-
-
-#######################################################################
-# Elu
-# ---
-
-
-def _test_elu(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of elu"""
-    return _test_unary_elemwise(nn_ops.elu, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Gelu
-# ---
-
-
-def _test_gelu(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of elu"""
-    return _test_unary_elemwise(nn_ops.gelu, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-def _test_forward_unary_elemwise(test_op, int_quant_dtype=None, quantized=True, negative=True):
-    # input data
-    in_data, inq_data = [], []
-
-    np_dtype = int_quant_dtype.as_numpy_dtype if int_quant_dtype else np.uint8
-
-    # quantized input data
-    if quantized:
-        inq_data.append(np.arange(1, 240, 40, dtype=np_dtype))
-        inq_data.append(np.arange(1, 240, 40, dtype=np_dtype).reshape((2, 1, 3)))
-        if int_quant_dtype == np.int8:
-            inq_data.append(np.arange(-128, 127, 45, dtype=np.int8))
-
-    for data in inq_data:
-        test_op(data, quantized=True, int_quant_dtype=int_quant_dtype)
-
-    # normal input data
-    if negative:
-        in_data.append(np.arange(-2.0, 4.0, dtype=np.float32))
-        in_data.append(np.arange(-2.0, 4.0, dtype=np.float32).reshape((2, 1, 3)))
-    else:
-        in_data.append(np.arange(1.0, 7.0, dtype=np.float32))
-        in_data.append(np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 3)))
-
-    for data in in_data:
-        test_op(data, quantized=False, int_quant_dtype=int_quant_dtype)
-
-
-def test_all_unary_elemwise():
-    """All unary elemwise"""
-    _test_forward_unary_elemwise(_test_abs, int_quant_dtype=tf.int8)
-    _test_forward_unary_elemwise(_test_abs, int_quant_dtype=tf.int16)
-    _test_forward_unary_elemwise(_test_floor)
-    _test_forward_unary_elemwise(_test_exp)
-    _test_forward_unary_elemwise(_test_log, negative=False)
-    _test_forward_unary_elemwise(_test_square, int_quant_dtype=tf.int8)
-    _test_forward_unary_elemwise(_test_sin)
-    _test_forward_unary_elemwise(_test_neg)
-    _test_forward_unary_elemwise(_test_sqrt, negative=False)
-    _test_forward_unary_elemwise(_test_gelu, quantized=False)
-    # tensorflow version upgrade support
-    if package_version.parse(tf.VERSION) < package_version.parse("2.6.1"):
-        _test_forward_unary_elemwise(_test_rsqrt, negative=False, int_quant_dtype=tf.uint8)
-    else:
-        _test_forward_unary_elemwise(_test_rsqrt, negative=False, int_quant_dtype=tf.int8)
-    # ceil and cos come with TFLite 1.14.0.post1 fbs schema
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        _test_forward_unary_elemwise(_test_ceil)
-        if package_version.parse(tf.VERSION) < package_version.parse("2.6.1"):
-            _test_forward_unary_elemwise(_test_cos, quantized=False)
-        else:
-            _test_forward_unary_elemwise(_test_cos, int_quant_dtype=tf.int8)
-        _test_forward_unary_elemwise(_test_round)
-        # This fails with TF and Tflite 1.15.2, this could not have been tested
-        # in CI or anywhere else. The failure mode is that we see a backtrace
-        # from the converter that we need to provide a custom Tan operator
-        # implementation.
-        # _test_forward_unary_elemwise(_test_tan)
-        _test_forward_unary_elemwise(_test_elu)
-
-
-#######################################################################
-# Element-wise
-# ------------
-
-
-def _test_elemwise(
-    math_op,
-    data,
-    fused_activation_function=None,
-    quantized=False,
-    qnn_op=None,
-    same_qnn_params=False,
-    comparison_op=False,
-    exclude_zero_point=False,
-):
-    """One iteration of elemwise"""
-
-    assert len(data) == 2
-
-    def __test_elemwise(in_data):
-        assert len(in_data) == 2
-        if quantized:
-            int_quant_dtype = None
-            if data[0].dtype == "int8":
-                int_quant_dtype = tf.int8
-            elif data[0].dtype == "uint8":
-                int_quant_dtype = tf.uint8
-            elif data[0].dtype == "int16":
-                int_quant_dtype = tf.int16
-            else:
-                assert False, "Unsupported conversion from numpy to tflite dtype!"
-
-            # set the fp32 output range with respect to the operation
-            out_min, out_max = _test_elemwise_qnn_out_range(qnn_op)
-            inq0_min, inq0_max = (-100, 100)
-            inq1_min, inq1_max = (-50, 50)
-
-            # if requested use same quantization parameters provided by _test_elemwise_qnn_out_range
-            if same_qnn_params:
-                inq0_min, inq0_max = (out_min, out_max)
-                inq1_min, inq1_max = (out_min, out_max)
-
-            if exclude_zero_point:
-                if inq1_max == inq1_min:
-                    raise ZeroDivisionError("Input range is 0.")
-
-                # only compute for rhs.
-                quant_scale = 255 / (inq1_max - inq1_min)
-                zero_point = int(round(-inq1_min * quant_scale))
-                data[1][data[1] == zero_point] += 1
-                data[1][data[1] == 0] += 1
-
-            # fake_quant will keep the tensors in float32 until the conversion in the session
-            inq_data = [
-                tf.quantization.fake_quant_with_min_max_args(
-                    in_data[0], min=out_min, max=out_max, name="inq_0"
-                )
-                if in_data[0] is not None
-                else tf.quantization.fake_quant_with_min_max_args(
-                    data[0], min=out_min, max=out_max, name="const_tensor0"
-                ),
-                tf.quantization.fake_quant_with_min_max_args(
-                    in_data[1], min=out_min, max=out_max, name="inq_1"
-                )
-                if in_data[1] is not None
-                else tf.quantization.fake_quant_with_min_max_args(
-                    data[1], min=out_min, max=out_max, name="const_tensor1"
-                ),
-            ]
-
-            input_range = {
-                x[1][0]: x[1][1]
-                for x in zip(
-                    in_data, (("inq_0", (inq0_min, inq0_max)), ("inq_1", (inq1_min, inq1_max)))
-                )
-                if x[0] is not None
-            }
-
-            if comparison_op:
-                out = math_op(inq_data[0], inq_data[1])
-                out = with_fused_activation_function(out, fused_activation_function)
-
-                compare_tflite_with_tvm(
-                    [x[1] for x in zip(in_data, data) if x[0] is not None],
-                    [x + ":0" for x in input_range.keys()],
-                    [x[1] for x in zip(in_data, inq_data) if x[0] is not None],
-                    [out],
-                    quantized=True,
-                    input_range=input_range,
-                    experimental_new_converter=same_qnn_params,
-                    int_quant_dtype=int_quant_dtype,
-                )
-            else:
-                out = math_op(inq_data[0], inq_data[1])
-                out = with_fused_activation_function(out, fused_activation_function)
-                out = tf.quantization.fake_quant_with_min_max_args(
-                    out, min=out_min, max=out_max, name="out"
-                )
-
-                # Note same_qnn_params uses experimental_new_converter as toco failed
-                compare_tflite_with_tvm(
-                    [x[1] for x in zip(in_data, data) if x[0] is not None],
-                    [x + ":0" for x in input_range.keys()],
-                    [x[1] for x in zip(in_data, inq_data) if x[0] is not None],
-                    [out],
-                    quantized=True,
-                    input_range=input_range,
-                    experimental_new_converter=same_qnn_params,
-                    int_quant_dtype=int_quant_dtype,
-                )
-        else:
-            out = math_op(
-                in_data[0]
-                if in_data[0] is not None
-                else ops.convert_to_tensor(data[0], dtype=data[0].dtype),
-                in_data[1]
-                if in_data[1] is not None
-                else ops.convert_to_tensor(data[1], dtype=data[1].dtype),
-            )
-            out = with_fused_activation_function(out, fused_activation_function)
-            compare_tflite_with_tvm(
-                [x[1] for x in zip(in_data, data) if x[0] is not None],
-                [x[1] for x in zip(in_data, ("in_0:0", "in_1:0")) if x[0] is not None],
-                [x for x in in_data if x is not None],
-                [out],
-            )
-
-    # Test with two tensors
-    with tf.Graph().as_default():
-        __test_elemwise(
-            in_data=[
-                array_ops.placeholder(shape=data[0].shape, dtype="float32", name="in_0"),
-                array_ops.placeholder(shape=data[1].shape, dtype="float32", name="in_1"),
-            ]
-        )
-    # Test with tensor and constant
-    with tf.Graph().as_default():
-        __test_elemwise(
-            in_data=[array_ops.placeholder(shape=data[0].shape, dtype="float32", name="in_0"), None]
-        )
-    # Test with constant and tensor
-    with tf.Graph().as_default():
-        __test_elemwise(
-            in_data=[None, array_ops.placeholder(shape=data[1].shape, dtype="float32", name="in_1")]
-        )
-
-
-#######################################################################
-# Add
-# ---
-
-
-def _test_add(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of add"""
-    return _test_elemwise(math_ops.add, data, fused_activation_function, quantized, qnn_op)
-
-
-#######################################################################
-# Subtract
-# --------
-
-
-def _test_sub(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of subtract"""
-    return _test_elemwise(math_ops.subtract, data, fused_activation_function, quantized, qnn_op)
-
-
-#######################################################################
-# Mul
-# ---
-
-
-def _test_mul(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of mul"""
-    return _test_elemwise(math_ops.multiply, data, fused_activation_function, quantized, qnn_op)
-
-
-#######################################################################
-# Divide
-# ------
-
-
-def _test_div(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of divide"""
-    return _test_elemwise(
-        math_ops.divide,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-        exclude_zero_point=True,
-    )
-
-
-#######################################################################
-# Power
-# -----
-
-
-def _test_pow(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of power"""
-    return _test_elemwise(
-        math_ops.pow,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-    )
-
-
-#######################################################################
-# Maximum
-# -------
-
-
-def _test_maximum(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of maximum"""
-    return _test_elemwise(
-        math_ops.maximum, data, fused_activation_function, quantized, qnn_op, same_qnn_params=True
-    )
-
-
-#######################################################################
-# Minimum
-# -------
-
-
-def _test_minimum(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of minimum"""
-    return _test_elemwise(
-        math_ops.minimum, data, fused_activation_function, quantized, qnn_op, same_qnn_params=True
-    )
-
-
-#######################################################################
-# Greater
-# -------
-
-
-def _test_greater(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of greater"""
-    return _test_elemwise(
-        math_ops.greater,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-        comparison_op=True,
-    )
-
-
-#######################################################################
-# Greater_equal
-# -------------
-
-
-def _test_greater_equal(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of greater_equal"""
-    return _test_elemwise(
-        math_ops.greater_equal,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-        comparison_op=True,
-    )
-
-
-#######################################################################
-# Less
-# ----
-
-
-def _test_less(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of less"""
-    return _test_elemwise(
-        math_ops.less,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-        comparison_op=True,
-    )
-
-
-#######################################################################
-# Less_equal
-# ----------
-
-
-def _test_less_equal(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of less_equal"""
-    return _test_elemwise(
-        math_ops.less_equal,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-        comparison_op=True,
-    )
-
-
-#######################################################################
-# Equal
-# -----
-
-
-def _test_equal(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of equal"""
-    return _test_elemwise(
-        math_ops.equal,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-        comparison_op=True,
-    )
-
-
-#######################################################################
-# Not_equal
-# ---------
-
-
-def _test_not_equal(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of not_equal"""
-    return _test_elemwise(
-        math_ops.not_equal,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-        comparison_op=True,
-    )
-
-
-#######################################################################
-# Squared_difference
-# ------------------
-
-
-def _test_squared_difference(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of squared difference"""
-    return _test_elemwise(
-        math_ops.squared_difference,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-    )
-
-
-#######################################################################
-# Floor_divide
-# ------------
-
-
-def _test_floor_divide(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of floor_div"""
-    return _test_elemwise(
-        math_ops.floordiv,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-        exclude_zero_point=True,
-    )
-
-
-#######################################################################
-# Floor_mod
-# ---------
-
-
-def _test_floor_mod(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of floor_mod"""
-    return _test_elemwise(
-        math_ops.floormod,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-    )
-
-
-def _test_forward_elemwise(testop):
-    """Elewise"""
-    testop(
-        [
-            np.arange(6.0, dtype=np.float32).reshape((2, 1, 1, 3)),
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 1, 3)),
-        ]
-    )
-    testop(
-        [
-            np.arange(6.0, dtype=np.float32).reshape((2, 1, 3)),
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 3)),
-        ]
-    )
-    testop(
-        [
-            np.arange(3.0, dtype=np.float32).reshape((1, 3)),
-            np.arange(1.0, 4.0, dtype=np.float32).reshape((1, 3)),
-        ]
-    )
-
-
-def _test_forward_elemwise_quantized(testop, dtype=np.uint8):
-    type_info = np.iinfo(dtype)
-    _min, _max = type_info.min, type_info.max
-    testop(
-        [
-            np.array(np.random.uniform(_min, _max, (3, 6)), dtype=dtype),
-            np.array(np.random.uniform(_min, _max, (3, 6)), dtype=dtype),
-        ],
-        quantized=True,
-        qnn_op=testop,
-    )
-
-
-def _test_elemwise_qnn_out_range(qnn_op):
-    # set the fake_quant output range with respect to the input tensors float32 range
-    qnn_out_range = {
-        _test_add: (-150, 150),
-        _test_sub: (-150, 150),
-        _test_mul: (-5e3, 5e3),
-        _test_div: (-150, 150),
-        _test_maximum: (-112, 111),
-        _test_minimum: (-128, 127),
-        _test_equal: (-150, 150),
-        _test_greater: (-150, 150),
-        _test_squared_difference: (0, 65025),
-        _test_floor_divide: (-150, 150),
-        _test_less: (-150, 150),
-        _test_floor_mod: (-150, 150),
-        _test_not_equal: (-150, 150),
-        _test_pow: (0, 3),
-        _test_less_equal: (-150, 150),
-        _test_greater_equal: (-150, 150),
-    }
-
-    return qnn_out_range[qnn_op]
-
-
-def test_all_elemwise():
-    """All_elemwise"""
-    _test_forward_elemwise(_test_add)
-    _test_forward_elemwise_quantized(_test_add)
-    _test_forward_elemwise(partial(_test_add, fused_activation_function="RELU"))
-    # this is broken with tf upgrade 1.15.2 and hits a segfault that needs
-    # further investigation.
-    # _test_forward_elemwise(partial(_test_add, fused_activation_function="RELU6"))
-    _test_forward_elemwise(_test_sub)
-    _test_forward_elemwise_quantized(_test_sub)
-    _test_forward_elemwise(partial(_test_sub, fused_activation_function="RELU"))
-    _test_forward_elemwise(partial(_test_sub, fused_activation_function="RELU6"))
-    _test_forward_elemwise(_test_mul)
-    _test_forward_elemwise_quantized(_test_mul)
-    _test_forward_elemwise(partial(_test_mul, fused_activation_function="RELU"))
-    _test_forward_elemwise(partial(_test_mul, fused_activation_function="RELU6"))
-    _test_forward_elemwise(_test_div)
-    _test_forward_elemwise(partial(_test_div, fused_activation_function="RELU"))
-    _test_forward_elemwise(partial(_test_div, fused_activation_function="RELU6"))
-    _test_forward_elemwise_quantized(_test_div)
-    _test_forward_elemwise(_test_pow)
-    _test_forward_elemwise_quantized(_test_pow)
-    _test_forward_elemwise(_test_maximum)
-    _test_forward_elemwise_quantized(_test_maximum)
-    _test_forward_elemwise(_test_minimum)
-    _test_forward_elemwise_quantized(_test_minimum)
-    _test_forward_elemwise(_test_greater)
-    _test_forward_elemwise_quantized(_test_greater)
-    _test_forward_elemwise(_test_squared_difference)
-    _test_forward_elemwise_quantized(_test_squared_difference, np.int8)
-    _test_forward_elemwise(_test_greater_equal)
-    _test_forward_elemwise_quantized(_test_greater_equal)
-    _test_forward_elemwise(_test_less)
-    _test_forward_elemwise_quantized(_test_less)
-    _test_forward_elemwise(_test_less_equal)
-    _test_forward_elemwise_quantized(_test_less_equal)
-    _test_forward_elemwise(_test_equal)
-    _test_forward_elemwise_quantized(_test_equal)
-    _test_forward_elemwise(_test_not_equal)
-    _test_forward_elemwise_quantized(_test_not_equal)
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        _test_forward_elemwise(_test_floor_divide)
-        _test_forward_elemwise_quantized(_test_floor_divide)
-        _test_forward_elemwise(_test_floor_mod)
-        # This test of quantized floor mod is currently disabled due
-        # to flaky CI failures in main, failing approximately 45% of
-        # the time.
-        #
-        # _test_forward_elemwise_quantized(_test_floor_mod)
-
-
-#######################################################################
-# AddN
-# ----
-
-
-def _test_forward_add_n(inputs):
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        temp = []
-        for each in inputs:
-            temp.append(tf.placeholder(shape=each.shape, dtype=each.dtype))
-        output = tf.add_n(temp)
-        compare_tflite_with_tvm(
-            list(inputs),
-            [each.name for each in temp],
-            list(temp),
-            [output],
-        )
-
-
-def test_forward_add_n():
-    """Add n"""
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        x = np.random.randint(1, 100, size=(3, 3, 3), dtype=np.int32)
-        y = np.random.randint(1, 100, size=(3, 3, 3), dtype=np.int32)
-        z_1 = np.random.randint(1, 100, size=(3, 3, 3), dtype=np.int32)
-        x_1, x_2, z_2 = x.astype(np.float32), y.astype(np.float32), z_1.astype(np.float32)
-        in0 = x
-        in1 = [x, y]
-        in2 = (x, y, z_1)
-        in3 = x_1
-        in4 = [x_1, x_2]
-        in5 = (x_1, x_2, z_2)
-        _test_forward_add_n(in0)
-        _test_forward_add_n(in1)
-        _test_forward_add_n(in2)
-        _test_forward_add_n(in3)
-        _test_forward_add_n(in4)
-        _test_forward_add_n(in5)
-
-
-#######################################################################
-# Logical operators
-# -----------------
-
-
-def _test_logical_binary(logical_bin_op, data):
-
-    with tf.Graph().as_default():
-        in_data = [
-            array_ops.placeholder(shape=data[0].shape, dtype="bool", name="in_0"),
-            array_ops.placeholder(shape=data[1].shape, dtype="bool", name="in_1"),
-        ]
-        if logical_bin_op is math_ops.logical_not:
-            out = math_ops.logical_or(in_data[0], in_data[1], name="out1")
-            out = logical_bin_op(out, name="out")
-        else:
-            out = logical_bin_op(in_data[0], in_data[1], name="out")
-
-        compare_tflite_with_tvm(data, ["in_0:0", "in_1:0"], in_data, [out])
-
-
-def _test_forward_logical_and(data):
-    """One iteration of logical and"""
-    return _test_logical_binary(math_ops.logical_and, data)
-
-
-def _test_forward_logical_or(data):
-    """One iteration of logical or"""
-    return _test_logical_binary(math_ops.logical_or, data)
-
-
-def _test_forward_logical_not(data):
-    """One iteration of logical not"""
-    return _test_logical_binary(math_ops.logical_not, data)
-
-
-def test_all_logical():
-    data = [
-        np.random.choice(a=[False, True], size=(2, 3, 4)).astype("bool"),
-        np.random.choice(a=[False, True], size=(2, 3, 4)).astype("bool"),
-    ]
-    # boolean dtype is not supported by older versions than TFLite 1.15.0
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.15.0"):
-        _test_forward_logical_and(data)
-        _test_forward_logical_or(data)
-        _test_forward_logical_not(data)
-
-
-#######################################################################
-# Zeros like
-# ----------
-
-
-def _test_zeros_like(data):
-    """One iteration of ZEROS LIKE"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out = gen_array_ops.zeros_like(in_data)
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_zeros_like():
-    """ZEROS LIKE"""
-    _test_zeros_like(np.arange(6.0, dtype=np.float32).reshape((1, 6)))
-
-
-#######################################################################
-# Fill
-# ----
-
-
-def _test_fill(dims, value_data, value_dtype):
-    """Use the fill op to create a tensor of value_data with constant dims."""
-
-    value_data = np.array(value_data, dtype=value_dtype)
-    # TF 1.13 TFLite convert method does not accept empty shapes
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        with tf.Graph().as_default():
-            value = array_ops.placeholder(dtype=value_dtype, name="value", shape=[])
-            out = tf.fill(dims, value)
-            compare_tflite_with_tvm([value_data], ["value"], [value], [out])
-
-    with tf.Graph().as_default():
-        input1 = array_ops.placeholder(dtype=value_dtype, name="input1", shape=dims)
-        # Fill op gets converted to static tensor during conversion
-        out = tf.fill(dims, value_data)
-        out1 = tf.add(out, input1)
-        input1_data = np.random.uniform(0, 5, size=dims).astype(value_dtype)
-        compare_tflite_with_tvm([input1_data], ["input1"], [input1], [out1])
-
-
-def test_forward_fill():
-    """Test FILL op"""
-
-    _test_fill((1, 2, 2, 4), 5, "int32")
-    _test_fill((1, 2, 2, 4), 5, "float32")
-    _test_fill((5,), 5, "int32")
-
-
-#######################################################################
-# Reduce
-# ------
-
-
-def _test_reduce(math_op, data, keep_dims=None):
-    """One iteration of reduce"""
-
-    assert len(data) == 2
-
-    # Test with tensor and constant
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data[0].shape, dtype=data[0].dtype, name="in")
-        out = math_op(in_data, data[1], keep_dims)
-        compare_tflite_with_tvm([data[0]], ["in:0"], [in_data], [out])
-
-
-def _test_reduce_quantize(math_op, data, keep_dims=None):
-    """One iteration of reduce"""
-
-    assert len(data) == 2
-
-    # Test with tensor and constant
-    with tf.Graph().as_default():
-        in_data = [array_ops.placeholder(shape=data[0].shape, dtype="float32", name="in")]
-        inq_data = [
-            tf.quantization.fake_quant_with_min_max_args(
-                in_data[0], min=-100, max=100, name="inq_0"
-            )
-        ]
-        input_range = {"inq_0": (-100, 100)}
-        out = math_op(inq_data, data[1], keep_dims)
-        out = tf.quantization.fake_quant_with_min_max_args(out, min=-200, max=200, name="out")
-        compare_tflite_with_tvm(
-            [data[0]], ["inq_0:0"], [inq_data[0]], [out], quantized=True, input_range=input_range
-        )
-
-
-#######################################################################
-# Reduce_min
-# ----------
-
-
-def _test_reduce_min(data, keep_dims=None):
-    """One iteration of reduce_min"""
-    return _test_reduce(math_ops.reduce_min, data, keep_dims)
-
-
-#######################################################################
-# Reduce_max
-# ----------
-
-
-def _test_reduce_max(data, keep_dims=None):
-    """One iteration of reduce_max"""
-    return _test_reduce(math_ops.reduce_max, data, keep_dims)
-
-
-#######################################################################
-# Reduce_mean
-# -----------
-
-
-def _test_reduce_mean(data, keep_dims=None, quantized=False):
-    """One iteration of reduce_mean"""
-    if quantized:
-        return _test_reduce_quantize(math_ops.reduce_mean, data, keep_dims)
-    else:
-        return _test_reduce(math_ops.reduce_mean, data, keep_dims)
-
-
-#######################################################################
-# Reduce_prod
-# -----------
-
-
-def _test_reduce_prod(data, keep_dims=None):
-    """One iteration of reduce_prod"""
-    return _test_reduce(math_ops.reduce_prod, data, keep_dims)
-
-
-#######################################################################
-# Reduce_sum
-# -----------
-
-
-def _test_reduce_sum(data, keep_dims=None):
-    """One iteration of reduce_sum"""
-    return _test_reduce(math_ops.reduce_sum, data, keep_dims)
-
-
-#######################################################################
-# Reduce_any
-# ----------
-
-
-def _test_reduce_any(data, keep_dims=None):
-    """One iteration of reduce_any"""
-    return _test_reduce(math_ops.reduce_any, data, keep_dims)
-
-
-def _test_forward_reduce(testop, dtype="float32"):
-    """Reduce"""
-    if dtype == "bool":
-        data0 = [np.random.choice(a=[False, True], size=(16, 16, 16, 16)).astype(dtype), None]
-        data1 = [
-            np.random.choice(a=[False, True], size=(16, 16, 16, 16)).astype(dtype),
-            np.array(1, dtype=np.int32),
-        ]
-        data2 = [
-            np.random.choice(a=[False, True], size=(16, 16, 16, 16)).astype(dtype),
-            np.array([1, 2], dtype=np.int32),
-        ]
-    else:
-        data0 = [np.random.rand(16, 16, 16, 16).astype(dtype), None]
-        data1 = [np.random.rand(16, 16, 16, 16).astype(dtype), np.array(1, dtype=np.int32)]
-        data2 = [np.random.rand(16, 16, 16, 16).astype(dtype), np.array([1, 2], dtype=np.int32)]
-
-    for data in [data0, data1, data2]:
-        testop(data)
-        testop(data, keep_dims=False)
-        testop(data, keep_dims=True)
-
-
-def _test_forward_reduce_quantized(testop):
-    data0 = [
-        np.array(np.random.uniform(0, 255, (3, 6)), dtype=np.uint8),
-        np.array([1, 2], dtype=np.int32),
-    ]
-    testop(data0, quantized=True)
-    testop(data0, keep_dims=False, quantized=True)
-    testop(data0, keep_dims=True, quantized=True)
-
-
-def test_all_reduce():
-    _test_forward_reduce(_test_reduce_min)
-    _test_forward_reduce(_test_reduce_max)
-    _test_forward_reduce(_test_reduce_mean)
-    _test_forward_reduce_quantized(_test_reduce_mean)
-    _test_forward_reduce(_test_reduce_prod)
-    _test_forward_reduce(_test_reduce_sum)
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.15.0"):
-        _test_forward_reduce(_test_reduce_any, dtype="bool")
-
-
-#######################################################################
-# Arg_min_max
-# -----------
-
-
-def _test_arg_min_max(math_op, data, axis, quantized=False):
-    """One iteration of arg_min_max"""
-
-    with tf.Graph().as_default():
-        t_name = "in"
-        in_data = array_ops.placeholder(shape=data.shape, dtype=np.float32, name=t_name)
-        input_range = None
-        qmin, qmax = -100, 102
-        if quantized:
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=qmin, max=qmax, name="q" + t_name
-            )
-            input_range = {inq_data.name.split(":")[0]: (qmin, qmax)}
-            out = math_op(input=inq_data, axis=axis)
-            compare_tflite_with_tvm(
-                [data], [inq_data.name], [inq_data], [out], quantized=True, input_range=input_range
-            )
-        else:
-            out = math_op(input=in_data, axis=axis)
-            compare_tflite_with_tvm([data], [in_data.name], [in_data], [out])
-
-
-def test_forward_arg_min_max():
-    """Arg min max"""
-    # test quantized
-    for data in [np.array(np.random.uniform(-100, 100, (3, 4)), dtype=np.uint8)]:
-        # There is no quantized version of ArgMin
-        for axis in [None, 0, 1, -1]:
-            _test_arg_min_max(math_ops.argmax, data, axis, True)
-
-    for data in [np.array(np.random.uniform(-100, 100, (3, 4)), dtype=np.float32)]:
-        for axis in [None, 0, 1, -1]:
-            _test_arg_min_max(math_ops.argmax, data, axis)
-            _test_arg_min_max(math_ops.argmin, data, axis)
-
-
-#######################################################################
-# Select, Where
-# -------------
-
-
-def test_forward_select():
-    """Select"""
-    with tf.Graph().as_default():
-        with tf.Session() as _:
-            input1 = tf.placeholder(tf.int32, shape=[1, 4, 4, 3], name="input1")
-            input2 = tf.placeholder(tf.int32, shape=[1, 4, 4, 3], name="input2")
-            mask = input1 > input2
-            out = tf.where(mask, input1 + 1, input2 * 2)
-            in_data1 = np.random.uniform(0, 10, size=(1, 4, 4, 3)).astype("int32")
-            in_data2 = np.random.uniform(0, 10, size=(1, 4, 4, 3)).astype("int32")
-
-            compare_tflite_with_tvm(
-                [in_data1, in_data2], ["input1:0", "input2:0"], [input1, input2], [out]
-            )
-
-
-@pytest.mark.parametrize("quant_bits", [2, 4, 8, 16])
-@pytest.mark.parametrize(
-    "value, min_value, max_value",
-    [[-10.11, -6, 6], [-3.55, -6, 6], [0, -6, 6], [3.55, -6, 6], [10.11, -6, 6]],
-)
-def test_forward_fake_quant(value, min_value, max_value, quant_bits):
-    """Fake quant"""
-    with tf.Graph().as_default():
-        with tf.Session() as _:
-            input_placeholder = tf.placeholder(tf.float32, shape=[1], name="input")
-            out = tf.quantization.fake_quant_with_min_max_args(
-                input_placeholder, min=min_value, max=max_value, num_bits=quant_bits, name=None
-            )
-
-            in_data = np.float32(value)
-            compare_tflite_with_tvm([in_data], ["input:0"], [input_placeholder], [out])
-
-
-# Squeeze
-# -------
-
-
-def _test_squeeze(data, squeeze_dims=None):
-    """One iteration of squeeze"""
-
-    if squeeze_dims is None:
-        squeeze_dims = []
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-
-        if squeeze_dims:
-            out = array_ops.squeeze(in_data, squeeze_dims)
-        else:
-            out = array_ops.squeeze(in_data)
-
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_squeeze():
-    """Squeeze"""
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3)), [0, 2])
-    _test_squeeze(np.arange(6).reshape((2, 1, 3, 1)), [1, 3])
-
-
-#######################################################################
-# Quantize/DeQuantize
-# -------------------
-
-
-def _test_quantize_dequantize(data):
-    """One iteration of quantize and dequantize"""
-
-    # Keras model to force TFLite converter to insert 2 TFLite quantize ops.
-    # First TFLite quantize op converts float32 tensor to int8 tensor - Qnn quantize.
-    # Second TFLite quantize op converts int8 tensor to int8 tensor - Qnn requantize.
-    data_in = tf.keras.layers.Input(shape=data.shape[1:])
-    relu = tf.keras.layers.ReLU()(data_in)
-    add = tf.keras.layers.Add()([data_in, relu])
-    concat = tf.keras.layers.Concatenate(axis=0)([relu, add])
-    keras_model = tf.keras.models.Model(inputs=data_in, outputs=concat)
-
-    # To create quantized values with dynamic range of activations, needs representative dataset
-    def representative_data_gen():
-        for _ in range(1):
-            yield [data]
-
-    tflite_model_quant = _quantize_keras_model(keras_model, representative_data_gen, True, True)
-
-    tflite_output = run_tflite_graph(tflite_model_quant, data)
-    if package_version.parse(tf.__version__) < package_version.parse("2.9"):
-        in_node = data_in.name.split(":")[0]
-    else:
-        in_node = "serving_default_" + data_in.name + ":0"
-    tvm_output = run_tvm_graph(tflite_model_quant, data, in_node)
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-2
-    )
-
-
-def _test_quantize_dequantize_const(data):
-    """One iteration of quantize and dequantize"""
-
-    # Keras model to force TFLite converter to insert 2 TFLite quantize ops.
-    # First TFLite quantize op converts float32 tensor to int8 tensor - Qnn quantize.
-    # Second TFLite quantize op converts int8 tensor to int8 tensor - Qnn requantize.
-    data_in = tf.keras.layers.Input(shape=data.shape[1:])
-    relu = tf.keras.layers.ReLU()(data_in)
-    add = tf.keras.layers.Add()([data, relu])
-    concat = tf.keras.layers.Concatenate(axis=0)([relu, add])
-    keras_model = tf.keras.models.Model(inputs=data_in, outputs=concat)
-
-    # To create quantized values with dynamic range of activations, needs representative dataset
-    def representative_data_gen():
-        for _ in range(1):
-            yield [data]
-
-    tflite_model_quant = _quantize_keras_model(keras_model, representative_data_gen, True, True)
-
-    tflite_output = run_tflite_graph(tflite_model_quant, data)
-    if package_version.parse(tf.__version__) < package_version.parse("2.9"):
-        in_node = data_in.name.split(":")[0]
-    else:
-        in_node = "serving_default_" + data_in.name + ":0"
-    tvm_output = run_tvm_graph(tflite_model_quant, data, in_node)
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-2
-    )
-
-
-def test_forward_quantize_dequantize():
-    """Quantize Dequantize"""
-    data = np.random.uniform(0, 1, (1, 4, 4, 3)).astype("float32")
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.1.0"):
-        _test_quantize_dequantize(data)
-        _test_quantize_dequantize_const(data)
-
-
-#######################################################################
-# Pad
-# ---
-
-
-def _test_pad(data, mode="CONSTANT", quantized=False):
-    """One iteration of PAD"""
-
-    assert len(data) == 2
-
-    # Test with tensor and constant
-    with tf.Graph().as_default():
-        in_data = [array_ops.placeholder(shape=data[0].shape, dtype="float32", name="in")]
-
-        if quantized:
-            # fake_quant will keep the tensors in float32 until the conversion in the session
-            input_range = {"inq_0": (-100, 100)}
-            inq_data = [
-                tf.quantization.fake_quant_with_min_max_args(
-                    in_data[0], min=-100, max=100, name="inq_0"
-                )
-            ]
-            out = array_ops.pad(
-                inq_data[0], ops.convert_to_tensor(data[1], dtype=data[1].dtype), mode=mode
-            )
-            compare_tflite_with_tvm(
-                [data[0]],
-                ["inq_0:0"],
-                inq_data,
-                [out],
-                quantized=True,
-                input_range=input_range,
-                experimental_new_converter=True,
-            )
-        else:
-            out = array_ops.pad(
-                in_data[0], ops.convert_to_tensor(data[1], dtype=data[1].dtype), mode=mode
-            )
-            compare_tflite_with_tvm([data[0]], ["in:0"], in_data, [out])
-
-
-def test_forward_pad():
-    """Pad"""
-    _test_pad(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 1, 3)),
-            np.array([[1, 1], [2, 2], [1, 1], [2, 2]], dtype=np.int32),
-        ]
-    )
-    _test_pad(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 3)),
-            np.array([[2, 2], [1, 1], [1, 1]], dtype=np.int32),
-        ]
-    )
-    _test_pad(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ]
-    )
-    _test_pad(
-        [
-            np.arange(1.0, 4.0, dtype=np.float32).reshape((1, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ]
-    )
-    _test_pad(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ],
-        mode="REFLECT",
-    )
-    _test_pad(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ],
-        mode="SYMMETRIC",
-    )
-    _test_pad(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int64),
-        ],
-        mode="REFLECT",
-    )
-    _test_pad(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int64),
-        ],
-        mode="SYMMETRIC",
-    )
-    _test_pad(
-        [
-            np.arange(0, 256, dtype=np.uint8).reshape((1, 256)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ],
-        quantized=True,
-    )
-    _test_pad(
-        [
-            np.arange(0, 256, dtype=np.uint8).reshape((1, 256)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ],
-        quantized=True,
-        mode="SYMMETRIC",
-    )
-    _test_pad(
-        [
-            np.arange(0, 256, dtype=np.uint8).reshape((1, 256)),
-            np.array([[0, 0], [2, 2]], dtype=np.int32),
-        ],
-        quantized=True,
-        mode="REFLECT",
-    )
-
-
-#######################################################################
-# PADV2
-# -----
-
-
-def _test_padv2(data, mode="CONSTANT", quantized=False):
-    """One iteration of PADV2"""
-
-    assert len(data) == 2 or len(data) == 3
-
-    with_constant_values = len(data) == 3
-
-    # Test with tensor and constant
-    with tf.Graph().as_default():
-        in_data = [array_ops.placeholder(shape=data[0].shape, dtype="float32", name="in")]
-
-        if quantized:
-            # fake_quant will keep the tensors in float32 until the conversion in the session
-            input_range = {"inq_0": (-100, 100)}
-            inq_data = [
-                tf.quantization.fake_quant_with_min_max_args(
-                    in_data[0], min=-100, max=100, name="inq_0"
-                )
-            ]
-            if with_constant_values:
-                in_constant_values = constant_op.constant(
-                    data[2], shape=data[2].shape, dtype="float32", name="in_constant_values"
-                )
-                inq_constant_values = tf.quantization.fake_quant_with_min_max_args(
-                    in_constant_values, min=-100, max=100, name="inq_constant_values"
-                )
-                out = array_ops.pad_v2(
-                    inq_data[0],
-                    ops.convert_to_tensor(data[1], dtype=data[1].dtype),
-                    constant_values=inq_constant_values,
-                    mode=mode,
-                )
-                out = tf.quantization.fake_quant_with_min_max_args(
-                    out, min=-100, max=100, name="out"
-                )
-            else:
-                out = array_ops.pad_v2(
-                    inq_data[0], ops.convert_to_tensor(data[1], dtype=data[1].dtype), mode=mode
-                )
-            compare_tflite_with_tvm(
-                [data[0]], ["inq_0:0"], inq_data, [out], quantized=True, input_range=input_range
-            )
-        else:
-            if with_constant_values:
-                out = array_ops.pad_v2(
-                    in_data[0],
-                    ops.convert_to_tensor(data[1], dtype=data[1].dtype),
-                    constant_values=ops.convert_to_tensor(data[2], dtype=data[2].dtype),
-                    mode=mode,
-                )
-            else:
-                out = array_ops.pad_v2(
-                    in_data[0], ops.convert_to_tensor(data[1], dtype=data[1].dtype), mode=mode
-                )
-            compare_tflite_with_tvm([data[0]], ["in:0"], in_data, [out])
-
-
-def test_forward_padv2():
-    """PADV2"""
-    # Tests without Constant_values
-    _test_padv2(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 1, 3)),
-            np.array([[1, 1], [2, 2], [1, 1], [2, 2]], dtype=np.int32),
-        ]
-    )
-    _test_padv2(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 3)),
-            np.array([[2, 2], [1, 1], [1, 1]], dtype=np.int32),
-        ]
-    )
-    _test_padv2(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ]
-    )
-    _test_padv2(
-        [
-            np.arange(1.0, 4.0, dtype=np.float32).reshape((1, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ]
-    )
-    _test_padv2(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ],
-        mode="REFLECT",
-    )
-    _test_padv2(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ],
-        mode="SYMMETRIC",
-    )
-    _test_padv2(
-        [
-            np.arange(0, 256, dtype=np.uint8).reshape((1, 256)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ],
-        quantized=True,
-    )
-
-    # Tests with Constant_values
-    _test_padv2(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 1, 3)),
-            np.array([[1, 1], [2, 2], [1, 1], [2, 2]], dtype=np.int32),
-            np.array([2], dtype=np.float32),
-        ]
-    )
-    _test_padv2(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 3)),
-            np.array([[2, 2], [1, 1], [1, 1]], dtype=np.int32),
-            np.array([1], dtype=np.float32),
-        ]
-    )
-    _test_padv2(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-            np.array([-1], dtype=np.float32),
-        ]
-    )
-    _test_padv2(
-        [
-            np.arange(1.0, 4.0, dtype=np.float32).reshape((1, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-            np.array([2], dtype=np.float32),
-        ]
-    )
-    # NOTE: In versions > 2.1.0, there is a bug in Tensorflow package for this scenario.
-    #       Hence, it is disabled temporarily for TF version > 2.1.0 .
-    if package_version.parse(tf.VERSION) <= package_version.parse("2.1.0"):
-        _test_padv2(
-            [
-                np.arange(0, 256, dtype=np.uint8).reshape((1, 256)),
-                np.array([[1, 1], [2, 2]], dtype=np.int32),
-                np.array([2], dtype=np.float32),
-            ],
-            quantized=True,
-        )
-
-    # Constant Values input can be scalar
-    _test_padv2(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 1, 3)),
-            np.array([[1, 1], [2, 2], [1, 1], [2, 2]], dtype=np.int32),
-            np.float32(2),
-        ]
-    )
-    # NOTE: In versions > 2.1.0, there is a bug in Tensorflow package for this scenario.
-    #       Hence, it is disabled temporarily for TF versions > 2.1.0.
-    if package_version.parse(tf.VERSION) <= package_version.parse("2.1.0"):
-        _test_padv2(
-            [
-                np.arange(0, 256, dtype=np.uint8).reshape((1, 256)),
-                np.array([[1, 1], [2, 2]], dtype=np.int32),
-                np.uint8(10),
-            ],
-            quantized=True,
-        )
-
-
-#######################################################################
-# EXPAND_DIMS
-# -----------
-
-
-def _test_expand_dims(input_shape, input_type, axis, quantized=False):
-    """One iteration of EXPAND_DIMS"""
-    with tf.Graph().as_default():
-        axis = ops.convert_to_tensor(axis, dtype=axis.dtype)
-
-        if quantized:
-            # ignoring input_type as quantized requires uint8
-            input_array = np.random.uniform(0, 256, input_shape).astype("uint8")
-            in_input = tf.placeholder(dtype="float32", shape=input_array.shape, name="input")
-
-            input_range = {"q_input": (-100, 100)}
-            inq_input = tf.quantization.fake_quant_with_min_max_args(
-                in_input, min=-100, max=100, name="q_input"
-            )
-
-            out = array_ops.expand_dims(inq_input, axis=axis)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-100, max=100, name="out")
-
-            compare_tflite_with_tvm(
-                [input_array],
-                ["q_input"],
-                [inq_input],
-                [out],
-                quantized=True,
-                input_range=input_range,
-            )
-        else:
-            input_array = np.random.uniform(-100, 100, input_shape).astype(input_type)
-            in_input = tf.placeholder(
-                dtype=input_array.dtype, shape=input_array.shape, name="input"
-            )
-
-            out = array_ops.expand_dims(in_input, axis=axis)
-
-            compare_tflite_with_tvm([input_array], ["input"], [in_input], [out])
-
-
-def test_forward_expand_dims():
-    """EXPAND_DIMS"""
-    for quantized in [False, True]:
-        _test_expand_dims((6, 2, 7, 5), "float32", np.int32(0), quantized=quantized)
-        _test_expand_dims((1, 2, 3), "int32", np.int32(-2), quantized=quantized)
-        _test_expand_dims((2, 4, 5), "float32", np.array([1], dtype=np.int32), quantized=quantized)
-
-
-#######################################################################
-# ONE_HOT
-# -------
-
-
-def _test_one_hot(indices, depth, on_value, off_value, axis=None):
-    """One iteration of One_Hot"""
-    with tf.Graph().as_default():
-        in_indices = tf.placeholder(dtype=indices.dtype, shape=indices.shape, name="indices")
-        in_depth = ops.convert_to_tensor(depth, dtype=depth.dtype)
-        in_on_value = tf.placeholder(dtype=on_value.dtype, shape=on_value.shape, name="on_value")
-        in_off_value = tf.placeholder(
-            dtype=off_value.dtype, shape=off_value.shape, name="off_value"
-        )
-        if axis is not None:
-            out = array_ops.one_hot(in_indices, in_depth, in_on_value, in_off_value, axis=axis)
-        else:
-            out = array_ops.one_hot(in_indices, in_depth, in_on_value, in_off_value)
-        compare_tflite_with_tvm(
-            [indices, on_value, off_value],
-            ["indices", "on_value", "off_value"],
-            [in_indices, in_on_value, in_off_value],
-            [out],
-        )
-
-
-def test_forward_one_hot():
-    """One_Hot"""
-    _test_one_hot(np.int32(2), np.int32(8), np.int32(1), np.int32(0))
-    _test_one_hot(np.int32(4), np.int32(8), np.float32(1), np.float32(0))
-    _test_one_hot(np.array([1, 2, 3], dtype=np.int32), np.int32(8), np.int32(3), np.int32(-1))
-    _test_one_hot(
-        np.array([1, 2, 3], dtype=np.int32), np.int32(8), np.int32(3), np.int32(-1), axis=0
-    )
-
-
-#######################################################################
-# Pack
-# ----
-
-
-def _test_pack(data, is_var, axis, quantized=False):
-    """One iteration of pack"""
-
-    assert len(data) >= 1
-    assert len(data) == len(is_var)
-    if quantized:
-        with tf.Graph().as_default():
-            in_data = [
-                array_ops.placeholder(shape=d.shape, dtype="float32", name="in_" + str(idx))
-                if is_var[idx]
-                else constant_op.constant(
-                    d, shape=d.shape, dtype="float32", name="in_constant_" + str(idx)
-                )
-                for idx, d in enumerate(data)
-            ]
-            inq_data = [
-                tf.quantization.fake_quant_with_min_max_args(
-                    i_data, min=-100, max=100, name=f"inq_{idx}"
-                )
-                for idx, i_data in enumerate(in_data)
-            ]
-            input_range = {}
-            for i in range(len(data)):
-                input_range[f"inq_{i}"] = (-100, 100)
-
-            out = array_ops.pack(inq_data, axis=axis)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-100, max=100, name="out")
-            name = [f"inq_{idx}:0" for idx in range(len(data))]
-            compare_tflite_with_tvm(
-                data, name, inq_data, [out], quantized=True, input_range=input_range
-            )
-    else:
-        with tf.Graph().as_default():
-            in_data = [
-                array_ops.placeholder(shape=d.shape, dtype=d.dtype, name="in_" + str(idx))
-                if is_var[idx]
-                else constant_op.constant(
-                    d, shape=d.shape, dtype=d.dtype, name="in_constant_" + str(idx)
-                )
-                for idx, d in enumerate(data)
-            ]
-
-            out = array_ops.pack(in_data, axis=axis)
-            name = [_.name for _ in in_data]
-            compare_tflite_with_tvm(data, name, in_data, [out], experimental_new_converter=True)
-
-
-def test_forward_pack():
-    """Pack"""
-    _test_pack([np.int32(1), np.int32(5)], [False, False], 0)
-    _test_pack([np.array([1, 4]), np.array([2, 5]), np.array([3, 6])], [True, False, False], 0)
-    _test_pack(
-        [np.arange(6).reshape((1, 2, 1, 3)), np.arange(6).reshape((1, 2, 1, 3))], [True, True], 1
-    )
-
-    _test_pack([np.arange(6).reshape((3, 2)), np.arange(6).reshape((3, 2))], [True, True], 1)
-
-    _test_pack(
-        [
-            np.arange(6).reshape((2, 1, 1, 3)),
-            np.arange(6).reshape((2, 1, 1, 3)),
-            np.arange(6).reshape((2, 1, 1, 3)),
-        ],
-        [True, True, True],
-        1,
-    )
-
-    _test_pack(
-        [
-            np.arange(6, dtype=np.uint8).reshape((2, 1, 1, 3)),
-            np.arange(6, dtype=np.uint8).reshape((2, 1, 1, 3)),
-            np.arange(6, dtype=np.uint8).reshape((2, 1, 1, 3)),
-        ],
-        [True, True, True],
-        1,
-        quantized=True,
-    )
-
-
-#######################################################################
-# Unpack
-# ------
-
-
-def _test_unpack(data, axis, num_unpacks):
-    """One iteration of UNPACK"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out = gen_array_ops.unpack(in_data, num=num_unpacks, axis=axis, name="unpack")
-        out_names = ["out_" + str(n) + ":0" for n in range(num_unpacks)]
-        compare_tflite_with_tvm([data], "Placeholder:0", [in_data], out, out_names=out_names)
-
-
-def test_forward_unpack():
-    """UNPACK"""
-    _test_unpack(np.array(np.random.uniform(0, 5, (3, 1)), dtype=np.int32), axis=1, num_unpacks=1)
-    _test_unpack(np.array(np.random.uniform(0, 5, (3, 4)), dtype=np.float32), axis=0, num_unpacks=3)
-    _test_unpack(
-        np.array(np.random.uniform(0, 5, (3, 1, 2)), dtype=np.float32), axis=0, num_unpacks=3
-    )
-    # tflite 1.13 doesn't accept negative axis
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        _test_unpack(
-            np.array(np.random.uniform(0, 5, (3, 6)), dtype=np.int32), axis=-2, num_unpacks=3
-        )
-        _test_unpack(
-            np.array(np.random.uniform(0, 5, (2, 3, 4)), dtype=np.int32), axis=-3, num_unpacks=2
-        )
-
-
-#######################################################################
-# Local response normalization
-# ----------------------------
-
-
-def _test_local_response_normalization(data, depth_radius, bias, alpha, beta):
-    """One iteration of LOCAL_RESPONSE_NORMALIZATION"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in_0")
-        out = nn_ops.local_response_normalization(
-            in_data, depth_radius=depth_radius, bias=bias, alpha=alpha, beta=beta
-        )
-        compare_tflite_with_tvm(data, "in_0:0", [in_data], [out])
-
-
-def test_forward_local_response_normalization():
-    """LOCAL_RESPONSE_NORMALIZATION"""
-    data = np.random.uniform(size=(1, 6, 4, 3)).astype("float32")
-    # LOCAL_RESPONSE_NORMALIZATION come with TFLite >= 1.14.0 fbs schema
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        _test_local_response_normalization(data, depth_radius=5, bias=1, alpha=1, beta=0.5)
-
-
-#######################################################################
-# L2 normalization
-# ----------------
-
-
-def _test_l2_normalization(data, axis, fused_activation_function=None):
-    """One iteration of L2_NORMALIZATION"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out = nn_impl.l2_normalize(in_data, axis)
-        out = with_fused_activation_function(out, fused_activation_function)
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_l2_normalization():
-    """L2_NORMALIZATION"""
-    data = np.random.uniform(size=(3, 6, 4)).astype("float32")
-    _test_l2_normalization(data, axis=2)
-    _test_l2_normalization(data, axis=2, fused_activation_function="RELU")
-
-
-#######################################################################
-# Logistic
-# --------
-
-
-def _test_logistic(data, quantized=False):
-    """One iteration of LOGISTIC"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in_0")
-
-        if quantized:
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-5, max=5, name="inq_0"
-            )
-            input_range = {"inq_0": (-5, 5)}
-            out = math_ops.sigmoid(inq_data)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=0, max=1, name="out")
-            compare_tflite_with_tvm(
-                data, "inq_0:0", [inq_data], [out], quantized=True, input_range=input_range
-            )
-        else:
-            out = math_ops.sigmoid(in_data)
-            compare_tflite_with_tvm(data, "in_0:0", [in_data], [out])
-
-
-def test_forward_logistic():
-    """LOGISTIC"""
-    _test_logistic(np.arange(6.0, dtype=np.float32).reshape((1, 6)))
-    _test_logistic(np.random.uniform(0, 255, (3, 6)).astype(np.uint8), quantized=True)
-
-
-#######################################################################
-# Softmax
-# -------
-
-
-def _test_softmax(data):
-    """One iteration of softmax"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out = nn_ops.softmax(in_data)
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_softmax():
-    """Softmax"""
-    _test_softmax(np.arange(6.0, dtype=np.float32).reshape((1, 6)))
-    _test_softmax(np.arange(6.0, dtype=np.float32).reshape((1, 2, 3)))
-
-
-######################################################################
-# Log_softmax
-# -----------
-
-
-def _test_log_softmax(data, quantized=False):
-    """One iteration of log_softmax"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in_0")
-
-        if quantized:
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-10, max=10, name="inq_0"
-            )
-            input_range = {"inq_0": (-10, 10)}
-            # tflite log_softmax supports only the case when axis is not specified
-            out = nn_ops.log_softmax(inq_data)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-20, max=0, name="out")
-            compare_tflite_with_tvm(
-                data, "inq_0:0", [inq_data], [out], quantized=True, input_range=input_range
-            )
-        else:
-            out = nn_ops.log_softmax(in_data)
-            compare_tflite_with_tvm(data, "in_0:0", [in_data], [out])
-
-
-def test_forward_log_softmax():
-    """Log_softmax"""
-    _test_log_softmax(np.random.uniform(-10, 10, size=(3, 6)).astype(np.float32))
-    _test_log_softmax(np.random.uniform(0, 255, (3, 6)).astype(np.uint8), quantized=True)
-
-
-#######################################################################
-# Tanh
-# ----
-
-
-def _test_tanh(data, quantized=False):
-    """One iteration of TANH"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in_0")
-
-        if quantized:
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-3, max=3, name="inq_0"
-            )
-            input_range = {"inq_0": (-3, 3)}
-            out = math_ops.tanh(inq_data)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-1, max=1, name="out")
-            compare_tflite_with_tvm(
-                data, "inq_0:0", [inq_data], [out], quantized=True, input_range=input_range
-            )
-        else:
-            out = math_ops.tanh(in_data)
-            compare_tflite_with_tvm(data, "in_0:0", [in_data], [out])
-
-
-def test_forward_tanh():
-    """TANH"""
-    _test_tanh(np.arange(6.0, dtype=np.float32).reshape((1, 6)), quantized=False)
-    _test_tanh(np.arange(0, 256, 30, dtype=np.uint8), quantized=True)
-
-
-#######################################################################
-# ReLu
-# ----
-
-
-def _test_relu(data, quantized=False):
-    """One iteration of ReLU"""
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in_0")
-
-        if quantized:
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-10, max=10, name="inq_0"
-            )
-            input_range = {"inq_0": (-10, 10)}
-            out = nn_ops.relu(inq_data)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=0, max=6, name="out")
-            compare_tflite_with_tvm(
-                data, "inq_0:0", [inq_data], [out], quantized=True, input_range=input_range
-            )
-        else:
-            out = nn_ops.relu(in_data)
-            compare_tflite_with_tvm(data, "in_0:0", [in_data], [out])
-
-
-def test_forward_relu():
-    """ReLU"""
-    _test_relu(np.arange(6.0, dtype=np.float32).reshape((1, 6)))
-    _test_relu(np.random.uniform(0, 255, (3, 6)).astype(np.uint8), quantized=True)
-
-
-#######################################################################
-# ReLU6
-# -----
-
-
-def _test_relu6(data, quantized=False):
-    """One iteration of ReLU6"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in_0")
-
-        if quantized:
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-10, max=10, name="inq_0"
-            )
-            input_range = {"inq_0": (-10, 10)}
-            out = nn_ops.relu6(inq_data)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=0, max=6, name="out")
-            compare_tflite_with_tvm(
-                data, "inq_0:0", [inq_data], [out], quantized=True, input_range=input_range
-            )
-        else:
-            out = nn_ops.relu6(in_data)
-            compare_tflite_with_tvm(data, "in_0:0", [in_data], [out])
-
-
-def test_forward_relu6():
-    """ReLU6"""
-    _test_relu6(np.random.uniform(-10, 10, size=(3, 6)).astype(np.float32))
-    _test_relu6(np.random.uniform(0, 255, (3, 6)).astype(np.uint8), quantized=True)
-
-
-#######################################################################
-# Leaky_ReLU
-# ----------
-
-
-def _test_leaky_relu(data, alpha, quantized=False):
-    """One iteration of Leaky_ReLU"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in_0")
-
-        if quantized:
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-3, max=2, name="inq_0"
-            )
-            input_range = {"inq_0": (-3, 2)}
-            out = nn_ops.leaky_relu(inq_data, alpha)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-3, max=2, name="out")
-            compare_tflite_with_tvm(
-                data, "inq_0:0", [inq_data], [out], quantized=True, input_range=input_range
-            )
-        else:
-            out = nn_ops.leaky_relu(in_data, alpha)
-            compare_tflite_with_tvm(data, "in_0:0", [in_data], [out])
-
-
-def test_forward_leaky_relu():
-    """Leaky_ReLU"""
-    _test_leaky_relu(np.random.uniform(-5, 5, (1, 6)).astype(np.float32), alpha=0.2)
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        _test_leaky_relu(
-            np.random.uniform(0, 255, (2, 3)).astype(np.uint8), alpha=0.3, quantized=True
-        )
-
-
-#######################################################################
-# ReLU_n1_to_1
-# ------------
-
-
-def _test_relu_n1_to_1(data, quantized=False):
-    """One iteration of ReLU_n1_to_1"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in_0")
-
-        if quantized:
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-3, max=3, name="inq_0"
-            )
-            input_range = {"inq_0": (-3, 3)}
-            # There is no such tf operation.
-            # The specific pattern will be replaced into RELU_N1_TO_1 by tflite
-            out = math_ops.maximum(-1.0, math_ops.minimum(inq_data, 1.0))
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-1, max=1, name="out")
-            compare_tflite_with_tvm(
-                data, "inq_0:0", [inq_data], [out], quantized=True, input_range=input_range
-            )
-        else:
-            out = math_ops.maximum(-1.0, math_ops.minimum(in_data, 1.0))
-            compare_tflite_with_tvm(data, "in_0:0", [in_data], [out])
-
-
-def test_forward_relu_n1_to_1():
-    """ReLU_n1_to_1"""
-    _test_relu_n1_to_1(np.random.uniform(-3, 3, (1, 6)).astype(np.float32))
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        _test_relu_n1_to_1(np.random.uniform(0, 255, (3, 6)).astype(np.uint8), quantized=True)
-
-
-#######################################################################
-# PReLU
-# -----
-
-
-def _test_prelu(data, alpha):
-    """One iteration of PReLU"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        # This specific pattern will be replaced into PRelu by tflite
-        out = nn_ops.relu(in_data) + (-alpha * nn_ops.relu(-in_data))
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_prelu():
-    """PReLU"""
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(1, 32, 32, 3)).astype("float32"),
-        np.full((3,), 0.2, dtype="float32"),
-    )
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(1, 32, 32, 3)).astype("float32"),
-        np.full((1, 3), 0.2, dtype="float32"),
-    )
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(1, 32, 32, 3)).astype("float32"),
-        np.full((1, 1, 3), 0.2, dtype="float32"),
-    )
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(1, 32, 32, 3)).astype("float32"),
-        np.full((1, 1, 1, 3), 0.2, dtype="float32"),
-    )
-    #
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(1, 32, 32, 3)).astype("float32"),
-        np.full((32, 3), 0.2, dtype="float32"),
-    )
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(1, 32, 32, 3)).astype("float32"),
-        np.full((32, 32, 3), 0.2, dtype="float32"),
-    )
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(1, 32, 32, 3)).astype("float32"),
-        np.full((1, 32, 1, 3), 0.2, dtype="float32"),
-    )
-    #
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(1, 1, 3)).astype("float32"),
-        np.full((3,), 0.2, dtype="float32"),
-    )
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(1, 32, 3)).astype("float32"),
-        np.full((32, 3), 0.2, dtype="float32"),
-    )
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(32, 3)).astype("float32"), np.full((3), 0.2, dtype="float32")
-    )
-
-
-#######################################################################
-# DepthToSpace
-# ------------
-
-
-def _test_depthtospace(data, block_size):
-    """One iteration of depth_to_space operation with given data and block size"""
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out = array_ops.depth_to_space(in_data, block_size)
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_depthtospace():
-    # DEPTH_TO_SPACE comes with TFLite >= 1.15.0 fbs schema
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.15.0"):
-        _test_depthtospace(np.random.normal(size=[1, 32, 32, 4]).astype("float32"), 2)
-        _test_depthtospace(np.random.normal(size=[1, 16, 8, 32]).astype("float32"), 4)
-
-
-#######################################################################
-# SpaceToDepth
-# ------------
-
-
-def _test_spacetodepth(data, block_size):
-    """One iteration of space_to_depth operation with given data and block size"""
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out = array_ops.space_to_depth(in_data, block_size)
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_spacetodepth():
-    _test_spacetodepth(np.random.normal(size=[1, 32, 32, 4]).astype("float32"), 2)
-    _test_spacetodepth(np.random.normal(size=[1, 16, 8, 32]).astype("float32"), 4)
-
-
-#######################################################################
-# ReverseSequence
-# ---------------
-
-
-def _test_reverse_sequence(shape, dtype, seq_lengths, batch_axis, seq_axis):
-    """One iteration of reverse_sequence operation with given data and attributes"""
-
-    data = np.random.uniform(0, 100, size=shape).astype(dtype)
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(dtype=dtype, name="input", shape=shape)
-        out = tf.reverse_sequence(
-            in_data, seq_lengths=seq_lengths, batch_axis=batch_axis, seq_axis=seq_axis
-        )
-
-        compare_tflite_with_tvm(data, "input", [in_data], [out])
-
-
-def test_forward_reverse_sequence():
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        _test_reverse_sequence([4, 3], "float32", [3, 2, 1], 1, 0)
-        _test_reverse_sequence([4, 3], "float32", [3, 2, 1, 3], 0, 1)
-        _test_reverse_sequence([2, 3, 3, 3], "float32", [2, 3, 2], 2, 1)
-        _test_reverse_sequence([2, 4, 6, 4, 5], "float32", [5, 3], 0, 2)
-        _test_reverse_sequence([2, 4, 6, 4, 5], "float32", [5, 3, 1, 4], 3, 2)
-
-
-#######################################################################
-# Sparse To Dense
-# ---------------
-def _test_sparse_to_dense(sparse_indices, sparse_values, default_value, output_shape):
-    # tflite 1.13 convert method does not accept empty shapes
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        with tf.Graph().as_default():
-            indices = tf.placeholder(
-                shape=sparse_indices.shape, dtype=str(sparse_indices.dtype), name="indices"
-            )
-            values = tf.placeholder(
-                shape=sparse_values.shape, dtype=str(sparse_values.dtype), name="values"
-            )
-            oshape = tf.constant(
-                output_shape, shape=output_shape.shape, dtype=str(output_shape.dtype)
-            )
-
-            if default_value is None:
-                output = tf.sparse_to_dense(indices, oshape, values)
-                compare_tflite_with_tvm(
-                    [sparse_indices, sparse_values],
-                    ["indices", "values"],
-                    [indices, values],
-                    [output],
-                )
-            else:
-                dv_placeholder = tf.placeholder(
-                    shape=(), dtype=str(default_value.dtype), name="default_value"
-                )
-                output = tf.sparse_to_dense(indices, oshape, values, dv_placeholder)
-                compare_tflite_with_tvm(
-                    [sparse_indices, sparse_values, default_value],
-                    ["indices", "values", "default_value"],
-                    [indices, values, dv_placeholder],
-                    [output],
-                )
-
-
-def test_forward_sparse_to_dense():
-    """
-    Works in tvm/topi/tensorflow. But tflite converter breaks this test case
-    _test_sparse_to_dense(
-        np.int32(1),
-        np.int32(3),
-        np.int32(0),
-        np.array([5]).astype("int32")
-    )
-    """
-    # vector
-    _test_sparse_to_dense(
-        np.array([0, 1, 4]).astype("int32"),
-        np.array([3, 3, 3]).astype("int32"),
-        np.int32(0),
-        np.array([5]).astype("int32"),
-    )
-    # vector nXd
-    _test_sparse_to_dense(
-        np.array([[0, 0], [1, 2]]).astype("int32"),
-        np.array([1, 2]).astype("int32"),
-        np.int32(0),
-        np.array([3, 4]).astype("int32"),
-    )
-    _test_sparse_to_dense(
-        np.array([[0, 0, 0], [1, 2, 3]]).astype("int32"),
-        np.array([1, 2]).astype("int32"),
-        np.int32(4),
-        np.array([2, 3, 4]).astype("int32"),
-    )
-    # floats
-    _test_sparse_to_dense(
-        np.array([0, 1, 4]).astype("int32"),
-        np.array([3.1, 3.1, 3.1]).astype("float32"),
-        np.float32(3.5),
-        np.array([5]).astype("int32"),
-    )
-    # default value not specified
-    _test_sparse_to_dense(
-        np.array([0, 1, 4]).astype("int32"),
-        np.array([3.1, 3.1, 3.1]).astype("float32"),
-        None,
-        np.array([5]).astype("int32"),
-    )
-
-
-#######################################################################
-# Fully Connected
-# ---------------
-def _test_fully_connected(
-    tensor_in_sizes,
-    const_input,
-    filter_in_sizes,
-    bias_in_size=None,
-    quantized=False,
-    fp16_quantized=False,
-):
-    """One iteration of fully connected"""
-
-    total_size_1 = np.prod(tensor_in_sizes)
-    total_size_2 = np.prod(filter_in_sizes)
-
-    assert (
-        int(total_size_1 / tensor_in_sizes[0]) == filter_in_sizes[0]
-    ), "input size and filter size are mismatched"
-
-    # Initializes the input tensor with array containing incrementing
-    # numbers from 1.
-    data_array = np.arange(
-        1, total_size_1 + 1, dtype=np.uint8 if quantized and not fp16_quantized else np.float32
-    )
-    filter_array = np.arange(
-        1, total_size_2 + 1, dtype=np.uint8 if quantized and not fp16_quantized else np.float32
-    )
-    in_name = "input"
-
-    with tf.Graph().as_default():
-        in_data = (
-            constant_op.constant(data_array, shape=tensor_in_sizes, dtype=np.float32, name=in_name)
-            if const_input
-            else array_ops.placeholder(shape=tensor_in_sizes, dtype=np.float32, name=in_name)
-        )
-
-        in_filter = constant_op.constant(filter_array, shape=filter_in_sizes, dtype=np.float32)
-        data_array = np.reshape(data_array, tensor_in_sizes)
-
-        # if we have bias
-        if bias_in_size:
-            assert bias_in_size[0] == filter_in_sizes[1], "bias and filter size are mismatched"
-            bias_array = np.arange(
-                1, bias_in_size[0] + 1, dtype=np.uint8 if quantized else np.float32
-            )
-            in_bias = constant_op.constant(bias_array, shape=bias_in_size, dtype=np.float32)
-
-        if quantized and not fp16_quantized:
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-100, max=100, name="inq_0"
-            )
-            input_range = {"inq_0": (-100, 100)}
-            inq_filter = tf.quantization.fake_quant_with_min_max_args(
-                in_filter, min=-100, max=100, name="inq_1"
-            )
-            input_range = {"inq_0": (-100, 100), "inq_1": (-100, 100)}
-            # reshape N H W C into N H*W*C
-            inq_data_reshape = array_ops.reshape(inq_data, [tensor_in_sizes[0], -1])
-            out = math_ops.mat_mul(inq_data_reshape, inq_filter)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-100, max=100, name="out")
-
-            # if we have bias
-            if bias_in_size:
-                out = nn_ops.bias_add(out, in_bias)
-
-            compare_tflite_with_tvm(
-                data_array,
-                inq_data.name,
-                [inq_data],
-                [out],
-                quantized=True,
-                input_range=input_range,
-                experimental_new_converter=True,
-            )
-        else:
-            # reshape N H W C into N H*W*C
-            in_data_reshape = array_ops.reshape(in_data, [tensor_in_sizes[0], -1])
-            out = math_ops.mat_mul(in_data_reshape, in_filter)
-            # TODO : Need to construct a fc op with (keep_num_dims == True)
-
-            # if we have bias
-            if bias_in_size:
-                out = nn_ops.bias_add(out, in_bias)
-
-            compare_tflite_with_tvm(
-                data_array,
-                in_data.name,
-                [in_data],
-                [out],
-                experimental_new_converter=True,
-                fp16_quantized=fp16_quantized,
-            )
-
-
-def test_forward_fully_connected():
-    """Fully Connected"""
-    for input_shape, weight_shape, bias_shape in [
-        ([1, 4], [4, 4], None),
-        ([1, 4], [4, 4], [4]),
-        ([1, 1, 1, 5], [5, 5], None),
-        ([1, 1, 10], [10, 103], None),
-        ([1, 1, 1, 150], [150, 100], None),
-        ([1, 1, 1, 150], [150, 100], None),
-        ([1, 1, 1, 150], [150, 100], [100]),
-        ([5, 1, 1, 150], [150, 100], None),
-        ([5, 1, 1, 150], [150, 100], [100]),
-    ]:
-        for const_input in [False, True]:
-            for quantized in [False, True]:
-                for fp16_quantized in [False, True]:
-                    _test_fully_connected(
-                        input_shape,
-                        const_input,
-                        weight_shape,
-                        bias_shape,
-                        quantized,
-                        fp16_quantized,
-                    )
-
-
-#######################################################################
-# REVERSE_V2
-# ----------
-
-
-def _test_reverse_v2(input_shape, axis, dtype):
-    """One iteration of REVERSE_V2"""
-    with tf.Graph().as_default():
-        input_array = np.random.randint(0, 100, size=input_shape).astype(dtype)
-        in_input = tf.placeholder(dtype=input_array.dtype, shape=input_array.shape, name="input")
-        in_axis = ops.convert_to_tensor(axis, dtype=axis.dtype)
-
-        out = array_ops.reverse(in_input, in_axis)
-
-        compare_tflite_with_tvm([input_array], ["input"], [in_input], [out])
-
-
-def test_forward_reverse_v2():
-    """REVERSE_V2"""
-    for dtype in ["float32", "int32"]:
-        _test_reverse_v2((5), np.array([0], dtype="int32"), dtype)
-        _test_reverse_v2((5, 6, 4, 2), np.array([2], dtype="int32"), dtype)
-
-
-#######################################################################
-# MATRIX_SET_DIAG
-# ---------------
-
-
-def _test_matrix_set_diag(input_shape, input_type, quantized=False):
-    """One iteration of MATRIX_SET_DIAG"""
-    with tf.Graph().as_default():
-        diagonal_shape = list(input_shape[:-2])
-        diagonal_shape.append(min(input_shape[-2], input_shape[-1]))
-
-        if quantized:
-            # ignoring input_type as quantized requires uint8
-            input_array = np.random.uniform(0, 256, input_shape).astype("uint8")
-            in_input = tf.placeholder(dtype="float32", shape=input_array.shape, name="input")
-            inq_input = tf.quantization.fake_quant_with_min_max_args(
-                in_input, min=-100, max=100, name="q_input"
-            )
-
-            diagonal = np.random.uniform(0, 256, diagonal_shape).astype("uint8")
-            in_diagonal = tf.placeholder(dtype="float32", shape=diagonal.shape, name="diagonal")
-            inq_diagonal = tf.quantization.fake_quant_with_min_max_args(
-                in_diagonal, min=-100, max=100, name="q_diagonal"
-            )
-
-            input_range = {"q_input": (-100, 100), "q_diagonal": (-100, 100)}
-
-            out = array_ops.matrix_set_diag(inq_input, inq_diagonal)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-100, max=100, name="out")
-
-            compare_tflite_with_tvm(
-                [input_array, diagonal],
-                ["q_input", "q_diagonal"],
-                [inq_input, inq_diagonal],
-                [out],
-                quantized=True,
-                input_range=input_range,
-            )
-        else:
-            input_array = np.random.uniform(0, 100, input_shape).astype(input_type)
-            diagonal = np.random.uniform(0, 100, diagonal_shape).astype(input_type)
-
-            in_input = tf.placeholder(
-                dtype=input_array.dtype, shape=input_array.shape, name="input"
-            )
-            in_diagonal = tf.placeholder(
-                dtype=diagonal.dtype, shape=diagonal.shape, name="diagonal"
-            )
-
-            out = array_ops.matrix_set_diag(in_input, in_diagonal)
-
-            compare_tflite_with_tvm(
-                [input_array, diagonal], ["input", "diagonal"], [in_input, in_diagonal], [out]
-            )
-
-
-def test_forward_matrix_set_diag():
-    """MATRIX_SET_DIAG"""
-    for dtype in [np.float32, np.int32]:
-        _test_matrix_set_diag((4, 4), dtype)
-        _test_matrix_set_diag((5, 4, 3, 4), dtype)
-        _test_matrix_set_diag((4, 4, 2), dtype)
-
-    _test_matrix_set_diag((4, 4), np.uint8, quantized=True)
-    _test_matrix_set_diag((5, 4, 3, 4), np.uint8, quantized=True)
-    _test_matrix_set_diag((4, 4, 2), np.uint8, quantized=True)
-
-
-#######################################################################
-# MATRIX_DIAG
-# -----------
-
-
-def _test_matrix_diag(diagonal_shape, dtype):
-    """One iteration of MATRIX_DIAG"""
-    with tf.Graph().as_default():
-        diagonal = np.random.uniform(0, 100, diagonal_shape).astype(dtype)
-        in_diagonal = tf.placeholder(dtype=diagonal.dtype, shape=diagonal.shape, name="diagonal")
-
-        out = array_ops.matrix_diag(in_diagonal)
-
-        compare_tflite_with_tvm(
-            [diagonal], ["diagonal"], [in_diagonal], [out], experimental_new_converter=True
-        )
-
-
-def test_forward_matrix_diag():
-    """MATRIX_DIAG"""
-    for dtype in [np.float32, np.int32]:
-        _test_matrix_diag((4), dtype)
-        _test_matrix_diag((5, 4, 3), dtype)
-        _test_matrix_diag((2, 3), dtype)
-
-
-#######################################################################
-# Custom Operators
-# ----------------
-
-
-def _test_detection_postprocess(tf_model_file, box_encodings_size, class_predictions_size):
-    """One iteration of detection postProcess with given model and shapes"""
-    converter = tf.lite.TFLiteConverter.from_frozen_graph(
-        tf_model_file,
-        input_arrays=["raw_outputs/box_encodings", "raw_outputs/class_predictions"],
-        output_arrays=[
-            "TFLite_Detection_PostProcess",
-            "TFLite_Detection_PostProcess:1",
-            "TFLite_Detection_PostProcess:2",
-            "TFLite_Detection_PostProcess:3",
-        ],
-        input_shapes={
-            "raw_outputs/box_encodings": box_encodings_size,
-            "raw_outputs/class_predictions": class_predictions_size,
-        },
-    )
-    converter.allow_custom_ops = True
-    converter.inference_type = tf.lite.constants.FLOAT
-    tflite_model = converter.convert()
-    np.random.seed(0)
-    box_encodings = np.random.uniform(size=box_encodings_size).astype("float32")
-    class_predictions = np.random.uniform(size=class_predictions_size).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model, [box_encodings, class_predictions])
-    tvm_output = run_tvm_graph(
-        tflite_model,
-        [box_encodings, class_predictions],
-        ["raw_outputs/box_encodings", "raw_outputs/class_predictions"],
-        num_output=4,
-    )
-
-    # Check all output shapes are equal
-    assert all(
-        list(
-            tvm_tensor.shape == tflite_tensor.shape
-            for (tvm_tensor, tflite_tensor) in zip(tvm_output, tflite_output)
-        )
-    )
-
-    # Check valid count is the same
-    assert tvm_output[3] == tflite_output[3]
-    valid_count = tvm_output[3][0]
-
-    # For boxes that do not have any detections, TFLite puts random values. Therefore, we compare
-    # tflite and tvm tensors for only valid boxes.
-    for i in range(0, valid_count):
-        # Check bounding box co-ords
-        tvm.testing.assert_allclose(
-            np.squeeze(tvm_output[0][0][i]),
-            np.squeeze(tflite_output[0][0][i]),
-            rtol=1e-5,
-            atol=1e-5,
-        )
-
-        # Check the class
-        # Stricter check to ensure class remains same
-        np.testing.assert_equal(np.squeeze(tvm_output[1][0][i]), np.squeeze(tflite_output[1][0][i]))
-
-        # Check the score
-        tvm.testing.assert_allclose(
-            np.squeeze(tvm_output[2][0][i]),
-            np.squeeze(tflite_output[2][0][i]),
-            rtol=1e-5,
-            atol=1e-5,
-        )
-
-
-def test_detection_postprocess():
-    """Detection PostProcess"""
-
-    # Fast-NMS
-    box_encodings_size = (1, 1917, 4)
-    class_predictions_size = (1, 1917, 91)
-    tf_model_file = tf_testing.get_workload_official(
-        "http://download.tensorflow.org/models/object_detection/"
-        "ssd_mobilenet_v2_quantized_300x300_coco_2019_01_03.tar.gz",
-        "ssd_mobilenet_v2_quantized_300x300_coco_2019_01_03/tflite_graph.pb",
-    )
-    _test_detection_postprocess(tf_model_file, box_encodings_size, class_predictions_size)
-
-    # Fast-NMS
-    box_encodings_size = (1, 2034, 4)
-    class_predictions_size = (1, 2034, 91)
-    tf_model_file = download_testdata(
-        "https://github.com/czh978/models_for_tvm_test/raw/main/tflite_graph_with_postprocess.pb",
-        "tflite_graph_with_postprocess.pb",
-    )
-    _test_detection_postprocess(tf_model_file, box_encodings_size, class_predictions_size)
-
-    # Regular NMS
-    box_encodings_size = (1, 1917, 4)
-    class_predictions_size = (1, 1917, 91)
-    tf_model_file = download_testdata(
-        (
-            "https://github.com/Grovety/ModelZoo/raw/52fb82156ae8c8e3f62c7d7caf6867b25261dda4/"
-            "models/object_detection/ssd_mobilenet_v1/tflite_int8/tflite_graph_with_regular_nms.pb"
-        ),
-        "tflite_graph_with_regular_nms.pb",
-    )
-    _test_detection_postprocess(tf_model_file, box_encodings_size, class_predictions_size)
-
-
-#######################################################################
-# Custom Converter
-# ----------------
-
-
-def test_custom_op_converter():
-    """Test case for user-defined operator converter in TFLite frontend"""
-
-    class DummyOperatorConverter(relay.frontend.tflite.OperatorConverter):
-        """Operator Converter for converting TFLite ops to relay ops"""
-
-        def __init__(self, model, subgraph, exp_tab):
-            super().__init__(model, subgraph, exp_tab)
-            self.allow_custom_ops = True
-
-            convert_map_overwrite = {"SUB": self.convert_sub_dummy}
-
-            self.convert_map.update(convert_map_overwrite)
-
-        def convert_sub_dummy(self, op):
-            """Convert TFLite SUB"""
-            input_tensors = self.get_input_tensors(op)
-            assert len(input_tensors) == 2, "input tensors length should be 2"
-
-            lhs_tensor = input_tensors[0]
-            rhs_tensor = input_tensors[1]
-
-            lhs_expr = self.get_expr(lhs_tensor.tensor_idx)
-            rhs_expr = self.get_expr(rhs_tensor.tensor_idx)
-
-            temp_expr = relay.op.negative(rhs_expr)
-            out = relay.op.add(lhs_expr, temp_expr)
-
-            return out
-
-    with tf.Graph().as_default():
-        # Generate TFLite model for single addition
-        data = [
-            np.arange(6.0, dtype=np.float32).reshape((2, 1, 1, 3)),
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 1, 3)),
-        ]
-        in_data = [
-            array_ops.placeholder(shape=data[0].shape, dtype="float32", name="in_0"),
-            array_ops.placeholder(shape=data[1].shape, dtype="float32", name="in_1"),
-        ]
-        out = math_ops.subtract(in_data[0], in_data[1])
-        in_name = [x[1] for x in zip(in_data, ("in_0:0", "in_1:0"))]
-        input_tensors = in_data
-        output_tensors = [out]
-        in_node = [0] * len(in_name)
-        for i, _ in enumerate(in_name):
-            in_node[i] = in_name[i].split(":")[0]
-
-        with tf.Session() as sess:
-            converter = tf.lite.TFLiteConverter.from_session(sess, input_tensors, output_tensors)
-            tflite_model_buf = converter.convert()
-    in_data = [x[1] for x in zip(in_data, data)]
-    tvm_output_orig = run_tvm_graph(tflite_model_buf, in_data, in_node)
-    tvm_output_dummy = run_tvm_graph(
-        tflite_model_buf, in_data, in_node, op_converter=DummyOperatorConverter
-    )
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output_orig[0]), np.squeeze(tvm_output_dummy[0]), rtol=1e-5, atol=1e-5
-    )
-
-
-#######################################################################
-# Mobilenet
-# ---------
-
-
-def test_forward_mobilenet_v1():
-    """Test the Mobilenet V1 TF Lite model."""
-    # MobilenetV1
-    tflite_model_file = tf_testing.get_workload_official(
-        "http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz",
-        "mobilenet_v1_1.0_224.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    data = np.random.uniform(size=(1, 224, 224, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-5
-    )
-
-
-def test_forward_mobilenet_v2():
-    """Test the Mobilenet V2 TF Lite model."""
-    # MobilenetV2
-    tflite_model_file = tf_testing.get_workload_official(
-        "http://download.tensorflow.org/models/tflite_11_05_08/mobilenet_v2_1.0_224.tgz",
-        "mobilenet_v2_1.0_224.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    data = np.random.uniform(size=(1, 224, 224, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-5
-    )
-
-
-#######################################################################
-# Mobilenet V3
-# ------------
-
-
-def test_forward_mobilenet_v3():
-    """Test the Mobilenet V3 TF Lite model."""
-    # In MobilenetV3, some ops are not supported before tf 1.15 fbs schema
-    if package_version.parse(tf.VERSION) < package_version.parse("1.15.0"):
-        return
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_1.0_float.tgz",
-        "v3-large_224_1.0_float/v3-large_224_1.0_float.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    data = np.random.uniform(size=(1, 224, 224, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-5
-    )
-
-
-#######################################################################
-# Mobilenet V1 Sparse
-# -----------------
-
-
-def test_forward_sparse_mobilenet_v1():
-    """Test the Sparse version of Mobilenet V1 TF Lite model."""
-    # MobilenetV1
-    tflite_model_file = download_testdata(
-        "https://storage.googleapis.com/fast-convnets/tflite-models/mbv1_140_90_12b4_720.tflite",
-        "mbv1_140_90_12b4_720.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    data = np.random.uniform(size=(1, 224, 224, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "float_image_input")
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-5
-    )
-
-
-#######################################################################
-# Mobilenet V2 Sparse
-# -----------------
-
-
-def test_forward_sparse_mobilenet_v2():
-    """Test the Sparse version of Mobilenet V2 TF Lite model."""
-    # MobilenetV1
-    tflite_model_file = download_testdata(
-        "https://storage.googleapis.com/fast-convnets/tflite-models/mbv2_200_85_11-16b2_744.tflite",
-        "mbv2_200_85_11-16b2_744.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    data = np.random.uniform(size=(1, 224, 224, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "float_image_input")
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-5
-    )
-
-
-#######################################################################
-# Inception
-# ---------
-
-
-def test_forward_inception_v3_net():
-    """Test the Inception V3 TF Lite model."""
-    # InceptionV3
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/"
-        "upload_20180427/inception_v3_2018_04_27.tgz",
-        "inception_v3.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    data = np.random.uniform(size=(1, 299, 299, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-5
-    )
-
-
-def test_forward_inception_v4_net():
-    """Test the Inception V4 TF Lite model."""
-    # InceptionV4
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/download.tensorflow.org/models/"
-        "tflite/model_zoo/upload_20180427/"
-        "inception_v4_2018_04_27.tgz",
-        "inception_v4.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    data = np.random.uniform(size=(1, 299, 299, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-5
-    )
-
-
-def test_forward_inception_v4_net_batched():
-    """Test the Inception V4 TF Lite model."""
-    # InceptionV4
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/download.tensorflow.org/models/"
-        "tflite/model_zoo/upload_20180427/"
-        "inception_v4_2018_04_27.tgz",
-        "inception_v4.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    data = np.random.uniform(size=(4, 299, 299, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-5
-    )
-
-
-def test_forward_qnn_inception_v1_net():
-    """Test the Quantized TFLite Inception model."""
-    # InceptionV1
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/download.tensorflow.org/models/"
-        "inception_v1_224_quant_20181026.tgz",
-        "inception_v1_224_quant.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-
-    # Test image. Checking the labels because the requantize implementation is different between
-    # TFLite and Relay. This cause final output numbers to mismatch. So, testing accuracy via
-    # labels. Also, giving a real image, instead of random inputs.
-    data = get_real_image(224, 224)
-
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tflite_predictions = np.squeeze(tflite_output)
-    tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm_predictions = np.squeeze(tvm_output)
-    tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-    tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-def test_forward_qnn_mobilenet_v1_net():
-    """Test the Quantized TFLite Mobilenet V1 model."""
-    # MobilenetV1
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_2018_08_02/"
-        "mobilenet_v1_1.0_224_quant.tgz",
-        "mobilenet_v1_1.0_224_quant.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-
-    # Test image. Checking the labels because the requantize implementation is different between
-    # TFLite and Relay. This cause final output numbers to mismatch. So, testing accuracy via
-    # labels. Also, giving a real image, instead of random inputs.
-    data = get_real_image(224, 224)
-
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tflite_predictions = np.squeeze(tflite_output)
-    tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm_predictions = np.squeeze(tvm_output)
-    tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-    tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-def test_forward_qnn_mobilenet_v2_net():
-    """Test the Quantized TFLite Mobilenet V2 model."""
-    # MobilenetV2
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/download.tensorflow.org/models/tflite_11_05_08/"
-        "mobilenet_v2_1.0_224_quant.tgz",
-        "mobilenet_v2_1.0_224_quant.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-
-    # Test image. Checking the labels because the requantize implementation is different between
-    # TFLite and Relay. This cause final output numbers to mismatch. So, testing accuracy via
-    # labels. Also, giving a real image, instead of random inputs.
-    data = get_real_image(224, 224)
-
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tflite_predictions = np.squeeze(tflite_output)
-    tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm_predictions = np.squeeze(tvm_output)
-    tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-    tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-#######################################################################
-# Mobilenet V3 Quantized
-# ----------------------
-
-
-def test_forward_qnn_mobilenet_v3_net():
-    """Test the Quantized TFLite Mobilenet V3 model."""
-    # In MobilenetV3, some ops are not supported before tf 1.15 fbs schema
-    if package_version.parse(tf.VERSION) < package_version.parse("1.15.0"):
-        pytest.skip("Unsupported in tflite < 1.15.0")
-    else:
-        pytest.skip("This segfaults with tensorflow 1.15.2 and above")
-
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_1.0_uint8.tgz",
-        "v3-large_224_1.0_uint8/v3-large_224_1.0_uint8.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-
-    # Test image. Checking the labels because the requantize implementation is different between
-    # TFLite and Relay. This cause final output numbers to mismatch. So, testing accuracy via
-    # labels. Also, giving a real image, instead of random inputs.
-    data = get_real_image(224, 224)
-
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tflite_predictions = np.squeeze(tflite_output)
-    tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm_predictions = np.squeeze(tvm_output)
-    tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-    tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-@pytest.mark.skipif(
-    platform.machine() == "aarch64",
-    reason="Fails with an output mismatch. See https://github.com/apache/tvm/issues/16534",
-)
-def test_forward_tflite2_qnn_resnet50():
-    """Test the Quantized TFLite version 2.1.0 Resnet50 model."""
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.1.0"):
-        tflite_model_file = download_testdata(
-            "https://raw.githubusercontent.com/dmlc/web-data/main/tensorflow/models/Quantized/"
-            "resnet_50_quantized.tflite",
-            "resnet_50_quantized.tflite",
-        )
-        with open(tflite_model_file, "rb") as f:
-            tflite_model_buf = f.read()
-
-        data = pre_processed_image(224, 224)
-
-        tflite_output = run_tflite_graph(tflite_model_buf, data)
-        tflite_predictions = np.squeeze(tflite_output)
-        tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-        tvm_output = run_tvm_graph(tflite_model_buf, np.array(data), "input_1")
-        tvm_predictions = np.squeeze(tvm_output)
-        tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-        tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-def test_forward_tflite2_qnn_inception_v1():
-    """Test the Quantized TFLite version 2.1.0 Inception V1 model."""
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.1.0"):
-        tflite_model_file = download_testdata(
-            "https://raw.githubusercontent.com/dmlc/web-data/main/tensorflow/models/Quantized/"
-            "inception_v1_quantized.tflite",
-            "inception_v1_quantized.tflite",
-        )
-        with open(tflite_model_file, "rb") as f:
-            tflite_model_buf = f.read()
-
-        data = pre_processed_image(224, 224)
-
-        tflite_output = run_tflite_graph(tflite_model_buf, data)
-        tflite_predictions = np.squeeze(tflite_output)
-        tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-        tvm_output = run_tvm_graph(tflite_model_buf, np.array(data), "input_1")
-        tvm_predictions = np.squeeze(tvm_output)
-        tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-        tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-def test_forward_tflite2_qnn_mobilenet_v2():
-    """Test the Quantized TFLite version 2.1.0 Mobilenet V2 model."""
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.1.0"):
-        tflite_model_file = download_testdata(
-            "https://raw.githubusercontent.com/dmlc/web-data/main/tensorflow/models/Quantized/"
-            "mobilenet_v2_quantized.tflite",
-            "mobilenet_v2_quantized.tflite",
-        )
-        with open(tflite_model_file, "rb") as f:
-            tflite_model_buf = f.read()
-
-        data = pre_processed_image(224, 224)
-
-        tflite_output = run_tflite_graph(tflite_model_buf, data)
-        tflite_predictions = np.squeeze(tflite_output)
-        tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-        tvm_output = run_tvm_graph(tflite_model_buf, np.array(data), "input_1")
-        tvm_predictions = np.squeeze(tvm_output)
-        tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-        tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-def test_forward_tflite_float16():
-    """Test float16 quantized model"""
-    # MobilenetV2
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_2018_02_22/"
-        "mobilenet_v1_0.25_128.tgz",
-        "mobilenet_v1_0.25_128_frozen.pb",
-    )
-
-    converter = tf.lite.TFLiteConverter.from_frozen_graph(
-        tflite_model_file, ["input"], ["MobilenetV1/Predictions/Reshape_1"]
-    )
-    converter.optimizations = [tf.lite.Optimize.DEFAULT]
-    converter.target_spec.supported_types = [tf.float16]
-    tflite_model_buf = converter.convert()
-
-    # Test image. Checking the labels because the requantize implementation is different between
-    # TFLite and Relay. This cause final output numbers to mismatch. So, testing accuracy via
-    # labels. Also, giving a real image, instead of random inputs.
-    data = get_real_image(128, 128, quantized=False)
-
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tflite_predictions = np.squeeze(tflite_output)
-    tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm_predictions = np.squeeze(tvm_output)
-    tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-    tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-@pytest.mark.skipif(
-    platform.machine() == "aarch64",
-    reason="Fails during leagalization due to int16 datatype. "
-    "See https://github.com/apache/tvm/issues/16535",
-)
-def test_forward_mobilenet_int16():
-    """Test int16 quantized model"""
-    # MobilenetV2
-    model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_2018_02_22/"
-        "mobilenet_v1_0.25_128.tgz",
-        "mobilenet_v1_0.25_128_frozen.pb",
-    )
-
-    # Test image. Checking the labels because the requantize implementation is different between
-    # TFLite and Relay. This cause final output numbers to mismatch. So, testing accuracy via
-    # labels. Also, giving a real image, instead of random inputs.
-    #
-    # According to TFLite documentation, despite the quantization being done to make this model
-    # use int16 types, inputs and outputs are kept float32 by default.
-    # https://www.tensorflow.org/lite/performance/post_training_integer_quant_16x8
-    data = get_real_image(128, 128, quantized=False)
-
-    converter = tf.lite.TFLiteConverter.from_frozen_graph(
-        model_file, ["input"], ["MobilenetV1/Predictions/Reshape_1"]
-    )
-
-    def representative_dataset():
-        for _ in range(1):
-            yield [data]
-
-    converter.optimizations = [tf.lite.Optimize.DEFAULT]
-    converter.target_spec.supported_ops = [
-        tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
-    ]
-    converter.representative_dataset = representative_dataset
-    tflite_model_buf = converter.convert()
-
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tflite_predictions = np.squeeze(tflite_output)
-    tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm_predictions = np.squeeze(tvm_output)
-    tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-    tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-@pytest.mark.skipif(
-    platform.machine() == "aarch64",
-    reason="Fails during leagalization due to int16 datatype. "
-    "See https://github.com/apache/tvm/issues/16535",
-)
-def test_forward_ds_cnn_int16():
-    """Test DS_CNN int16 quantized model"""
-    tflite_model_file = download_testdata(
-        "https://github.com/ARM-software/ML-zoo/blob/48f458af1e9065d9aad2ad94d24b58d6e7c00817/"
-        "models/keyword_spotting/ds_cnn_small/tflite_int16/ds_cnn_quantized.tflite?raw=true",
-        "ds_cnn_quantized_int16.tflite",
-    )
-
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-
-    data = np.random.uniform(size=(1, 490)).astype("int16")
-
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tflite_predictions = np.squeeze(tflite_output)
-    tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "serving_default_input:0")
-    tvm_predictions = np.squeeze(tvm_output)
-    tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-    tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-#######################################################################
-# Unidirectional Sequence LSTM
-# ---------------------
-def test_forward_unidirectional_sequence_lstm():
-    """Test the UnidirectionalSequenceLSTM TFLite"""
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.1.0"):
-        tflite_model_file = download_testdata(
-            "https://github.com/SebastianBoblestETAS/nn_models/blob/"
-            "ce49c5de64889493161ca4194a20e0fd5eb707e6/lstm_1_in_3_out_2_ts_4.tflite?raw=true",
-            "lstm_1_in_3_out_2_ts_4.tflite",
-        )
-        with open(tflite_model_file, "rb") as f:
-            tflite_model_buf = f.read()
-
-        data = np.array(
-            [
-                [
-                    [0.5488135, 0.71518934, 0.60276335],
-                    [0.5448832, 0.4236548, 0.6458941],
-                    [0.4375872, 0.891773, 0.96366274],
-                    [0.3834415, 0.79172504, 0.5288949],
-                ]
-            ],
-            dtype="float32",
-        )
-
-        tflite_output = run_tflite_graph(tflite_model_buf, data)
-        tvm_output = run_tvm_graph(tflite_model_buf, data, "serving_default_input_1:0")
-        tvm.testing.assert_allclose(tflite_output, tvm_output)
-
-
-#######################################################################
-# Quantized SSD Mobilenet
-# -----------------------
-
-
-def test_forward_qnn_coco_ssd_mobilenet_v1():
-    """Test the quantized Coco SSD Mobilenet V1 TF Lite model."""
-    pytest.skip(
-        "LLVM bug - getExtendedVectorNumElements - "
-        + "https://discuss.tvm.apache.org/t/segfault-in-llvm/3567. The workaround is to use a "
-        + "specific target, for example, llvm -mpcu=core-avx2"
-    )
-
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/download.tensorflow.org/models/tflite/"
-        "coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip",
-        "detect.tflite",
-    )
-
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-
-    data = get_real_image_object_detection(300, 300)
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(
-        tflite_model_buf, data, "normalized_input_image_tensor", num_output=4
-    )
-
-    # Check all output shapes are equal
-    assert all(
-        list(
-            tvm_tensor.shape == tflite_tensor.shape
-            for (tvm_tensor, tflite_tensor) in zip(tvm_output, tflite_output)
-        )
-    )
-
-    # Check valid count is the same
-    assert tvm_output[3] == tflite_output[3]
-    valid_count = tvm_output[3][0]
-
-    # For boxes that do not have any detections, TFLite puts random values. Therefore, we compare
-    # tflite and tvm tensors for only valid boxes.
-    for i in range(0, valid_count):
-        # We compare the bounding boxes whose prediction score is above 60%. This is typical in end
-        # to end application where a low prediction score is discarded. This is also needed because
-        # multiple low score bounding boxes can have same score and TFlite and TVM can have
-        # different orderings for same score bounding boxes. Another reason for minor differences in
-        # low score bounding boxes is the difference between TVM and TFLite for requantize operator.
-        if tvm_output[2][0][i] > 0.6:
-            # Check bounding box co-ords. The tolerances have to be adjusted, from 1e-5 to 1e-2,
-            # because of differences between for requantiize operator in TFLite and TVM.
-            tvm.testing.assert_allclose(
-                np.squeeze(tvm_output[0][0][i]),
-                np.squeeze(tflite_output[0][0][i]),
-                rtol=1e-2,
-                atol=1e-2,
-            )
-
-            # Check the class
-            # Stricter check to ensure class remains same
-            np.testing.assert_equal(
-                np.squeeze(tvm_output[1][0][i]), np.squeeze(tflite_output[1][0][i])
-            )
-
-            # Check the score
-            tvm.testing.assert_allclose(
-                np.squeeze(tvm_output[2][0][i]),
-                np.squeeze(tflite_output[2][0][i]),
-                rtol=1e-5,
-                atol=1e-5,
-            )
-
-
-#######################################################################
-# SSD Mobilenet
-# -------------
-
-
-def test_forward_coco_ssd_mobilenet_v1():
-    """Test the FP32 Coco SSD Mobilenet V1 TF Lite model."""
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://raw.githubusercontent.com/dmlc/web-data/main/tensorflow/models/object_detection/"
-        "ssd_mobilenet_v1_coco_2018_01_28.tgz",
-        "ssd_mobilenet_v1_coco_2018_01_28.tflite",
-    )
-
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-
-    np.random.seed(0)
-    data = np.random.uniform(size=(1, 300, 300, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(
-        tflite_model_buf, data, "normalized_input_image_tensor", num_output=4
-    )
-
-    # Check all output shapes are equal
-    assert all(
-        list(
-            tvm_tensor.shape == tflite_tensor.shape
-            for (tvm_tensor, tflite_tensor) in zip(tvm_output, tflite_output)
-        )
-    )
-
-    # Check valid count is the same
-    assert tvm_output[3] == tflite_output[3]
-    valid_count = tvm_output[3][0]
-
-    # For boxes that do not have any detections, TFLite puts random values. Therefore, we compare
-    # tflite and tvm tensors for only valid boxes.
-    for i in range(0, valid_count):
-        # Check bounding box co-ords
-        tvm.testing.assert_allclose(
-            np.squeeze(tvm_output[0][0][i]),
-            np.squeeze(tflite_output[0][0][i]),
-            rtol=1e-5,
-            atol=1e-5,
-        )
-        # Check the class
-        np.testing.assert_equal(np.squeeze(tvm_output[1][0][i]), np.squeeze(tflite_output[1][0][i]))
-
-        # Check the score
-        tvm.testing.assert_allclose(
-            np.squeeze(tvm_output[2][0][i]),
-            np.squeeze(tflite_output[2][0][i]),
-            rtol=1e-5,
-            atol=1e-5,
-        )
-
-
-#######################################################################
-# MediaPipe
-# -------------
-def test_forward_mediapipe_hand_landmark():
-    """Test MediaPipe 2D hand landmark TF Lite model."""
-    # MediaPipe 2D hand landmark TF
-    tflite_model_file = download_testdata(
-        "https://github.com/google/mediapipe/raw/v0.7.4/mediapipe/models/hand_landmark.tflite",
-        "hand_landmark.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    data = np.random.uniform(size=(1, 256, 256, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input_1", num_output=2)
-    for i in range(2):
-        tvm.testing.assert_allclose(
-            np.squeeze(tvm_output[i]), np.squeeze(tflite_output[i]), rtol=1e-5, atol=1e-5
-        )
-
-
-#######################################################################
-# Test check for Tensorflow "dynamic range quantization" optimization
-# --------------
-def test_prevent_tensorflow_dynamic_range():
-    """
-    Should prevent running "dynamic range quantization" optimized TFLite graph
-    """
-    data_array = np.random.randint(0, 2, (1, 1024, 1024)).astype(dtype=np.float32)
-    filter_array = np.random.randint(0, 2, (1024, 1024)).astype(dtype=np.float32)
-    data_in = tf.keras.layers.Input(shape=data_array.shape[1:])
-    dense = tf.keras.layers.Dense(units=filter_array.shape[-1], use_bias=False)(data_in)
-    keras_model = tf.keras.models.Model(data_in, dense)
-    keras_model.layers[1].set_weights([filter_array])
-
-    converter = interpreter_wrapper.TFLiteConverter.from_keras_model(keras_model)
-    converter.optimizations = [tf.lite.Optimize.DEFAULT]
-    tflite_model = converter.convert()
-    with pytest.raises(tvm.error.OpNotImplemented):
-        _ = run_tvm_graph(tflite_model, data_array, data_in.name.replace(":0", ""))
-
-
-def _test_nms_v5(
-    bx_shape, score_shape, iou_threshold, score_threshold, max_output_size, dtype="float32"
-):
-    """One iteration of nms_v5 with given attributes"""
-    boxes = np.random.uniform(0, 10, size=bx_shape).astype(dtype)
-    scores = np.random.uniform(size=score_shape).astype(dtype)
-
-    tf.reset_default_graph()
-    tf.compat.v1.disable_eager_execution()
-    in_data_1 = array_ops.placeholder(dtype, boxes.shape, name="in_data_1")
-    in_data_2 = array_ops.placeholder(dtype, scores.shape, name="in_data_2")
-    out = image_ops.non_max_suppression_with_scores(
-        boxes=in_data_1,
-        scores=in_data_2,
-        max_output_size=max_output_size,
-        iou_threshold=iou_threshold,
-        score_threshold=score_threshold,
-        name="nms",
-    )
-
-    compare_tflite_with_tvm(
-        [boxes, scores],
-        ["in_data_1:0", "in_data_2:0"],
-        [in_data_1, in_data_2],
-        [out[0], out[1]],
-        out_names=[out[0].name, out[1].name],
-        experimental_new_converter=True,
-    )
-
-
-def test_forward_nms_v5():
-    """test nms_v5"""
-    _test_nms_v5((10000, 4), (10000,), 0.5, 0.4, 100)
-    _test_nms_v5((1000, 4), (1000,), 0.7, 0.3, 50)
-
-
-#######################################################################
-# Test structural_equal and span of a model
-# --------------------------------------
-def test_structure_and_span():
-    """Test Structure and span of frequently-used models"""
-
-    def _verify(res_fptr, golden_fptr):
-        with tvm.testing.enable_span_filling():
-            with_span = res_fptr()
-        with tvm.testing.disable_span_filling():
-            without_span = res_fptr()
-        tvm.ir.assert_structural_equal(with_span, without_span)
-        _verify_structural_equal_with_span(with_span, golden_fptr())
-
-    def _tf_to_tflite(
-        input_tensors, output_tensors, init_global_variables=False, experimental_new_converter=False
-    ):
-        with tf.Session() as sess:
-            if init_global_variables:
-                sess.run(variables.global_variables_initializer())
-            converter = tf.lite.TFLiteConverter.from_session(sess, input_tensors, output_tensors)
-            converter.experimental_new_converter = experimental_new_converter
-
-            tflite_model_buffer = converter.convert()
-
-        try:
-            import tflite.Model
-
-            tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buffer, 0)
-        except AttributeError:
-            import tflite
-
-            tflite_model = tflite.Model.GetRootAsModel(tflite_model_buffer, 0)
-        except ImportError:
-            raise ImportError("The tflite package must be installed")
-        return tflite_model
-
-    def _test_conv2d_bias_add_span():
-        def _res():
-            in_shape = (1, 5, 5, 1)
-            kernel_shpae = (2, 2, 1, 2)
-            kernel_in = np.ones(kernel_shpae)
-
-            with tf.Graph().as_default():
-                x = array_ops.placeholder(shape=in_shape, dtype="float32", name="input")
-                kernel = tf.constant(kernel_in, dtype=tf.float32, name="filter_weight")
-                tf_model = tf.nn.conv2d(
-                    x, kernel, strides=[1, 1, 1, 1], padding="VALID", name="conv2d"
-                )
-                tflite_model = _tf_to_tflite([x], [tf_model])
-
-            mod, _ = relay.frontend.from_tflite(
-                tflite_model,
-                shape_dict={"input": in_shape},
-                dtype_dict={"input": "float32"},
-                op_converter=relay.frontend.tflite.OperatorConverter,
-            )
-            return mod["main"]
-
-        def _golden():
-            in_input = relay.var(
-                "input", relay.TensorType([1, 5, 5, 1]), span=_create_span("input")
-            )
-            weight = relay.var(
-                "_param_1", relay.TensorType([2, 2, 1, 2]), span=_create_span("filter_weight")
-            )
-            bias = relay.var("_param_2", relay.TensorType([2]), span=_create_span("conv2d_bias"))
-            conv2d = _set_span(
-                relay.nn.conv2d(
-                    in_input,
-                    weight,
-                    channels=2,
-                    kernel_size=[2, 2],
-                    data_layout="NHWC",
-                    kernel_layout="HWIO",
-                ),
-                "conv2d",
-            )
-            bias_add = _set_span(relay.nn.bias_add(conv2d, bias, axis=3), "conv2d")
-            attrs = ir.make_node("DictAttrs", **{"output_tensor_names": ["conv2d"]})
-            func = relay.Function([in_input, weight, bias], bias_add, attrs=attrs)
-            mod = ir.IRModule.from_expr(func)
-            return mod["main"]
-
-        _verify(_res, _golden)
-
-    def _test_fully_connected_bias_add_span():
-        def _res():
-            in_shape = (1, 10)
-            kernel_shpae = (10, 10)
-            kernel_in = np.ones(kernel_shpae)
-
-            with tf.Graph().as_default():
-                x = array_ops.placeholder(shape=in_shape, dtype="float32", name="input")
-                weight = tf.constant(kernel_in, dtype=tf.float32, name="filter_weight")
-                tf_model = math_ops.mat_mul(x, weight, name="dense")
-                tflite_model = _tf_to_tflite([x], [tf_model])
-
-            mod, _ = relay.frontend.from_tflite(
-                tflite_model,
-                shape_dict={"input": in_shape},
-                dtype_dict={"input": "float32"},
-                op_converter=relay.frontend.tflite.OperatorConverter,
-            )
-            return mod["main"]
-
-        def _golden():
-            in_input = relay.var("input", relay.TensorType([1, 10]), span=_create_span("input"))
-            weight = relay.var(
-                "_param_1", relay.TensorType([10, 10]), span=_create_span("filter_weight/transpose")
-            )
-            bias = relay.var("_param_2", relay.TensorType([10]), span=_create_span("dense_bias"))
-            reshape = _set_span(relay.reshape(in_input, [-1, 10]), "dense")
-            dense = _set_span(relay.nn.dense(reshape, weight, units=10), "dense")
-            bias_add = _set_span(relay.nn.bias_add(dense, bias), "dense")
-            attrs = ir.make_node("DictAttrs", **{"output_tensor_names": ["dense"]})
-            func = relay.Function([in_input, weight, bias], bias_add, attrs=attrs)
-            mod = ir.IRModule.from_expr(func)
-            return mod["main"]
-
-        _verify(_res, _golden)
-
-    def _test_reshape_span():
-        def _res():
-            in_shape = (1, 10)
-            output_shape = (2, 5)
-
-            with tf.Graph().as_default():
-                x = array_ops.placeholder(shape=in_shape, dtype="float32", name="input")
-                tf_model = array_ops.reshape(x, output_shape, "reshape")
-                tflite_model = _tf_to_tflite([x], [tf_model])
-
-            mod, _ = relay.frontend.from_tflite(
-                tflite_model,
-                shape_dict={"input": in_shape},
-                dtype_dict={"input": "float32"},
-                op_converter=relay.frontend.tflite.OperatorConverter,
-            )
-            return mod["main"]
-
-        def _golden():
-            in_input = relay.var("input", relay.TensorType([1, 10]), span=_create_span("input"))
-            reshape = _set_span(relay.reshape(in_input, [2, 5]), "reshape")
-            attrs = ir.make_node("DictAttrs", **{"output_tensor_names": ["reshape"]})
-            func = relay.Function([in_input], reshape, attrs=attrs)
-            mod = ir.IRModule.from_expr(func)
-            return mod["main"]
-
-        _verify(_res, _golden)
-
-    _test_conv2d_bias_add_span()
-    _test_fully_connected_bias_add_span()
-    _test_reshape_span()
-
-
-class TestConv2d:
-    """Import Conv2d operator from TFLite, build with Relay and test."""
-
-    input_shape, kernel_shape, padding = tvm.testing.parameters(
-        ((1, 128, 256, 6), (5, 5, 6, 10), "SAME"),
-        ((1, 128, 256, 6), (5, 5, 6, 10), "VALID"),
-        # conv2d_group cases
-        ((1, 30, 40, 6), (5, 5, 1, 6), "SAME"),
-        ((1, 30, 40, 6), (5, 5, 1, 6), "VALID"),
-    )
-
-    def test_conv2d(self, input_shape: tuple, kernel_shape: tuple, padding: str):
-        dtype = tf.float32
-        kernel_in = np.ones(kernel_shape)
-        with tf.Graph().as_default():
-            x = array_ops.placeholder(shape=input_shape, dtype=dtype.name, name="input")
-            kernel = tf.constant(kernel_in, dtype=dtype, name="filter_weight")
-            out = tf.nn.conv2d(x, kernel, strides=[1, 1, 1, 1], padding=padding, name="conv2d")
-            input_data = np.random.randn(*input_shape).astype(dtype.name)
-            compare_tflite_with_tvm(
-                [input_data],
-                ["input"],
-                [x],
-                [out],
-            )
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/scripts/task_python_frontend.sh b/tests/scripts/task_python_frontend.sh
index 593e8f50c1d0..22d9d9ee22db 100755
--- a/tests/scripts/task_python_frontend.sh
+++ b/tests/scripts/task_python_frontend.sh
@@ -25,33 +25,4 @@ export OMP_NUM_THREADS=1
 
 export TVM_TEST_TARGETS="llvm;cuda"
 
-find . -type f -path "*.pyc" | xargs rm -f
-
-# Rebuild cython
-make cython3
-
-
-echo "Running relay ONNX frontend test..."
-run_pytest cython python-frontend-onnx tests/python/frontend/onnx
-
-echo "Running relay PyTorch frontend test..."
-run_pytest cython python-frontend-pytorch tests/python/frontend/pytorch
-
-echo "Running relay Tensorflow frontend test..."
-# Note: Tensorflow tests often have memory issues, so invoke each one separately
-TENSORFLOW_TESTS=$(./ci/scripts/jenkins/pytest_ids.py --folder tests/python/frontend/tensorflow)
-i=0
-for node_id in $TENSORFLOW_TESTS; do
-    echo "$node_id"
-    run_pytest cython "python-frontend-tensorflow-$i" "$node_id"
-    i=$((i+1))
-done
-
-echo "Running relay DarkNet frontend test..."
-run_pytest cython python-frontend-darknet tests/python/frontend/darknet
-
-echo "Running relay PaddlePaddle frontend test..."
-run_pytest cython python-frontend-paddlepaddle tests/python/frontend/paddlepaddle
-
-echo "Running relay CoreML frontend test..."
-run_pytest cython python-frontend-coreml tests/python/frontend/coreml
+# TODO(Siyuan): Keep this file for passing CI
diff --git a/tests/scripts/task_python_frontend_cpu.sh b/tests/scripts/task_python_frontend_cpu.sh
index aac554bea53a..73f21c3c924b 100755
--- a/tests/scripts/task_python_frontend_cpu.sh
+++ b/tests/scripts/task_python_frontend_cpu.sh
@@ -26,13 +26,4 @@ export OMP_NUM_THREADS=1
 
 export TVM_TEST_TARGETS="llvm"
 
-find . -type f -path "*.pyc" | xargs rm -f
-
-# Rebuild cython
-make cython3
-
-echo "Running relay TFLite frontend test..."
-run_pytest cython python-frontend-tflite tests/python/frontend/tflite
-
-echo "Running relay Keras frontend test..."
-run_pytest cython python-frontend-keras tests/python/frontend/keras
+# TODO(Siyuan): Keep this file for passing CI
diff --git a/tests/scripts/task_python_unittest.sh b/tests/scripts/task_python_unittest.sh
index 5b07b5256ea5..4a13c6ce1ed2 100755
--- a/tests/scripts/task_python_unittest.sh
+++ b/tests/scripts/task_python_unittest.sh
@@ -55,7 +55,6 @@ TEST_FILES=(
 )
 
 for TEST_FILE in ${TEST_FILES[@]}; do
-    run_pytest ctypes ${TEST_FILE}-0, tests/python/${TEST_FILE}
     run_pytest cython ${TEST_FILE}-1, tests/python/${TEST_FILE}
 done
 
diff --git a/tests/scripts/task_python_unittest_gpuonly.sh b/tests/scripts/task_python_unittest_gpuonly.sh
index b478bbdc773d..e68fcba25c91 100755
--- a/tests/scripts/task_python_unittest_gpuonly.sh
+++ b/tests/scripts/task_python_unittest_gpuonly.sh
@@ -33,5 +33,4 @@ export TVM_UNITTEST_TESTSUITE_NAME=python-codegen-vulkan
 
 source tests/scripts/setup-pytest-env.sh
 
-run_pytest ctypes ${TVM_UNITTEST_TESTSUITE_NAME}-0 tests/python/codegen/test_target_codegen_vulkan.py
 run_pytest cython ${TVM_UNITTEST_TESTSUITE_NAME}-1 tests/python/codegen/test_target_codegen_vulkan.py