From 845285774b9d005294e506cdee623aaa4799286f Mon Sep 17 00:00:00 2001
From: Siyuan Feng <hzfengsy@sjtu.edu.cn>
Date: Mon, 3 Feb 2025 18:23:42 +0800
Subject: [PATCH] [CI] Remove legacy frontend tests

The frontend tests have been moved to their own repository to reduce the
size of the main TVM repository and make CI more efficient. This commit
removes all frontend-related test files and updates the CI scripts
accordingly.

Key changes:
- Remove all frontend test files from tests/python/frontend/
- Update task_python_frontend.sh script
- Update conftest.py to remove frontend-specific test configurations
---
 conftest.py                                   |    6 -
 tests/lint/pylint.sh                          |   12 -
 tests/python/frontend/caffe/test_forward.py   | 1166 ---
 .../frontend/caffe2/model_zoo/__init__.py     |   48 -
 .../frontend/caffe2/model_zoo/squeezenet.py   |  135 -
 tests/python/frontend/caffe2/test_forward.py  |  257 -
 tests/python/frontend/caffe2/test_graph.py    |   41 -
 .../frontend/coreml/model_zoo/__init__.py     |   50 -
 tests/python/frontend/coreml/test_forward.py  |  851 --
 tests/python/frontend/darknet/test_forward.py |  537 -
 tests/python/frontend/keras/test_forward.py   |  926 --
 .../frontend/mxnet/model_zoo/__init__.py      |   86 -
 .../python/frontend/mxnet/model_zoo/dcgan.py  |   91 -
 tests/python/frontend/mxnet/model_zoo/dqn.py  |   41 -
 .../frontend/mxnet/model_zoo/inception_v3.py  |  368 -
 tests/python/frontend/mxnet/model_zoo/mlp.py  |   41 -
 .../python/frontend/mxnet/model_zoo/resnet.py |  326 -
 .../frontend/mxnet/model_zoo/squeezenet.py    |   97 -
 tests/python/frontend/mxnet/model_zoo/vgg.py  |  108 -
 tests/python/frontend/mxnet/test_forward.py   | 2369 -----
 tests/python/frontend/mxnet/test_graph.py     |  123 -
 .../frontend/mxnet/test_qnn_ops_utils.py      |  224 -
 tests/python/frontend/oneflow/test_forward.py |  963 --
 .../frontend/oneflow/test_vision_models.py    |  149 -
 tests/python/frontend/onnx/test_forward.py    | 8716 -----------------
 .../frontend/paddlepaddle/test_forward.py     | 2566 -----
 tests/python/frontend/pytorch/qnn_test.py     |  803 --
 tests/python/frontend/pytorch/test_forward.py | 5884 -----------
 .../python/frontend/pytorch/test_fx_quant.py  |   95 -
 tests/python/frontend/pytorch/test_lstm.py    |  372 -
 .../frontend/pytorch/test_object_detection.py |  167 -
 tests/python/frontend/pytorch/test_rnns.py    |  521 -
 .../frontend/pytorch/test_span_naming.py      |  106 -
 .../frontend/tensorflow/test_bn_dynamic.py    |   97 -
 .../frontend/tensorflow/test_control_flow.py  |  473 -
 .../frontend/tensorflow/test_debugging.py     |  106 -
 .../frontend/tensorflow/test_forward.py       | 6100 ------------
 .../python/frontend/tensorflow/test_no_op.py  |   53 -
 tests/python/frontend/tensorflow2/common.py   |  106 -
 .../tensorflow2/test_functional_models.py     |  649 --
 .../tensorflow2/test_sequential_models.py     |  168 -
 tests/python/frontend/test_common.py          |  220 -
 tests/python/frontend/tflite/test_forward.py  | 5722 -----------
 tests/scripts/task_python_frontend.sh         |   31 +-
 tests/scripts/task_python_frontend_cpu.sh     |   11 +-
 tests/scripts/task_python_unittest.sh         |    1 -
 tests/scripts/task_python_unittest_gpuonly.sh |    1 -
 47 files changed, 2 insertions(+), 41981 deletions(-)
 delete mode 100644 tests/python/frontend/caffe/test_forward.py
 delete mode 100644 tests/python/frontend/caffe2/model_zoo/__init__.py
 delete mode 100644 tests/python/frontend/caffe2/model_zoo/squeezenet.py
 delete mode 100644 tests/python/frontend/caffe2/test_forward.py
 delete mode 100644 tests/python/frontend/caffe2/test_graph.py
 delete mode 100644 tests/python/frontend/coreml/model_zoo/__init__.py
 delete mode 100644 tests/python/frontend/coreml/test_forward.py
 delete mode 100644 tests/python/frontend/darknet/test_forward.py
 delete mode 100644 tests/python/frontend/keras/test_forward.py
 delete mode 100644 tests/python/frontend/mxnet/model_zoo/__init__.py
 delete mode 100644 tests/python/frontend/mxnet/model_zoo/dcgan.py
 delete mode 100644 tests/python/frontend/mxnet/model_zoo/dqn.py
 delete mode 100644 tests/python/frontend/mxnet/model_zoo/inception_v3.py
 delete mode 100644 tests/python/frontend/mxnet/model_zoo/mlp.py
 delete mode 100644 tests/python/frontend/mxnet/model_zoo/resnet.py
 delete mode 100644 tests/python/frontend/mxnet/model_zoo/squeezenet.py
 delete mode 100644 tests/python/frontend/mxnet/model_zoo/vgg.py
 delete mode 100644 tests/python/frontend/mxnet/test_forward.py
 delete mode 100644 tests/python/frontend/mxnet/test_graph.py
 delete mode 100644 tests/python/frontend/mxnet/test_qnn_ops_utils.py
 delete mode 100644 tests/python/frontend/oneflow/test_forward.py
 delete mode 100644 tests/python/frontend/oneflow/test_vision_models.py
 delete mode 100644 tests/python/frontend/onnx/test_forward.py
 delete mode 100755 tests/python/frontend/paddlepaddle/test_forward.py
 delete mode 100644 tests/python/frontend/pytorch/qnn_test.py
 delete mode 100644 tests/python/frontend/pytorch/test_forward.py
 delete mode 100644 tests/python/frontend/pytorch/test_fx_quant.py
 delete mode 100644 tests/python/frontend/pytorch/test_lstm.py
 delete mode 100644 tests/python/frontend/pytorch/test_object_detection.py
 delete mode 100644 tests/python/frontend/pytorch/test_rnns.py
 delete mode 100644 tests/python/frontend/pytorch/test_span_naming.py
 delete mode 100644 tests/python/frontend/tensorflow/test_bn_dynamic.py
 delete mode 100644 tests/python/frontend/tensorflow/test_control_flow.py
 delete mode 100644 tests/python/frontend/tensorflow/test_debugging.py
 delete mode 100644 tests/python/frontend/tensorflow/test_forward.py
 delete mode 100644 tests/python/frontend/tensorflow/test_no_op.py
 delete mode 100644 tests/python/frontend/tensorflow2/common.py
 delete mode 100644 tests/python/frontend/tensorflow2/test_functional_models.py
 delete mode 100644 tests/python/frontend/tensorflow2/test_sequential_models.py
 delete mode 100644 tests/python/frontend/test_common.py
 delete mode 100644 tests/python/frontend/tflite/test_forward.py

diff --git a/conftest.py b/conftest.py
index 861abc14b843..88e21f494113 100644
--- a/conftest.py
+++ b/conftest.py
@@ -31,23 +31,17 @@
 # taken from the 20 (arbitrary number) of tests as from
 # https://ci.tlcpack.ai/job/tvm/job/main/2907/testReport
 _slowest_tests = [
-    "tests/python/frontend/tensorflow/test_forward.py::test_forward_broadcast_args",
-    "tests/python/frontend/tensorflow/test_forward.py::test_forward_broadcast_to",
     "tests/python/topi/python/test_topi_conv2d_int8.py::test_conv2d_nchw[int8]",
     "tests/python/topi/python/test_topi_conv2d_int8.py::test_conv2d_nchw[uint8]",
     "tests/python/topi/python/test_topi_upsampling.py::test_upsampling3d",
     "tests/python/topi/python/test_topi_upsampling.py::test_upsampling3d",
     "tests/python/topi/python/test_topi_conv2d_int8.py::test_conv2d_nchw[int8]",
-    "tests/python/frontend/tflite/test_forward.py::test_all_elemwise",
-    "tests/python/frontend/pytorch/test_object_detection.py::test_detection_models",
     "tests/python/topi/python/test_topi_conv2d_int8.py::test_conv2d_nchw[uint8]",
     "tests/python/topi/python/test_topi_conv2d_NCHWc.py::test_conv2d_NCHWc",
     "tests/python/topi/python/test_topi_conv2d_hwnc_tensorcore.py::test_conv2d_hwnc_tensorcore",
     "tests/python/contrib/test_tensorrt.py::test_binary[compile]",
-    "tests/python/frontend/pytorch/test_forward.py::test_segmentation_models",
     "tests/python/topi/python/test_topi_conv2d_NCHWc.py::test_conv2d_NCHWc",
     "tests/python/relay/test_py_converter.py::test_global_recursion",
-    "tests/python/frontend/tensorflow/test_forward.py::test_forward_ptb",
     "tests/python/relay/test_op_level6.py::test_topk",
     "tests/python/topi/python/test_topi_conv2d_winograd.py::test_conv2d_nchw",
     "tests/python/relay/test_py_converter.py::test_global_recursion",
diff --git a/tests/lint/pylint.sh b/tests/lint/pylint.sh
index 90e50dfa9433..4d10b01485a0 100755
--- a/tests/lint/pylint.sh
+++ b/tests/lint/pylint.sh
@@ -41,17 +41,5 @@ python3 -m pylint tests/python/contrib/test_hexagon/conv2d/*.py --rcfile="$(dirn
 python3 -m pylint tests/python/contrib/test_hexagon/topi/*.py --rcfile="$(dirname "$0")"/pylintrc
 python3 -m pylint tests/python/contrib/test_hexagon/metaschedule_e2e/*.py --rcfile="$(dirname "$0")"/pylintrc
 
-# tests/python/frontend tests
-python3 -m pylint tests/python/frontend/caffe/test_forward.py --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/frontend/caffe2/*.py --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/frontend/darknet/test_forward.py --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/frontend/coreml/*.py --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/frontend/keras/test_forward.py --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/frontend/darknet/test_forward.py --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/frontend/oneflow/*.py --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/frontend/tensorflow/test_forward.py --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/frontend/pytorch/test_forward.py --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/frontend/tflite/test_forward.py --rcfile="$(dirname "$0")"/pylintrc
-
 # tests/python/contrib/test_msc tests
 python3 -m pylint tests/python/contrib/test_msc/*.py --rcfile="$(dirname "$0")"/pylintrc
diff --git a/tests/python/frontend/caffe/test_forward.py b/tests/python/frontend/caffe/test_forward.py
deleted file mode 100644
index d0ba1dfac40b..000000000000
--- a/tests/python/frontend/caffe/test_forward.py
+++ /dev/null
@@ -1,1166 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument, unspecified-encoding
-"""
-Caffe testcases
-====================
-This article is a test script to test Caffe operator with Relay.
-"""
-import os
-import logging
-import numpy as np
-import pytest
-
-from google.protobuf import text_format
-import caffe
-from caffe import layers as L, params as P
-from caffe.proto import caffe_pb2 as pb
-
-import tvm
-import tvm.testing
-from tvm import relay
-from tvm.contrib import graph_executor
-from tvm.contrib.download import download_testdata
-
-os.environ["GLOG_minloglevel"] = "2"
-
-logging.basicConfig(level=logging.ERROR)
-
-CURRENT_DIR = os.path.join(os.path.expanduser("~"), ".tvm_test_data", "caffe_test")
-
-#######################################################################
-# Generic functions for TVM & Caffe
-# ------------------------------------------
-
-
-def _create_dir(d_path):
-    """If the directory is not existed, create it"""
-    if not (os.path.exists(d_path) and os.path.isdir(d_path)):
-        os.makedirs(d_path)
-
-
-def _list_to_str(ll):
-    """Convert list or tuple to str, separated by underline."""
-    if isinstance(ll, (tuple, list)):
-        tmp = [str(i) for i in ll]
-        res = "_".join(tmp)
-    return res
-
-
-def _gen_filename_str(op_name, data_shape, *args, **kwargs):
-    """Combining the filename according to the op_name, shape and other args."""
-    file_dir = os.path.join(CURRENT_DIR, op_name)
-    _create_dir(file_dir)
-    res = op_name + "_"
-    shape_str = _list_to_str(list(data_shape))
-    res += shape_str
-    for arg in args:
-        if isinstance(arg, (tuple, list)):
-            res += "_" + _list_to_str(arg)
-        elif isinstance(arg, (int, float, str)):
-            res += "_" + str(arg)
-    for _, v in kwargs.items():
-        if isinstance(v, (tuple, list)):
-            res += "_" + _list_to_str(v)
-        elif isinstance(v, (int, float, str)):
-            res += "_" + str(v)
-    res = res.replace(".", "_")
-    res = res.replace("-", "_")
-    proto_file = os.path.join(file_dir, res + ".prototxt")
-    blob_file = os.path.join(file_dir, res + ".caffemodel")
-    solver_file = os.path.join(file_dir, res + "_solver.prototxt")
-
-    return (proto_file, blob_file, solver_file)
-
-
-def _save_prototxt(n_netspec, f_path):
-    """Generate .prototxt file according to caffe.NetSpec"""
-    s = n_netspec.to_proto()
-    with open(f_path, "w") as f:
-        f.write(str(s))
-
-
-def _save_solver(solver_file, proto_file, blob_file):
-    """Define a solver proto, you can change the configs."""
-    blob_file_prefix = blob_file.split(".caffemodel")[0]
-    s = pb.SolverParameter()
-    s.train_net = proto_file
-    s.base_lr = 0.01
-    s.momentum = 0.9
-    s.weight_decay = 0.0005
-    s.lr_policy = "inv"
-    s.gamma = 0.0001
-    s.power = 0.75
-    s.display = 1
-    s.max_iter = 100000
-    s.snapshot = 100000
-    s.snapshot_prefix = blob_file_prefix
-
-    with open(solver_file, "w") as f:
-        f.write(str(s))
-
-
-def _save_caffemodel(solver_file, blob_file):
-    """Generate .caffemodel file."""
-    solver = caffe.SGDSolver(solver_file)
-    solver.net.save(blob_file)
-
-
-def _gen_model_files(n_netspec, proto_file, blob_file, solver_file):
-    _save_prototxt(n_netspec, proto_file)
-    _save_solver(solver_file, proto_file, blob_file)
-    _save_caffemodel(solver_file, blob_file)
-
-
-def _siso_op(data, func, *args, **kwargs):
-    """Create single input and single output Caffe op"""
-    n = caffe.NetSpec()
-    n.data = L.Input(input_param={"shape": {"dim": list(data.shape)}})
-    n.output = func(n.data, *args, **kwargs)
-    return n
-
-
-def _miso_op(data_list, func, *args, **kwargs):
-    """Create multi input and single output Caffe op"""
-    n = caffe.NetSpec()
-    if not isinstance(data_list, (tuple, list)):
-        raise TypeError(f"Need tuple or list but get {type(data_list)}")
-    input_list = []
-    for idx, data in enumerate(data_list):
-        n["data" + str(idx)] = L.Input(input_param={"shape": {"dim": list(data.shape)}})
-        input_list.append(n["data" + str(idx)])
-    n.output = func(*input_list, *args, **kwargs)
-    return n
-
-
-def _simo_op(data, func, *args, **kwargs):
-    """Create single input and multi output Caffe op"""
-    n = caffe.NetSpec()
-    n.data = L.Input(input_param={"shape": {"dim": list(data.shape)}})
-    output_list = func(n.data, *args, **kwargs)
-    for idx, out in enumerate(output_list):
-        n["output" + str(idx)] = out
-    return n
-
-
-def _run_caffe(data, proto_file, blob_file):
-    """Run caffe model by Caffe according to .caffemodel and .prototxt"""
-    net = caffe.Net(proto_file, blob_file, caffe.TEST)
-    if isinstance(data, (list, tuple)):
-        for idx, d in enumerate(data):
-            net.blobs["data" + str(idx)].data[...] = d
-    else:
-        net.blobs["data"].data[...] = data
-    out = net.forward()
-
-    caffe_output = []
-    for i in range(len(out.keys())):
-        if "output" + str(i) not in out.keys():
-            caffe_output.clear()
-            return list(out.values())
-        caffe_output.append(out["output" + str(i)])
-    return caffe_output
-
-
-def _run_tvm(data, proto_file, blob_file):
-    """Run caffe model by TVM according to .caffemodel and .prototxt"""
-    init_net = pb.NetParameter()
-    predict_net = pb.NetParameter()
-
-    # load model
-    with open(proto_file, "r") as f:
-        text_format.Merge(f.read(), predict_net)
-    # load blob
-    with open(blob_file, "rb") as f:
-        init_net.ParseFromString(f.read())
-
-    shape_dict = {}
-    dtype_dict = {}
-    if isinstance(data, (tuple, list)):
-        for idx, d in enumerate(data):
-            shape_dict["data" + str(idx)] = d.shape
-            dtype_dict["data" + str(idx)] = "float32"
-    else:
-        shape_dict = {"data": data.shape}
-        dtype_dict = {"data": "float32"}
-
-    mod, params = relay.frontend.from_caffe(init_net, predict_net, shape_dict, dtype_dict)
-
-    target = "llvm"
-
-    dev = tvm.cpu(0)
-    with tvm.transform.PassContext(opt_level=3):
-        lib = relay.build(mod, target=target, params=params)
-    dtype = "float32"
-    m = graph_executor.GraphModule(lib["default"](dev))
-    if isinstance(data, (tuple, list)):
-        for idx, d in enumerate(data):
-            m.set_input("data" + str(idx), tvm.nd.array(d.astype(dtype)))
-    else:
-        m.set_input("data", tvm.nd.array(data.astype(dtype)))
-    # execute
-    m.run()
-    tvm_output = []
-    # get outputs
-    for i in range(m.get_num_outputs()):
-        tvm_output.append(m.get_output(i).numpy())
-    return tvm_output
-
-
-def _compare_caffe_tvm(caffe_out, tvm_out, is_network=False):
-    for i, _ in enumerate(caffe_out):
-        if is_network:
-            caffe_out[i] = caffe_out[i][:1]
-        tvm.testing.assert_allclose(caffe_out[i], tvm_out[i], rtol=1e-5, atol=1e-5)
-
-
-def _test_op(data, func_op, op_name, **kwargs):
-    """Single op testing pipline."""
-    shape_list = []
-    if isinstance(data, (list, tuple)):
-        n = _miso_op(data, func_op, **kwargs)
-        for d in data:
-            shape_list.extend(list(d.shape))
-    else:
-        output_num = 1
-        if "ntop" in kwargs:
-            output_num = kwargs["ntop"]
-        if output_num == 1:
-            n = _siso_op(data, func_op, **kwargs)
-        else:
-            n = _simo_op(data, func_op, **kwargs)
-        shape_list = list(data.shape)
-
-    # obtain the .caffemodel file and .prototxt file
-    (proto_file, blob_file, solver_file) = _gen_filename_str(op_name, shape_list, **kwargs)
-    _gen_model_files(n, proto_file, blob_file, solver_file)
-    # run model in Caffe
-    caffe_out = _run_caffe(data, proto_file, blob_file)
-    # run model in TVM
-    tvm_out = _run_tvm(data, proto_file, blob_file)
-    _compare_caffe_tvm(caffe_out, tvm_out)
-
-
-def _test_network(data, proto_file, blob_file):
-    # run model in Caffe
-    caffe_out = _run_caffe(data, proto_file, blob_file)
-    # run model in TVM
-    tvm_out = _run_tvm(data, proto_file, blob_file)
-    _compare_caffe_tvm(caffe_out, tvm_out, is_network=True)
-
-
-#######################################################################
-# BatchNorm
-# -----------
-
-
-def _test_batchnorm(data, moving_average_fraction=0.999, eps=1e-5):
-    """One iteration of BatchNorm"""
-    _test_op(
-        data, L.BatchNorm, "BatchNorm", moving_average_fraction=moving_average_fraction, eps=eps
-    )
-
-
-def test_forward_BatchNorm():
-    """BatchNorm"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_batchnorm(data)
-    _test_batchnorm(data, moving_average_fraction=0.88, eps=1e-4)
-
-
-#######################################################################
-# Concat
-# -----------
-
-
-def _test_concat(data_list, axis=1):
-    """One iteration of Concat"""
-    _test_op(data_list, L.Concat, "Concat", axis=axis)
-
-
-def test_forward_Concat():
-    """Concat"""
-    _test_concat([np.random.rand(1, 3, 10, 10), np.random.rand(1, 2, 10, 10)], axis=1)
-    _test_concat([np.random.rand(3, 10, 10), np.random.rand(2, 10, 10)], axis=0)
-    _test_concat([np.random.rand(3, 10), np.random.rand(2, 10)], axis=0)
-
-
-#######################################################################
-# Convolution
-# -----------
-
-
-def _test_convolution(data, **kwargs):
-    """One iteration of Convolution"""
-    _test_op(data, L.Convolution, "Convolution", **kwargs)
-
-
-def test_forward_Convolution():
-    """Convolution"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_convolution(
-        data,
-        num_output=20,
-        bias_term=True,
-        pad=0,
-        kernel_size=3,
-        stride=2,
-        dilation=1,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    _test_convolution(
-        data,
-        num_output=20,
-        bias_term=False,
-        pad=[1, 2],
-        kernel_size=3,
-        stride=2,
-        dilation=1,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    _test_convolution(
-        data,
-        num_output=20,
-        bias_term=True,
-        pad=[1, 2],
-        kernel_size=[3, 5],
-        stride=[2, 1],
-        dilation=[1, 2],
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    _test_convolution(
-        np.random.rand(1, 2, 10, 10).astype(np.float32),
-        num_output=20,
-        bias_term=True,
-        pad=[1, 2],
-        kernel_size=[3, 5],
-        stride=[2, 1],
-        dilation=[1, 2],
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-        group=2,
-    )
-    _test_convolution(
-        data,
-        num_output=20,
-        bias_term=True,
-        pad_h=1,
-        pad_w=2,
-        kernel_h=3,
-        kernel_w=5,
-        stride_h=2,
-        stride_w=1,
-        dilation=[1, 2],
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-
-
-#######################################################################
-# Crop
-# -----------
-
-
-def _test_crop(data, **kwargs):
-    """One iteration of Crop"""
-    _test_op(data, L.Crop, "Crop", **kwargs)
-
-
-def test_forward_Crop():
-    """Crop"""
-    _test_crop([np.random.rand(10, 10, 120, 120), np.random.rand(10, 5, 50, 60)])
-    _test_crop([np.random.rand(10, 10, 120, 120), np.random.rand(10, 5, 50, 60)], axis=1)
-    _test_crop([np.random.rand(10, 10, 120, 120), np.random.rand(10, 5, 50, 60)], axis=1, offset=2)
-    _test_crop(
-        [np.random.rand(10, 10, 120, 120), np.random.rand(10, 5, 50, 60)], axis=1, offset=[1, 2, 4]
-    )
-    _test_crop(
-        [np.random.rand(10, 10, 120, 120), np.random.rand(10, 5, 50, 60)], axis=2, offset=[2, 4]
-    )
-    _test_crop([np.random.rand(10, 120, 120), np.random.rand(5, 50, 60)], axis=1, offset=[2, 4])
-    _test_crop([np.random.rand(120, 120), np.random.rand(50, 60)], axis=0, offset=[2, 4])
-
-
-#######################################################################
-# Deconvolution
-# -----------
-
-
-def _test_deconvolution(data, **kwargs):
-    """One iteration of Deconvolution"""
-    _test_op(data, L.Deconvolution, "Deconvolution", **kwargs)
-
-
-def test_forward_Deconvolution():
-    """Deconvolution"""
-    data = np.random.rand(1, 16, 32, 32).astype(np.float32)
-    _test_deconvolution(
-        data,
-        convolution_param=dict(
-            num_output=20,
-            bias_term=True,
-            pad=0,
-            kernel_size=3,
-            stride=2,
-            dilation=1,
-            weight_filler=dict(type="xavier"),
-            bias_filler=dict(type="xavier"),
-        ),
-    )
-    _test_deconvolution(
-        data,
-        convolution_param=dict(
-            num_output=20,
-            bias_term=False,
-            pad=[1, 2],
-            kernel_size=3,
-            stride=2,
-            dilation=1,
-            weight_filler=dict(type="xavier"),
-            bias_filler=dict(type="xavier"),
-        ),
-    )
-    _test_deconvolution(
-        data,
-        convolution_param=dict(
-            num_output=20,
-            bias_term=True,
-            pad_h=1,
-            pad_w=2,
-            kernel_h=3,
-            kernel_w=5,
-            stride_h=2,
-            stride_w=1,
-            dilation=1,
-            weight_filler=dict(type="xavier"),
-            bias_filler=dict(type="xavier"),
-        ),
-    )
-    _test_deconvolution(
-        data,
-        convolution_param=dict(
-            num_output=16,
-            bias_term=False,
-            pad=0,
-            kernel_size=2,
-            stride=2,
-            dilation=1,
-            group=16,
-            weight_filler=dict(type="xavier"),
-            bias_filler=dict(type="xavier"),
-        ),
-    )
-    data = np.random.rand(1, 100, 32, 32).astype(np.float32)
-    _test_deconvolution(
-        data,
-        convolution_param=dict(
-            num_output=100,
-            bias_term=False,
-            pad=0,
-            kernel_size=2,
-            stride=2,
-            dilation=1,
-            group=100,
-            weight_filler=dict(type="xavier"),
-            bias_filler=dict(type="xavier"),
-        ),
-    )
-
-
-#######################################################################
-# Dropout
-# -----------
-
-
-def _test_dropout(data, **kwargs):
-    """One iteration of Dropout"""
-    _test_op(data, L.Dropout, "Dropout", **kwargs)
-
-
-def test_forward_Dropout():
-    """Dropout"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_dropout(data)
-    _test_dropout(data, dropout_ratio=0.7)
-
-
-#######################################################################
-# Eltwise
-# -----------
-
-
-def _test_eltwise(data_list, **kwargs):
-    """One iteration of Eltwise"""
-    _test_op(data_list, L.Eltwise, "Eltwise", **kwargs)
-
-
-def test_forward_Eltwise():
-    """Eltwise"""
-    _test_eltwise(
-        [
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-        ],
-        operation=0,
-    )
-    _test_eltwise(
-        [
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-        ],
-        operation=1,
-    )
-    _test_eltwise(
-        [
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-        ],
-        operation=2,
-    )
-    _test_eltwise(
-        [
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-        ],
-        operation=1,
-        coeff=[0.5, 1],
-    )
-    _test_eltwise(
-        [
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-        ],
-        operation=0,
-    )
-    _test_eltwise(
-        [
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-        ],
-        operation=1,
-    )
-    _test_eltwise(
-        [
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-        ],
-        operation=2,
-    )
-    _test_eltwise(
-        [
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-            np.random.rand(1, 3, 10, 11).astype(np.float32),
-        ],
-        operation=1,
-        coeff=[0.5, 1, 0.2, 1.8, 3.1, 0.1],
-    )
-
-
-#######################################################################
-# Flatten
-# -----------
-
-
-def _test_flatten(data, axis=1):
-    """One iteration of Flatten"""
-    _test_op(data, L.Flatten, "Flatten", axis=axis)
-
-
-def test_forward_Flatten():
-    """Flatten"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_flatten(data)
-    _test_flatten(data, axis=1)
-
-
-#######################################################################
-# Flatten
-# -----------
-
-
-def _test_inner_product(data, **kwargs):
-    """One iteration of InnerProduct"""
-    _test_op(data, L.InnerProduct, "InnerProduct", **kwargs)
-
-
-def test_forward_InnerProduct():
-    """InnerProduct"""
-    data = np.random.rand(1, 3, 10, 10)
-    _test_inner_product(data, num_output=20, bias_term=False, weight_filler=dict(type="xavier"))
-    _test_inner_product(
-        data,
-        num_output=20,
-        bias_term=True,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    _test_inner_product(
-        np.random.rand(20, 10).astype(np.float32),
-        num_output=30,
-        bias_term=True,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-
-
-#######################################################################
-# LRN
-# -----------
-
-
-def _test_lrn(data, local_size=5, alpha=1.0, beta=0.75, k=1.0):
-    """One iteration of LRN"""
-    _test_op(data, L.LRN, "LRN", local_size=local_size, alpha=alpha, beta=beta, k=k)
-
-
-def test_forward_LRN():
-    """LRN"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_lrn(data)
-    _test_lrn(data, local_size=3)
-    _test_lrn(data, local_size=3, alpha=2.0)
-    _test_lrn(
-        data,
-        local_size=3,
-        alpha=2.0,
-        beta=0.5,
-    )
-    _test_lrn(data, local_size=3, alpha=2.0, beta=0.5, k=2.0)
-
-
-#######################################################################
-# Permute
-# -------
-
-
-def _test_permute(data, **kwargs):
-    """One iteration of Permute."""
-    _test_op(data, L.Permute, "Permute", **kwargs)
-
-
-def test_forward_Permute():
-    """Permute"""
-    data = np.random.rand(2, 3, 4).astype(np.float32)
-    _test_permute(data, permute_param={"order": [0, 1, 2]})
-    _test_permute(data, permute_param={"order": [0, 2, 1]})
-    _test_permute(data, permute_param={"order": [1, 0, 2]})
-    _test_permute(data, permute_param={"order": [1, 2, 0]})
-    _test_permute(data, permute_param={"order": [2, 0, 1]})
-    _test_permute(data, permute_param={"order": [2, 1, 0]})
-
-
-#######################################################################
-# Pooling
-# -----------
-
-
-def _test_pooling(data, **kwargs):
-    """One iteration of Pooling."""
-    _test_op(data, L.Pooling, "Pooling", **kwargs)
-
-
-def test_forward_Pooling():
-    """Pooing"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    # MAX Pooling
-    _test_pooling(data, kernel_size=2, stride=2, pad=0, pool=P.Pooling.MAX)
-    _test_pooling(
-        data, kernel_h=2, kernel_w=3, stride_h=2, stride_w=1, pad_h=1, pad_w=2, pool=P.Pooling.MAX
-    )
-    _test_pooling(data, pool=P.Pooling.MAX, global_pooling=True)
-
-    # AVE Pooing
-    _test_pooling(data, kernel_size=2, stride=2, pad=0, pool=P.Pooling.AVE)
-    _test_pooling(
-        data, kernel_h=2, kernel_w=3, stride_h=2, stride_w=1, pad_h=1, pad_w=2, pool=P.Pooling.AVE
-    )
-    _test_pooling(data, pool=P.Pooling.AVE, global_pooling=True)
-
-
-#######################################################################
-# Power
-# -----
-def _test_power(data, **kwargs):
-    """One iteration of Power."""
-    _test_op(data, L.Power, "Power", **kwargs)
-
-
-def test_forward_Power():
-    """Power"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_power(data, power_param={"power": 0.37, "scale": 0.83, "shift": -2.4})
-    _test_power(data, power_param={"power": 0.37, "scale": 0.83, "shift": 0.0})
-    _test_power(data, power_param={"power": 0.0, "scale": 0.83, "shift": -2.4})
-    _test_power(data, power_param={"power": 1.0, "scale": 0.83, "shift": -2.4})
-    _test_power(data, power_param={"power": 2.0, "scale": 0.34, "shift": -2.4})
-    _test_power(data, power_param={"power": 1.0, "scale": 1.0, "shift": 0.0})
-
-
-#######################################################################
-# PReLU
-# -----------
-
-
-def _test_prelu(data, **kwargs):
-    """One iteration of PReLU."""
-    _test_op(data, L.PReLU, "PReLU", **kwargs)
-
-
-def test_forward_PReLU():
-    """PReLU"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_prelu(data, filler=dict(type="constant", value=0.5))
-    _test_prelu(data)
-    _test_prelu(np.random.rand(10, 20).astype(np.float32))
-
-
-#######################################################################
-# ReLU
-# -----------
-
-
-def _test_relu(data, **kwargs):
-    """One iteration of ReLU."""
-    _test_op(data, L.ReLU, "ReLU", **kwargs)
-
-
-def test_forward_ReLU():
-    """ReLU"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_relu(data)
-    _test_relu(np.random.rand(10, 20).astype(np.float32))
-
-
-#######################################################################
-# Reshape
-# -----------
-
-
-def _test_reshape(data, **kwargs):
-    """One iteration of Reshape."""
-    _test_op(data, L.Reshape, "Reshape", **kwargs)
-
-
-def test_forward_Reshape():
-    """Reshape"""
-    data = np.random.rand(1, 8, 6).astype(np.float32)
-    _test_reshape(data, reshape_param={"shape": {"dim": [4, 3, 4]}})
-    _test_reshape(data, reshape_param={"shape": {"dim": [2, 0, 3]}})
-    _test_reshape(data, reshape_param={"shape": {"dim": [2, 0, -1]}})
-    _test_reshape(data, reshape_param={"shape": {"dim": [0, -1]}})
-
-    _test_reshape(data, reshape_param={"shape": {"dim": [2, 3]}, "axis": 2})
-    _test_reshape(data, reshape_param={"shape": {"dim": [4, 3, 4]}, "axis": 1})
-    _test_reshape(data, reshape_param={"shape": {"dim": [4, 3, 4]}, "axis": -3})
-
-    _test_reshape(data, reshape_param={"shape": {"dim": [2, 4]}, "axis": 1, "num_axes": 1})
-    _test_reshape(data, reshape_param={"shape": {"dim": [3, 16]}, "axis": 1, "num_axes": 2})
-
-
-#######################################################################
-# Scale
-# -----------
-
-
-def _test_scale(data, **kwargs):
-    """One iteration of Scale."""
-    _test_op(data, L.Scale, "Scale", **kwargs)
-
-
-def test_forward_Scale():
-    """Scale"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_scale(data, filler=dict(type="xavier"))
-    _test_scale(data, filler=dict(type="xavier"), bias_term=True, bias_filler=dict(type="xavier"))
-
-
-#######################################################################
-# Sigmoid
-# -----------
-
-
-def _test_sigmoid(data, **kwargs):
-    """One iteration of Sigmoid."""
-    _test_op(data, L.Sigmoid, "Sigmoid", **kwargs)
-
-
-def test_forward_Sigmoid():
-    """Sigmoid"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_sigmoid(data)
-
-
-#######################################################################
-# Slice
-# -----------
-
-
-def _test_slice(data, **kwargs):
-    """One iteration of Slice"""
-    _test_op(data, L.Slice, "Slice", **kwargs)
-
-
-def test_forward_Slice():
-    """Slice"""
-    data = np.random.rand(1, 3, 10, 10).astype(np.float32)
-    _test_slice(data, ntop=2, slice_param=dict(axis=1, slice_point=[1]))
-    _test_slice(data, ntop=2, slice_param=dict(axis=-1, slice_point=[1]))
-    _test_slice(data, ntop=3, slice_param=dict(axis=2, slice_point=[1, 6]))
-    _test_slice(data, ntop=3)
-
-
-#######################################################################
-# Softmax
-# -----------
-
-
-def _test_softmax(data, **kwargs):
-    """One iteration of Softmax"""
-    _test_op(data, L.Softmax, "Softmax", **kwargs)
-
-
-def test_forward_Softmax():
-    """Softmax"""
-    _test_softmax(np.random.rand(1, 3, 10, 10).astype(np.float32))
-    _test_softmax(np.random.rand(1, 3, 10, 10).astype(np.float32), axis=2)
-    _test_softmax(np.random.rand(10, 10).astype(np.float32), axis=0)
-    _test_softmax(np.random.rand(2, 10, 10).astype(np.float32), axis=1)
-
-
-#######################################################################
-# TanH
-# -----------
-
-
-def _test_tanh(data, **kwargs):
-    """One iteration of TanH"""
-    _test_op(data, L.TanH, "TanH", **kwargs)
-
-
-def test_forward_TanH():
-    """TanH"""
-    _test_tanh(np.random.rand(1, 3, 10, 10).astype(np.float32))
-    _test_tanh(np.random.rand(3, 10, 10).astype(np.float32))
-    _test_tanh(np.random.rand(10, 10).astype(np.float32))
-    _test_tanh(np.random.rand(10).astype(np.float32))
-
-
-#######################################################################
-# Reduction
-# -----------
-
-
-def _test_reduction(data, **kwargs):
-    """One iteration of Reduction"""
-    _test_op(data, L.Reduction, "Reduction", **kwargs)
-
-
-def test_forward_Reduction():
-    """Reduction"""
-    reduction_op = {"SUM": 1, "ASUM": 2, "SUMSQ": 3, "MEAN": 4}
-    _test_reduction(np.random.rand(10).astype(np.float32), operation=reduction_op["SUM"], axis=0)
-    _test_reduction(
-        np.random.rand(10, 20, 30, 40).astype(np.float32), operation=reduction_op["SUM"], axis=3
-    )
-    _test_reduction(
-        np.random.rand(10, 20, 30, 40).astype(np.float32), operation=reduction_op["SUM"], axis=1
-    )
-    _test_reduction(
-        np.random.rand(10).astype(np.float32), operation=reduction_op["SUM"], axis=0, coeff=0.5
-    )
-    _test_reduction(
-        np.random.rand(10, 20, 30, 40).astype(np.float32),
-        operation=reduction_op["SUM"],
-        axis=3,
-        coeff=5.0,
-    )
-    _test_reduction(np.random.rand(10).astype(np.float32), operation=reduction_op["ASUM"])
-    _test_reduction(
-        np.random.rand(10, 20).astype(np.float32), operation=reduction_op["ASUM"], axis=1
-    )
-    _test_reduction(
-        np.random.rand(10, 20, 30, 40).astype(np.float32), operation=reduction_op["ASUM"], axis=3
-    )
-    _test_reduction(
-        np.random.rand(10).astype(np.float32), operation=reduction_op["ASUM"], axis=0, coeff=0.0
-    )
-    _test_reduction(
-        np.random.rand(10, 20, 30).astype(np.float32),
-        operation=reduction_op["ASUM"],
-        axis=2,
-        coeff=7.0,
-    )
-    _test_reduction(
-        np.random.rand(10, 20, 30, 40, 10).astype(np.float32),
-        operation=reduction_op["ASUM"],
-        axis=3,
-        coeff=1.0,
-    )
-    _test_reduction(np.random.rand(10).astype(np.float32), operation=reduction_op["SUMSQ"], axis=0)
-    _test_reduction(
-        np.random.rand(10, 20, 30, 40).astype(np.float32), operation=reduction_op["SUMSQ"], axis=3
-    )
-    _test_reduction(
-        np.random.rand(10).astype(np.float32), operation=reduction_op["SUMSQ"], axis=0, coeff=0.0
-    )
-    _test_reduction(
-        np.random.rand(10, 20, 30, 40, 50).astype(np.float32),
-        operation=reduction_op["SUMSQ"],
-        axis=4,
-        coeff=2.0,
-    )
-    _test_reduction(np.random.rand(10).astype(np.float32), operation=reduction_op["MEAN"], axis=0)
-    _test_reduction(
-        np.random.rand(10, 20, 30, 40).astype(np.float32), operation=reduction_op["MEAN"], axis=3
-    )
-    _test_reduction(
-        np.random.rand(10).astype(np.float32), operation=reduction_op["MEAN"], axis=0, coeff=0.0
-    )
-    _test_reduction(
-        np.random.rand(10, 20, 30, 40).astype(np.float32),
-        operation=reduction_op["MEAN"],
-        axis=3,
-        coeff=2.0,
-    )
-
-
-#######################################################################
-# Embed
-# -----------
-
-
-def _test_embed(data, **kwargs):
-    """One iteration of Embed"""
-    _test_op(data, L.Embed, "Embed", **kwargs)
-
-
-def test_forward_Embed():
-    """Embed"""
-    k = 20
-    data = list(i for i in range(k))
-    np.random.shuffle(data)
-    # dimension is 1
-    data = np.asarray(data)
-    _test_embed(
-        data,
-        num_output=30,
-        input_dim=k,
-        bias_term=True,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    _test_embed(
-        data,
-        num_output=30,
-        input_dim=k,
-        bias_term=False,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    # dimension is 2
-    data = np.reshape(data, [4, 5])
-    _test_embed(
-        data,
-        num_output=30,
-        input_dim=k,
-        bias_term=True,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    _test_embed(
-        data,
-        num_output=30,
-        input_dim=k,
-        bias_term=False,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    # dimension is 3
-    data = np.reshape(data, [2, 2, 5])
-    _test_embed(
-        data,
-        num_output=30,
-        input_dim=k,
-        bias_term=True,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    _test_embed(
-        data,
-        num_output=30,
-        input_dim=k,
-        bias_term=False,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    # dimension is 4
-    data = np.reshape(data, [2, 2, 5, 1])
-    _test_embed(
-        data,
-        num_output=30,
-        input_dim=k,
-        bias_term=True,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-    _test_embed(
-        data,
-        num_output=30,
-        input_dim=k,
-        bias_term=False,
-        weight_filler=dict(type="xavier"),
-        bias_filler=dict(type="xavier"),
-    )
-
-
-#######################################################################
-# Mobilenetv2
-# -----------
-
-
-def _test_mobilenetv2(data):
-    """One iteration of Mobilenetv2"""
-    mean_val = np.array([103.939, 116.779, 123.68], dtype=np.float32)
-    mean_val = np.reshape(mean_val, (1, 3, 1, 1))
-    mean_val = np.tile(mean_val, (1, 1, 224, 224))
-    data_process = data - mean_val
-    data_process = data_process / 58.8
-    data_process = data_process.astype(np.float32)
-
-    proto_file_url = (
-        "https://github.com/shicai/MobileNet-Caffe/raw/master/mobilenet_v2_deploy.prototxt"
-    )
-    blob_file_url = (
-        "https://github.com/shicai/MobileNet-Caffe/blob/master/mobilenet_v2.caffemodel?raw=true"
-    )
-    proto_file = download_testdata(proto_file_url, "mobilenetv2.prototxt", module="model")
-    blob_file = download_testdata(blob_file_url, "mobilenetv2.caffemodel", module="model")
-    _test_network(data_process, proto_file, blob_file)
-
-
-def test_forward_Mobilenetv2():
-    """Mobilenetv2"""
-    data = np.random.randint(0, 256, size=(1, 3, 224, 224)).astype(np.float32)
-    _test_mobilenetv2(data)
-
-
-#######################################################################
-# Alexnet
-# -----------
-
-
-def _test_alexnet(data):
-    """One iteration of Alexnet"""
-    mean_val = np.array([103.939, 116.779, 123.68], dtype=np.float32)
-    mean_val = np.reshape(mean_val, (1, 3, 1, 1))
-    mean_val = np.tile(mean_val, (1, 1, 227, 227))
-    data_process = data - mean_val
-    data_process = data_process.astype(np.float32)
-
-    proto_file_url = (
-        "https://github.com/BVLC/caffe/raw/master/models/" + "bvlc_alexnet/deploy.prototxt"
-    )
-    blob_file_url = "http://dl.caffe.berkeleyvision.org/bvlc_alexnet.caffemodel"
-    proto_file = download_testdata(proto_file_url, "alexnet.prototxt", module="model")
-    blob_file = download_testdata(blob_file_url, "alexnet.caffemodel", module="model")
-    _test_network(data_process, proto_file, blob_file)
-
-
-@pytest.mark.skip(reason="See https://github.com/apache/tvm/issues/13227")
-def test_forward_Alexnet():
-    """Alexnet"""
-    data = np.random.randint(0, 256, size=(1, 3, 227, 227)).astype(np.float32)
-    _test_alexnet(data)
-
-
-#######################################################################
-# Resnet50
-# -----------
-
-
-def _test_resnet50(data):
-    """One iteration of Resnet50"""
-    mean_val = np.array([103.939, 116.779, 123.68], dtype=np.float32)
-    mean_val = np.reshape(mean_val, (1, 3, 1, 1))
-    mean_val = np.tile(mean_val, (1, 1, 224, 224))
-    data_process = data - mean_val
-    data_process = data_process.astype(np.float32)
-
-    proto_file_url = (
-        "https://github.com/fernchen/CaffeModels/raw/master/resnet/ResNet-50-deploy.prototxt"
-    )
-    blob_file_url = (
-        "https://github.com/fernchen/CaffeModels/raw/master/resnet/ResNet-50-model.caffemodel"
-    )
-
-    proto_file = download_testdata(proto_file_url, "resnet50.prototxt", module="model")
-    blob_file = download_testdata(blob_file_url, "resnet50.caffemodel", module="model")
-
-    _test_network(data_process, proto_file, blob_file)
-
-
-def test_forward_Resnet50():
-    """Resnet50"""
-    data = np.random.randint(0, 256, size=(1, 3, 224, 224)).astype(np.float32)
-    _test_resnet50(data)
-
-
-#######################################################################
-# Inceptionv4
-# -----------
-
-
-def _test_inceptionv1(data):
-    """One iteration of Inceptionv4"""
-    mean_val = np.array([103.939, 116.779, 123.68], dtype=np.float32)
-    mean_val = np.reshape(mean_val, (1, 3, 1, 1))
-    mean_val = np.tile(mean_val, (1, 1, 224, 224))
-    data_process = data - mean_val
-    data_process = data_process / 58.8
-    data_process = data_process.astype(np.float32)
-
-    proto_file_url = (
-        "https://github.com/BVLC/caffe/raw/master/models" + "/bvlc_googlenet/deploy.prototxt"
-    )
-    blob_file_url = "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel"
-    proto_file = download_testdata(proto_file_url, "inceptionv1.prototxt", module="model")
-    blob_file = download_testdata(blob_file_url, "inceptionv1.caffemodel", module="model")
-    _test_network(data_process, proto_file, blob_file)
-
-
-@pytest.mark.skip(reason="See issue https://github.com/apache/tvm/issues/13227")
-def test_forward_Inceptionv1():
-    """Inceptionv4"""
-    data = np.random.randint(0, 256, size=(1, 3, 224, 224)).astype(np.float32)
-    _test_inceptionv1(data)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/caffe2/model_zoo/__init__.py b/tests/python/frontend/caffe2/model_zoo/__init__.py
deleted file mode 100644
index 946367f9ed4f..000000000000
--- a/tests/python/frontend/caffe2/model_zoo/__init__.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Store for caffe2 examples and common models."""
-from __future__ import absolute_import as _abs
-import os
-import sys
-import importlib
-from caffe2.python.models.download import ModelDownloader
-from . import squeezenet
-
-models = [
-    "squeezenet",
-    "resnet50",
-    "vgg19",
-]
-
-mf = ModelDownloader()
-
-
-class Model:
-    def __init__(self, model_name):
-        self.init_net, self.predict_net, self.value_info = mf.get_c2_model(model_name)
-
-
-for model in models:
-    try:
-        locals()["c2_" + model] = importlib.import_module("caffe2.python.models." + model)
-    except ImportError:
-        locals()["c2_" + model] = Model(model)
-
-# squeezenet
-def relay_squeezenet():
-    return squeezenet.get_workload()
diff --git a/tests/python/frontend/caffe2/model_zoo/squeezenet.py b/tests/python/frontend/caffe2/model_zoo/squeezenet.py
deleted file mode 100644
index 06e99567e5a8..000000000000
--- a/tests/python/frontend/caffe2/model_zoo/squeezenet.py
+++ /dev/null
@@ -1,135 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# coding: utf-8
-# pylint: disable=unused-argument
-
-"""
-Symbol of SqueezeNet
-
-Reference:
-Iandola, Forrest N., et al.
-"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and< 0.5 mb model size." (2016).
-"""
-
-from tvm import relay
-from tvm.relay.testing import create_workload
-
-# Helpers
-def _make_fire(net, squeeze_channels, expand1x1_channels, expand3x3_channels, prefix=""):
-    net = _make_fire_conv(net, squeeze_channels, 1, 0, f"{prefix}/squeeze1x1")
-
-    left = _make_fire_conv(net, expand1x1_channels, 1, 0, f"{prefix}/expand1x1")
-    right = _make_fire_conv(net, expand3x3_channels, 3, 1, f"{prefix}/expand3x3")
-    # NOTE : Assume NCHW layout here
-    net = relay.concatenate((left, right), axis=1)
-    return net
-
-
-def _make_fire_conv(net, channels, kernel_size, padding=0, prefix=""):
-    net = relay.nn.conv2d(
-        net,
-        relay.var(f"{prefix}_weight"),
-        channels=channels,
-        kernel_size=(kernel_size, kernel_size),
-        padding=(padding, padding),
-    )
-    net = relay.nn.bias_add(net, relay.var(f"{prefix}_bias"))
-    net = relay.nn.relu(net)
-    return net
-
-
-# Net
-def get_net(batch_size, image_shape, num_classes, dtype):
-    """Get symbol of SqueezeNet
-
-    Parameters
-    ----------
-    batch_size : int
-        The batch size used in the model
-
-    image_shape : tuple
-        The input image shape
-
-    num_classes: int
-        The number of classification results
-
-    dtype : str
-        The data type
-
-    """
-    data_shape = (batch_size,) + image_shape
-    net = relay.var("data", shape=data_shape, dtype=dtype)
-    net = relay.nn.conv2d(
-        net,
-        relay.var("conv1_weight"),
-        channels=64,
-        kernel_size=(3, 3),
-        strides=(2, 2),
-        padding=(0, 0),
-    )
-    net = relay.nn.bias_add(net, relay.var("conv1_bias"))
-    net = relay.nn.relu(net)
-    net = relay.nn.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
-    net = _make_fire(net, 16, 64, 64, "fire2")
-    net = _make_fire(net, 16, 64, 64, "fire3")
-    net = relay.nn.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
-    net = _make_fire(net, 32, 128, 128, "fire4")
-    net = _make_fire(net, 32, 128, 128, "fire5")
-    net = relay.nn.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
-    net = _make_fire(net, 48, 192, 192, "fire6")
-    net = _make_fire(net, 48, 192, 192, "fire7")
-    net = _make_fire(net, 64, 256, 256, "fire8")
-    net = _make_fire(net, 64, 256, 256, "fire9")
-    net = relay.nn.dropout(net, rate=0.5)
-    net = relay.nn.conv2d(net, relay.var("conv10_weight"), channels=num_classes, kernel_size=(1, 1))
-    net = relay.nn.bias_add(net, relay.var("conv10_bias"))
-    net = relay.nn.relu(net)
-    net = relay.nn.global_avg_pool2d(net)
-    net = relay.nn.softmax(net, axis=1)
-    args = relay.analysis.free_vars(net)
-    return relay.Function(args, net)
-
-
-def get_workload(batch_size=1, image_shape=(3, 224, 224), num_classes=1000, dtype="float32"):
-    """Get benchmark workload for SqueezeNet
-
-    Parameters
-    ----------
-    batch_size : int, optional
-        The batch size used in the model
-
-    num_classes : int, optional
-        Number of classes
-
-    image_shape : tuple, optional
-        The input image shape
-
-    dtype : str, optional
-        The data type
-
-    Returns
-    -------
-    net : relay.Function
-        The computational graph
-
-    params : dict of str to NDArray
-        The parameters.
-    """
-
-    net = get_net(batch_size, image_shape, num_classes, dtype)
-    return create_workload(net)
diff --git a/tests/python/frontend/caffe2/test_forward.py b/tests/python/frontend/caffe2/test_forward.py
deleted file mode 100644
index 9758d937c254..000000000000
--- a/tests/python/frontend/caffe2/test_forward.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Caffe2 testcases
-====================
-This article is a test script to test Caffe2 operator with Relay.
-"""
-from collections import namedtuple
-import numpy as np
-
-from caffe2.python import workspace, core
-from caffe2.proto import caffe2_pb2
-from model_zoo import c2_squeezenet, c2_resnet50, c2_vgg19
-import tvm
-from tvm.contrib import graph_executor
-from tvm import relay
-
-import tvm.testing
-
-
-def get_tvm_output(model, input_data, target, device, output_shape, output_dtype="float32"):
-    """Generic function to execute and get tvm output"""
-    # supporting multiple inputs in caffe2 in a bit tricky,
-    # because the input names can appear at the beginning or end of model.predict_net.external_input
-    assert isinstance(input_data, np.ndarray)
-
-    # here we use the first input blob to the first op to get the input name
-    input_names = model.predict_net.op[0].input[0]
-    shape_dict = {input_names: input_data.shape}
-    dtype_dict = {input_names: input_data.dtype}
-    mod, params = relay.frontend.from_caffe2(
-        model.init_net, model.predict_net, shape_dict, dtype_dict
-    )
-    with tvm.transform.PassContext(opt_level=3):
-        lib = relay.build(mod, target, params=params)
-
-    m = graph_executor.GraphModule(lib["default"](device))
-
-    # set inputs
-    m.set_input(input_names, tvm.nd.array(input_data.astype(input_data.dtype)))
-
-    # execute
-    m.run()
-
-    # get outputs
-    if isinstance(output_shape, list) and isinstance(output_dtype, list):
-        tvm_output_list = []
-        for i, s in enumerate(output_shape):
-            tvm_output = m.get_output(i, tvm.nd.empty((s), output_dtype[i]))
-            tvm_output_list.append(tvm_output.numpy())
-        return tvm_output_list
-    else:
-        tvm_output = m.get_output(0, tvm.nd.empty((output_shape), output_dtype))
-        return tvm_output.numpy()
-
-
-def get_caffe2_output(model, x, dtype="float32"):
-    workspace.RunNetOnce(model.init_net)
-
-    input_blob = model.predict_net.op[0].input[0]
-    workspace.FeedBlob(input_blob, x.astype(dtype))
-    workspace.RunNetOnce(model.predict_net)
-
-    output_blob = model.predict_net.external_output[0]
-    c2_output = workspace.FetchBlob(output_blob)
-    return c2_output
-
-
-def verify_caffe2_forward_impl(model, data_shape, out_shape):
-    dtype = "float32"
-    data = np.random.uniform(size=data_shape).astype(dtype)
-    c2_out = get_caffe2_output(model, data, dtype)
-    for target, dev in tvm.testing.enabled_targets():
-        tvm_out = get_tvm_output(model, data, target, dev, out_shape, dtype)
-        tvm.testing.assert_allclose(c2_out, tvm_out, rtol=1e-5, atol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_squeezenet1_1():
-    verify_caffe2_forward_impl(c2_squeezenet, (1, 3, 224, 224), (1, 1000, 1, 1))
-
-
-@tvm.testing.uses_gpu
-def test_forward_resnet50():
-    verify_caffe2_forward_impl(c2_resnet50, (1, 3, 224, 224), (1, 1000))
-
-
-@tvm.testing.uses_gpu
-def test_forward_vgg19():
-    verify_caffe2_forward_impl(c2_vgg19, (1, 3, 224, 224), (1, 1000))
-
-
-Model = namedtuple("Model", ["init_net", "predict_net"])
-
-
-@tvm.testing.uses_gpu
-def test_elementwise_add():
-    """Elewise_add"""
-    data_shape = (1, 16, 9, 9)
-    init_net = caffe2_pb2.NetDef()
-    init_net.name = "test_init_net"
-    init_net.external_output[:] = ["A", "B"]
-    init_net.op.extend(
-        [
-            core.CreateOperator(
-                "GivenTensorFill",
-                [],
-                ["A"],
-                shape=data_shape,
-                values=np.random.uniform(size=data_shape).flatten().tolist(),
-            ),
-            core.CreateOperator(
-                "GivenTensorFill",
-                [],
-                ["B"],
-                shape=data_shape,
-                values=np.random.uniform(size=data_shape).flatten().tolist(),
-            ),
-        ]
-    )
-
-    predict_net = caffe2_pb2.NetDef()
-    predict_net.name = "test_predict_net"
-    predict_net.external_input[:] = ["A", "B"]
-    predict_net.external_output[:] = ["C"]
-    predict_net.op.extend(
-        [
-            core.CreateOperator(
-                "Add",
-                ["A", "B"],
-                ["C"],
-            )
-        ]
-    )
-
-    model = Model(init_net, predict_net)
-    verify_caffe2_forward_impl(model, data_shape, data_shape)
-
-
-@tvm.testing.uses_gpu
-def test_elementwise_add_with_broadcast():
-    """Elewise_add_with_broadcast"""
-    data_shape = (1, 16, 9, 9)
-    init_net = caffe2_pb2.NetDef()
-    init_net.name = "test_init_net"
-    init_net.external_output[:] = ["A", "B"]
-    init_net.op.extend(
-        [
-            core.CreateOperator(
-                "GivenTensorFill",
-                [],
-                ["A"],
-                shape=data_shape,
-                values=np.random.uniform(size=data_shape).flatten().tolist(),
-            ),
-            core.CreateOperator(
-                "GivenTensorFill",
-                [],
-                ["B"],
-                shape=(1,),
-                values=np.random.uniform(size=1).flatten().tolist(),
-            ),
-        ]
-    )
-
-    predict_net = caffe2_pb2.NetDef()
-    predict_net.name = "test_predict_net"
-    predict_net.external_input[:] = ["A", "B"]
-    predict_net.external_output[:] = ["C"]
-    predict_net.op.extend(
-        [
-            core.CreateOperator(
-                "Add",
-                ["A", "B"],
-                ["C"],
-                broadcast=1,
-            )
-        ]
-    )
-
-    model = Model(init_net, predict_net)
-    verify_caffe2_forward_impl(model, data_shape, data_shape)
-
-
-@tvm.testing.uses_gpu
-def test_normalize_yuv():
-    """Normalize_yuv"""
-    data_shape = (1, 3, 96, 96)
-    init_net = caffe2_pb2.NetDef()
-    init_net.name = "test_init_net"
-    init_net.external_output[:] = ["A", "mean", "std"]
-    init_net.op.extend(
-        [
-            core.CreateOperator(
-                "GivenTensorFill",
-                [],
-                ["A"],
-                shape=data_shape,
-                values=np.random.uniform(size=data_shape).flatten().tolist(),
-            ),
-            core.CreateOperator(
-                "GivenTensorFill",
-                [],
-                ["mean"],
-                shape=(
-                    1,
-                    3,
-                ),
-                values=np.random.uniform(size=3).flatten().tolist(),
-            ),
-            core.CreateOperator(
-                "GivenTensorFill",
-                [],
-                ["std"],
-                shape=(
-                    1,
-                    3,
-                ),
-                values=np.random.uniform(size=3).flatten().tolist(),
-            ),
-        ]
-    )
-
-    predict_net = caffe2_pb2.NetDef()
-    predict_net.name = "test_predict_net"
-    predict_net.external_input[:] = ["A", "mean", "std"]
-    predict_net.external_output[:] = ["C"]
-    predict_net.op.extend(
-        [
-            core.CreateOperator(
-                "NormalizePlanarYUV",
-                ["A", "mean", "std"],
-                ["C"],
-            )
-        ]
-    )
-
-    model = Model(init_net, predict_net)
-    verify_caffe2_forward_impl(model, data_shape, data_shape)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/caffe2/test_graph.py b/tests/python/frontend/caffe2/test_graph.py
deleted file mode 100644
index 3bf5beff3fce..000000000000
--- a/tests/python/frontend/caffe2/test_graph.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Test graph equality of caffe2 models."""
-from model_zoo import c2_squeezenet, relay_squeezenet
-import tvm
-from tvm import relay
-from tvm.relay import transform
-
-
-def compare_graph(lhs_mod, rhs_mod):
-    lhs_mod = transform.InferType()(lhs_mod)
-    rhs_mod = transform.InferType()(rhs_mod)
-    tvm.ir.assert_structural_equal(lhs_mod["main"], rhs_mod["main"])
-
-
-def test_squeeze_net():
-    shape_dict = {"data": (1, 3, 224, 224)}
-    dtype_dict = {"data": "float32"}
-    mod, _, = relay.frontend.from_caffe2(
-        c2_squeezenet.init_net, c2_squeezenet.predict_net, shape_dict, dtype_dict
-    )
-    relay_mod, _ = relay_squeezenet()
-    compare_graph(mod, relay_mod)
-
-
-if __name__ == "__main__":
-    test_squeeze_net()
diff --git a/tests/python/frontend/coreml/model_zoo/__init__.py b/tests/python/frontend/coreml/model_zoo/__init__.py
deleted file mode 100644
index ea2f3478fde4..000000000000
--- a/tests/python/frontend/coreml/model_zoo/__init__.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""coreml model zoo for testing purposes."""
-import os
-from PIL import Image
-import numpy as np
-from tvm.contrib.download import download_testdata
-
-
-def get_mobilenet():
-    url = "https://docs-assets.developer.apple.com/coreml/models/MobileNet.mlmodel"
-    dst = "mobilenet.mlmodel"
-    real_dst = download_testdata(url, dst, module="coreml")
-    return os.path.abspath(real_dst)
-
-
-def get_resnet50():
-    url = "https://docs-assets.developer.apple.com/coreml/models/Resnet50.mlmodel"
-    dst = "resnet50.mlmodel"
-    real_dst = download_testdata(url, dst, module="coreml")
-    return os.path.abspath(real_dst)
-
-
-def get_cat_image():
-    """Get cat image"""
-    url = (
-        "https://gist.githubusercontent.com/zhreshold/"
-        + "bcda4716699ac97ea44f791c24310193/raw/fa7ef0e9c9a5daea686d6473a62aacd1a5885849/cat.png"
-    )
-    dst = "cat.png"
-    real_dst = download_testdata(url, dst, module="data")
-    img = Image.open(real_dst).resize((224, 224))
-    # CoreML's standard model image format is BGR
-    img_bgr = np.array(img)[:, :, ::-1]
-    img = np.transpose(img_bgr, (2, 0, 1))[np.newaxis, :]
-    return np.asarray(img)
diff --git a/tests/python/frontend/coreml/test_forward.py b/tests/python/frontend/coreml/test_forward.py
deleted file mode 100644
index 26ddcba6ef41..000000000000
--- a/tests/python/frontend/coreml/test_forward.py
+++ /dev/null
@@ -1,851 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-CoreML testcases
-====================
-This article is a test script to test CoreML operator with Relay.
-"""
-from os import path
-from enum import Enum
-import tempfile
-import numpy as np
-import model_zoo
-import coremltools as cm
-from coremltools.models.neural_network import NeuralNetworkBuilder
-from coremltools.models import datatypes
-from tensorflow import keras
-
-import tvm
-import tvm.topi.testing
-import tvm.testing
-from tvm.contrib import graph_executor
-from tvm.topi.testing import conv2d_nchw_python
-from tvm import relay
-
-
-def get_tvm_output(
-    func, x, params, target, device, out_shape=(1, 1000), input_name="image", dtype="float32"
-):
-    """Generic function to execute and get tvm output"""
-    with tvm.transform.PassContext(opt_level=3):
-        lib = relay.build(func, target, params=params)
-    m = graph_executor.GraphModule(lib["default"](device))
-    # set inputs
-    m.set_input(input_name, tvm.nd.array(x.astype(dtype)))
-    m.run()
-    # get outputs
-    out = m.get_output(0, tvm.nd.empty(out_shape, dtype))
-    return out.numpy()
-
-
-def run_model_checkonly(model_file, model_name="", input_name="image"):
-    model = cm.models.MLModel(model_file)
-    x = model_zoo.get_cat_image()
-    shape_dict = {input_name: x.shape}
-    # Some Relay passes change operators on the fly. Ensuring that we generate
-    # new graph for each target.
-    for target, dev in tvm.testing.enabled_targets():
-        mod, params = relay.frontend.from_coreml(model, shape_dict)
-        tvm_output = get_tvm_output(mod["main"], x, params, target, dev)
-        print(target, dev, model_name, "prediction id: ", np.argmax(tvm_output.flat))
-
-
-@tvm.testing.uses_gpu
-def test_mobilenet_checkonly():
-    model_file = model_zoo.get_mobilenet()
-    run_model_checkonly(model_file, "mobilenet")
-
-
-@tvm.testing.uses_gpu
-def test_resnet50_checkonly():
-    model_file = model_zoo.get_resnet50()
-    run_model_checkonly(model_file, "resnet50")
-
-
-def run_tvm_graph(
-    coreml_model, target, device, input_data, input_name, output_shape, output_dtype="float32"
-):
-    """Generic function to compile on relay and execute on tvm"""
-    if isinstance(input_data, list):
-        shape_dict = {}
-        dtype_dict = {}
-        for i, inp in enumerate(input_name):
-            shape_dict[inp] = input_data[i].shape
-            dtype_dict[inp] = input_data[i].dtype
-    else:
-        shape_dict = {input_name: input_data.shape}
-        dtype_dict = {input_name: input_data.dtype}
-
-    mod, params = relay.frontend.from_coreml(coreml_model, shape_dict)
-    with tvm.transform.PassContext(opt_level=3):
-        lib = relay.build(mod, target, params=params)
-
-    m = graph_executor.GraphModule(lib["default"](device))
-    # set inputs
-    if isinstance(input_data, list):
-        for i, inp in enumerate(input_name):
-            m.set_input(inp, tvm.nd.array(input_data[i].astype(input_data[i].dtype)))
-    else:
-        m.set_input(input_name, tvm.nd.array(input_data.astype(input_data.dtype)))
-
-    # execute
-    m.run()
-    # get outputs
-    if isinstance(output_shape, list) and isinstance(output_dtype, list):
-        tvm_output_list = []
-        for i, s in enumerate(output_shape):
-            tvm_output = m.get_output(i, tvm.nd.empty((s), output_dtype[i]))
-            tvm_output_list.append(tvm_output.numpy())
-        return tvm_output_list
-    else:
-        if not output_shape:
-            tvm_output = m.get_output(0)
-        else:
-            tvm_output = m.get_output(0, tvm.nd.empty((output_shape), output_dtype))
-        return tvm_output.numpy()
-
-
-def verify_add_layer_params(input_dim, alpha=2):
-    """Verify add layer params"""
-    dtype = "float32"
-
-    a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-
-    b_np = np.add(a_np1, a_np2) + alpha
-    inputs = [("input1", datatypes.Array(*input_dim)), ("input2", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_elementwise(
-        name="Add", alpha=alpha, input_names=["input1", "input2"], output_name="output", mode="ADD"
-    )
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(
-            model, target, dev, [a_np1, a_np2], ["input1", "input2"], b_np.shape, dtype
-        )
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_add_layer_params():
-    verify_add_layer_params((1, 2, 2), 0)
-    verify_add_layer_params((1, 2, 2), 1)
-    verify_add_layer_params((1, 3, 3), 2)
-
-
-def verify_multiply_layer_params(input_dim, alpha):
-    """Verify multiply layer params"""
-    dtype = "float32"
-
-    a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-
-    b_np = np.multiply(a_np1, a_np2) * alpha
-    inputs = [("input1", datatypes.Array(*input_dim)), ("input2", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_elementwise(
-        name="Mul",
-        alpha=alpha,
-        input_names=["input1", "input2"],
-        output_name="output",
-        mode="MULTIPLY",
-    )
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(
-            model, target, dev, [a_np1, a_np2], ["input1", "input2"], b_np.shape, dtype
-        )
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_multiply_layer_params():
-    verify_multiply_layer_params((1, 2, 2), 0)
-    verify_multiply_layer_params((1, 2, 2), 1)
-    verify_multiply_layer_params((1, 3, 3), 2)
-
-
-def verify_concat_layer_params(input1_dim, input2_dim):
-    """Verify concat layer params"""
-    dtype = "float32"
-
-    a_np1 = np.random.uniform(size=input1_dim).astype(dtype)
-    a_np2 = np.random.uniform(size=input2_dim).astype(dtype)
-
-    b_np = np.concatenate((a_np1, a_np2), axis=1)
-    inputs = [("input1", datatypes.Array(*input1_dim)), ("input2", datatypes.Array(*input2_dim))]
-    output = [("output", datatypes.Array(*b_np.shape))]  # pylint:disable=not-an-iterable
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_elementwise(
-        name="Concate", input_names=["input1", "input2"], output_name="output", mode="CONCAT"
-    )
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(
-            model, target, dev, [a_np1, a_np2], ["input1", "input2"], b_np.shape, dtype
-        )
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_concat_layer_params():
-    verify_concat_layer_params((1, 1, 2, 2), (1, 2, 2, 2))
-    verify_concat_layer_params((1, 2, 4, 4), (1, 3, 4, 4))
-
-
-def _verify_upsample_layer_params(input_dim, scale, mode):
-    dtype = "float32"
-
-    a_np = np.full(input_dim, 1, dtype=dtype)
-
-    if mode == "NN":
-        method = "nearest_neighbor"
-        coord_trans = "asymmetric"
-    else:
-        method = "linear"
-        coord_trans = "align_corners"
-
-    b_np = tvm.topi.testing.resize2d_python(a_np, (scale, scale), "NCHW", method, coord_trans)
-
-    input_data = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(input_data, output)
-    builder.add_upsample(
-        name="Upsample",
-        scaling_factor_h=scale,
-        scaling_factor_w=scale,
-        mode=mode,
-        input_name="input",
-        output_name="output",
-    )
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, a_np, "input", b_np.shape, dtype)
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_upsample_layer_params():
-    """Upsample Layer Params"""
-    _verify_upsample_layer_params((1, 16, 32, 32), 2, "NN")
-    _verify_upsample_layer_params((1, 4, 6, 6), 3, "BILINEAR")
-
-
-def _verify_l2_normalize(input_dim, eps):
-    dtype = "float32"
-
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    b_np = tvm.topi.testing.l2_normalize_python(a_np, eps, 1)
-
-    input_data = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(input_data, output)
-    builder.add_l2_normalize(name="L2", epsilon=eps, input_name="input", output_name="output")
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, a_np, "input", b_np.shape, dtype)
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_l2_normalize():
-    _verify_l2_normalize((1, 3, 20, 20), 0.001)
-
-
-def _verify_lrn(input_dim, size, bias, alpha, beta):
-    dtype = "float32"
-    axis = 1
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    b_np = tvm.topi.testing.lrn_python(a_np, size, axis, bias, alpha, beta)
-
-    input_data = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(input_data, output)
-    builder.add_lrn(
-        name="LRN",
-        input_name="input",
-        output_name="output",
-        alpha=alpha,
-        beta=beta,
-        k=bias,
-        local_size=size,
-    )
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, a_np, "input", b_np.shape, dtype)
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_lrn():
-    _verify_lrn((1, 3, 10, 20), 3, 1.0, 1.0, 0.5)
-
-
-def _verify_average(input_dim1, input_dim2, axis=0):
-    dtype = "float32"
-
-    a_np1 = np.random.uniform(size=input_dim1).astype(dtype)
-    a_np2 = np.random.uniform(size=input_dim2).astype(dtype)
-
-    b_np = np.mean((a_np1, a_np2), axis=axis, dtype=float)
-
-    inputs = [("input1", datatypes.Array(*input_dim1)), ("input2", datatypes.Array(*input_dim2))]
-    output = [("output", datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_elementwise(
-        name="MEAN", input_names=["input1", "input2"], output_name="output", mode="AVE"
-    )
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(
-            model, target, dev, [a_np1, a_np2], ["input1", "input2"], b_np.shape, dtype
-        )
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_average():
-    _verify_average((1, 3, 20, 20), (1, 3, 20, 20))
-    # disable tests for now because ValueError: setting an array element with a sequence.
-    # The requested array has an inhomogeneous shape after 1 dimensions. The detected shape
-    # was (2,) + inhomogeneous part.
-    # _verify_average((3, 20, 20), (1, 3, 20, 20))
-    # _verify_average((20, 20), (1, 3, 20, 20))
-
-
-def _verify_max(input_dim):
-    dtype = "float32"
-
-    a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
-    b_np = np.max((a_np1, a_np2, a_np3), axis=0)
-
-    inputs = [
-        ("input1", datatypes.Array(*input_dim)),
-        ("input2", datatypes.Array(*input_dim)),
-        ("input3", datatypes.Array(*input_dim)),
-    ]
-    output = [("output", datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_elementwise(
-        name="Max", input_names=["input1", "input2", "input3"], output_name="output", mode="MAX"
-    )
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(
-            model,
-            target,
-            dev,
-            [a_np1, a_np2, a_np3],
-            ["input1", "input2", "input3"],
-            b_np.shape,
-            dtype,
-        )
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_max():
-    _verify_max((1, 3, 20, 20))
-    _verify_max((20, 20))
-
-
-def _verify_min(input_dim):
-    dtype = "float32"
-
-    a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
-    b_np = np.min((a_np1, a_np2, a_np3), axis=0)
-
-    inputs = [
-        ("input1", datatypes.Array(*input_dim)),
-        ("input2", datatypes.Array(*input_dim)),
-        ("input3", datatypes.Array(*input_dim)),
-    ]
-    output = [("output", datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_elementwise(
-        name="Min", input_names=["input1", "input2", "input3"], output_name="output", mode="MIN"
-    )
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(
-            model,
-            target,
-            dev,
-            [a_np1, a_np2, a_np3],
-            ["input1", "input2", "input3"],
-            b_np.shape,
-            dtype,
-        )
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_min():
-    _verify_min((1, 3, 20, 20))
-    _verify_min((20, 20))
-
-
-def verify_unary_sqrt(input_dim):
-    """Verify unary sqrt"""
-    dtype = "float32"
-
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    ref_val = np.sqrt(a_np)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*ref_val.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_unary(name="sqrt", input_name="input", output_name="output", mode="sqrt")
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-def verify_unary_rsqrt(input_dim, epsilon=0):
-    """Verify unary rsqrt"""
-    dtype = "float32"
-
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    ref_val = 1 / np.sqrt(a_np + epsilon)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*ref_val.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_unary(
-        name="rsqrt", input_name="input", output_name="output", mode="rsqrt", epsilon=epsilon
-    )
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-def verify_unary_inverse(input_dim, epsilon=0):
-    """Verify unary inverse"""
-    dtype = "float32"
-
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    ref_val = 1 / (a_np + epsilon)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*ref_val.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_unary(
-        name="inverse", input_name="input", output_name="output", mode="inverse", epsilon=epsilon
-    )
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-def verify_unary_power(input_dim, alpha):
-    """Verify unary power"""
-    dtype = "float32"
-
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    ref_val = np.power(a_np, alpha)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*ref_val.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_unary(
-        name="power", input_name="input", output_name="output", mode="power", alpha=alpha
-    )
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-def verify_unary_exp(input_dim):
-    """Verify unary exp"""
-    dtype = "float32"
-
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    ref_val = np.exp(a_np)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*ref_val.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_unary(name="exp", input_name="input", output_name="output", mode="exp")
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-def verify_unary_log(input_dim):
-    """Verify unary log"""
-    dtype = "float32"
-
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    ref_val = np.log(a_np)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*ref_val.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_unary(name="log", input_name="input", output_name="output", mode="log")
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-def verify_unary_abs(input_dim):
-    """Verify unary abs"""
-    dtype = "float32"
-
-    a_np = np.random.uniform(-100.0, 100.0, size=input_dim).astype(dtype)
-    ref_val = np.abs(a_np)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*ref_val.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_unary(name="abs", input_name="input", output_name="output", mode="abs")
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-def verify_unary_threshold(input_dim, alpha):
-    """Verify unary threshold"""
-    dtype = "float32"
-
-    a_np = np.random.uniform(-100.0, 100.0, size=input_dim).astype(dtype)
-    ref_val = np.maximum(a_np, alpha)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*ref_val.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_unary(
-        name="threshold", input_name="input", output_name="output", mode="threshold", alpha=alpha
-    )
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_unary():
-    """All unary"""
-    verify_unary_sqrt((1, 3, 20, 20))
-    verify_unary_rsqrt((1, 3, 20, 20))
-    verify_unary_rsqrt((1, 3, 20, 20), epsilon=1e-6)
-    verify_unary_inverse((1, 3, 20, 20))
-    verify_unary_inverse((1, 3, 20, 20), epsilon=1e-6)
-    verify_unary_power((1, 3, 20, 20), alpha=0.5)
-    verify_unary_power((1, 3, 20, 20), alpha=4)
-    verify_unary_exp((1, 3, 20, 20))
-    verify_unary_log((1, 3, 20, 20))
-    verify_unary_abs((1, 3, 20, 20))
-    verify_unary_threshold((1, 3, 20, 20), alpha=-6.0)
-    verify_unary_threshold((1, 3, 20, 20), alpha=5.0)
-
-
-@tvm.testing.uses_gpu
-def test_forward_reduce():
-    """Reduce"""
-
-    class ReduceAxis(Enum):
-        # pylint: disable=invalid-name
-        CHW = 0
-        HW = 1
-        C = 2
-        H = 3
-        W = 4
-
-    def _verify_reduce(input_dim, mode, axis, ref_func, dtype="float32"):
-        print(input_dim, mode, axis)
-        a_np = np.random.uniform(size=input_dim).astype(dtype)
-
-        # translate to axis from coreml format
-        if axis == ReduceAxis.CHW:
-            np_axis = (-3, -2, -1)
-        elif axis == ReduceAxis.HW:
-            np_axis = (-2, -1)
-        elif axis == ReduceAxis.C:
-            np_axis = -3
-        elif axis == ReduceAxis.H:
-            np_axis = -2
-        elif axis == ReduceAxis.W:
-            np_axis = -1
-
-        if ref_func is np.argmax:
-            ref_val = np.expand_dims(ref_func(a_np, np_axis), np_axis).astype(dtype)
-        else:
-            ref_val = ref_func(a_np, np_axis, keepdims=True)
-
-        inputs = [("input", datatypes.Array(*input_dim))]
-        output = [("output", datatypes.Array(*ref_val.shape))]
-        builder = NeuralNetworkBuilder(inputs, output)
-        builder.add_reduce(
-            name=mode, input_name="input", output_name="output", axis=axis.name, mode=mode
-        )
-
-        model = cm.models.MLModel(builder.spec)
-        for target, dev in tvm.testing.enabled_targets():
-            out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-            tvm.testing.assert_allclose(out, ref_val, rtol=1e-5, atol=1e-5)
-
-    dshapes = [[10, 10], [1, 10, 10], [1, 3, 10, 10]]
-    for dshape in dshapes:
-        for axis in ReduceAxis:
-            if len(dshape) < 3 and axis in [ReduceAxis.CHW, ReduceAxis.C]:
-                # input must have rank at least 3
-                continue
-            _verify_reduce(dshape, "sum", axis, np.sum)
-            _verify_reduce(dshape, "avg", axis, np.mean)
-            _verify_reduce(dshape, "prod", axis, np.prod)
-            _verify_reduce(dshape, "min", axis, np.min)
-            _verify_reduce(dshape, "max", axis, np.max)
-            if axis in [ReduceAxis.C, ReduceAxis.H, ReduceAxis.W]:
-                # For mode ArgMax, axis must be [-1] or [-2] or [-3]
-                _verify_reduce(dshape, "argmax", axis, np.argmax, dtype="int32")
-
-
-def verify_reshape(input_dim, target_shape, mode):
-    """Reshape"""
-    dtype = "float32"
-
-    a_np = np.random.uniform(-100.0, 100.0, size=input_dim).astype(dtype)
-    ref_val = np.reshape(a_np, target_shape)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*ref_val.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_reshape(
-        name="reshape",
-        input_name="input",
-        output_name="output",
-        target_shape=target_shape,
-        mode=mode,
-    )
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input"], ref_val.shape, dtype)
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-def test_forward_reshape():
-    for mode in [0, 1]:
-        verify_reshape((20,), (1, 2, 2, 5), mode)
-        verify_reshape((1, 3, 20, 20), (1, 12, 10, 10), mode)
-
-
-def _verify_split(input_dim, out_nums):
-    dtype = "float32"
-
-    a_np = np.random.uniform(-100.0, 100.0, size=input_dim).astype(dtype)
-    ref_val = np.split(a_np, out_nums, axis=-3)
-
-    inputs = [("input", datatypes.Array(*input_dim))]
-
-    output_names = []
-    outputs = []
-    output_shapes = []
-    for i, out in enumerate(ref_val):
-        output_name = "output" + str(i)
-        output_names = output_names + [output_name]
-        outputs = outputs + [(output_name, datatypes.Array(*out.shape))]
-        output_shapes = output_shapes + [out.shape]
-
-    builder = NeuralNetworkBuilder(inputs, outputs)
-    builder.add_split(name="split", input_name="input", output_names=output_names)
-
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(
-            model, target, dev, [a_np], ["input"], output_shapes, [dtype] * len(output_shapes)
-        )
-        tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
-
-
-def test_forward_split():
-    """Split"""
-    _verify_split(
-        (
-            1,
-            4,
-            4,
-            4,
-        ),
-        2,
-    )
-    _verify_split(
-        (
-            1,
-            3,
-            30,
-            20,
-        ),
-        3,
-    )
-
-
-def verify_image_scaler(input_dim, blue_bias=0.0, green_bias=0.0, red_bias=0.0, image_scale=1.0):
-    """Verify image scaler"""
-    dtype = "float32"
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    # make sure it is valid image format CHW.
-    assert len(a_np.shape) == 3 and a_np.shape[0] == 3
-    b_np = np.zeros(a_np.shape, dtype=dtype)
-    b_np[0, :, :] = image_scale * a_np[0, :, :] + blue_bias
-    b_np[1, :, :] = image_scale * a_np[1, :, :] + green_bias
-    b_np[2, :, :] = image_scale * a_np[2, :, :] + red_bias
-    b_np = np.add(a_np, b_np)
-    inputs = [("input1", datatypes.Array(*input_dim)), ("input2", datatypes.Array(*input_dim))]
-    output = [("output", datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.set_pre_processing_parameters(
-        image_input_names=["input1"],
-        is_bgr=True,
-        blue_bias=blue_bias,
-        green_bias=green_bias,
-        red_bias=red_bias,
-        image_scale=image_scale,
-    )
-    # add one add layer to make CoreML model format valid
-    # add layer has been tested before.
-    builder.add_elementwise(
-        name="add", input_names=["input1", "input2"], output_name="output", alpha=0, mode="ADD"
-    )
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(
-            model, target, dev, [a_np, a_np], ["input1", "input2"], b_np.shape, dtype
-        )
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_image_scaler():
-    verify_image_scaler((3, 224, 224), image_scale=0.17)
-    verify_image_scaler(
-        (3, 224, 224),
-        blue_bias=-1.7669800519943237,
-        green_bias=-1.985260009765625,
-        red_bias=-2.102560043334961,
-        image_scale=0.379,
-    )
-
-
-def verify_convolution(input_dim, filter_, padding):
-    """Verify convolution"""
-    dtype = "float32"
-    _, c, h, width = input_dim
-    out_c, _, kernel_h, kernel_w = filter_
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    w_np = np.random.uniform(size=(out_c, c, kernel_h, kernel_w)).astype(dtype)
-    w_np_cm = np.transpose(w_np, axes=(2, 3, 1, 0))
-    b_np = conv2d_nchw_python(a_np, w_np, [1, 1], padding)
-    inputs = [("input1", datatypes.Array(c, h, width))]
-    output = [("output", datatypes.Array(*b_np.shape))]  # pylint:disable=not-an-iterable
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_convolution(
-        name="conv",
-        kernel_channels=3,
-        output_channels=out_c,
-        height=kernel_h,
-        width=kernel_w,
-        stride_height=1,
-        stride_width=1,
-        border_mode=padding.lower(),
-        groups=1,
-        W=w_np_cm,
-        b=None,
-        has_bias=False,
-        is_deconv=False,
-        input_name="input1",
-        output_name="output",
-    )
-    model = cm.models.MLModel(builder.spec)
-    for target, dev in tvm.testing.enabled_targets():
-        out = run_tvm_graph(model, target, dev, [a_np], ["input1"], output_shape=None)
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_convolution():
-    verify_convolution((1, 3, 224, 224), filter_=(32, 3, 3, 3), padding="VALID")
-    verify_convolution((1, 3, 224, 224), filter_=(32, 3, 3, 3), padding="SAME")
-
-
-def test_can_build_keras_to_coreml_to_relay():
-    """Test multiple conversion paths and importing from a saved file."""
-    model = keras.models.Sequential()
-    model.add(
-        keras.layers.Conv2D(
-            filters=6,
-            kernel_size=(1, 1),
-            activation="relu",
-            padding="same",
-            input_shape=(3, 3, 1),
-            data_format="channels_first",
-        )
-    )
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        kmodel_fn = path.join(tmpdir, "c1mdl.h5")
-        model.save(kmodel_fn)
-
-        mdl = cm.convert(
-            kmodel_fn, convert_to="neuralnetwork", minimum_deployment_target=cm.target.macOS11
-        )
-        model_file = path.join(tmpdir, "c1.mlmodel")
-        mdl.save(model_file)
-
-        mdl = cm.models.MLModel(model_file)
-        desc = mdl.get_spec().description
-        iname = desc.input[0].name
-        ishape = desc.input[0].type.multiArrayType.shape
-        shape_dict = {}
-        for i in mdl.get_spec().description.input:
-            iname = i.name
-            ishape = i.type.multiArrayType.shape
-            shape_dict[iname] = ishape
-        mod, params = relay.frontend.from_coreml(mdl, shape_dict)
-
-        with tvm.transform.PassContext(opt_level=3):
-            relay.build(mod, "llvm", params=params)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/darknet/test_forward.py b/tests/python/frontend/darknet/test_forward.py
deleted file mode 100644
index e78e35ff5c7c..000000000000
--- a/tests/python/frontend/darknet/test_forward.py
+++ /dev/null
@@ -1,537 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-argument
-"""
-Test Darknet Models
-===================
-This article is a test script to test darknet models with Relay.
-All the required models and libraries will be downloaded from the internet
-by the script.
-"""
-from cffi import FFI
-import numpy as np
-import tvm
-from tvm.contrib import graph_executor
-from tvm.contrib.download import download_testdata
-
-from tvm.relay.testing.darknet import LAYERTYPE
-from tvm.relay.testing.darknet import __darknetffi__
-from tvm.relay.frontend.darknet import ACTIVATION
-from tvm import relay
-
-REPO_URL = "https://github.com/dmlc/web-data/blob/main/darknet/"
-
-# Lazily initialized
-DARKNET_TEST_IMAGE_PATH = None
-LIB = None
-
-
-def _lib():
-    global LIB
-    lib = "libdarknet2.0.so"
-    url = REPO_URL + "lib/" + lib + "?raw=true"
-    if LIB is None:
-        LIB = __darknetffi__.dlopen(download_testdata(url, lib, module="darknet"))
-
-    return LIB
-
-
-def _darknet_test_image_path():
-    global DARKNET_TEST_IMAGE_PATH
-    if DARKNET_TEST_IMAGE_PATH is None:
-        name = "dog.jpg"
-        url = REPO_URL + "data/" + name + "?raw=true"
-        DARKNET_TEST_IMAGE_PATH = download_testdata(url, name, module="data")
-    return DARKNET_TEST_IMAGE_PATH
-
-
-def astext(program, unify_free_vars=False):
-    """check that program is parsable in text format"""
-    text = program.astext()
-    if isinstance(program, relay.Expr):
-        roundtrip_program = tvm.relay.parse_expr(text)
-    else:
-        roundtrip_program = tvm.relay.fromtext(text)
-
-    tvm.ir.assert_structural_equal(roundtrip_program, program, map_free_vars=True)
-
-
-def _read_memory_buffer(shape, data, dtype="float32"):
-    length = 1
-    for x in shape:
-        length *= x
-    data_np = np.zeros(length, dtype=dtype)
-    for i in range(length):
-        data_np[i] = data[i]
-    return data_np.reshape(shape)
-
-
-def _get_tvm_output(net, data, build_dtype="float32", states=None):
-    """Compute TVM output"""
-    dtype = "float32"
-    mod, params = relay.frontend.from_darknet(net, data.shape, dtype)
-    # verify that from_darknet creates a valid, parsable relay program
-    mod = relay.transform.InferType()(mod)
-    astext(mod)
-
-    target = "llvm"
-    lib = relay.build(mod, target, params=params)
-
-    # Execute on TVM
-    dev = tvm.cpu(0)
-    m = graph_executor.GraphModule(lib["default"](dev))
-    # set inputs
-    m.set_input("data", tvm.nd.array(data.astype(dtype)))
-    if states:
-        for name in states.keys():
-            m.set_input(name, tvm.nd.array(states[name].astype(dtype)))
-    m.run()
-    # get outputs
-    tvm_out = []
-    for i in range(m.get_num_outputs()):
-        tvm_out.append(m.get_output(i).numpy())
-    return tvm_out
-
-
-def _load_net(cfg_url, cfg_name, weights_url, weights_name):
-    cfg_path = download_testdata(cfg_url, cfg_name, module="darknet")
-    weights_path = download_testdata(weights_url, weights_name, module="darknet")
-    net = _lib().load_network(cfg_path.encode("utf-8"), weights_path.encode("utf-8"), 0)
-    return net
-
-
-def verify_darknet_frontend(net, build_dtype="float32"):
-    """Test network with given input image on both darknet and tvm"""
-
-    def get_darknet_output(net, img):
-        _lib().network_predict_image(net, img)
-        out = []
-        for i in range(net.n):
-            layer = net.layers[i]
-            if layer.type == LAYERTYPE.REGION:
-                attributes = np.array(
-                    [
-                        layer.n,
-                        layer.out_c,
-                        layer.out_h,
-                        layer.out_w,
-                        layer.classes,
-                        layer.coords,
-                        layer.background,
-                    ],
-                    dtype=np.int32,
-                )
-                out.insert(0, attributes)
-                out.insert(0, _read_memory_buffer((layer.n * 2,), layer.biases))
-                layer_outshape = (layer.batch, layer.out_c, layer.out_h, layer.out_w)
-                out.insert(0, _read_memory_buffer(layer_outshape, layer.output))
-            elif layer.type == LAYERTYPE.YOLO:
-                attributes = np.array(
-                    [layer.n, layer.out_c, layer.out_h, layer.out_w, layer.classes, layer.total],
-                    dtype=np.int32,
-                )
-                out.insert(0, attributes)
-                out.insert(0, _read_memory_buffer((layer.total * 2,), layer.biases))
-                out.insert(0, _read_memory_buffer((layer.n,), layer.mask, dtype="int32"))
-                layer_outshape = (layer.batch, layer.out_c, layer.out_h, layer.out_w)
-                out.insert(0, _read_memory_buffer(layer_outshape, layer.output))
-            elif i == net.n - 1:
-                if layer.type == LAYERTYPE.CONNECTED:
-                    darknet_outshape = (layer.batch, layer.out_c)
-                elif layer.type in [LAYERTYPE.SOFTMAX]:
-                    darknet_outshape = (layer.batch, layer.outputs)
-                else:
-                    darknet_outshape = (layer.batch, layer.out_c, layer.out_h, layer.out_w)
-                out.insert(0, _read_memory_buffer(darknet_outshape, layer.output))
-        return out
-
-    dtype = "float32"
-
-    img = _lib().letterbox_image(
-        _lib().load_image_color(_darknet_test_image_path().encode("utf-8"), 0, 0), net.w, net.h
-    )
-    darknet_output = get_darknet_output(net, img)
-    batch_size = 1
-    data = np.empty([batch_size, img.c, img.h, img.w], dtype)
-    i = 0
-    for c in range(img.c):
-        for h in range(img.h):
-            for k in range(img.w):
-                data[0][c][h][k] = img.data[i]
-                i = i + 1
-
-    tvm_out = _get_tvm_output(net, data, build_dtype)
-    for tvm_outs, darknet_out in zip(tvm_out, darknet_output):
-        tvm.testing.assert_allclose(darknet_out, tvm_outs, rtol=1e-3, atol=1e-3)
-
-
-def _test_rnn_network(net, states):
-    """Test network with given input data on both darknet and tvm"""
-
-    def get_darknet_network_predict(net, data):
-        return _lib().network_predict(net, data)
-
-    ffi = FFI()
-    np_arr = np.zeros([1, net.inputs], dtype="float32")
-    np_arr[0, 2] = 1
-    cffi_arr = ffi.cast("float*", np_arr.ctypes.data)
-    tvm_out = _get_tvm_output(net, np_arr, states=states)[0]
-    darknet_output = get_darknet_network_predict(net, cffi_arr)
-    darknet_out = np.zeros(net.outputs, dtype="float32")
-    for i in range(net.outputs):
-        darknet_out[i] = darknet_output[i]
-    last_layer = net.layers[net.n - 1]
-    darknet_outshape = (last_layer.batch, last_layer.outputs)
-    darknet_out = darknet_out.reshape(darknet_outshape)
-    tvm.testing.assert_allclose(darknet_out, tvm_out, rtol=1e-4, atol=1e-4)
-
-
-def test_forward_extraction():
-    """test extraction model"""
-    model_name = "extraction"
-    cfg_name = model_name + ".cfg"
-    weights_name = model_name + ".weights"
-    cfg_url = "https://github.com/pjreddie/darknet/blob/master/cfg/" + cfg_name + "?raw=true"
-    weights_url = "http://pjreddie.com/media/files/" + weights_name + "?raw=true"
-    net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_alexnet():
-    """test alexnet model"""
-    model_name = "alexnet"
-    cfg_name = model_name + ".cfg"
-    weights_name = model_name + ".weights"
-    cfg_url = "https://github.com/pjreddie/darknet/blob/master/cfg/" + cfg_name + "?raw=true"
-    weights_url = "http://pjreddie.com/media/files/" + weights_name + "?raw=true"
-    net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_resnet50():
-    """test resnet50 model"""
-    model_name = "resnet50"
-    cfg_name = model_name + ".cfg"
-    weights_name = model_name + ".weights"
-    cfg_url = "https://github.com/pjreddie/darknet/blob/master/cfg/" + cfg_name + "?raw=true"
-    weights_url = "http://pjreddie.com/media/files/" + weights_name + "?raw=true"
-    net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_resnext50():
-    """test resnet50 model"""
-    model_name = "resnext50"
-    cfg_name = model_name + ".cfg"
-    weights_name = model_name + ".weights"
-    cfg_url = "https://github.com/pjreddie/darknet/blob/master/cfg/" + cfg_name + "?raw=true"
-    weights_url = "http://pjreddie.com/media/files/" + weights_name + "?raw=true"
-    net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_yolov2():
-    """test yolov2 model"""
-    model_name = "yolov2"
-    cfg_name = model_name + ".cfg"
-    weights_name = model_name + ".weights"
-    cfg_url = "https://github.com/pjreddie/darknet/blob/master/cfg/" + cfg_name + "?raw=true"
-    weights_url = "http://pjreddie.com/media/files/" + weights_name + "?raw=true"
-    net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
-    build_dtype = {}
-    verify_darknet_frontend(net, build_dtype)
-    _lib().free_network(net)
-
-
-def test_forward_yolov3():
-    """test yolov3 model"""
-    model_name = "yolov3"
-    cfg_name = model_name + ".cfg"
-    weights_name = model_name + ".weights"
-    cfg_url = "https://github.com/pjreddie/darknet/blob/master/cfg/" + cfg_name + "?raw=true"
-    weights_url = "http://pjreddie.com/media/files/" + weights_name + "?raw=true"
-    net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
-    build_dtype = {}
-    verify_darknet_frontend(net, build_dtype)
-    _lib().free_network(net)
-
-
-def test_forward_convolutional():
-    """test convolutional layer"""
-    net = _lib().make_network(1)
-    layer = _lib().make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0)
-    net.layers[0] = layer
-    net.w = net.h = 224
-    _lib().resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_dense():
-    """test fully connected layer"""
-    net = _lib().make_network(1)
-    layer = _lib().make_connected_layer(1, 75, 20, 1, 0, 0)
-    net.layers[0] = layer
-    net.w = net.h = 5
-    _lib().resize_network(net, 5, 5)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_dense_batchnorm():
-    """test fully connected layer with batchnorm"""
-    net = _lib().make_network(1)
-    layer = _lib().make_connected_layer(1, 12, 2, 1, 1, 0)
-    for i in range(5):
-        layer.rolling_mean[i] = np.random.rand(1)
-        layer.rolling_variance[i] = np.random.rand(1) + 0.5
-        layer.scales[i] = np.random.rand(1)
-    net.layers[0] = layer
-    net.w = net.h = 2
-    _lib().resize_network(net, 2, 2)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_maxpooling():
-    """test maxpooling layer"""
-    net = _lib().make_network(1)
-    layer = _lib().make_maxpool_layer(1, 224, 224, 3, 2, 2, 0)
-    net.layers[0] = layer
-    net.w = net.h = 224
-    _lib().resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_avgpooling():
-    """test avgerage pooling layer"""
-    net = _lib().make_network(1)
-    layer = _lib().make_avgpool_layer(1, 224, 224, 3)
-    net.layers[0] = layer
-    net.w = net.h = 224
-    _lib().resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_conv_batch_norm():
-    """test batch normalization layer"""
-    net = _lib().make_network(1)
-    layer = _lib().make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 1, 0, 0, 0)
-    for i in range(32):
-        layer.rolling_mean[i] = np.random.rand(1)
-        layer.rolling_variance[i] = np.random.rand(1) + 0.5
-    net.layers[0] = layer
-    net.w = net.h = 224
-    _lib().resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_shortcut():
-    """test shortcut layer"""
-    net = _lib().make_network(3)
-    layer_1 = _lib().make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0)
-    layer_2 = _lib().make_convolutional_layer(1, 111, 111, 32, 32, 1, 1, 1, 0, 1, 0, 0, 0, 0)
-    layer_3 = _lib().make_shortcut_layer(1, 0, 111, 111, 32, 111, 111, 32)
-    layer_3.activation = ACTIVATION.RELU
-    layer_3.alpha = 1
-    layer_3.beta = 1
-    net.layers[0] = layer_1
-    net.layers[1] = layer_2
-    net.layers[2] = layer_3
-    net.w = net.h = 224
-    _lib().resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_reorg():
-    """test reorg layer"""
-    net = _lib().make_network(2)
-    layer_1 = _lib().make_convolutional_layer(1, 222, 222, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0)
-    layer_2 = _lib().make_reorg_layer(1, 110, 110, 32, 2, 0, 0, 0)
-    net.layers[0] = layer_1
-    net.layers[1] = layer_2
-    net.w = net.h = 222
-    _lib().resize_network(net, 222, 222)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_region():
-    """test region layer"""
-    net = _lib().make_network(2)
-    layer_1 = _lib().make_convolutional_layer(1, 19, 19, 3, 425, 1, 1, 1, 0, 1, 0, 0, 0, 0)
-    layer_2 = _lib().make_region_layer(1, 19, 19, 5, 80, 4)
-    layer_2.softmax = 1
-    net.layers[0] = layer_1
-    net.layers[1] = layer_2
-    net.w = net.h = 19
-    _lib().resize_network(net, 19, 19)
-    build_dtype = {}
-    verify_darknet_frontend(net, build_dtype)
-    _lib().free_network(net)
-
-
-def test_forward_yolo_op():
-    """test yolo layer"""
-    net = _lib().make_network(2)
-    layer_1 = _lib().make_convolutional_layer(1, 224, 224, 3, 14, 1, 3, 2, 0, 1, 0, 0, 0, 0)
-    layer_2 = _lib().make_yolo_layer(1, 111, 111, 2, 9, __darknetffi__.NULL, 2)
-    net.layers[0] = layer_1
-    net.layers[1] = layer_2
-    net.w = net.h = 224
-    _lib().resize_network(net, 224, 224)
-    build_dtype = {}
-    verify_darknet_frontend(net, build_dtype)
-    _lib().free_network(net)
-
-
-def test_forward_upsample():
-    """test upsample layer"""
-    net = _lib().make_network(1)
-    layer = _lib().make_upsample_layer(1, 19, 19, 3, 3)
-    layer.scale = 1
-    net.layers[0] = layer
-    net.w = net.h = 19
-    _lib().resize_network(net, 19, 19)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_l2normalize():
-    """test l2 normalization layer"""
-    net = _lib().make_network(1)
-    layer = _lib().make_l2norm_layer(1, 224 * 224 * 3)
-    layer.c = layer.out_c = 3
-    layer.h = layer.out_h = 224
-    layer.w = layer.out_w = 224
-    net.layers[0] = layer
-    net.w = net.h = 224
-    _lib().resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_elu():
-    """test elu activation layer"""
-    net = _lib().make_network(1)
-    layer_1 = _lib().make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0)
-    layer_1.activation = ACTIVATION.ELU
-    net.layers[0] = layer_1
-    net.w = net.h = 224
-    _lib().resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_softmax():
-    """test softmax layer"""
-    net = _lib().make_network(1)
-    layer_1 = _lib().make_softmax_layer(1, 75, 1)
-    layer_1.temperature = 1
-    net.layers[0] = layer_1
-    net.w = net.h = 5
-    _lib().resize_network(net, net.w, net.h)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_softmax_temperature():
-    """test softmax layer"""
-    net = _lib().make_network(1)
-    layer_1 = _lib().make_softmax_layer(1, 75, 1)
-    layer_1.temperature = 0.8
-    net.layers[0] = layer_1
-    net.w = net.h = 5
-    _lib().resize_network(net, net.w, net.h)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_activation_logistic():
-    """test logistic activation layer"""
-    net = _lib().make_network(1)
-    batch = 1
-    h = 224
-    width = 224
-    c = 3
-    n = 32
-    groups = 1
-    size = 3
-    stride = 2
-    padding = 0
-    activation = ACTIVATION.LOGISTIC
-    batch_normalize = 0
-    binary = 0
-    xnor = 0
-    adam = 0
-    layer_1 = _lib().make_convolutional_layer(
-        batch,
-        h,
-        width,
-        c,
-        n,
-        groups,
-        size,
-        stride,
-        padding,
-        activation,
-        batch_normalize,
-        binary,
-        xnor,
-        adam,
-    )
-    net.layers[0] = layer_1
-    net.w = width
-    net.h = h
-    _lib().resize_network(net, net.w, net.h)
-    verify_darknet_frontend(net)
-    _lib().free_network(net)
-
-
-def test_forward_rnn():
-    """test RNN layer"""
-    net = _lib().make_network(1)
-    batch = 1
-    inputs = 4
-    outputs = 4
-    steps = 1
-    activation = ACTIVATION.RELU
-    batch_normalize = 0
-    adam = 0
-    layer_1 = _lib().make_rnn_layer(
-        batch, inputs, outputs, steps, activation, batch_normalize, adam
-    )
-    net.layers[0] = layer_1
-    net.inputs = inputs
-    net.outputs = outputs
-    net.w = net.h = 0
-    _lib().resize_network(net, net.w, net.h)
-    states = {"rnn0_state": np.zeros([1, net.inputs])}
-    _test_rnn_network(net, states)
-    _lib().free_network(net)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/keras/test_forward.py b/tests/python/frontend/keras/test_forward.py
deleted file mode 100644
index 52505e259d23..000000000000
--- a/tests/python/frontend/keras/test_forward.py
+++ /dev/null
@@ -1,926 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Unit tests for various models and operators"""
-from packaging import version as package_version
-import numpy as np
-
-try:
-    import tensorflow.compat.v1 as tf
-except ImportError:
-    import tensorflow as tf
-
-from tensorflow import keras as tf_keras
-
-# prevent Keras from using up all gpu memory
-import keras
-
-import pytest
-import tvm
-from tvm import relay
-from tvm.contrib import graph_executor
-import tvm.testing
-
-if tf.executing_eagerly():
-    GPUS = tf.config.experimental.list_physical_devices("GPU")
-    for gpu in GPUS:
-        tf.config.experimental.set_memory_growth(gpu, True)
-else:
-    from keras.backend.tensorflow_backend import set_session
-
-    CONFIG = tf.ConfigProto()
-    CONFIG.gpu_options.per_process_gpu_memory_fraction = 0.5
-    set_session(tf.Session(config=CONFIG))
-
-
-def pytest_generate_tests(metafunc):
-    """
-    This function generates the list of tests for pytest, based
-    on scenarios that will change the parameters in which the
-    tests use to run.
-    https://docs.pytest.org/en/latest/example/parametrize.html
-    """
-    idlist = []
-    argvalues = []
-    for scenario in metafunc.cls.scenarios:
-        idlist.append(scenario[0])
-        items = scenario[1].items()
-        argnames = [x[0] for x in items]
-        argvalues.append([x[1] for x in items])
-    metafunc.parametrize(argnames, argvalues, ids=idlist, scope="class")
-
-
-# Scenarios:
-# - classic keras, using keras from "import keras"
-# - tensorflow keras, using keras from "from tensorflow import keras as tf_keras"
-USING_CLASSIC_KERAS = ("keras", {"keras_mod": keras})
-USING_TENSORFLOW_KERAS = ("tf_keras", {"keras_mod": tf_keras})
-
-
-def verify_keras_frontend(keras_model, need_transpose=True, layout="NCHW"):
-    """Generic function to generate and compare Keras and TVM output"""
-    # Keras frontend currently supports tensorflow backend only.
-    assert keras.backend.backend() == "tensorflow"
-
-    if layout != "NCHW":
-        need_transpose = False
-
-    in_shapes = []
-    for layer in keras_model._input_layers:
-        if tf.executing_eagerly():
-            in_shapes.append(tuple(dim if dim is not None else 1 for dim in layer.input.shape))
-        else:
-            in_shapes.append(
-                tuple(dim.value if dim.value is not None else 1 for dim in layer.input.shape)
-            )
-
-    def get_keras_output(in_data):
-        return keras_model.predict(in_data)
-
-    def get_tvm_output(in_data, target, dev, dtype="float32"):
-        shape_dict = {name: x.shape for (name, x) in zip(keras_model.input_names, in_data)}
-        mod, params = relay.frontend.from_keras(keras_model, shape_dict, layout=layout)
-        with tvm.transform.PassContext(opt_level=3):
-            lib = relay.build(mod, target, params=params)
-        m = graph_executor.GraphModule(lib["default"](dev))
-        for name, x in zip(keras_model.input_names, in_data):
-            m.set_input(name, tvm.nd.array(x.astype(dtype)))
-        m.run()
-        return [m.get_output(i).numpy() for i in range(m.get_num_outputs())]
-
-    def to_channels_first(arr):
-        return arr.transpose([0, -1] + list(range(1, arr.ndim - 1)))
-
-    def to_channels_last(arr):
-        return arr.transpose([0] + list(range(2, arr.ndim)) + [1])
-
-    in_data = [np.random.uniform(size=shape, low=-1.0, high=1.0) for shape in in_shapes]
-    keras_out = get_keras_output(in_data)
-    keras_out = keras_out if isinstance(keras_out, list) else [keras_out]
-    for target, dev in tvm.testing.enabled_targets():
-        inputs = [to_channels_first(x) for x in in_data] if need_transpose else in_data
-        tvm_out = get_tvm_output(inputs, target, dev)
-        for kout, tout in zip(keras_out, tvm_out):
-            if need_transpose:
-                tout = to_channels_last(tout)
-            tvm.testing.assert_allclose(kout, tout, rtol=1e-5, atol=1e-5)
-
-
-def get_mobilenet(keras_mod):
-    if hasattr(keras_mod.applications, "MobileNet"):
-        # Keras 2.4.x and older
-        mobilenet_mod = keras_mod.applications.MobileNet
-    else:
-        # Keras 2.6.x and newer
-        mobilenet_mod = keras_mod.applications.mobilenet.MobileNet
-
-    return mobilenet_mod
-
-
-@tvm.testing.uses_gpu
-class TestKeras:
-    """Keras test"""
-
-    scenarios = [USING_CLASSIC_KERAS, USING_TENSORFLOW_KERAS]
-
-    def test_forward_merge(self, keras_mod):
-        """test_forward_merge"""
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        conv2d_x = keras_mod.layers.Conv2D(8, (3, 3), padding="same")(data)
-        conv2d_y = keras_mod.layers.Conv2D(8, (3, 3), padding="same")(conv2d_x)
-        conv2d_z = keras_mod.layers.Conv2D(8, (3, 3), padding="same")(conv2d_y)
-        merge_funcs = [
-            keras_mod.layers.Add(),
-            keras_mod.layers.Subtract(),
-            keras_mod.layers.Multiply(),
-            keras_mod.layers.Maximum(),
-            keras_mod.layers.Minimum(),
-            keras_mod.layers.Average(),
-            keras_mod.layers.Concatenate(),
-        ]
-        for merge_func in merge_funcs:
-            class_name = type(merge_func).__name__
-            if class_name in ("Subtract", "Dot"):
-                out = merge_func([conv2d_x, conv2d_y])
-            else:
-                out = merge_func([conv2d_x, conv2d_y, conv2d_z])
-            keras_model = keras_mod.models.Model(data, out)
-            verify_keras_frontend(keras_model)
-
-    def test_forward_concatenate(self, keras_mod):
-        """test_forward_concatenate"""
-        data1 = keras_mod.layers.Input(shape=(1, 2, 2))
-        data2 = keras_mod.layers.Input(shape=(1, 1, 2))
-        merge_func = keras_mod.layers.Concatenate(axis=2)
-        out = merge_func([data1, data2])
-        keras_model = keras_mod.models.Model([data1, data2], out)
-        verify_keras_frontend(keras_model, layout="NHWC")
-        verify_keras_frontend(keras_model, layout="NCHW")
-        # test default axis (e.g., -1)
-        data1 = keras_mod.layers.Input(shape=(1, 2, 2))
-        data2 = keras_mod.layers.Input(shape=(1, 2, 3))
-        merge_func = keras_mod.layers.Concatenate()
-        out = merge_func([data1, data2])
-        keras_model = keras_mod.models.Model([data1, data2], out)
-        verify_keras_frontend(keras_model, layout="NHWC")
-        verify_keras_frontend(keras_model, layout="NCHW")
-        # test axis at last dimension
-        data1 = keras_mod.layers.Input(shape=(1, 2, 2))
-        data2 = keras_mod.layers.Input(shape=(1, 2, 3))
-        merge_func = keras_mod.layers.Concatenate(axis=3)
-        out = merge_func([data1, data2])
-        keras_model = keras_mod.models.Model([data1, data2], out)
-        verify_keras_frontend(keras_model, layout="NHWC")
-        verify_keras_frontend(keras_model, layout="NCHW")
-
-    def test_forward_merge_dot(self, keras_mod):
-        """test_forward_merge_dot"""
-        data1 = keras_mod.layers.Input(shape=(2, 2))
-        data2 = keras_mod.layers.Input(shape=(2, 2))
-        merge_funcs = [
-            keras_mod.layers.Dot(axes=[1, 2]),
-            keras_mod.layers.Dot(axes=[2, 1]),
-            keras_mod.layers.Dot(axes=[1, 1]),
-            keras_mod.layers.Dot(axes=[2, 2]),
-            keras_mod.layers.Dot(axes=1),
-            keras_mod.layers.Dot(axes=2),
-        ]
-        for merge_func in merge_funcs:
-            out = merge_func([data1, data2])
-            keras_model = keras_mod.models.Model([data1, data2], out)
-            verify_keras_frontend(keras_model)
-
-    def test_forward_activations(self, keras_mod):
-        """test_forward_activations"""
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        act_funcs = [
-            keras_mod.layers.Activation("softmax"),
-            keras_mod.layers.Softmax(),
-            keras_mod.layers.Softmax(axis=-1),
-            keras_mod.layers.Softmax(axis=1),
-            keras_mod.layers.Softmax(axis=2),
-            keras_mod.layers.Softmax(axis=3),
-            keras_mod.layers.Activation("softplus"),
-            keras_mod.layers.Activation("relu"),
-            keras_mod.layers.Activation("softsign"),
-            keras_mod.layers.Activation("hard_sigmoid"),
-            keras_mod.layers.Activation("sigmoid"),
-            keras_mod.layers.Activation("tanh"),
-            keras_mod.layers.Activation("linear"),
-            keras_mod.layers.Activation("selu"),
-            keras_mod.layers.Activation("swish"),
-            keras_mod.layers.ReLU(),
-            keras_mod.layers.ReLU(max_value=6.0),
-            keras_mod.layers.ReLU(max_value=6.0, threshold=0.0),
-            keras_mod.layers.ReLU(max_value=6.0, threshold=1.0),
-            keras_mod.layers.ReLU(max_value=6.0, threshold=1.0, negative_slope=0.0),
-            keras_mod.layers.ReLU(max_value=6.0, threshold=1.0, negative_slope=0.5),
-            keras_mod.layers.ReLU(max_value=6.0, threshold=1.0, negative_slope=1.0),
-            keras_mod.layers.LeakyReLU(alpha=0.3),
-            keras_mod.layers.PReLU(weights=np.random.rand(1, 32, 32, 3)),
-            keras_mod.layers.ELU(alpha=0.5),
-            keras_mod.layers.ThresholdedReLU(theta=0.5),
-        ]
-        for act_func in act_funcs:
-            x = act_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model)
-            verify_keras_frontend(keras_model, need_transpose=False, layout="NHWC")
-        # Test the input dimension = 1
-        data = keras_mod.layers.Input(shape=(11,))
-        act_func = keras_mod.layers.Softmax()
-        x = act_func(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-        verify_keras_frontend(keras_model, need_transpose=False, layout="NHWC")
-
-    def test_forward_activations_except(self, keras_mod):
-        """
-        test invalid attribute alpha=None for LeakyReLU and ELU.
-        after version 2.3.1 in keras, checking was added to reject the invalid api call:
-        LeakyReLU(alpha=None) and ELU(alpha=None),
-        (see issue: https://github.com/tensorflow/tensorflow/pull/47017)
-        Thus, it's necessary to check the keras version to avoid crash at LeakyReLU(alpha=None)
-        and ELU(alpha=None)
-        """
-        if package_version.parse(keras_mod.__version__.split("-tf")[0]) <= package_version.parse(
-            "2.3.1"
-        ):
-            act_funcs = [
-                keras_mod.layers.LeakyReLU(alpha=None),
-                keras_mod.layers.ELU(2, 3, 4),
-                keras_mod.layers.ReLU(threshold=None),
-            ]
-            data = keras_mod.layers.Input(shape=(2, 3, 4))
-            for act_func in act_funcs:
-                layer = act_func(data)
-                keras_model = keras_mod.models.Model(data, layer)
-                with pytest.raises(tvm.error.OpAttributeInvalid):
-                    verify_keras_frontend(keras_model)
-
-    def test_forward_dense(self, keras_mod):
-        """test_forward_dense"""
-        data = keras_mod.layers.Input(shape=(32, 32, 1))
-        x = keras_mod.layers.Flatten()(data)
-        x = keras_mod.layers.Dropout(0.5)(x)
-        x = keras_mod.layers.Dense(10, activation="relu", kernel_initializer="uniform")(x)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-        # RNN dense
-        data = keras_mod.layers.Input(shape=(1, 32))
-        x = keras_mod.layers.Dense(32, activation="relu", kernel_initializer="uniform")(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-        data = keras_mod.layers.Input(shape=(120, 2560), name="image_set")
-        x = keras_mod.layers.Dense(1, activation="linear", name="e")(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-        data = keras_mod.layers.Input(shape=(10, 12, 2560), name="image_set")
-        x = keras_mod.layers.Dense(32, activation="linear", name="e")(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-    def test_forward_permute(self, keras_mod):
-        data = keras_mod.layers.Input(shape=(2, 3, 4))
-        x = keras_mod.layers.Permute([2, 3, 1])(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-    def test_forward_sequential(self, keras_mod):
-        """test_forward_sequential"""
-        keras_model = keras_mod.models.Sequential(
-            [
-                keras_mod.layers.Dense(16, input_dim=32, activation="relu"),
-                keras_mod.layers.Dropout(0.5),
-                keras_mod.layers.Dense(8, activation="relu"),
-                keras_mod.layers.Dropout(0.5),
-                keras_mod.layers.Dense(1, activation="sigmoid"),
-            ]
-        )
-        verify_keras_frontend(keras_model)
-
-    def test_forward_pool(self, keras_mod):
-        """test_forward_pool"""
-        data = keras_mod.layers.Input(shape=(32, 32, 1))
-        # maxpool
-        x = keras_mod.layers.MaxPooling2D((3, 3), strides=(1, 1), padding="same")(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-        # avgpool
-        y = keras_mod.layers.AveragePooling2D((3, 3), strides=(1, 1), padding="same")(data)
-        keras_model = keras_mod.models.Model(data, y)
-        verify_keras_frontend(keras_model)
-        # reject the invalid input shape
-        data = keras_mod.layers.Input(shape=(0, 3, 6, 4))
-        x = keras_mod.layers.GlobalAveragePooling3D()(data)
-        keras_model = keras_mod.models.Model(data, x)
-        with pytest.raises(ValueError):
-            verify_keras_frontend(keras_model)
-
-    def test_forward_conv1d(self, keras_mod):
-        """test_forward_conv1d"""
-        data = keras_mod.layers.Input(shape=(32, 3))
-        conv_funcs = [
-            keras_mod.layers.Conv1D(filters=10, kernel_size=(3,), strides=(2,), padding="same"),
-            keras_mod.layers.Conv1D(
-                filters=10, kernel_size=(3,), dilation_rate=(2,), padding="same"
-            ),
-            keras_mod.layers.Conv1D(filters=1, kernel_size=(3,), padding="valid", use_bias=False),
-            keras_mod.layers.Conv1D(filters=10, kernel_size=(2,), padding="valid"),
-            # Enable when relay conv1dtranspose handles NWC
-            # keras.layers.Conv1DTranspose(filters=10, kernel_size=(3), padding="valid"),
-        ]
-        for conv_func in conv_funcs:
-            x = conv_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, layout="NWC")
-
-    def test_forward_conv(self, keras_mod):
-        """test_forward_conv"""
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        conv_funcs = [
-            keras_mod.layers.Conv2D(filters=10, kernel_size=(3, 3), strides=(2, 2), padding="same"),
-            keras_mod.layers.Conv2D(
-                filters=10, kernel_size=(3, 3), dilation_rate=(2, 2), padding="same"
-            ),
-            keras_mod.layers.Conv2D(filters=1, kernel_size=(3, 3), padding="same"),
-            keras_mod.layers.DepthwiseConv2D(kernel_size=(3, 3), padding="same"),
-            keras_mod.layers.Conv2DTranspose(filters=10, kernel_size=(3, 3), padding="valid"),
-            keras_mod.layers.SeparableConv2D(filters=10, kernel_size=(3, 3), padding="same"),
-            keras_mod.layers.SeparableConv2D(filters=10, kernel_size=(3, 3), dilation_rate=(2, 2)),
-            keras_mod.layers.SeparableConv2D(filters=2, kernel_size=(3, 3), dilation_rate=2),
-        ]
-        for conv_func in conv_funcs:
-            x = conv_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model)
-
-    def test_forward_conv_transpose(self, keras_mod):
-        """test_forward_conv_transpose"""
-        data = keras_mod.layers.Input(shape=(32, 32, 128))
-        conv_funcs = [
-            keras_mod.layers.Conv2DTranspose(filters=64, kernel_size=(2, 2), padding="valid"),
-            keras_mod.layers.Conv2DTranspose(
-                filters=2, kernel_size=(3, 3), strides=(2, 2), output_padding=(1, 1)
-            ),
-        ]
-        for conv_func in conv_funcs:
-            x = conv_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, layout="NHWC")
-
-    def test_forward_batch_norm(self, keras_mod):
-        """test_forward_batch_norm"""
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        batch_norm_funcs = [
-            keras_mod.layers.BatchNormalization(
-                axis=-1,
-                momentum=0.99,
-                epsilon=0.001,
-                center=True,
-                scale=False,
-                beta_initializer="zeros",
-                gamma_initializer="ones",
-                moving_mean_initializer="zeros",
-                moving_variance_initializer="ones",
-            ),
-            keras_mod.layers.BatchNormalization(
-                axis=-1,
-                momentum=0.99,
-                epsilon=0.001,
-                center=True,
-                scale=True,
-                beta_initializer="zeros",
-                gamma_initializer="ones",
-                moving_mean_initializer="zeros",
-                moving_variance_initializer="ones",
-            ),
-            keras_mod.layers.BatchNormalization(
-                axis=-1,
-                momentum=0.99,
-                epsilon=0.001,
-                center=False,
-                scale=True,
-                beta_initializer="zeros",
-                gamma_initializer="ones",
-                moving_mean_initializer="zeros",
-                moving_variance_initializer="ones",
-            ),
-            keras_mod.layers.BatchNormalization(
-                axis=-1,
-                momentum=0.99,
-                epsilon=0.001,
-                center=False,
-                scale=False,
-                beta_initializer="zeros",
-                gamma_initializer="ones",
-                moving_mean_initializer="zeros",
-                moving_variance_initializer="ones",
-            ),
-        ]
-        for batch_norm_func in batch_norm_funcs:
-            x = batch_norm_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model)
-
-    def test_forward_upsample(self, keras_mod, interpolation="nearest"):
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        x = keras_mod.layers.UpSampling2D(size=(3, 3), interpolation=interpolation)(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-        # Height and width are not equal for the attribute size
-        data = keras_mod.layers.Input(shape=(2, 1, 3))
-        x = keras_mod.layers.UpSampling2D(size=(1, 2), interpolation=interpolation)(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-
-    def test_forward_reshape(self, keras_mod):
-        """test_forward_reshape"""
-        # input_shape len is 3, target_shape len is 3
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        x = keras_mod.layers.Reshape(target_shape=(16, 64, 3))(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-        # input_shape len is 3, target_shape len is 2
-        data = keras_mod.layers.Input(shape=(32, 8, 3))
-        x = keras_mod.layers.Reshape(target_shape=(256, 3))(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-        # input_shape len is 2, target_shape len is 3
-        data = keras_mod.layers.Input(shape=(256, 3))
-        x = keras_mod.layers.Reshape(target_shape=(8, 32, 3))(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-        # input_shape len is 2, target_shape len is 1
-        data = keras_mod.layers.Input(shape=(2, 8))
-        x = keras_mod.layers.Reshape(target_shape=(16,))(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-        # input_shape len is 1, target_shape len is 2
-        data = keras_mod.layers.Input(shape=(16,))
-        x = keras_mod.layers.Reshape(target_shape=(4, 4))(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-        # input_shape len is 2, target_shape len is 2
-        data = keras_mod.layers.Input(shape=(2, 8))
-        x = keras_mod.layers.Reshape(target_shape=(4, 4))(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-        # "non-square" target shape
-        data = keras_mod.layers.Input(shape=(15,))
-        x = keras_mod.layers.Reshape(target_shape=(5, 3))(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-        # modify channel dim
-        data = keras_mod.layers.Input(shape=(3, 2, 4))
-        x = keras_mod.layers.Reshape(target_shape=(3, 8))(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-
-    def test_forward_crop(self, keras_mod):
-        """test_forward_crop"""
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        x = keras_mod.layers.Cropping2D(cropping=((1, 1), (1, 1)))(data)
-        x = keras_mod.layers.Cropping2D(cropping=(1, 1))(x)
-        x = keras_mod.layers.Cropping2D(cropping=1)(x)
-        x = keras_mod.layers.Cropping2D(cropping=((0, 1), (1, 0)))(x)
-        x = keras_mod.layers.Cropping2D(cropping=(1, 0))(x)
-        x = keras_mod.layers.Cropping2D(cropping=0)(x)
-        x = keras_mod.layers.Add()([x, x])
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, layout="NHWC")
-        verify_keras_frontend(keras_model, layout="NHWC")
-
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        x = keras_mod.layers.Cropping2D(cropping=(2, 1))(data)
-        x = keras_mod.layers.Cropping2D(cropping=(1, 2))(x)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, layout="NHWC")
-        verify_keras_frontend(keras_model, layout="NCHW")
-
-    def test_forward_multi_inputs(self, keras_mod):
-        data1 = keras_mod.layers.Input(shape=(32, 32, 3))
-        data2 = keras_mod.layers.Input(shape=(32, 32, 3))
-        x = keras_mod.layers.Conv2D(8, (3, 3), padding="same")(data1)
-        y = keras_mod.layers.Conv2D(8, (3, 3), padding="same")(data2)
-        average_z = keras_mod.layers.Average()([x, y])
-        out = keras_mod.layers.GlobalAveragePooling2D()(average_z)
-        keras_model = keras_mod.models.Model([data1, data2], out)
-        verify_keras_frontend(keras_model)
-
-    def test_forward_multi_outputs(self, keras_mod):
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        x = keras_mod.layers.Conv2D(8, (3, 3), padding="same")(data)
-        x = keras_mod.layers.GlobalAveragePooling2D()(x)
-        y = keras_mod.layers.Conv2D(8, (3, 3), padding="same")(data)
-        y = keras_mod.layers.GlobalAveragePooling2D()(y)
-        keras_model = keras_mod.models.Model(data, [x, y])
-        verify_keras_frontend(keras_model)
-
-    def test_forward_reuse_layers(self, keras_mod):
-        """test_forward_reuse_layers"""
-        # reuse conv2d
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        conv2d = keras_mod.layers.Conv2D(8, (3, 3), padding="same")
-        x = conv2d(data)
-        y = conv2d(data)
-        add_z = keras_mod.layers.Add()([x, y])
-        out = keras_mod.layers.GlobalAveragePooling2D()(add_z)
-        keras_model = keras_mod.models.Model(data, out)
-        verify_keras_frontend(keras_model)
-        # reuse add
-        data = keras_mod.layers.Input(shape=(32, 32, 3))
-        x = keras_mod.layers.Conv2D(8, (3, 3), padding="same")(data)
-        add = keras_mod.layers.Add()
-        x = add([x, x])
-        x = add([x, x])
-        out = keras_mod.layers.GlobalAveragePooling2D()(x)
-        keras_model = keras_mod.models.Model(data, out)
-        verify_keras_frontend(keras_model)
-
-    def test_forward_lstm(self, keras_mod):
-        """test_forward_lstm"""
-        data = keras_mod.layers.Input(shape=(10, 32))
-        rnn_funcs = [
-            keras_mod.layers.LSTM(16),
-            keras_mod.layers.LSTM(16, return_sequences=True),
-            keras_mod.layers.LSTM(16, go_backwards=True),
-            keras_mod.layers.LSTM(16, return_sequences=True, go_backwards=True),
-            keras_mod.layers.LSTM(16, return_sequences=True, use_bias=False),
-        ]
-        for rnn_func in rnn_funcs:
-            x = rnn_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, need_transpose=False)
-
-    def test_forward_rnn(self, keras_mod):
-        """test_forward_rnn"""
-        data = keras_mod.layers.Input(shape=(1, 32))
-        rnn_funcs = [
-            keras_mod.layers.LSTM(
-                units=16, return_state=False, recurrent_activation="sigmoid", activation="tanh"
-            ),
-            keras_mod.layers.LSTM(
-                units=16,
-                return_state=False,
-                recurrent_activation="sigmoid",
-                activation="tanh",
-                use_bias=False,
-            ),
-            keras_mod.layers.SimpleRNN(units=16, return_state=False, activation="tanh"),
-            keras_mod.layers.SimpleRNN(
-                units=16, return_state=False, activation="tanh", use_bias=False
-            ),
-            keras_mod.layers.SimpleRNN(
-                units=16, return_state=False, activation="tanh", go_backwards=True
-            ),
-            keras_mod.layers.GRU(
-                units=16,
-                return_state=False,
-                recurrent_activation="sigmoid",
-                activation="tanh",
-                reset_after=False,
-            ),
-            keras_mod.layers.GRU(
-                units=16,
-                return_state=False,
-                recurrent_activation="sigmoid",
-                activation="tanh",
-                reset_after=False,
-                use_bias=False,
-            ),
-            keras_mod.layers.GRU(
-                units=16,
-                return_state=False,
-                recurrent_activation="sigmoid",
-                activation="tanh",
-                reset_after=False,
-                use_bias=False,
-                go_backwards=True,
-            ),
-        ]
-        for rnn_func in rnn_funcs:
-            x = rnn_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, need_transpose=False)
-
-    def test_forward_vgg16(self, keras_mod, layout="NCHW"):
-        """test_forward_vgg16"""
-        if hasattr(keras_mod.applications, "VGG16"):
-            # Keras 2.4.x and older
-            vgg16_mod = keras_mod.applications.VGG16
-        else:
-            # Keras 2.6.x and newer
-            vgg16_mod = keras_mod.applications.vgg16.VGG16
-
-        keras_model = vgg16_mod(
-            include_top=True, weights="imagenet", input_shape=(224, 224, 3), classes=1000
-        )
-        verify_keras_frontend(keras_model, layout=layout)
-
-    def test_forward_xception(self, keras_mod, layout="NCHW"):
-        """test_forward_vgg16"""
-        if hasattr(keras_mod.applications, "Xception"):
-            # Keras 2.4.x and older
-            xception_mod = keras_mod.applications.Xception
-        else:
-            # Keras 2.6.x and newer
-            xception_mod = keras_mod.applications.xception.Xception
-
-        keras_model = xception_mod(
-            include_top=True, weights="imagenet", input_shape=(299, 299, 3), classes=1000
-        )
-        verify_keras_frontend(keras_model, layout=layout)
-
-    def test_forward_resnet50(self, keras_mod, layout="NCHW"):
-        """test_forward_resnet50"""
-        if hasattr(keras_mod.applications, "ResNet50"):
-            # Keras 2.4.x and older
-            resnet50_mod = keras_mod.applications.ResNet50
-        else:
-            # Keras 2.6.x and newer
-            resnet50_mod = keras_mod.applications.resnet.ResNet50
-
-        keras_model = resnet50_mod(
-            include_top=True, weights="imagenet", input_shape=(224, 224, 3), classes=1000
-        )
-        verify_keras_frontend(keras_model, layout=layout)
-
-    def test_forward_inception_v3(self, keras_mod, layout="NCHW"):
-        """test_forward_inception_v3"""
-        if hasattr(keras_mod.applications, "InceptionV3"):
-            # Keras 2.4.x and older
-            inception_v3_mod = keras_mod.applications.InceptionV3
-        else:
-            # Keras 2.6.x and newer
-            inception_v3_mod = keras_mod.applications.inception_v3.InceptionV3
-
-        keras_model = inception_v3_mod(
-            include_top=True, weights=None, input_shape=(299, 299, 3), classes=1000
-        )
-        verify_keras_frontend(keras_model, layout=layout)
-
-    def test_forward_mobilenet(self, keras_mod, layout="NCHW"):
-        mobilenet_mod = get_mobilenet(keras_mod)
-
-        keras_model = mobilenet_mod(
-            include_top=True, weights="imagenet", input_shape=(224, 224, 3), classes=1000
-        )
-        verify_keras_frontend(keras_model, layout=layout)
-
-    def test_forward_conv3d(self, keras_mod):
-        """test_forward_conv3d"""
-        data = keras_mod.layers.Input(shape=(32, 32, 32, 3))
-        conv_funcs = [
-            keras_mod.layers.Conv3D(
-                filters=10, kernel_size=(3, 3, 3), strides=(2, 2, 2), padding="same"
-            ),
-            keras_mod.layers.Conv3D(
-                filters=10, kernel_size=(3, 3, 3), dilation_rate=(2, 2, 2), padding="same"
-            ),
-            keras_mod.layers.Conv3D(
-                filters=1, kernel_size=(3, 3, 3), padding="valid", use_bias=False
-            ),
-            keras_mod.layers.Conv3D(filters=10, kernel_size=(2, 2, 2), padding="valid"),
-        ]
-        for conv_func in conv_funcs:
-            x = conv_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, layout="NDHWC")
-
-    def test_forward_conv3d_transpose(self, keras_mod):
-        """test_forward_conv3d_transpose"""
-        data = keras_mod.layers.Input(shape=(32, 32, 32, 3))
-        conv_funcs = [
-            keras_mod.layers.Conv3DTranspose(
-                filters=10, kernel_size=(3, 3, 3), strides=(2, 2, 2), padding="same"
-            ),
-            keras_mod.layers.Conv3DTranspose(
-                filters=10, kernel_size=(1, 1, 1), dilation_rate=(1, 1, 1), padding="same"
-            ),
-            keras_mod.layers.Conv3DTranspose(
-                filters=1, kernel_size=(3, 3, 3), padding="valid", use_bias=False
-            ),
-            keras_mod.layers.Conv3DTranspose(filters=10, kernel_size=(2, 2, 2), padding="valid"),
-            keras_mod.layers.Conv3DTranspose(
-                filters=2, kernel_size=(3, 3, 3), strides=(2, 2, 2), output_padding=(1, 1, 1)
-            ),
-        ]
-        for conv_func in conv_funcs:
-            x = conv_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, layout="NDHWC")
-
-    def test_forward_pool3d(self, keras_mod):
-        """test_forward_pool3d"""
-        data = keras_mod.layers.Input(shape=(32, 32, 32, 1))
-        pool_funcs = [  # maxpool
-            keras_mod.layers.MaxPooling3D(pool_size=(2, 2, 2), strides=(1, 1, 1), padding="same"),
-            keras_mod.layers.MaxPooling3D(pool_size=(3, 3, 3), strides=(2, 2, 2), padding="valid"),
-            # avgpool
-            keras_mod.layers.AveragePooling3D(
-                pool_size=(3, 3, 3), strides=(2, 2, 2), padding="same"
-            ),
-            keras_mod.layers.AveragePooling3D(
-                pool_size=(2, 2, 2), strides=(1, 1, 1), padding="valid"
-            ),
-        ]
-        for pool_func in pool_funcs:
-            x = pool_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, layout="NDHWC")
-
-    def test_forward_upsample3d(self, keras_mod):
-        data = keras_mod.layers.Input(shape=(32, 32, 32, 3))
-        x = keras_mod.layers.UpSampling3D(size=(2, 3, 4))(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, layout="NDHWC")
-
-    def test_forward_zero_padding3d(self, keras_mod):
-        """test_forward_zero_padding3d"""
-        data = keras_mod.layers.Input(shape=(32, 32, 32, 3))
-        pad_funcs = [  # Integer
-            keras_mod.layers.ZeroPadding3D(padding=2),
-            # tuple of 3 ints
-            keras_mod.layers.ZeroPadding3D(padding=(1, 2, 3)),
-            # tuple of 3 tuples of 2 ints
-            keras_mod.layers.ZeroPadding3D(padding=((1, 1), (2, 2), (2, 2))),
-            # tuple of 3 tuples of 2 ints different values
-            keras_mod.layers.ZeroPadding3D(padding=((1, 2), (2, 3), (3, 2))),
-        ]
-        for pad_func in pad_funcs:
-            x = pad_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, layout="NDHWC")
-
-    def test_forward_embedding(self, keras_mod):
-        """test_forward_embedding"""
-        data = keras_mod.layers.Input(shape=(2, 4), dtype="int32")
-        x = keras_mod.layers.Embedding(10, 3)(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-        data = keras_mod.layers.Input(shape=(2, 3, 4), dtype="int32")
-        x = keras_mod.layers.Embedding(4, 5)(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-        data = keras_mod.layers.Input(shape=(6, 2, 3, 4), dtype="int32")
-        x = keras_mod.layers.Embedding(4, 5)(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-    def test_forward_repeat_vector(self, keras_mod):
-        """test_forward_repeat_vector"""
-        data = keras_mod.layers.Input(shape=(5,), dtype="float32")
-        x = keras_mod.layers.Dense(6)(data)
-        x = keras_mod.layers.RepeatVector(2)(x)
-
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-        data = keras_mod.layers.Input(shape=(10,), dtype="float32")
-        x = keras_mod.layers.RepeatVector(3)(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-        data = keras_mod.layers.Input(shape=(4,), dtype="float32")
-        x = keras_mod.layers.RepeatVector(1)(data)
-        keras_model = keras_mod.models.Model(data, x)
-        verify_keras_frontend(keras_model, need_transpose=False)
-
-    def test_forward_global_pool3d(self, keras_mod):
-        """test_forward_zero_padding3d"""
-        data = keras_mod.layers.Input(shape=(32, 32, 32, 1))
-        pool_funcs = [  # global maxpool
-            keras_mod.layers.GlobalMaxPooling3D(),
-            # global avgpool
-            keras_mod.layers.GlobalAveragePooling3D(),
-        ]
-        for pool_func in pool_funcs:
-            x = pool_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, layout="NDHWC")
-
-    def test_forward_nested_layers(self, keras_mod):
-        """test_forward_nested_layers"""
-        mobilenet_mod = get_mobilenet(keras_mod)
-
-        sub_model = mobilenet_mod(include_top=False, weights="imagenet", input_shape=(224, 224, 3))
-        keras_model = keras_mod.Sequential(
-            [
-                sub_model,
-                keras_mod.layers.GlobalAveragePooling2D(),
-                keras_mod.layers.Dense(1024, activation="relu"),
-                keras_mod.layers.Dense(2, activation="sigmoid"),
-            ]
-        )
-        verify_keras_frontend(keras_model)
-
-    def test_forward_l2_normalize(self, keras_mod):
-        """test_forward_l2_normalize"""
-        data = keras_mod.layers.Input(shape=(16, 12, 8))
-        k_backend = keras_mod.backend
-        l2_funcs = [
-            keras_mod.layers.Lambda(lambda v: k_backend.l2_normalize(v, axis=-2)),
-            keras_mod.layers.Lambda(lambda v: k_backend.l2_normalize(x=v, axis=-1)),
-            keras_mod.layers.Lambda(lambda v: k_backend.l2_normalize(axis=1, x=v)),
-            keras_mod.layers.Lambda(lambda v: k_backend.l2_normalize(v, 2)),
-            keras_mod.layers.Lambda(lambda v: k_backend.l2_normalize(v, axis=3)),
-            keras_mod.layers.Lambda(lambda v: k_backend.l2_normalize(v, axis=(2, 3))),
-            keras_mod.layers.Lambda(lambda v: k_backend.l2_normalize(v, (1, 2))),
-            keras_mod.layers.Lambda(lambda v: k_backend.l2_normalize(v, axis=[-2, -1])),
-            keras_mod.layers.Lambda(lambda v: k_backend.l2_normalize(v, [-3, -2])),
-        ]
-        for l2_func in l2_funcs:
-            x = l2_func(data)
-            keras_model = keras_mod.models.Model(data, x)
-            verify_keras_frontend(keras_model, layout="NCHW")
-            verify_keras_frontend(keras_model, layout="NHWC")
-
-    def test_forward_time_distributed(self, keras_mod):
-        """test_forward_time_distributed"""
-        conv2d_inputs = keras_mod.Input(shape=(10, 128, 128, 3))
-        conv_2d_layer = keras_mod.layers.Conv2D(64, (3, 3))
-        conv2d_model = keras_mod.models.Model(
-            conv2d_inputs, keras_mod.layers.TimeDistributed(conv_2d_layer)(conv2d_inputs)
-        )
-        verify_keras_frontend(conv2d_model, layout="NDHWC")
-
-        dense_inputs = keras_mod.Input(shape=(5, 1))
-        dense_layer = keras_mod.layers.Dense(1)
-        dense_model = keras_mod.models.Model(
-            dense_inputs, keras_mod.layers.TimeDistributed(dense_layer)(dense_inputs)
-        )
-        verify_keras_frontend(dense_model, need_transpose=False)
-
-    def test_simplernn_with_infertype(self, keras_mod):
-        """This test case is from https://github.com/apache/tvm/issues/14868"""
-        input_shape = (2, 2, 2)
-        x = keras_mod.layers.Input(shape=input_shape[1:], dtype="float32")
-        layer = keras_mod.layers.SimpleRNN(units=4)
-        y = layer(x)
-        model = keras_mod.models.Model(x, y)
-        mod, _ = relay.frontend.from_keras(model, {model.input_names[0]: input_shape})
-        relay.transform.InferType()(mod)
-
-
-if __name__ == "__main__":
-    for k in [keras, tf_keras]:
-        sut = TestKeras()
-        sut.test_forward_concatenate(keras_mod=k)
-        sut.test_forward_merge_dot(keras_mod=k)
-        sut.test_forward_merge(keras_mod=k)
-        sut.test_forward_activations(keras_mod=k)
-        sut.test_forward_activations_except(keras_mod=k)
-        sut.test_forward_dense(keras_mod=k)
-        sut.test_forward_permute(keras_mod=k)
-        sut.test_forward_sequential(keras_mod=k)
-        sut.test_forward_pool(keras_mod=k)
-        sut.test_forward_conv(keras_mod=k)
-        sut.test_forward_conv1d(keras_mod=k)
-        sut.test_forward_batch_norm(keras_mod=k)
-        sut.test_forward_upsample(keras_mod=k, interpolation="nearest")
-        sut.test_forward_upsample(keras_mod=k, interpolation="bilinear")
-        sut.test_forward_reshape(keras_mod=k)
-        sut.test_forward_crop(keras_mod=k)
-        sut.test_forward_multi_inputs(keras_mod=k)
-        sut.test_forward_multi_outputs(keras_mod=k)
-        sut.test_forward_reuse_layers(keras_mod=k)
-        sut.test_forward_lstm(keras_mod=k)
-        sut.test_forward_rnn(keras_mod=k)
-        sut.test_forward_vgg16(keras_mod=k)
-        sut.test_forward_vgg16(keras_mod=k, layout="NHWC")
-        sut.test_forward_xception(keras_mod=k)
-        sut.test_forward_resnet50(keras_mod=k)
-        sut.test_forward_resnet50(keras_mod=k, layout="NHWC")
-        sut.test_forward_inception_v3(keras_mod=k)
-        sut.test_forward_inception_v3(keras_mod=k, layout="NHWC")
-        sut.test_forward_mobilenet(keras_mod=k)
-        sut.test_forward_mobilenet(keras_mod=k, layout="NHWC")
-        sut.test_forward_conv3d(keras_mod=k)
-        sut.test_forward_conv3d_transpose(keras_mod=k)
-        sut.test_forward_pool3d(keras_mod=k)
-        sut.test_forward_global_pool3d(keras_mod=k)
-        sut.test_forward_upsample3d(keras_mod=k)
-        sut.test_forward_zero_padding3d(keras_mod=k)
-        sut.test_forward_embedding(keras_mod=k)
-        sut.test_forward_repeat_vector(keras_mod=k)
-        sut.test_forward_l2_normalize(keras_mod=k)
-        sut.test_forward_time_distributed(keras_mod=k)
-        sut.test_simplernn_with_infertype(keras_mod=k)
diff --git a/tests/python/frontend/mxnet/model_zoo/__init__.py b/tests/python/frontend/mxnet/model_zoo/__init__.py
deleted file mode 100644
index 2c324a060d25..000000000000
--- a/tests/python/frontend/mxnet/model_zoo/__init__.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""MXNet model zoo for testing purposes."""
-from __future__ import absolute_import
-from . import mlp, vgg, resnet, dqn, inception_v3, squeezenet, dcgan
-import tvm.relay.testing
-
-# mlp
-def mx_mlp():
-    num_class = 10
-    return mlp.get_symbol(num_class)
-
-
-def relay_mlp():
-    num_class = 10
-    return tvm.relay.testing.mlp.get_workload(1, num_class)[0]
-
-
-# vgg
-def mx_vgg(num_layers):
-    num_class = 1000
-    return vgg.get_symbol(num_class, num_layers)
-
-
-def relay_vgg(num_layers):
-    num_class = 1000
-    return tvm.relay.testing.vgg.get_workload(1, num_class, num_layers=num_layers)[0]
-
-
-# resnet
-def mx_resnet(num_layers):
-    num_class = 1000
-    return resnet.get_symbol(num_class, num_layers, "3,224,224")
-
-
-def relay_resnet(num_layers):
-    num_class = 1000
-    return tvm.relay.testing.resnet.get_workload(1, num_class, num_layers=num_layers)[0]
-
-
-# dqn
-mx_dqn = dqn.get_symbol
-
-
-def relay_dqn():
-    return tvm.relay.testing.dqn.get_workload(1)[0]
-
-
-# squeezenet
-def mx_squeezenet(version):
-    return squeezenet.get_symbol(version=version)
-
-
-def relay_squeezenet(version):
-    return tvm.relay.testing.squeezenet.get_workload(1, version=version)[0]
-
-
-# inception
-mx_inception_v3 = inception_v3.get_symbol
-
-
-def relay_inception_v3():
-    return tvm.relay.testing.inception_v3.get_workload(1)[0]
-
-
-# dcgan generator
-mx_dcgan = dcgan.get_symbol
-
-
-def relay_dcgan(batch_size):
-    return tvm.relay.testing.dcgan.get_workload(batch_size=batch_size)[0]
diff --git a/tests/python/frontend/mxnet/model_zoo/dcgan.py b/tests/python/frontend/mxnet/model_zoo/dcgan.py
deleted file mode 100644
index 67c20ccc65c9..000000000000
--- a/tests/python/frontend/mxnet/model_zoo/dcgan.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-argument
-"""
-The MXNet symbol of DCGAN generator
-
-Adopted from:
-https://github.com/tqchen/mxnet-gan/blob/main/mxgan/generator.py
-
-Reference:
-Radford, Alec, Luke Metz, and Soumith Chintala.
-"Unsupervised representation learning with deep convolutional generative adversarial networks."
-arXiv preprint arXiv:1511.06434 (2015).
-"""
-
-import mxnet as mx
-
-
-def deconv2d(data, ishape, oshape, kshape, name, stride=(2, 2)):
-    """a deconv layer that enlarges the feature map"""
-    target_shape = (oshape[-2], oshape[-1])
-    pad_y = (kshape[0] - 1) // 2
-    pad_x = (kshape[1] - 1) // 2
-    adj_y = (target_shape[0] + 2 * pad_y - kshape[0]) % stride[0]
-    adj_x = (target_shape[1] + 2 * pad_x - kshape[1]) % stride[1]
-
-    net = mx.sym.Deconvolution(
-        data,
-        kernel=kshape,
-        stride=stride,
-        pad=(pad_y, pad_x),
-        adj=(adj_y, adj_x),
-        num_filter=oshape[0],
-        no_bias=True,
-        name=name,
-    )
-    return net
-
-
-def deconv2d_bn_relu(data, prefix, **kwargs):
-    """a block of deconv + batch norm + relu"""
-    eps = 1e-5 + 1e-12
-
-    net = deconv2d(data, name="%s_deconv" % prefix, **kwargs)
-    net = mx.sym.BatchNorm(net, eps=eps, name="%s_bn" % prefix)
-    net = mx.sym.Activation(net, name="%s_act" % prefix, act_type="relu")
-    return net
-
-
-def get_symbol(oshape=(3, 64, 64), ngf=128, code=None):
-    """get symbol of dcgan generator"""
-    assert oshape[-1] == 64, "Only support 64x64 image"
-    assert oshape[-2] == 64, "Only support 64x64 image"
-
-    code = mx.sym.Variable("data") if code is None else code
-    net = mx.sym.FullyConnected(
-        code, name="g1", num_hidden=ngf * 8 * 4 * 4, no_bias=True, flatten=False
-    )
-    net = mx.sym.Activation(net, act_type="relu")
-    # 4 x 4
-    net = mx.sym.reshape(net, shape=(-1, ngf * 8, 4, 4))
-    # 8 x 8
-    net = deconv2d_bn_relu(
-        net, ishape=(ngf * 8, 4, 4), oshape=(ngf * 4, 8, 8), kshape=(4, 4), prefix="g2"
-    )
-    # 16x16
-    net = deconv2d_bn_relu(
-        net, ishape=(ngf * 4, 8, 8), oshape=(ngf * 2, 16, 16), kshape=(4, 4), prefix="g3"
-    )
-    # 32x32
-    net = deconv2d_bn_relu(
-        net, ishape=(ngf * 2, 16, 16), oshape=(ngf, 32, 32), kshape=(4, 4), prefix="g4"
-    )
-    # 64x64
-    net = deconv2d(net, ishape=(ngf, 32, 32), oshape=oshape[-3:], kshape=(4, 4), name="g5_deconv")
-    net = mx.sym.Activation(net, act_type="tanh")
-    return net
diff --git a/tests/python/frontend/mxnet/model_zoo/dqn.py b/tests/python/frontend/mxnet/model_zoo/dqn.py
deleted file mode 100644
index df611c701258..000000000000
--- a/tests/python/frontend/mxnet/model_zoo/dqn.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-The mxnet symbol of Nature DQN
-
-Reference:
-Mnih, Volodymyr, et al.
-"Human-level control through deep reinforcement learning."
-Nature 518.7540 (2015): 529.
-"""
-
-import mxnet as mx
-
-
-def get_symbol(num_action=18):
-    data = mx.sym.Variable(name="data")
-    net = mx.sym.Convolution(data, kernel=(8, 8), stride=(4, 4), num_filter=32, name="conv1")
-    net = mx.sym.Activation(net, act_type="relu", name="relu1")
-    net = mx.sym.Convolution(net, kernel=(4, 4), stride=(2, 2), num_filter=64, name="conv2")
-    net = mx.sym.Activation(net, act_type="relu", name="relu2")
-    net = mx.sym.Convolution(net, kernel=(3, 3), stride=(1, 1), num_filter=64, name="conv3")
-    net = mx.sym.Activation(net, act_type="relu", name="relu3")
-    net = mx.sym.FullyConnected(net, num_hidden=512, name="fc4")
-    net = mx.sym.Activation(net, act_type="relu", name="relu4")
-    net = mx.sym.FullyConnected(net, num_hidden=num_action, name="fc5", flatten=False)
-
-    return net
diff --git a/tests/python/frontend/mxnet/model_zoo/inception_v3.py b/tests/python/frontend/mxnet/model_zoo/inception_v3.py
deleted file mode 100644
index 872662a01c10..000000000000
--- a/tests/python/frontend/mxnet/model_zoo/inception_v3.py
+++ /dev/null
@@ -1,368 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Inception V3, suitable for images with around 299 x 299
-
-Reference:
-Szegedy, Christian, et al. "Rethinking the Inception Architecture for Computer Vision." arXiv preprint arXiv:1512.00567 (2015).
-
-Adopted from https://github.com/apache/incubator-mxnet/blob/master/
-             example/image-classification/symbols/inception-v3.py
-"""
-import mxnet as mx
-import numpy as np
-
-
-def Conv(data, num_filter, kernel=(1, 1), stride=(1, 1), pad=(0, 0), name=None, suffix=""):
-    conv = mx.sym.Convolution(
-        data=data,
-        num_filter=num_filter,
-        kernel=kernel,
-        stride=stride,
-        pad=pad,
-        no_bias=True,
-        name="%s%s_conv2d" % (name, suffix),
-    )
-    bn = mx.sym.BatchNorm(data=conv, eps=2e-5, name="%s%s_batchnorm" % (name, suffix))
-    act = mx.sym.Activation(data=bn, act_type="relu", name="%s%s_relu" % (name, suffix))
-    return act
-
-
-def Inception7A(
-    data, num_1x1, num_3x3_red, num_3x3_1, num_3x3_2, num_5x5_red, num_5x5, pool, proj, name
-):
-    tower_1x1 = Conv(data, num_1x1, name=("%s_conv" % name))
-    tower_5x5 = Conv(data, num_5x5_red, name=("%s_tower" % name), suffix="_conv")
-    tower_5x5 = Conv(
-        tower_5x5, num_5x5, kernel=(5, 5), pad=(2, 2), name=("%s_tower" % name), suffix="_conv_1"
-    )
-    tower_3x3 = Conv(data, num_3x3_red, name=("%s_tower_1" % name), suffix="_conv")
-    tower_3x3 = Conv(
-        tower_3x3,
-        num_3x3_1,
-        kernel=(3, 3),
-        pad=(1, 1),
-        name=("%s_tower_1" % name),
-        suffix="_conv_1",
-    )
-    tower_3x3 = Conv(
-        tower_3x3,
-        num_3x3_2,
-        kernel=(3, 3),
-        pad=(1, 1),
-        name=("%s_tower_1" % name),
-        suffix="_conv_2",
-    )
-    pooling = mx.sym.Pooling(
-        data=data,
-        kernel=(3, 3),
-        stride=(1, 1),
-        pad=(1, 1),
-        pool_type=pool,
-        name=("%s_pool_%s_pool" % (pool, name)),
-    )
-    cproj = Conv(pooling, proj, name=("%s_tower_2" % name), suffix="_conv")
-    concat = mx.sym.Concat(
-        *[tower_1x1, tower_5x5, tower_3x3, cproj], name="ch_concat_%s_chconcat" % name
-    )
-    return concat
-
-
-# First Downsample
-def Inception7B(data, num_3x3, num_d3x3_red, num_d3x3_1, num_d3x3_2, pool, name):
-    tower_3x3 = Conv(
-        data, num_3x3, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=("%s_conv" % name)
-    )
-    tower_d3x3 = Conv(data, num_d3x3_red, name=("%s_tower" % name), suffix="_conv")
-    tower_d3x3 = Conv(
-        tower_d3x3,
-        num_d3x3_1,
-        kernel=(3, 3),
-        pad=(1, 1),
-        stride=(1, 1),
-        name=("%s_tower" % name),
-        suffix="_conv_1",
-    )
-    tower_d3x3 = Conv(
-        tower_d3x3,
-        num_d3x3_2,
-        kernel=(3, 3),
-        pad=(0, 0),
-        stride=(2, 2),
-        name=("%s_tower" % name),
-        suffix="_conv_2",
-    )
-    pooling = mx.sym.Pooling(
-        data=data,
-        kernel=(3, 3),
-        stride=(2, 2),
-        pad=(0, 0),
-        pool_type="max",
-        name=("max_pool_%s_pool" % name),
-    )
-    concat = mx.sym.Concat(*[tower_3x3, tower_d3x3, pooling], name="ch_concat_%s_chconcat" % name)
-    return concat
-
-
-def Inception7C(
-    data,
-    num_1x1,
-    num_d7_red,
-    num_d7_1,
-    num_d7_2,
-    num_q7_red,
-    num_q7_1,
-    num_q7_2,
-    num_q7_3,
-    num_q7_4,
-    pool,
-    proj,
-    name,
-):
-    tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=("%s_conv" % name))
-    tower_d7 = Conv(data=data, num_filter=num_d7_red, name=("%s_tower" % name), suffix="_conv")
-    tower_d7 = Conv(
-        data=tower_d7,
-        num_filter=num_d7_1,
-        kernel=(1, 7),
-        pad=(0, 3),
-        name=("%s_tower" % name),
-        suffix="_conv_1",
-    )
-    tower_d7 = Conv(
-        data=tower_d7,
-        num_filter=num_d7_2,
-        kernel=(7, 1),
-        pad=(3, 0),
-        name=("%s_tower" % name),
-        suffix="_conv_2",
-    )
-    tower_q7 = Conv(data=data, num_filter=num_q7_red, name=("%s_tower_1" % name), suffix="_conv")
-    tower_q7 = Conv(
-        data=tower_q7,
-        num_filter=num_q7_1,
-        kernel=(7, 1),
-        pad=(3, 0),
-        name=("%s_tower_1" % name),
-        suffix="_conv_1",
-    )
-    tower_q7 = Conv(
-        data=tower_q7,
-        num_filter=num_q7_2,
-        kernel=(1, 7),
-        pad=(0, 3),
-        name=("%s_tower_1" % name),
-        suffix="_conv_2",
-    )
-    tower_q7 = Conv(
-        data=tower_q7,
-        num_filter=num_q7_3,
-        kernel=(7, 1),
-        pad=(3, 0),
-        name=("%s_tower_1" % name),
-        suffix="_conv_3",
-    )
-    tower_q7 = Conv(
-        data=tower_q7,
-        num_filter=num_q7_4,
-        kernel=(1, 7),
-        pad=(0, 3),
-        name=("%s_tower_1" % name),
-        suffix="_conv_4",
-    )
-    pooling = mx.sym.Pooling(
-        data=data,
-        kernel=(3, 3),
-        stride=(1, 1),
-        pad=(1, 1),
-        pool_type=pool,
-        name=("%s_pool_%s_pool" % (pool, name)),
-    )
-    cproj = Conv(
-        data=pooling, num_filter=proj, kernel=(1, 1), name=("%s_tower_2" % name), suffix="_conv"
-    )
-    # concat
-    concat = mx.sym.Concat(
-        *[tower_1x1, tower_d7, tower_q7, cproj], name="ch_concat_%s_chconcat" % name
-    )
-    return concat
-
-
-def Inception7D(
-    data, num_3x3_red, num_3x3, num_d7_3x3_red, num_d7_1, num_d7_2, num_d7_3x3, pool, name
-):
-    tower_3x3 = Conv(data=data, num_filter=num_3x3_red, name=("%s_tower" % name), suffix="_conv")
-    tower_3x3 = Conv(
-        data=tower_3x3,
-        num_filter=num_3x3,
-        kernel=(3, 3),
-        pad=(0, 0),
-        stride=(2, 2),
-        name=("%s_tower" % name),
-        suffix="_conv_1",
-    )
-    tower_d7_3x3 = Conv(
-        data=data, num_filter=num_d7_3x3_red, name=("%s_tower_1" % name), suffix="_conv"
-    )
-    tower_d7_3x3 = Conv(
-        data=tower_d7_3x3,
-        num_filter=num_d7_1,
-        kernel=(1, 7),
-        pad=(0, 3),
-        name=("%s_tower_1" % name),
-        suffix="_conv_1",
-    )
-    tower_d7_3x3 = Conv(
-        data=tower_d7_3x3,
-        num_filter=num_d7_2,
-        kernel=(7, 1),
-        pad=(3, 0),
-        name=("%s_tower_1" % name),
-        suffix="_conv_2",
-    )
-    tower_d7_3x3 = Conv(
-        data=tower_d7_3x3,
-        num_filter=num_d7_3x3,
-        kernel=(3, 3),
-        stride=(2, 2),
-        name=("%s_tower_1" % name),
-        suffix="_conv_3",
-    )
-    pooling = mx.sym.Pooling(
-        data=data,
-        kernel=(3, 3),
-        stride=(2, 2),
-        pool_type=pool,
-        name=("%s_pool_%s_pool" % (pool, name)),
-    )
-    # concat
-    concat = mx.sym.Concat(*[tower_3x3, tower_d7_3x3, pooling], name="ch_concat_%s_chconcat" % name)
-    return concat
-
-
-def Inception7E(
-    data,
-    num_1x1,
-    num_d3_red,
-    num_d3_1,
-    num_d3_2,
-    num_3x3_d3_red,
-    num_3x3,
-    num_3x3_d3_1,
-    num_3x3_d3_2,
-    pool,
-    proj,
-    name,
-):
-    tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=("%s_conv" % name))
-    tower_d3 = Conv(data=data, num_filter=num_d3_red, name=("%s_tower" % name), suffix="_conv")
-    tower_d3_a = Conv(
-        data=tower_d3,
-        num_filter=num_d3_1,
-        kernel=(1, 3),
-        pad=(0, 1),
-        name=("%s_tower" % name),
-        suffix="_mixed_conv",
-    )
-    tower_d3_b = Conv(
-        data=tower_d3,
-        num_filter=num_d3_2,
-        kernel=(3, 1),
-        pad=(1, 0),
-        name=("%s_tower" % name),
-        suffix="_mixed_conv_1",
-    )
-    tower_3x3_d3 = Conv(
-        data=data, num_filter=num_3x3_d3_red, name=("%s_tower_1" % name), suffix="_conv"
-    )
-    tower_3x3_d3 = Conv(
-        data=tower_3x3_d3,
-        num_filter=num_3x3,
-        kernel=(3, 3),
-        pad=(1, 1),
-        name=("%s_tower_1" % name),
-        suffix="_conv_1",
-    )
-    tower_3x3_d3_a = Conv(
-        data=tower_3x3_d3,
-        num_filter=num_3x3_d3_1,
-        kernel=(1, 3),
-        pad=(0, 1),
-        name=("%s_tower_1" % name),
-        suffix="_mixed_conv",
-    )
-    tower_3x3_d3_b = Conv(
-        data=tower_3x3_d3,
-        num_filter=num_3x3_d3_2,
-        kernel=(3, 1),
-        pad=(1, 0),
-        name=("%s_tower_1" % name),
-        suffix="_mixed_conv_1",
-    )
-    pooling = mx.sym.Pooling(
-        data=data,
-        kernel=(3, 3),
-        stride=(1, 1),
-        pad=(1, 1),
-        pool_type=pool,
-        name=("%s_pool_%s_pool" % (pool, name)),
-    )
-    cproj = Conv(
-        data=pooling, num_filter=proj, kernel=(1, 1), name=("%s_tower_2" % name), suffix="_conv"
-    )
-    # concat
-    concat = mx.sym.Concat(
-        *[tower_1x1, tower_d3_a, tower_d3_b, tower_3x3_d3_a, tower_3x3_d3_b, cproj],
-        name="ch_concat_%s_chconcat" % name,
-    )
-    return concat
-
-
-def get_symbol(num_classes=1000, **kwargs):
-    data = mx.sym.Variable(name="data")
-    # stage 1
-    conv = Conv(data, 32, kernel=(3, 3), stride=(2, 2), name="conv")
-    conv_1 = Conv(conv, 32, kernel=(3, 3), name="conv_1")
-    conv_2 = Conv(conv_1, 64, kernel=(3, 3), pad=(1, 1), name="conv_2")
-    pool = mx.sym.Pooling(data=conv_2, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool")
-    # stage 2
-    conv_3 = Conv(pool, 80, kernel=(1, 1), name="conv_3")
-    conv_4 = Conv(conv_3, 192, kernel=(3, 3), name="conv_4")
-    pool1 = mx.sym.Pooling(data=conv_4, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool1")
-
-    # # stage 3
-    in3a = Inception7A(pool1, 64, 64, 96, 96, 48, 64, "avg", 32, "mixed")
-    in3b = Inception7A(in3a, 64, 64, 96, 96, 48, 64, "avg", 64, "mixed_1")
-    in3c = Inception7A(in3b, 64, 64, 96, 96, 48, 64, "avg", 64, "mixed_2")
-    in3d = Inception7B(in3c, 384, 64, 96, 96, "max", "mixed_3")
-    # stage 4
-    in4a = Inception7C(in3d, 192, 128, 128, 192, 128, 128, 128, 128, 192, "avg", 192, "mixed_4")
-    in4b = Inception7C(in4a, 192, 160, 160, 192, 160, 160, 160, 160, 192, "avg", 192, "mixed_5")
-    in4c = Inception7C(in4b, 192, 160, 160, 192, 160, 160, 160, 160, 192, "avg", 192, "mixed_6")
-    in4d = Inception7C(in4c, 192, 192, 192, 192, 192, 192, 192, 192, 192, "avg", 192, "mixed_7")
-    in4e = Inception7D(in4d, 192, 320, 192, 192, 192, 192, "max", "mixed_8")
-    # stage 5
-    in5a = Inception7E(in4e, 320, 384, 384, 384, 448, 384, 384, 384, "avg", 192, "mixed_9")
-    in5b = Inception7E(in5a, 320, 384, 384, 384, 448, 384, 384, 384, "max", 192, "mixed_10")
-    # pool
-    pool = mx.sym.Pooling(
-        data=in5b, kernel=(8, 8), stride=(1, 1), pool_type="avg", name="global_pool"
-    )
-    flatten = mx.sym.Flatten(data=pool, name="flatten")
-    fc1 = mx.sym.FullyConnected(data=flatten, num_hidden=num_classes, name="fc1", flatten=False)
-    softmax = mx.sym.SoftmaxOutput(data=fc1, name="softmax")
-    return softmax
diff --git a/tests/python/frontend/mxnet/model_zoo/mlp.py b/tests/python/frontend/mxnet/model_zoo/mlp.py
deleted file mode 100644
index 45f33f991de5..000000000000
--- a/tests/python/frontend/mxnet/model_zoo/mlp.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-a simple multilayer perceptron
-"""
-import mxnet as mx
-
-
-def get_symbol(num_classes=10, **kwargs):
-    data = mx.symbol.Variable("data")
-    data = mx.sym.Flatten(data=data)
-    try:
-        fc1 = mx.symbol.FullyConnected(data=data, name="fc1", num_hidden=128, flatten=False)
-        act1 = mx.symbol.Activation(data=fc1, name="relu1", act_type="relu")
-        fc2 = mx.symbol.FullyConnected(data=act1, name="fc2", num_hidden=64, flatten=False)
-        act2 = mx.symbol.Activation(data=fc2, name="relu2", act_type="relu")
-        fc3 = mx.symbol.FullyConnected(data=act2, name="fc3", num_hidden=num_classes, flatten=False)
-        mlp = mx.symbol.softmax(data=fc3, name="softmax")
-    except:
-        fc1 = mx.symbol.FullyConnected(data=data, name="fc1", num_hidden=128)
-        act1 = mx.symbol.Activation(data=fc1, name="relu1", act_type="relu")
-        fc2 = mx.symbol.FullyConnected(data=act1, name="fc2", num_hidden=64)
-        act2 = mx.symbol.Activation(data=fc2, name="relu2", act_type="relu")
-        fc3 = mx.symbol.FullyConnected(data=act2, name="fc3", num_hidden=num_classes)
-        mlp = mx.symbol.softmax(data=fc3, name="softmax")
-    return mlp
diff --git a/tests/python/frontend/mxnet/model_zoo/resnet.py b/tests/python/frontend/mxnet/model_zoo/resnet.py
deleted file mode 100644
index 00e68958b462..000000000000
--- a/tests/python/frontend/mxnet/model_zoo/resnet.py
+++ /dev/null
@@ -1,326 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py
-Original author Wei Wu
-
-Implemented the following paper:
-
-Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. "Identity Mappings in Deep Residual Networks"
-"""
-import mxnet as mx
-import numpy as np
-
-
-def residual_unit(
-    data,
-    num_filter,
-    stride,
-    dim_match,
-    name,
-    bottle_neck=True,
-    bn_mom=0.9,
-    workspace=256,
-    memonger=False,
-):
-    """Return ResNet Unit symbol for building ResNet
-    Parameters
-    ----------
-    data : str
-        Input data
-    num_filter : int
-        Number of output channels
-    bnf : int
-        Bottle neck channels factor with regard to num_filter
-    stride : tuple
-        Stride used in convolution
-    dim_match : Boolean
-        True means channel number between input and output is the same, otherwise means differ
-    name : str
-        Base name of the operators
-    workspace : int
-        Workspace used in convolution operator
-    """
-    if bottle_neck:
-        bn1 = mx.sym.BatchNorm(
-            data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + "_bn1"
-        )
-        act1 = mx.sym.Activation(data=bn1, act_type="relu", name=name + "_relu1")
-        conv1 = mx.sym.Convolution(
-            data=act1,
-            num_filter=int(num_filter * 0.25),
-            kernel=(1, 1),
-            stride=stride,
-            pad=(0, 0),
-            no_bias=True,
-            workspace=workspace,
-            name=name + "_conv1",
-        )
-        bn2 = mx.sym.BatchNorm(
-            data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + "_bn2"
-        )
-        act2 = mx.sym.Activation(data=bn2, act_type="relu", name=name + "_relu2")
-        conv2 = mx.sym.Convolution(
-            data=act2,
-            num_filter=int(num_filter * 0.25),
-            kernel=(3, 3),
-            stride=(1, 1),
-            pad=(1, 1),
-            no_bias=True,
-            workspace=workspace,
-            name=name + "_conv2",
-        )
-        bn3 = mx.sym.BatchNorm(
-            data=conv2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + "_bn3"
-        )
-        act3 = mx.sym.Activation(data=bn3, act_type="relu", name=name + "_relu3")
-        conv3 = mx.sym.Convolution(
-            data=act3,
-            num_filter=num_filter,
-            kernel=(1, 1),
-            stride=(1, 1),
-            pad=(0, 0),
-            no_bias=True,
-            workspace=workspace,
-            name=name + "_conv3",
-        )
-        if dim_match:
-            shortcut = data
-        else:
-            shortcut = mx.sym.Convolution(
-                data=act1,
-                num_filter=num_filter,
-                kernel=(1, 1),
-                stride=stride,
-                no_bias=True,
-                workspace=workspace,
-                name=name + "_sc",
-            )
-        if memonger:
-            shortcut._set_attr(mirror_stage="True")
-        return conv3 + shortcut
-    else:
-        bn1 = mx.sym.BatchNorm(
-            data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + "_bn1"
-        )
-        act1 = mx.sym.Activation(data=bn1, act_type="relu", name=name + "_relu1")
-        conv1 = mx.sym.Convolution(
-            data=act1,
-            num_filter=num_filter,
-            kernel=(3, 3),
-            stride=stride,
-            pad=(1, 1),
-            no_bias=True,
-            workspace=workspace,
-            name=name + "_conv1",
-        )
-        bn2 = mx.sym.BatchNorm(
-            data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + "_bn2"
-        )
-        act2 = mx.sym.Activation(data=bn2, act_type="relu", name=name + "_relu2")
-        conv2 = mx.sym.Convolution(
-            data=act2,
-            num_filter=num_filter,
-            kernel=(3, 3),
-            stride=(1, 1),
-            pad=(1, 1),
-            no_bias=True,
-            workspace=workspace,
-            name=name + "_conv2",
-        )
-        if dim_match:
-            shortcut = data
-        else:
-            shortcut = mx.sym.Convolution(
-                data=act1,
-                num_filter=num_filter,
-                kernel=(1, 1),
-                stride=stride,
-                no_bias=True,
-                workspace=workspace,
-                name=name + "_sc",
-            )
-        if memonger:
-            shortcut._set_attr(mirror_stage="True")
-        return conv2 + shortcut
-
-
-def resnet(
-    units,
-    num_stages,
-    filter_list,
-    num_classes,
-    image_shape,
-    bottle_neck=True,
-    bn_mom=0.9,
-    workspace=256,
-    dtype="float32",
-    memonger=False,
-):
-    """Return ResNet symbol of
-    Parameters
-    ----------
-    units : list
-        Number of units in each stage
-    num_stages : int
-        Number of stage
-    filter_list : list
-        Channel size of each stage
-    num_classes : int
-        Output size of symbol
-    dataset : str
-        Dataset type, only cifar10 and imagenet supports
-    workspace : int
-        Workspace used in convolution operator
-    dtype : str
-        Precision (float32 or float16)
-    """
-    num_unit = len(units)
-    assert num_unit == num_stages
-    data = mx.sym.Variable(name="data")
-    if dtype == "float32":
-        # data = mx.sym.identity(data=data, name='id')
-        data = data
-    else:
-        if dtype == "float16":
-            data = mx.sym.Cast(data=data, dtype=np.float16)
-    data = mx.sym.BatchNorm(data=data, fix_gamma=True, eps=2e-5, momentum=bn_mom, name="bn_data")
-    (nchannel, height, width) = image_shape
-    if height <= 32:  # such as cifar10
-        body = mx.sym.Convolution(
-            data=data,
-            num_filter=filter_list[0],
-            kernel=(3, 3),
-            stride=(1, 1),
-            pad=(1, 1),
-            no_bias=True,
-            name="conv0",
-            workspace=workspace,
-        )
-    else:  # often expected to be 224 such as imagenet
-        body = mx.sym.Convolution(
-            data=data,
-            num_filter=filter_list[0],
-            kernel=(7, 7),
-            stride=(2, 2),
-            pad=(3, 3),
-            no_bias=True,
-            name="conv0",
-            workspace=workspace,
-        )
-        body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name="bn0")
-        body = mx.sym.Activation(data=body, act_type="relu", name="relu0")
-        body = mx.sym.Pooling(data=body, kernel=(3, 3), stride=(2, 2), pad=(1, 1), pool_type="max")
-
-    for i in range(num_stages):
-        body = residual_unit(
-            body,
-            filter_list[i + 1],
-            (1 if i == 0 else 2, 1 if i == 0 else 2),
-            False,
-            name="stage%d_unit%d" % (i + 1, 1),
-            bottle_neck=bottle_neck,
-            workspace=workspace,
-            memonger=memonger,
-        )
-        for j in range(units[i] - 1):
-            body = residual_unit(
-                body,
-                filter_list[i + 1],
-                (1, 1),
-                True,
-                name="stage%d_unit%d" % (i + 1, j + 2),
-                bottle_neck=bottle_neck,
-                workspace=workspace,
-                memonger=memonger,
-            )
-    bn1 = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name="bn1")
-    relu1 = mx.sym.Activation(data=bn1, act_type="relu", name="relu1")
-    # Although kernel is not used here when global_pool=True, we should put one
-    pool1 = mx.sym.Pooling(
-        data=relu1, global_pool=True, kernel=(7, 7), pool_type="avg", name="pool1"
-    )
-    flat = mx.sym.Flatten(data=pool1)
-    try:
-        fc1 = mx.sym.FullyConnected(data=flat, num_hidden=num_classes, name="fc1", flatten=False)
-    except:
-        fc1 = mx.sym.FullyConnected(data=flat, num_hidden=num_classes, name="fc1")
-    if dtype == "float16":
-        fc1 = mx.sym.Cast(data=fc1, dtype=np.float32)
-    return mx.sym.softmax(data=fc1, name="softmax")
-
-
-def get_symbol(num_classes, num_layers, image_shape, conv_workspace=256, dtype="float32", **kwargs):
-    """
-    Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py
-    Original author Wei Wu
-    """
-    image_shape = [int(l) for l in image_shape.split(",")]
-    (nchannel, height, width) = image_shape
-    if height <= 28:
-        num_stages = 3
-        if (num_layers - 2) % 9 == 0 and num_layers >= 164:
-            per_unit = [(num_layers - 2) // 9]
-            filter_list = [16, 64, 128, 256]
-            bottle_neck = True
-        elif (num_layers - 2) % 6 == 0 and num_layers < 164:
-            per_unit = [(num_layers - 2) // 6]
-            filter_list = [16, 16, 32, 64]
-            bottle_neck = False
-        else:
-            raise ValueError(
-                "no experiments done on num_layers {}, you can do it yourself".format(num_layers)
-            )
-        units = per_unit * num_stages
-    else:
-        if num_layers >= 50:
-            filter_list = [64, 256, 512, 1024, 2048]
-            bottle_neck = True
-        else:
-            filter_list = [64, 64, 128, 256, 512]
-            bottle_neck = False
-        num_stages = 4
-        if num_layers == 18:
-            units = [2, 2, 2, 2]
-        elif num_layers == 34:
-            units = [3, 4, 6, 3]
-        elif num_layers == 50:
-            units = [3, 4, 6, 3]
-        elif num_layers == 101:
-            units = [3, 4, 23, 3]
-        elif num_layers == 152:
-            units = [3, 8, 36, 3]
-        elif num_layers == 200:
-            units = [3, 24, 36, 3]
-        elif num_layers == 269:
-            units = [3, 30, 48, 8]
-        else:
-            raise ValueError(
-                "no experiments done on num_layers {}, you can do it yourself".format(num_layers)
-            )
-
-    return resnet(
-        units=units,
-        num_stages=num_stages,
-        filter_list=filter_list,
-        num_classes=num_classes,
-        image_shape=image_shape,
-        bottle_neck=bottle_neck,
-        workspace=conv_workspace,
-        dtype=dtype,
-    )
diff --git a/tests/python/frontend/mxnet/model_zoo/squeezenet.py b/tests/python/frontend/mxnet/model_zoo/squeezenet.py
deleted file mode 100644
index 146f7fa7e8e6..000000000000
--- a/tests/python/frontend/mxnet/model_zoo/squeezenet.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Symbol of SqueezeNet
-
-Reference:
-Iandola, Forrest N., et al.
-"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and< 0.5 mb model size." (2016).
-"""
-
-import mxnet as mx
-
-# Helpers
-def _make_fire(net, squeeze_channels, expand1x1_channels, expand3x3_channels):
-    net = _make_fire_conv(net, squeeze_channels, 1, 0)
-
-    left = _make_fire_conv(net, expand1x1_channels, 1, 0)
-    right = _make_fire_conv(net, expand3x3_channels, 3, 1)
-    # NOTE : Assume NCHW layout here
-    net = mx.sym.concat(left, right, dim=1)
-
-    return net
-
-
-def _make_fire_conv(net, channels, kernel_size, padding=0):
-    net = mx.sym.Convolution(
-        net, num_filter=channels, kernel=(kernel_size, kernel_size), pad=(padding, padding)
-    )
-    net = mx.sym.Activation(net, act_type="relu")
-    return net
-
-
-# Net
-def get_symbol(num_classes=1000, version="1.0", **kwargs):
-    """Get symbol of SqueezeNet
-
-    Parameters
-    ----------
-    num_classes: int
-        The number of classification results
-
-    version : str, optional
-        "1.0" or "1.1" of SqueezeNet
-    """
-    assert version in [
-        "1.0",
-        "1.1",
-    ], "Unsupported SqueezeNet version {version}:" "1.0 or 1.1 expected".format(version=version)
-    net = mx.sym.Variable("data")
-    if version == "1.0":
-        net = mx.sym.Convolution(net, num_filter=96, kernel=(7, 7), stride=(2, 2), pad=(3, 3))
-        net = mx.sym.Activation(net, act_type="relu")
-        net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type="max", stride=(2, 2))
-        net = _make_fire(net, 16, 64, 64)
-        net = _make_fire(net, 16, 64, 64)
-        net = _make_fire(net, 32, 128, 128)
-        net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type="max", stride=(2, 2))
-        net = _make_fire(net, 32, 128, 128)
-        net = _make_fire(net, 48, 192, 192)
-        net = _make_fire(net, 48, 192, 192)
-        net = _make_fire(net, 64, 256, 256)
-        net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type="max", stride=(2, 2))
-        net = _make_fire(net, 64, 256, 256)
-    else:
-        net = mx.sym.Convolution(net, num_filter=64, kernel=(3, 3), stride=(2, 2), pad=(1, 1))
-        net = mx.sym.Activation(net, act_type="relu")
-        net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type="max", stride=(2, 2))
-        net = _make_fire(net, 16, 64, 64)
-        net = _make_fire(net, 16, 64, 64)
-        net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type="max", stride=(2, 2))
-        net = _make_fire(net, 32, 128, 128)
-        net = _make_fire(net, 32, 128, 128)
-        net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type="max", stride=(2, 2))
-        net = _make_fire(net, 48, 192, 192)
-        net = _make_fire(net, 48, 192, 192)
-        net = _make_fire(net, 64, 256, 256)
-        net = _make_fire(net, 64, 256, 256)
-    net = mx.sym.Dropout(net, p=0.5)
-    net = mx.sym.Convolution(net, num_filter=num_classes, kernel=(1, 1))
-    net = mx.sym.Activation(net, act_type="relu")
-    net = mx.sym.Pooling(data=net, global_pool=True, kernel=(13, 13), pool_type="avg")
-    net = mx.sym.flatten(net)
-    return mx.sym.softmax(net)
diff --git a/tests/python/frontend/mxnet/model_zoo/vgg.py b/tests/python/frontend/mxnet/model_zoo/vgg.py
deleted file mode 100644
index 157803446811..000000000000
--- a/tests/python/frontend/mxnet/model_zoo/vgg.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""References:
-
-Simonyan, Karen, and Andrew Zisserman. "Very deep convolutional networks for
-large-scale image recognition." arXiv preprint arXiv:1409.1556 (2014).
-"""
-
-import mxnet as mx
-import numpy as np
-
-
-def get_feature(internel_layer, layers, filters, batch_norm=False, **kwargs):
-    for i, num in enumerate(layers):
-        for j in range(num):
-            internel_layer = mx.sym.Convolution(
-                data=internel_layer,
-                kernel=(3, 3),
-                pad=(1, 1),
-                num_filter=filters[i],
-                name="conv%s_%s" % (i + 1, j + 1),
-            )
-            if batch_norm:
-                internel_layer = mx.symbol.BatchNorm(
-                    data=internel_layer, name="bn%s_%s" % (i + 1, j + 1)
-                )
-            internel_layer = mx.sym.Activation(
-                data=internel_layer, act_type="relu", name="relu%s_%s" % (i + 1, j + 1)
-            )
-        internel_layer = mx.sym.Pooling(
-            data=internel_layer,
-            pool_type="max",
-            kernel=(2, 2),
-            stride=(2, 2),
-            name="pool%s" % (i + 1),
-        )
-    return internel_layer
-
-
-def get_classifier(input_data, num_classes, **kwargs):
-    flatten = mx.sym.Flatten(data=input_data, name="flatten")
-    try:
-        fc6 = mx.sym.FullyConnected(data=flatten, num_hidden=4096, name="fc6", flatten=False)
-        relu6 = mx.sym.Activation(data=fc6, act_type="relu", name="relu6")
-        drop6 = mx.sym.Dropout(data=relu6, p=0.5, name="drop6")
-        fc7 = mx.sym.FullyConnected(data=drop6, num_hidden=4096, name="fc7", flatten=False)
-        relu7 = mx.sym.Activation(data=fc7, act_type="relu", name="relu7")
-        drop7 = mx.sym.Dropout(data=relu7, p=0.5, name="drop7")
-        fc8 = mx.sym.FullyConnected(data=drop7, num_hidden=num_classes, name="fc8", flatten=False)
-    except:
-        fc6 = mx.sym.FullyConnected(data=flatten, num_hidden=4096, name="fc6")
-        relu6 = mx.sym.Activation(data=fc6, act_type="relu", name="relu6")
-        drop6 = mx.sym.Dropout(data=relu6, p=0.5, name="drop6")
-        fc7 = mx.sym.FullyConnected(data=drop6, num_hidden=4096, name="fc7")
-        relu7 = mx.sym.Activation(data=fc7, act_type="relu", name="relu7")
-        drop7 = mx.sym.Dropout(data=relu7, p=0.5, name="drop7")
-        fc8 = mx.sym.FullyConnected(data=drop7, num_hidden=num_classes, name="fc8")
-    return fc8
-
-
-def get_symbol(num_classes, num_layers=11, batch_norm=False, dtype="float32", **kwargs):
-    """
-    Parameters
-    ----------
-    num_classes : int, default 1000
-        Number of classification classes.
-    num_layers : int
-        Number of layers for the variant of densenet. Options are 11, 13, 16, 19.
-    batch_norm : bool, default False
-        Use batch normalization.
-    dtype: str, float32 or float16
-        Data precision.
-    """
-    vgg_spec = {
-        11: ([1, 1, 2, 2, 2], [64, 128, 256, 512, 512]),
-        13: ([2, 2, 2, 2, 2], [64, 128, 256, 512, 512]),
-        16: ([2, 2, 3, 3, 3], [64, 128, 256, 512, 512]),
-        19: ([2, 2, 4, 4, 4], [64, 128, 256, 512, 512]),
-    }
-    if num_layers not in vgg_spec:
-        raise ValueError(
-            "Invalide num_layers {}. Possible choices are 11,13,16,19.".format(num_layers)
-        )
-    layers, filters = vgg_spec[num_layers]
-    data = mx.sym.Variable(name="data")
-    if dtype == "float16":
-        data = mx.sym.Cast(data=data, dtype=np.float16)
-    feature = get_feature(data, layers, filters, batch_norm)
-    classifier = get_classifier(feature, num_classes)
-    if dtype == "float16":
-        classifier = mx.sym.Cast(data=classifier, dtype=np.float32)
-    symbol = mx.sym.softmax(data=classifier, name="softmax")
-    return symbol
diff --git a/tests/python/frontend/mxnet/test_forward.py b/tests/python/frontend/mxnet/test_forward.py
deleted file mode 100644
index cf206a3d5261..000000000000
--- a/tests/python/frontend/mxnet/test_forward.py
+++ /dev/null
@@ -1,2369 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import operator
-import random
-
-import numpy as np
-import pytest
-import tvm
-import tvm.testing
-from tvm import relay, te
-from tvm.contrib import graph_executor
-
-import model_zoo
-import mxnet as mx
-from mxnet import gluon
-from mxnet.gluon.model_zoo import vision
-
-
-def verify_mxnet_frontend_impl(
-    mx_symbol,
-    data_shape=(1, 3, 224, 224),
-    out_shape=(1, 1000),
-    gluon_impl=False,
-    name=None,
-    dtype="float32",
-):
-    """Use name different from test to avoid pytest picking it up"""
-    if gluon_impl:
-
-        def get_gluon_output(name, x):
-            try:
-                net = vision.get_model(name)
-            except RuntimeError:
-                pytest.skip(reason="mxnet downloads no longer supported")
-            net.collect_params().initialize(mx.init.Xavier())
-            net_sym = gluon.nn.SymbolBlock(
-                outputs=net(mx.sym.var("data")),
-                inputs=mx.sym.var("data"),
-                params=net.collect_params(),
-            )
-            out = net_sym(mx.nd.array(x.astype(dtype))).asnumpy()
-            return out, net_sym
-
-    else:
-
-        def get_mxnet_output(symbol, x, dtype="float32"):
-            from collections import namedtuple
-
-            Batch = namedtuple("Batch", ["data"])
-            mod = mx.mod.Module(symbol, label_names=None)
-            mod.bind(data_shapes=[("data", x.shape)], for_training=False)
-            mod.init_params()
-            mod.forward(Batch([mx.nd.array(x.astype(dtype))]))
-            out = mod.get_outputs()[0].asnumpy()
-            args, auxs = mod.get_params()
-            return out, args, auxs
-
-    def get_tvm_output(symbol, x, args, auxs, target, dev, dtype="float32"):
-        shape_dict = {"data": x.shape}
-        if gluon_impl:
-            mod, params = relay.frontend.from_mxnet(symbol, shape_dict)
-        else:
-            mod, params = relay.frontend.from_mxnet(
-                symbol, shape_dict, arg_params=args, aux_params=auxs
-            )
-        with tvm.transform.PassContext(opt_level=3):
-            lib = relay.build(mod, target, params=params)
-        m = graph_executor.GraphModule(lib["default"](dev))
-        # set inputs
-        m.set_input("data", tvm.nd.array(x.astype(dtype)))
-        m.run()
-        # get outputs
-        out = m.get_output(0, tvm.nd.empty(out_shape, dtype))
-        return out.numpy()
-
-    # random input
-    x = np.random.uniform(size=data_shape)
-    if gluon_impl:
-        gluon_out, gluon_sym = get_gluon_output(name, x)
-        for target, dev in tvm.testing.enabled_targets():
-            tvm_out = get_tvm_output(gluon_sym, x, None, None, target, dev, dtype)
-            tvm.testing.assert_allclose(gluon_out, tvm_out, rtol=1e-5, atol=1e-5)
-    else:
-        mx_out, args, auxs = get_mxnet_output(mx_symbol, x, dtype)
-        assert "data" not in args
-        for target, dev in tvm.testing.enabled_targets():
-            tvm_out = get_tvm_output(mx_symbol, x, args, auxs, target, dev, dtype)
-            tvm.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_mlp():
-    mlp = model_zoo.mx_mlp()
-    verify_mxnet_frontend_impl(mlp, data_shape=(1, 1, 28, 28), out_shape=(1, 10))
-
-
-@tvm.testing.uses_gpu
-def test_forward_vgg():
-    for n in [11]:
-        mx_sym = model_zoo.mx_vgg(n)
-        verify_mxnet_frontend_impl(mx_sym)
-
-
-@tvm.testing.uses_gpu
-def test_forward_resnet():
-    for n in [18]:
-        mx_sym = model_zoo.mx_resnet(18)
-        verify_mxnet_frontend_impl(mx_sym)
-
-
-@tvm.testing.uses_gpu
-def test_forward_leaky_relu():
-    data = mx.sym.var("data")
-    data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
-    mx_sym = mx.sym.LeakyReLU(data)
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-    mx_sym = mx.sym.LeakyReLU(data, act_type="leaky")
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-
-@tvm.testing.uses_gpu
-def test_forward_elu():
-    data = mx.sym.var("data")
-    data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
-    mx_sym = mx.sym.LeakyReLU(data, act_type="elu")
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-
-@tvm.testing.uses_gpu
-def test_forward_rrelu():
-    data = mx.sym.var("data")
-    data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
-    mx_sym = mx.sym.LeakyReLU(data, act_type="rrelu", lower_bound=0.3, upper_bound=0.7)
-    verify_mxnet_frontend_impl(mx_sym[0], (1, 3, 100, 100), (1, 6, 100, 100))
-
-
-@tvm.testing.uses_gpu
-def test_forward_prelu():
-    data = mx.sym.var("data")
-    data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
-    mx_sym = mx.sym.LeakyReLU(data, act_type="prelu")
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-
-@tvm.testing.uses_gpu
-def test_forward_gelu():
-    data = mx.sym.var("data")
-    data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
-    mx_sym = mx.sym.LeakyReLU(data, act_type="gelu")
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-
-@tvm.testing.uses_gpu
-def test_forward_softrelu():
-    data = mx.sym.var("data")
-    data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
-    mx_sym = mx.sym.Activation(data, act_type="softrelu")
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-
-@tvm.testing.uses_gpu
-def test_forward_fc_flatten():
-    # test flatten=True option in mxnet 0.11.1
-    data = mx.sym.var("data")
-    try:
-        mx_sym = mx.sym.FullyConnected(data, num_hidden=100, flatten=True)
-        verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 100))
-        mx_sym = mx.sym.FullyConnected(mx.sym.Flatten(data), num_hidden=100, flatten=False)
-        verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 100))
-    except:
-        pass
-
-
-@tvm.testing.uses_gpu
-def test_forward_clip():
-    data = mx.sym.var("data")
-    data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
-    mx_sym = mx.sym.clip(data, a_min=0, a_max=1)
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-
-@tvm.testing.uses_gpu
-def test_forward_split():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.split(data, axis=1, num_outputs=4, squeeze_axis=False)
-    verify_mxnet_frontend_impl(mx_sym, (1, 4, 2, 1), (1, 1, 2, 1))
-
-
-@tvm.testing.uses_gpu
-def test_forward_split_squeeze():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.split(data, axis=1, num_outputs=4, squeeze_axis=True)
-    verify_mxnet_frontend_impl(mx_sym, (1, 4, 2, 1), (1, 2, 1))
-
-
-@tvm.testing.uses_gpu
-def test_forward_expand_dims():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.expand_dims(data, axis=1)
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 1, 3, 4))
-
-
-@tvm.testing.uses_gpu
-def test_forward_pooling():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.Pooling(data, kernel=(3, 3), pad=(1, 1), pool_type="avg")
-    verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8), (1, 20, 8, 8))
-
-    mx_sym = mx.sym.Pooling(data, kernel=(3, 3), pad=(1, 1), pool_type="max")
-    verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8), (1, 20, 8, 8))
-
-
-@tvm.testing.uses_gpu
-def test_forward_pooling3d():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.Pooling(data, kernel=(3, 3, 3), pad=(1, 1, 1), pool_type="avg")
-    verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8, 8), (1, 20, 8, 8, 8))
-
-    mx_sym = mx.sym.Pooling(data, kernel=(3, 3, 3), pad=(1, 1, 1), pool_type="max")
-    verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8, 8), (1, 20, 8, 8, 8))
-
-
-@tvm.testing.uses_gpu
-def test_forward_adaptive_pooling():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.contrib.AdaptiveAvgPooling2D(data, output_size=(1,))
-    verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8), (1, 20, 1, 1))
-
-    mx_sym = mx.sym.contrib.AdaptiveAvgPooling2D(data, output_size=(3, 3))
-    verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8), (1, 20, 3, 3))
-
-
-@tvm.testing.uses_gpu
-def test_forward_lrn():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.LRN(data, alpha=2, beta=2, knorm=1, nsize=5)
-    verify_mxnet_frontend_impl(mx_sym, (1, 10, 24, 24), (1, 10, 24, 24))
-
-
-@tvm.testing.uses_gpu
-def test_forward_ones():
-    data = mx.sym.var("data")
-    ones = mx.sym.ones(shape=(2, 3, 4), dtype="float32")
-    mx_sym = mx.sym.elemwise_add(data, ones)
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
-
-
-@tvm.testing.uses_gpu
-def test_forward_zeros():
-    data = mx.sym.var("data")
-    zeros = mx.sym.zeros(shape=(2, 3, 4), dtype="float32")
-    mx_sym = mx.sym.elemwise_add(data, zeros)
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
-
-
-@tvm.testing.uses_gpu
-def test_forward_ones_like():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.ones_like(data, dtype="float32")
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
-
-
-@tvm.testing.uses_gpu
-def test_forward_make_loss():
-    data = mx.sym.var("data")
-    ones = mx.sym.ones(shape=(2, 3, 4), dtype="float32")
-    mx_sym = mx.sym.make_loss((data - ones) ** 2 / 2, dtype="float32")
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
-
-
-@tvm.testing.uses_gpu
-def test_forward_zeros_like():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.zeros_like(data, dtype="float32")
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
-
-
-@tvm.testing.uses_gpu
-def test_forward_argmax():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.argmax(data, axis=1)
-    verify_mxnet_frontend_impl(mx_sym, (5, 3), (5,))
-
-
-@tvm.testing.uses_gpu
-def test_forward_argmin():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.argmin(data, axis=0)
-    verify_mxnet_frontend_impl(mx_sym, (5, 4), (4,))
-
-
-@tvm.testing.uses_gpu
-def test_forward_slice():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.slice(data, begin=(0, 1), end=(2, 4))
-    verify_mxnet_frontend_impl(mx_sym, (3, 4), (2, 3))
-    mx_sym = mx.sym.slice(data, begin=(-1, 1), end=(-3, 4), step=(-1, 2))
-    verify_mxnet_frontend_impl(mx_sym, (3, 4), (2, 2))
-
-
-@tvm.testing.uses_gpu
-def test_forward_where():
-    cond = mx.sym.var("cond")
-    x = mx.sym.var("x")
-    y = mx.sym.var("y")
-    dshape = (2, 2)
-    dtype = "float32"
-    mx_sym = mx.sym.where(cond, x, y)
-    np_cond = np.array([[0, 1], [-1, 0]]).astype(dtype)
-    np_x = np.random.uniform(size=dshape).astype(dtype)
-    np_y = np.random.uniform(size=dshape).astype(dtype)
-    mx_cond = mx.nd.array(np_cond)
-    mx_x = mx.nd.array(np_x)
-    mx_y = mx.nd.array(np_y)
-    shapes = {"cond": dshape, "x": dshape, "y": dshape}
-    mod = mx.mod.Module(mx_sym, label_names=None, data_names=["cond", "x", "y"])
-    mod.bind(data_shapes=shapes.items(), for_training=False)
-    mod.init_params()
-    args, auxs = mod.get_params()
-    mx_out = mx.nd.where(mx_cond, mx_x, mx_y).asnumpy()
-
-    mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, args, auxs)
-    for target, dev in tvm.testing.enabled_targets():
-        for kind in ["graph", "debug"]:
-            op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                np_cond, np_x, np_y
-            )
-            tvm.testing.assert_allclose(op_res.numpy(), mx_out)
-
-
-@tvm.testing.uses_gpu
-def test_forward_arange():
-    def _mx_symbol(F, start, stop, step):
-        if start is None and step is None:
-            sym = F.arange(stop)
-        elif start is None:
-            sym = F.arange(stop, step=step)
-        elif step is None:
-            sym = F.arange(start, stop)
-        else:
-            sym = F.arange(start, stop, step)
-        return sym
-
-    def verify(start, stop, step):
-        ref_res = _mx_symbol(mx.nd, start, stop, step)
-        mx_sym = _mx_symbol(mx.sym, start, stop, step)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(
-                    kind, mod=mod, device=dev, target=target
-                ).evaluate()()
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify(0, 20, None)
-    verify(0, 20, 2)
-    verify(1, 20, None)
-    verify(1, 20, 2)
-    verify(1, 20, 1.5)
-    verify(1, 20.5, None)
-    verify(1, 20, 3)
-    verify(20, 1, -1)
-    verify(20, 1, -1.5)
-
-
-def _mx_symbol(F, op_name, inputs):
-    op = getattr(F, op_name)
-    return op(*inputs)
-
-
-@tvm.testing.uses_gpu
-def test_forward_broadcast_ops():
-    for op in [
-        "broadcast_add",
-        "broadcast_plus",
-        "broadcast_sub",
-        "broadcast_minus",
-        "broadcast_mul",
-        "broadcast_div",
-        "broadcast_mod",
-        "broadcast_maximum",
-        "broadcast_minimum",
-        "broadcast_equal",
-        "broadcast_not_equal",
-        "broadcast_greater",
-        "broadcast_greater_equal",
-        "broadcast_lesser",
-        "broadcast_lesser_equal",
-        "broadcast_power",
-        "broadcast_logical_or",
-        "broadcast_logical_and",
-        "broadcast_logical_xor",
-    ]:
-        a_shape = (3, 4, 5)
-        b_shape = (4, 5)
-        if op == "broadcast_mod":
-            dtype = "int32"
-            a_np = np.random.randint(1, 100, size=a_shape).astype(dtype)
-            b_np = np.random.randint(1, 100, size=b_shape).astype(dtype)
-        else:
-            dtype = "float32"
-            a_np = np.random.uniform(size=a_shape).astype(dtype)
-            b_np = np.random.uniform(size=b_shape).astype(dtype)
-        mx_sym = _mx_symbol(mx.sym, op, [mx.sym.var("a"), mx.sym.var("b")])
-        ref_res = _mx_symbol(mx.nd, op, [mx.nd.array(a_np), mx.nd.array(b_np)])
-        shapes = {"a": a_shape, "b": b_shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    a_np, b_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-
-@tvm.testing.uses_gpu
-def test_forward_elemwise_ops():
-    for op in [
-        "elemwise_add",
-        "elemwise_sub",
-        "elemwise_mul",
-        "elemwise_div",
-        "maximum",
-        "minimum",
-        operator.lt,
-        operator.le,
-        operator.eq,
-        operator.ne,
-        operator.gt,
-        operator.ge,
-    ]:
-        shape = (3, 4, 5)
-        dtype = "float32"
-        a_np = np.random.uniform(size=shape).astype(dtype)
-        b_np = np.random.uniform(size=shape).astype(dtype)
-        if type(op) == str:
-            mx_sym = _mx_symbol(mx.sym, op, [mx.sym.var("a"), mx.sym.var("b")])
-            ref_res = _mx_symbol(mx.nd, op, [mx.nd.array(a_np), mx.nd.array(b_np)])
-        else:
-            mx_sym = op(mx.sym.var("a"), mx.sym.var("b"))
-            ref_res = op(mx.nd.array(a_np), mx.nd.array(b_np))
-        shapes = {"a": shape, "b": shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    a_np, b_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-
-@tvm.testing.uses_gpu
-def test_forward_softmin():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.softmin(data)
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 3, 100, 100))
-
-    mx_sym = mx.sym.softmin(data, axis=2)
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 3, 100, 100))
-
-
-@tvm.testing.uses_gpu
-def test_forward_unary_ops():
-    for op in [
-        "abs",
-        "sqrt",
-        "ceil",
-        "floor",
-        "round",
-        "reciprocal",
-        "trunc",
-        "softsign",
-        "hard_sigmoid",
-        "cos",
-        "sin",
-        "tan",
-        "cosh",
-        "sinh",
-        "tanh",
-        "arccos",
-        "arcsin",
-        "arctan",
-        "arccosh",
-        "arcsinh",
-        "arctanh",
-    ]:
-        shape = (1, 3, 4, 5)
-        dtype = "float32"
-        a_np = np.random.uniform(size=shape).astype(dtype)
-        mx_sym = _mx_symbol(mx.sym, op, [mx.sym.var("a")])
-        ref_res = _mx_symbol(mx.nd, op, [mx.nd.array(a_np)])
-        shapes = {"a": shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    a_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5, atol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_forward_scalar_ops():
-    for op in [
-        operator.add,
-        operator.sub,
-        operator.mul,
-        operator.truediv,
-        operator.pow,
-        operator.lt,
-        operator.le,
-        operator.eq,
-        operator.ne,
-        operator.gt,
-        operator.ge,
-    ]:
-        dtype = "float32"
-        a_shape = (3, 4, 5)
-        a_np = np.random.uniform(size=a_shape).astype(dtype)
-        b_scalar = 2.3
-        mx_sym = op(mx.sym.var("a"), b_scalar)
-        ref_res = op(mx.nd.array(a_np), b_scalar)
-        shapes = {"a": a_shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    a_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-    for op in ["maximum", "minimum"]:
-        dtype = "float32"
-        a_shape = (3, 4, 5)
-        a_np = np.random.uniform(size=a_shape).astype(dtype)
-        b_scalar = 2.3
-        mx_sym = _mx_symbol(mx.sym, op, [mx.sym.var("a"), b_scalar])
-        ref_res = _mx_symbol(mx.nd, op, [mx.nd.array(a_np), b_scalar])
-        shapes = {"a": a_shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    a_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-
-@tvm.testing.uses_gpu
-def test_forward_slice_axis():
-    def verify(shape, axis, begin, end):
-        data_np = np.random.uniform(size=shape).astype("float32")
-        ref_res = mx.nd.slice_axis(mx.nd.array(data_np), axis, begin, end)
-        mx_sym = mx.sym.slice_axis(mx.sym.var("data"), axis, begin, end)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    data_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((3, 4), 0, 1, 2)
-    verify((3, 4), 0, 1, None)
-    verify((3, 4), 1, 0, 2)
-    verify((3, 4), 1, -3, -1)
-    verify((3, 4), -1, -3, -1)
-
-
-@tvm.testing.uses_gpu
-def test_forward_slice_like():
-    def verify(x_shape, y_shape, axes):
-        x_np = np.random.uniform(size=x_shape).astype("float32")
-        y_np = np.random.uniform(size=y_shape).astype("float32")
-        if axes is None:
-            ref_res = mx.nd.slice_like(mx.nd.array(x_np), mx.nd.array(y_np))
-            mx_sym = mx.sym.slice_like(mx.sym.var("x"), mx.sym.var("y"))
-        else:
-            ref_res = mx.nd.slice_like(mx.nd.array(x_np), mx.nd.array(y_np), axes=axes)
-            mx_sym = mx.sym.slice_like(mx.sym.var("x"), mx.sym.var("y"), axes=axes)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": x_shape, "y": y_shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_np, y_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((3, 4), (2, 3), None)
-    verify((3, 4), (2, 3), (0, 1))
-    verify((3, 4), (2, 3), (0))
-    verify((3, 4), (2, 3), (-1))
-
-
-@tvm.testing.uses_gpu
-def test_forward_sequence_reverse():
-    def verify(shape, seq_lengths, use_seq_lengths, seq_axis):
-        data_np = np.random.uniform(size=shape).astype("float32")
-
-        ref_res_args = [mx.nd.array(data_np), None, use_seq_lengths, seq_axis]
-        mx_sym_args = [mx.sym.var("data"), None, use_seq_lengths, seq_axis]
-        from_mxnet_args = [{"data": shape}, {"data": "float32"}]
-        in_data = [data_np]
-
-        if use_seq_lengths and seq_lengths:
-            seq_lengths_np = np.array(seq_lengths).astype("int32")
-            ref_res_args[1] = mx.nd.array(seq_lengths_np)
-            mx_sym_args[1] = mx.sym.var("seq_lengths")
-            from_mxnet_args[0].update({"seq_lengths": seq_lengths_np.shape})
-            from_mxnet_args[1].update({"seq_lengths": "int32"})
-            in_data.append(seq_lengths_np)
-
-        ref_res = mx.nd.SequenceReverse(*ref_res_args)
-        mx_sym = mx.sym.SequenceReverse(*mx_sym_args)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, *from_mxnet_args)
-
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    *in_data
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((3, 4), [1, 2, 3, 1], True, 0)
-    verify((3, 4), None, False, 0)
-    verify((3, 5, 5, 6), [1, 2, 3, 1, 3], True, 0)
-    # MXNet accepts axis value as 0 only
-    # verify((3, 4, 5, 6), None, False, 2)
-
-
-@tvm.testing.uses_gpu
-def test_forward_l2_normalize():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.L2Normalization(data, mode="channel")
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4, 5), (2, 3, 4, 5))
-
-    mx_sym = mx.sym.L2Normalization(data, mode="instance")
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4, 5), (2, 3, 4, 5))
-
-    mx_sym = mx.sym.L2Normalization(data, mode="spatial")
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4, 5), (2, 3, 4, 5))
-
-
-@tvm.testing.uses_gpu
-def test_forward_logistic_regression_output():
-    data_shape = (1, 10)
-    dtype = "float32"
-    data_np = np.random.uniform(size=data_shape).astype(dtype)
-    label_np = np.random.uniform(size=data_shape).astype(dtype)
-    mx_sym = mx.symbol.LogisticRegressionOutput(mx.sym.var("data"), mx.sym.var("label"))
-    ref_res = mx.nd.LogisticRegressionOutput(mx.nd.array(data_np), mx.nd.array(label_np))
-    shapes = {"data": data_shape}
-    mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-    for target, dev in tvm.testing.enabled_targets():
-        for kind in ["graph", "debug"]:
-            op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                data_np
-            )
-            tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-
-@tvm.testing.uses_gpu
-def test_forward_dot():
-    def verify(a_shape, b_shape, transpose_b=False):
-        dtype = "float32"
-        a_np = np.random.uniform(size=a_shape).astype(dtype)
-        b_np = np.random.uniform(size=b_shape).astype(dtype)
-        mx_sym = mx.symbol.dot(mx.sym.var("a"), mx.sym.var("b"), transpose_b=transpose_b)
-        ref_res = mx.nd.dot(mx.nd.array(a_np), mx.nd.array(b_np), transpose_b=transpose_b)
-        shapes = {"a": a_shape, "b": b_shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    a_np, b_np
-                )
-                tvm.testing.assert_allclose(
-                    op_res.numpy(), ref_res.asnumpy(), rtol=1e-05, atol=1e-05
-                )
-
-    verify((1, 256), (256, 1))
-    verify((1, 256), (1, 256), transpose_b=True)
-    verify((5,), (5,))
-    verify((3,), (3, 5))
-    verify((3,), (5, 3), transpose_b=True)
-    verify((3,), (3, 5, 3, 5))
-    verify((3,), (5, 5, 3, 3), transpose_b=True)
-    verify((10, 1), (1,))
-    verify((1, 1), (4, 3, 2, 1), transpose_b=True)
-    verify((4, 3, 2, 1), (1,))
-    verify((1, 2, 3, 4), (1, 4), transpose_b=True)
-    verify((4, 1, 1), (1, 2, 3))
-    verify((1, 1, 4), (2, 3, 4), transpose_b=True)
-
-
-@tvm.testing.uses_gpu
-def test_forward_shape_array():
-    def verify(shape):
-        x_np = np.random.uniform(size=shape).astype("float32")
-        ref_res = mx.nd.shape_array(mx.nd.array(x_np))
-        mx_sym = mx.sym.shape_array(mx.sym.var("x"))
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((1,))
-    verify((3, 4, 5))
-    verify((3, 4, 5, 6))
-
-
-@tvm.testing.uses_gpu
-def test_forward_squeeze():
-    def verify(shape, axis):
-        x_np = np.random.uniform(size=shape).astype("float32")
-        if axis is None:
-            ref_res = mx.nd.squeeze(mx.nd.array(x_np))
-            mx_sym = mx.sym.squeeze(mx.sym.var("x"))
-        else:
-            ref_res = mx.nd.squeeze(mx.nd.array(x_np), axis=axis)
-            mx_sym = mx.sym.squeeze(mx.sym.var("x"), axis=axis)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((1, 3, 1), None)
-    verify((1, 3, 1), 0)
-    verify((1, 3, 1), 2)
-    verify((1, 3, 1), (0, 2))
-
-
-@tvm.testing.uses_gpu
-def test_forward_broadcast_axis():
-    def verify(shape, axis, size):
-        x_np = np.random.uniform(size=shape).astype("float32")
-        for op in ["broadcast_axis", "broadcast_axes"]:
-            mx_sym = _mx_symbol(mx.sym, op, [mx.sym.var("x"), axis, size])
-            ref_res = _mx_symbol(mx.nd, op, [mx.nd.array(x_np), axis, size])
-            mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
-            for target, dev in tvm.testing.enabled_targets():
-                for kind in ["graph", "debug"]:
-                    op_res = relay.create_executor(
-                        kind, mod=mod, device=dev, target=target
-                    ).evaluate()(x_np)
-                    tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((1, 2, 1), 2, 3)
-    verify((1, 2, 1), (0, 2), (2, 3))
-
-
-@tvm.testing.uses_gpu
-def test_forward_broadcast_to():
-    def verify(input_shape, shape):
-        x_np = np.random.uniform(size=input_shape).astype("float32")
-        ref_res = mx.nd.broadcast_to(mx.nd.array(x_np), shape=shape)
-        mx_sym = mx.sym.broadcast_to(mx.sym.var("x"), shape=shape)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": input_shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((1, 2, 3), (3, 2, 3))
-    verify((4, 1, 32, 32), (4, 8, 32, 32))
-
-
-@tvm.testing.uses_gpu
-def test_forward_broadcast_like():
-    def verify(input_shape, like_shape):
-        x_np = np.random.uniform(size=input_shape).astype("float32")
-        y_np = np.random.uniform(size=like_shape).astype("float32")
-        ref_res = mx.nd.broadcast_like(mx.nd.array(x_np), mx.nd.array(y_np))
-        mx_sym = mx.sym.broadcast_like(mx.sym.var("x"), mx.sym.var("y"))
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": input_shape, "y": like_shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_np, y_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((1, 2, 3), (3, 2, 3))
-    verify((4, 1, 32, 32), (4, 8, 32, 32))
-
-
-@tvm.testing.uses_gpu
-def test_forward_logical_not():
-    a_shape = (3, 4, 5)
-    dtype = "float32"
-    a_np = np.random.uniform(size=a_shape).astype(dtype)
-    mx_sym = mx.sym.logical_not(mx.sym.var("a"))
-    ref_res = mx.nd.logical_not(mx.nd.array(a_np))
-    shapes = {"a": a_shape}
-    mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-    for target, dev in tvm.testing.enabled_targets():
-        for kind in ["graph", "debug"]:
-            op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                a_np
-            )
-            tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-
-@tvm.testing.uses_gpu
-def test_forward_full():
-    def verify(val, shape, dtype):
-        dev = mx.cpu()
-        ref_res = mx.nd.full(shape, val, dtype=dtype)
-        mx_sym = mx.sym.full(shape, val, dtype=dtype)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {})
-        for target, dev in tvm.testing.enabled_targets():
-            # Skip testing graph executor because this op will be optimized out
-            # by constant folding.
-            for kind in ["debug"]:
-                op_res = relay.create_executor(
-                    kind, mod=mod, device=dev, target=target
-                ).evaluate()()
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify(2, (3, 4), "float32")
-    verify(2, (3, 4), "int32")
-    verify(3.5, (1, 3, 4), "float32")
-
-
-@tvm.testing.uses_gpu
-def test_forward_embedding():
-    def verify(data_shape, weight_shape):
-        in_dim, out_dim = weight_shape
-        x_np = np.random.randint(0, weight_shape[0], size=data_shape).astype("float32")
-        w_np = np.random.uniform(size=weight_shape).astype("float32")
-        ref_res = mx.nd.Embedding(
-            mx.nd.array(x_np), mx.nd.array(w_np), input_dim=in_dim, output_dim=out_dim
-        )
-        mx_sym = mx.sym.Embedding(
-            mx.sym.var("x"), mx.sym.var("w"), input_dim=in_dim, output_dim=out_dim
-        )
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": data_shape, "w": weight_shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x=x_np, w=w_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((2, 2), (4, 5))
-    verify((2, 3, 4), (4, 5))
-
-
-@tvm.testing.uses_gpu
-def test_forward_smooth_l1():
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.smooth_l1(data)
-    verify_mxnet_frontend_impl(mx_sym, (3, 4), (3, 4))
-    mx_sym = mx.sym.smooth_l1(data, scalar=1.0)
-    verify_mxnet_frontend_impl(mx_sym, (3, 4), (3, 4))
-
-
-@tvm.testing.uses_gpu
-def test_forward_take():
-    def verify(shape, indices_src, axis, mode="clip"):
-        x_np = np.random.uniform(size=shape).astype("float32")
-        indices_np = np.array(indices_src, dtype="float32")
-        ref_res = mx.nd.take(mx.nd.array(x_np), mx.nd.array(indices_np), axis, mode)
-        mx_sym = mx.sym.take(mx.sym.var("x"), mx.sym.var("y"), axis, mode)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape, "y": indices_np.shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_np, indices_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((2, 2), [[[1, 0], [0, 1]]], 0)
-    verify((2, 2), [[[1, 0], [0, 1]]], 1)
-    verify((4, 3, 5, 6), [[2, 1, 0, 0]], -2)
-    verify((3, 4), [-1, 5], 0)
-    verify((3, 4), [-1, 5], 0, mode="wrap")
-    verify((3, 4), [-1, 5], 1)
-    verify((3, 4), [-1, 5], 1, mode="wrap")
-
-
-@tvm.testing.uses_gpu
-def test_forward_gather_nd():
-    def verify(xshape, yshape, y_data, error=False):
-        x_data = np.random.uniform(size=xshape).astype("float32")
-        ref_res = mx.nd.gather_nd(mx.nd.array(x_data), mx.nd.array(y_data))
-        mx_sym = mx.sym.gather_nd(mx.sym.var("x_data"), mx.sym.var("y_data"))
-        mod, _ = relay.frontend.from_mxnet(
-            mx_sym, {"x_data": xshape, "y_data": yshape}, {"x_data": "float32", "y_data": "int32"}
-        )
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_data, y_data
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((2, 2), (2, 3), [[1, 1, 0], [0, 1, 0]])
-    verify((2, 2, 2), (2, 2), [[0, 1], [1, 0]])
-    verify((3, 2, 2), (2, 2), [[0, 1], [1, 0]])
-    verify((3, 2), (2, 2, 3), [[[0, 1, 2], [2, 0, 1]], [[0, 0, 0], [1, 1, 1]]])
-    verify((1, 4), (1, 1), [[0]])
-
-
-@tvm.testing.uses_gpu
-def test_forward_bilinear_resize():
-    # add tests including scale_height and scale_width when mxnet is updated to version 1.5
-    data = mx.sym.var("data")
-    mx_sym = mx.sym.contrib.BilinearResize2D(data, height=5, width=10)
-    verify_mxnet_frontend_impl(mx_sym, (1, 2, 3, 4), (1, 2, 5, 10))
-
-
-@tvm.testing.uses_gpu
-def test_forward_grid_generator():
-    def verify(shape, transform_type, target_shape):
-        x = np.random.uniform(size=shape).astype("float32")
-        ref_res = mx.nd.GridGenerator(mx.nd.array(x), transform_type, target_shape)
-        mx_sym = mx.sym.GridGenerator(mx.sym.var("x"), transform_type, target_shape)
-        shape_dict = {"x": x.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5, atol=1e-5)
-
-    verify((4, 6), "affine", (16, 32))
-    verify((4, 2, 16, 16), "warp", None)
-    verify((1, 2, 16, 16), "warp", None)
-
-
-@tvm.testing.uses_gpu
-def test_forward_bilinear_sampler():
-    def verify(data_shape, grid_shape):
-        data = np.random.uniform(size=data_shape).astype("float32")
-        grid = np.random.uniform(low=-1.5, high=1.5, size=grid_shape).astype("float32")
-        ref_res = mx.nd.BilinearSampler(mx.nd.array(data), mx.nd.array(grid))
-        mx_sym = mx.sym.BilinearSampler(mx.sym.var("data"), mx.sym.var("grid"))
-        shape_dict = {"data": data.shape, "grid": grid.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    data, grid
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5, atol=1e-5)
-
-    verify((4, 4, 16, 32), (4, 2, 8, 8))
-    verify((4, 4, 16, 32), (4, 2, 32, 32))
-
-
-@tvm.testing.uses_gpu
-def test_forward_rnn_layer():
-    def verify(
-        mode,
-        seq_len,
-        input_size,
-        hidden_size,
-        num_layers,
-        batch=1,
-        init_states=True,
-        bidirectional=False,
-    ):
-        if mode == "rnn":
-            layer = gluon.rnn.RNN(hidden_size, num_layers, bidirectional=bidirectional)
-        elif mode == "gru":
-            layer = gluon.rnn.GRU(hidden_size, num_layers, bidirectional=bidirectional)
-        else:  # mode == "lstm"
-            layer = gluon.rnn.LSTM(hidden_size, num_layers, bidirectional=bidirectional)
-        num_states = 2 if mode == "lstm" else 1
-        layer.initialize()
-        layer.hybridize()
-
-        dtype = "float32"
-        directions = 2 if bidirectional else 1
-        data_np = np.random.uniform(size=(seq_len, batch, input_size)).astype(dtype)
-        data_mx = mx.nd.array(data_np)
-
-        if init_states:
-            shape_dict = {"data0": data_np.shape}
-            inputs = {"data0": data_np}
-            state_shape = (num_layers * directions, batch, hidden_size)
-            states_np = []
-            states_mx = []
-            for i in range(num_states):
-                s = np.random.uniform(size=state_shape).astype(dtype)
-                states_np.append(s)
-                states_mx.append(mx.nd.array(s))
-                shape_dict["data%s" % (i + 1)] = s.shape
-                inputs["data%s" % (i + 1)] = s
-            mx_out, mx_states = layer(data_mx, states_mx)
-            mx_res = [mx_out] + mx_states
-        else:
-            shape_dict = {"data": data_np.shape}
-            inputs = {"data": data_np}
-            mx_res = layer(data_mx)
-
-        mx_sym = layer._cached_graph[1]
-        mx_params = {}
-        for name, param in layer.collect_params().items():
-            mx_params[name] = param._reduce()
-
-        mod, params = relay.frontend.from_mxnet(mx_sym, shape=shape_dict, arg_params=mx_params)
-        for target, dev in tvm.testing.enabled_targets():
-            # only test graph executor because debug runtime is too slow
-            for kind in ["graph"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    **inputs, **params
-                )
-                if init_states:
-                    assert len(op_res) == len(mx_res)
-                    for i, val in enumerate(op_res):
-                        tvm.testing.assert_allclose(val.numpy(), mx_res[i].asnumpy(), rtol=1e-3)
-                else:
-                    tvm.testing.assert_allclose(op_res.numpy(), mx_res.asnumpy(), rtol=1e-3)
-
-    for mode in ["rnn", "gru", "lstm"]:
-        verify(mode, 1, 64, 64, 1)
-        verify(mode, 10, 64, 64, 2)
-        verify(mode, 10, 64, 32, 2)
-        verify(mode, 10, 64, 32, 2, batch=2)
-        verify(mode, 10, 32, 64, 1, bidirectional=True)
-        # The following two codeblocks need to be fixed for mxnet 1.5
-        # verify(mode, 10, 64, 64, 3, init_states=False)
-        # verify(mode, 10, 64, 64, 3, batch=2, bidirectional=True, init_states=False)
-
-
-@tvm.testing.uses_gpu
-def test_forward_Crop():
-    def verify(xshape, yshape, offset=None):
-        x_data = np.random.uniform(size=xshape).astype("float32")
-        y_data = np.random.uniform(size=yshape).astype("float32")
-        if offset is None:
-            mx_sym = mx.sym.Crop(mx.sym.var("x"), mx.sym.var("y"))
-            ref_res = mx.nd.Crop(mx.nd.array(x_data), mx.nd.array(y_data))
-        else:
-            mx_sym = mx.sym.Crop(mx.sym.var("x"), mx.sym.var("y"), offset=offset)
-            ref_res = mx.nd.Crop(mx.nd.array(x_data), mx.nd.array(y_data), offset=offset)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": xshape, "y": yshape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                func = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()
-                if offset is None or offset == (0, 0):
-                    op_res = func(x_data, y_data)
-                else:
-                    op_res = func(x_data)
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((1, 3, 40, 40), (1, 3, 20, 20))
-    verify((1, 3, 40, 40), (1, 3, 20, 20), (0, 0))
-    verify((1, 3, 40, 40), (1, 3, 20, 20), (10, 10))
-    verify((5, 32, 40, 40), (5, 32, 25, 25))
-    verify((5, 32, 40, 40), (5, 32, 25, 25), (5, 5))
-
-
-@tvm.testing.uses_gpu
-def test_forward_argsort():
-    def verify(shape, axis, is_ascend, dtype="float32"):
-        x_np = np.random.uniform(size=shape).astype("float32")
-        ref_res = mx.nd.argsort(mx.nd.array(x_np), axis=axis, is_ascend=is_ascend, dtype=dtype)
-        mx_sym = mx.sym.argsort(mx.sym.var("x"), axis=axis, is_ascend=is_ascend, dtype=dtype)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((2, 3, 4), axis=0, is_ascend=False)
-    verify((1, 4, 6), axis=1, is_ascend=True)
-    verify((3, 5, 6), axis=-3, is_ascend=False, dtype="int32")
-
-
-@tvm.testing.uses_gpu
-def test_forward_topk():
-    def verify(shape, k, axis, ret_type, is_ascend=None, dtype="float32"):
-        x_np = np.random.uniform(size=shape).astype("float32")
-        if is_ascend is None:
-            ref_res = mx.nd.topk(mx.nd.array(x_np), k=k, axis=axis, ret_typ=ret_type, dtype=dtype)
-            mx_sym = mx.sym.topk(mx.sym.var("x"), k=k, axis=axis, ret_typ=ret_type, dtype=dtype)
-        else:
-            ref_res = mx.nd.topk(
-                mx.nd.array(x_np),
-                k=k,
-                axis=axis,
-                ret_typ=ret_type,
-                is_ascend=is_ascend,
-                dtype=dtype,
-            )
-            mx_sym = mx.sym.topk(
-                mx.sym.var("x"), k=k, axis=axis, ret_typ=ret_type, is_ascend=is_ascend, dtype=dtype
-            )
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_np
-                )
-                if isinstance(ref_res, list):
-                    assert len(op_res) == len(ref_res)
-                    for i, t in enumerate(op_res):
-                        tvm.testing.assert_allclose(t.numpy(), ref_res[i].asnumpy())
-                else:
-                    tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((3, 4), k=1, axis=0, ret_type="both")
-    verify((3, 4), k=1, axis=-1, ret_type="indices")
-    verify((3, 5, 6), k=2, axis=2, ret_type="value", is_ascend=False)
-    verify((3, 5, 6), k=2, axis=1, ret_type="value", is_ascend=True)
-    verify((3, 5, 6), k=0, axis=2, ret_type="both", dtype="int32")
-
-
-@tvm.testing.uses_gpu
-def test_forward_sequence_mask():
-    def verify(shape, use_sequence_length, value, axis, dtype, itype):
-        data_np = np.random.uniform(size=shape).astype(dtype)
-        valid_length_np = np.random.randint(0, shape[axis], size=shape[1 - axis]).astype(itype)
-        if use_sequence_length:
-            ref_res = mx.nd.SequenceMask(
-                mx.nd.array(data_np, dtype=dtype),
-                sequence_length=mx.nd.array(valid_length_np, dtype=itype),
-                use_sequence_length=use_sequence_length,
-                value=value,
-                axis=axis,
-            )
-            mx_sym = mx.sym.SequenceMask(
-                mx.sym.var("data"),
-                sequence_length=mx.sym.var("valid_length"),
-                use_sequence_length=use_sequence_length,
-                value=value,
-                axis=axis,
-            )
-            mod, _ = relay.frontend.from_mxnet(
-                mx_sym,
-                {"data": shape, "valid_length": valid_length_np.shape},
-                dtype={"data": dtype, "valid_length": itype},
-            )
-        else:
-            ref_res = mx.nd.SequenceMask(
-                mx.nd.array(data_np, dtype=dtype),
-                use_sequence_length=use_sequence_length,
-                value=value,
-                axis=axis,
-            )
-            mx_sym = mx.sym.SequenceMask(
-                mx.sym.var("data"), use_sequence_length=use_sequence_length, value=value, axis=axis
-            )
-            mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": shape}, dtype={"data": dtype})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                if use_sequence_length is False and kind == "graph":
-                    # Disable the test for 'graph' when it's identity.
-                    continue
-                func = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()
-                if use_sequence_length:
-                    op_res = func(data_np, valid_length_np)
-                else:
-                    op_res = func(data_np)
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((5, 10), True, 0.0, 0, "float32", "float32")
-    verify((5, 4, 3), True, 1.0, 1, "float32", "float32")
-    verify((5, 4, 3), False, 1.0, 1, "float64", "float64")
-    verify((5, 4, 3, 2), True, 1.0, 0, "float32", "float32")
-
-
-@tvm.testing.uses_gpu
-def test_forward_contrib_div_sqrt_dim():
-    def verify(shape):
-        x_np = np.random.uniform(size=shape).astype("float32")
-        ref_res = mx.nd.contrib.div_sqrt_dim(mx.nd.array(x_np))
-        mx_sym = mx.sym.contrib.div_sqrt_dim(mx.sym.var("x"))
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify((3, 4))
-    verify((3, 4, 5))
-
-
-@tvm.testing.uses_gpu
-def test_forward_batch_norm():
-    def verify(shape, axis=1, fix_gamma=False):
-        x = np.random.uniform(size=shape).astype("float32")
-        gamma = np.random.uniform(size=(shape[axis])).astype("float32")
-        beta = np.random.uniform(size=(shape[axis])).astype("float32")
-        moving_mean = np.random.uniform(size=(shape[axis])).astype("float32")
-        moving_var = np.abs(np.random.uniform(size=(shape[axis])).astype("float32")) + 0.5
-        ref_res = mx.nd.BatchNorm(
-            mx.nd.array(x),
-            mx.nd.array(gamma),
-            mx.nd.array(beta),
-            mx.nd.array(moving_mean),
-            mx.nd.array(moving_var),
-            axis=axis,
-            use_global_stats=True,
-            fix_gamma=fix_gamma,
-        )
-        mx_sym = mx.sym.BatchNorm(
-            mx.sym.var("x"),
-            mx.sym.var("gamma"),
-            mx.sym.var("beta"),
-            mx.sym.var("mean"),
-            mx.sym.var("var"),
-            axis=axis,
-            use_global_stats=True,
-            fix_gamma=fix_gamma,
-        )
-
-        shape_dict = {
-            "x": x.shape,
-            "gamma": gamma.shape,
-            "beta": beta.shape,
-            "mean": moving_mean.shape,
-            "var": moving_var.shape,
-        }
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        # print(mod)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x, gamma, beta, moving_mean, moving_var
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3)
-
-    verify((2, 3, 4, 5))
-    verify((2, 3, 4, 5), axis=0)
-    verify((2, 3, 4, 5), axis=-1)
-    verify((2, 3, 4, 5), fix_gamma=True)
-
-
-@tvm.testing.uses_gpu
-def test_forward_instance_norm():
-    def verify(shape, axis=1, epsilon=1e-5):
-        x = np.random.uniform(size=shape).astype("float32")
-        gamma = np.random.uniform(size=(shape[axis])).astype("float32")
-        beta = np.random.uniform(size=(shape[axis])).astype("float32")
-        ref_res = mx.nd.InstanceNorm(mx.nd.array(x), mx.nd.array(gamma), mx.nd.array(beta), epsilon)
-        mx_sym = mx.sym.InstanceNorm(
-            mx.sym.var("x"), mx.sym.var("gamma"), mx.sym.var("beta"), epsilon
-        )
-        shape_dict = {"x": x.shape, "gamma": gamma.shape, "beta": beta.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x, gamma, beta
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=2e-5, atol=1e-5)
-
-    verify((2, 3, 4, 5))
-    verify((32, 64, 80, 64))
-    verify((8, 6, 5))
-    verify((8, 7, 6, 5, 4))
-
-
-@tvm.testing.uses_gpu
-def test_forward_layer_norm():
-    def verify(shape, axis=-1):
-        x = np.random.uniform(size=shape).astype("float32")
-        gamma = np.random.uniform(size=(shape[axis])).astype("float32")
-        beta = np.random.uniform(size=(shape[axis])).astype("float32")
-        ref_res = mx.nd.LayerNorm(mx.nd.array(x), mx.nd.array(gamma), mx.nd.array(beta), axis=axis)
-        mx_sym = mx.sym.LayerNorm(
-            mx.sym.var("x"), mx.sym.var("gamma"), mx.sym.var("beta"), axis=axis
-        )
-        shape_dict = {"x": x.shape, "gamma": gamma.shape, "beta": beta.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x, gamma, beta
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify((2, 5))
-    verify((2, 5), axis=0)
-    verify((2, 5, 6))
-
-
-@tvm.testing.uses_gpu
-def test_forward_group_norm():
-    def verify(shape, num_groups=1):
-        x = np.random.uniform(size=shape).astype("float32")
-        gamma = np.random.uniform(size=(shape[1])).astype("float32")
-        beta = np.random.uniform(size=(shape[1])).astype("float32")
-        ref_res = mx.nd.GroupNorm(
-            data=mx.nd.array(x),
-            gamma=mx.nd.array(gamma),
-            beta=mx.nd.array(beta),
-            num_groups=num_groups,
-        )
-        mx_sym = mx.sym.GroupNorm(
-            mx.sym.var("x"), mx.sym.var("gamma"), mx.sym.var("beta"), num_groups=num_groups
-        )
-        shape_dict = {"x": x.shape, "gamma": gamma.shape, "beta": beta.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x, gamma, beta
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify((1, 4, 2), num_groups=4)
-    # TODO(trevmorr): MXNet GroupNorm implementation is bugged for cases when num_groups != num_channels
-    # https://github.com/apache/incubator-mxnet/pull/18199
-    # verify((1, 4, 2, 3), num_groups=2)
-    # verify((1, 4, 2, 3))
-
-
-@tvm.testing.uses_gpu
-def test_forward_one_hot():
-    def verify(indices_shape, depth, on_value, off_value, dtype):
-        x = np.random.randint(0, 5, size=indices_shape)
-        ref_res = mx.nd.one_hot(mx.nd.array(x), depth, on_value, off_value, dtype)
-        mx_sym = mx.sym.one_hot(mx.sym.var("x"), depth, on_value, off_value, dtype)
-        shape_dict = {"x": x.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x.astype("float32")
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify((3,), 3, 1, 0, "int32")
-    verify((3,), 3, 1.0, 0.0, "float32")
-    verify((2, 2), 5, 2, -2, "int32")
-    verify((2, 2), 5, 0.5, -0.5, "float32")
-    verify((3, 2, 4, 5), 6, 1, 0, "int32")
-    verify((3, 2, 4, 5), 6, 1.0, 0.0, "float32")
-
-
-@tvm.testing.uses_gpu
-def test_forward_pad():
-    def verify(data_shape, out_shape, mode, pad_width, constant_value=0.0):
-        data = mx.sym.var("data")
-        mx_sym = mx.sym.pad(data, mode=mode, pad_width=pad_width, constant_value=constant_value)
-        verify_mxnet_frontend_impl(mx_sym, data_shape=data_shape, out_shape=out_shape)
-
-    verify(
-        data_shape=(1, 1, 3, 5),
-        out_shape=(1, 1, 6, 12),
-        mode="constant",
-        pad_width=(0, 0, 0, 0, 1, 2, 3, 4),
-    )
-    verify(
-        data_shape=(1, 1, 3, 5),
-        out_shape=(1, 1, 6, 12),
-        mode="constant",
-        pad_width=(0, 0, 0, 0, 1, 2, 3, 4),
-        constant_value=3.0,
-    )
-    verify(
-        data_shape=(1, 1, 3, 5),
-        out_shape=(1, 1, 6, 12),
-        mode="edge",
-        pad_width=(0, 0, 0, 0, 1, 2, 3, 4),
-    )
-    verify(
-        data_shape=(1, 1, 3, 5),
-        out_shape=(1, 1, 6, 12),
-        mode="reflect",
-        pad_width=(0, 0, 0, 0, 1, 2, 3, 4),
-    )
-    verify(
-        data_shape=(1, 1, 3, 5, 7),
-        out_shape=(1, 1, 6, 12, 18),
-        mode="constant",
-        pad_width=(0, 0, 0, 0, 1, 2, 3, 4, 5, 6),
-    )
-    verify(
-        data_shape=(1, 1, 3, 5, 7),
-        out_shape=(1, 1, 6, 12, 18),
-        mode="constant",
-        pad_width=(0, 0, 0, 0, 1, 2, 3, 4, 5, 6),
-        constant_value=3.0,
-    )
-    verify(
-        data_shape=(1, 1, 3, 5, 7),
-        out_shape=(1, 1, 6, 12, 18),
-        mode="edge",
-        pad_width=(0, 0, 0, 0, 1, 2, 3, 4, 5, 6),
-    )
-    verify(
-        data_shape=(1, 1, 3, 5, 7),
-        out_shape=(1, 1, 6, 12, 18),
-        mode="reflect",
-        pad_width=(0, 0, 0, 0, 1, 2, 3, 4, 5, 6),
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_slice():
-    def verify(data_shape, out_shape, begin, end):
-        data = mx.sym.var("data")
-        mx_sym = mx.sym.slice(data, begin=begin, end=end)
-        verify_mxnet_frontend_impl(mx_sym, data_shape=data_shape, out_shape=out_shape)
-
-    verify(data_shape=(1, 1, 10), out_shape=(1, 1, 8), begin=(0, 0, 2), end=(1, 1, 10))
-    verify(
-        data_shape=(1, 1, 10), out_shape=(1, 1, 8), begin=(None, None, 2), end=(None, None, None)
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_convolution():
-    def verify(data_shape, kernel_size, stride, pad, num_filter, is_depthwise=False):
-        if is_depthwise:
-            groups = data_shape[1]
-            weight_shape = (
-                data_shape[1],
-                num_filter // groups,
-            ) + kernel_size
-        else:
-            groups = 1
-            weight_shape = (
-                num_filter,
-                data_shape[1],
-            ) + kernel_size
-        x = np.random.uniform(size=data_shape).astype("float32")
-        weight = np.random.uniform(size=weight_shape).astype("float32")
-        bias = np.random.uniform(size=num_filter).astype("float32")
-        ref_res = mx.nd.Convolution(
-            data=mx.nd.array(x),
-            weight=mx.nd.array(weight),
-            bias=mx.nd.array(bias),
-            kernel=kernel_size,
-            stride=stride,
-            pad=pad,
-            num_filter=num_filter,
-            num_group=groups,
-        )
-        mx_sym = mx.sym.Convolution(
-            mx.sym.var("x"),
-            mx.sym.var("weight"),
-            mx.sym.var("bias"),
-            kernel=kernel_size,
-            stride=stride,
-            pad=pad,
-            num_filter=num_filter,
-            num_group=groups,
-        )
-        shape_dict = {"x": x.shape, "weight": weight.shape, "bias": bias.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x, weight, bias
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3)
-
-    verify(data_shape=(1, 1, 1024 * 16), kernel_size=(17,), stride=(2,), pad=(8,), num_filter=4)
-    verify(data_shape=(20, 1, 1024 * 16), kernel_size=(17,), stride=(2,), pad=(8,), num_filter=4)
-    verify(data_shape=(1, 8, 1024 * 16), kernel_size=(17,), stride=(2,), pad=(8,), num_filter=4)
-    verify(data_shape=(20, 8, 1024 * 16), kernel_size=(17,), stride=(2,), pad=(8,), num_filter=4)
-    verify(data_shape=(1, 1, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
-    verify(data_shape=(20, 1, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
-    verify(data_shape=(1, 8, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
-    verify(data_shape=(20, 8, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
-    verify(
-        data_shape=(1, 8, 32, 32),
-        kernel_size=(3, 3),
-        stride=(1, 1),
-        pad=(1, 1),
-        num_filter=8,
-        is_depthwise=True,
-    )
-    verify(
-        data_shape=(1, 1, 16, 16, 16),
-        kernel_size=(3, 3, 3),
-        stride=(1, 1, 1),
-        pad=(1, 1, 1),
-        num_filter=2,
-    )
-    verify(
-        data_shape=(20, 1, 16, 16, 16),
-        kernel_size=(3, 3, 3),
-        stride=(1, 1, 1),
-        pad=(1, 1, 1),
-        num_filter=2,
-    )
-    verify(
-        data_shape=(1, 8, 16, 16, 16),
-        kernel_size=(3, 3, 3),
-        stride=(2, 2, 2),
-        pad=(1, 1, 1),
-        num_filter=2,
-    )
-    verify(
-        data_shape=(20, 8, 16, 16, 16),
-        kernel_size=(3, 3, 3),
-        stride=(1, 1, 1),
-        pad=(1, 1, 1),
-        num_filter=2,
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_deconvolution():
-    def verify(data_shape, kernel_size, stride, pad, num_filter):
-        weight_shape = (data_shape[1], num_filter) + kernel_size
-        x = np.random.uniform(size=data_shape).astype("float32")
-        weight = np.random.uniform(size=weight_shape).astype("float32")
-        bias = np.random.uniform(size=num_filter).astype("float32")
-        ref_res = mx.nd.Deconvolution(
-            data=mx.nd.array(x),
-            weight=mx.nd.array(weight),
-            bias=mx.nd.array(bias),
-            kernel=kernel_size,
-            stride=stride,
-            pad=pad,
-            num_filter=num_filter,
-            no_bias=False,
-        )
-        mx_sym = mx.sym.Deconvolution(
-            mx.sym.var("x"),
-            mx.sym.var("weight"),
-            mx.sym.var("bias"),
-            kernel=kernel_size,
-            stride=stride,
-            pad=pad,
-            num_filter=num_filter,
-            no_bias=False,
-        )
-        shape_dict = {"x": x.shape, "weight": weight.shape, "bias": bias.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x, weight, bias
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify(data_shape=(1, 1, 1024 * 16), kernel_size=(17,), stride=(2,), pad=(8,), num_filter=4)
-    verify(data_shape=(20, 1, 1024 * 16), kernel_size=(17,), stride=(2,), pad=(8,), num_filter=4)
-    verify(data_shape=(1, 8, 1024 * 16), kernel_size=(17,), stride=(2,), pad=(8,), num_filter=4)
-    verify(data_shape=(20, 8, 1024 * 16), kernel_size=(17,), stride=(2,), pad=(8,), num_filter=4)
-    verify(data_shape=(1, 1, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
-    verify(data_shape=(20, 1, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
-    verify(data_shape=(1, 8, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
-    verify(data_shape=(20, 8, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
-
-
-@tvm.testing.uses_gpu
-def test_forward_cond():
-    def verify(a_np, b_np):
-        a_nd, b_nd = mx.nd.array(a_np), mx.nd.array(b_np)
-        pred = a_nd * b_nd < 5
-        then_func = lambda: (a_nd + 5) * (b_nd + 5)
-        else_func = lambda: (a_nd - 5) * (b_nd - 5)
-        ref_res = mx.nd.contrib.cond(pred, then_func, else_func)
-
-        a_sym, b_sym = mx.sym.var("a"), mx.sym.var("b")
-        pred = a_sym * b_sym < 5
-        then_func = lambda: (a_sym + 5) * (b_sym + 5)
-        else_func = lambda: (a_sym - 5) * (b_sym - 5)
-        mx_sym = mx.sym.contrib.cond(pred, then_func, else_func)
-
-        shape_dict = {"a": a_np.shape, "b": b_np.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["debug", "vm"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    a_np, b_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3)
-
-    verify(np.asarray([1.0], "float32"), np.asarray([2.0], "float32"))
-    verify(np.asarray([4.0], "float32"), np.asarray([3.0], "float32"))
-
-
-@tvm.testing.uses_gpu
-def test_forward_amp_cast():
-    def verify(from_dtype, to_dtype):
-        from_np = np.random.uniform(size=(1, 3, 18)).astype(from_dtype)
-        x_var = mx.sym.var("x", dtype=from_dtype)
-        mx_sym = mx.sym.amp_cast(x_var, dtype=to_dtype)
-        shape_dict = {"x": (1, 3, 18)}
-        dtype_dict = {"x": from_dtype}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "vm", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    from_np
-                )
-                assert op_res.dtype == to_dtype, op_res.dtype
-                tvm.testing.assert_allclose(op_res.numpy(), from_np.astype(to_dtype))
-
-    verify("float32", "float16")
-    verify("float16", "float32")
-
-
-@tvm.testing.uses_gpu
-def test_forward_amp_multicast():
-    def verify(dtypes, cast_narrow, expected_dtype):
-        x_nps = [np.random.uniform(size=(1, 3, 18)).astype(dtype) for dtype in dtypes]
-        x_vars = [mx.sym.var(str(i), dtype=dtype) for i, dtype in enumerate(dtypes)]
-        mx_sym = mx.sym.amp_multicast(*x_vars, cast_narrow=cast_narrow, num_outputs=len(dtypes))
-        shape_dict = {}
-        dtype_dict = {}
-        for i, dtype in enumerate(dtypes):
-            shape_dict[str(i)] = (1, 3, 18)
-            dtype_dict[str(i)] = dtype
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "vm", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    *x_nps
-                )
-                for i, res in enumerate(op_res):
-                    assert res.dtype == expected_dtype, res.dtype
-                    tvm.testing.assert_allclose(res.numpy(), x_nps[i].astype(expected_dtype))
-
-    verify(["float32", "float16"], False, "float32")
-    verify(["float32", "float16"], True, "float16")
-    verify(["float32", "float32"], False, "float32")
-    verify(["float32", "float32"], True, "float32")
-    verify(["float16", "float16"], False, "float16")
-    verify(["float16", "float16"], True, "float16")
-
-
-@tvm.testing.uses_gpu
-def test_forward_unravel_index():
-    def verify(x, shape, dtype):
-        a_np = np.array(x).astype(dtype)
-        mx_sym = _mx_symbol(mx.sym, "unravel_index", [mx.sym.var("a"), shape])
-        ref_res = _mx_symbol(mx.nd, "unravel_index", [mx.nd.array(a_np), shape])
-        shapes = {"a": a_np.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "vm", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    a_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    for dtype in ["int32", "int64"]:
-        verify([0, 1, 2, 3], [2, 2], dtype)
-        verify([144, 13, 45], [6, 7, 10, 2], dtype)
-        verify([456], [6, 7, 10, 2], dtype)
-
-    # In below example, 5 is out of bound for array of size 4.
-    # MXNet implementation provides different result than TVM
-    # TVM implementation is inline with Tensorflow
-    # Ideally error should be thrown just like Numpy
-    # verify([0, 1, 2, 5], [2, 2], dtype)
-
-
-@tvm.testing.uses_gpu
-def test_forward_swap_axis():
-    def _verify_swap_axis(in_shape, out_shape, dim1, dim2):
-        data = mx.sym.var("data")
-        mx_sym = mx.sym.swapaxes(data, dim1, dim2)
-        verify_mxnet_frontend_impl(mx_sym, in_shape, out_shape)
-
-    _verify_swap_axis((4, 5), (5, 4), 0, 1)
-    _verify_swap_axis((2, 4, 4, 5), (2, 5, 4, 4), 1, 3)
-    # MXNet errors out when dim1 == dim2
-    # _verify_swap_axis((4, 5), (5, 4), 0, 0)
-
-
-@tvm.testing.uses_gpu
-def test_forward_depth_to_space():
-    def verify(shape, blocksize=2):
-        x = np.random.uniform(size=shape).astype("float32")
-        ref_res = mx.nd.depth_to_space(mx.nd.array(x), blocksize)
-        mx_sym = mx.sym.depth_to_space(mx.sym.var("x"), blocksize)
-        shape_dict = {
-            "x": x.shape,
-        }
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify((1, 18, 3, 3), 3)
-
-
-@tvm.testing.uses_gpu
-def test_forward_space_to_depth():
-    def verify(shape, blocksize=2):
-        x = np.random.uniform(size=shape).astype("float32")
-        ref_res = mx.nd.space_to_depth(mx.nd.array(x), blocksize)
-        mx_sym = mx.sym.space_to_depth(mx.sym.var("x"), blocksize)
-        shape_dict = {
-            "x": x.shape,
-        }
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify((1, 1, 9, 9), 3)
-
-
-@tvm.testing.uses_gpu
-def test_forward_correlation():
-    def verify(data_shape, kernel_size, max_displacement, stride1, stride2, pad_size, is_multiply):
-        data1 = np.random.uniform(size=data_shape).astype("float32")
-        data2 = np.random.uniform(size=data_shape).astype("float32")
-        ref_res = mx.nd.Correlation(
-            data1=mx.nd.array(data1),
-            data2=mx.nd.array(data2),
-            kernel_size=kernel_size,
-            max_displacement=max_displacement,
-            stride1=stride1,
-            stride2=stride2,
-            pad_size=pad_size,
-            is_multiply=is_multiply,
-        )
-        mx_sym = mx.sym.Correlation(
-            data1=mx.sym.var("data1"),
-            data2=mx.sym.var("data2"),
-            kernel_size=kernel_size,
-            max_displacement=max_displacement,
-            stride1=stride1,
-            stride2=stride2,
-            pad_size=pad_size,
-            is_multiply=is_multiply,
-        )
-        shape_dict = {"data1": data1.shape, "data2": data2.shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    data1, data2
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify(
-        (1, 3, 10, 10),
-        kernel_size=1,
-        max_displacement=4,
-        stride1=1,
-        stride2=1,
-        pad_size=4,
-        is_multiply=False,
-    )
-    verify(
-        (5, 1, 15, 15),
-        kernel_size=1,
-        max_displacement=5,
-        stride1=1,
-        stride2=1,
-        pad_size=5,
-        is_multiply=False,
-    )
-    verify(
-        (5, 1, 15, 15),
-        kernel_size=1,
-        max_displacement=5,
-        stride1=1,
-        stride2=1,
-        pad_size=5,
-        is_multiply=True,
-    )
-    verify(
-        (5, 1, 15, 15),
-        kernel_size=1,
-        max_displacement=10,
-        stride1=1,
-        stride2=2,
-        pad_size=10,
-        is_multiply=True,
-    )
-    verify(
-        (5, 1, 4, 4),
-        kernel_size=3,
-        max_displacement=1,
-        stride1=1,
-        stride2=1,
-        pad_size=2,
-        is_multiply=True,
-    )
-    verify(
-        (5, 1, 4, 4),
-        kernel_size=3,
-        max_displacement=1,
-        stride1=2,
-        stride2=1,
-        pad_size=2,
-        is_multiply=True,
-    )
-    verify(
-        (5, 1, 4, 4),
-        kernel_size=3,
-        max_displacement=1,
-        stride1=2,
-        stride2=1,
-        pad_size=2,
-        is_multiply=False,
-    )
-    verify(
-        (5, 1, 6, 4),
-        kernel_size=3,
-        max_displacement=1,
-        stride1=2,
-        stride2=1,
-        pad_size=2,
-        is_multiply=False,
-    )
-    verify(
-        (5, 1, 11, 11),
-        kernel_size=5,
-        max_displacement=1,
-        stride1=1,
-        stride2=1,
-        pad_size=2,
-        is_multiply=False,
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_arange_like():
-    def verify(data_shape, start=None, step=None, axis=None):
-        attrs = {}
-        if start is not None:
-            attrs["start"] = start
-        if step is not None:
-            attrs["step"] = step
-        if axis is not None:
-            attrs["axis"] = axis
-        data = mx.sym.var("data")
-        data_np = np.random.uniform(size=data_shape).astype("float32")
-        ref_res = mx.nd.contrib.arange_like(mx.nd.array(data_np), **attrs)
-
-        mx_sym = mx.sym.contrib.arange_like(data, **attrs)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph"]:
-                op_res = relay.create_executor(
-                    kind, mod=mod, device=dev, target=target
-                ).evaluate()()
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy())
-
-    verify(data_shape=(3,), start=0.0, step=1.0)
-    verify(data_shape=(3, 4, 5), start=0.0, step=1.0)
-    verify(data_shape=(3, 4, 5), start=0.0, step=1.0, axis=-1)
-    verify(data_shape=(3, 4, 5), start=2.0, step=3.0, axis=1)
-
-
-@tvm.testing.uses_gpu
-def test_forward_interleaved_matmul_selfatt_qk():
-    def verify(batch, seq_length, num_heads, head_dim):
-        data_shape = (seq_length, batch, num_heads * head_dim * 3)
-        data = mx.sym.var("data")
-        data_np = np.random.uniform(size=data_shape).astype("float32")
-        ref_res = mx.nd.contrib.interleaved_matmul_selfatt_qk(mx.nd.array(data_np), heads=num_heads)
-
-        mx_sym = mx.sym.contrib.interleaved_matmul_selfatt_qk(data, heads=num_heads)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    data_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-    verify(1, 10, 3, 16)
-    verify(3, 10, 6, 8)
-
-
-@tvm.testing.uses_gpu
-def test_forward_interleaved_matmul_selfatt_valatt():
-    def verify(batch, seq_length, num_heads, head_dim):
-        data_shape = (seq_length, batch, num_heads * head_dim * 3)
-        weight_shape = (batch * num_heads, seq_length, seq_length)
-        data = mx.sym.var("data")
-        weight = mx.sym.var("weight")
-        data_np = np.random.uniform(size=data_shape).astype("float32")
-        weight_np = np.random.uniform(size=weight_shape).astype("float32")
-        ref_res = mx.nd.contrib.interleaved_matmul_selfatt_valatt(
-            mx.nd.array(data_np), mx.nd.array(weight_np), heads=num_heads
-        )
-
-        mx_sym = mx.sym.contrib.interleaved_matmul_selfatt_valatt(data, weight, heads=num_heads)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape, "weight": weight_shape})
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    data=data_np, weight=weight_np
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-    verify(1, 10, 4, 16)
-    verify(3, 10, 6, 8)
-
-
-@tvm.testing.uses_gpu
-def test_forward_box_nms():
-    def verify(
-        data_shape,
-        overlap_thresh=0.5,
-        valid_thresh=0,
-        topk=1,
-        coord_start=2,
-        score_index=1,
-        id_index=0,
-        force_suppress=False,
-        in_format="corner",
-    ):
-        dtype = "float32"
-        data = np.random.uniform(low=0, high=1, size=data_shape).astype(dtype)
-        ref_res = mx.nd.contrib.box_nms(
-            mx.nd.array(data),
-            overlap_thresh=overlap_thresh,
-            valid_thresh=valid_thresh,
-            topk=topk,
-            coord_start=coord_start,
-            score_index=score_index,
-            id_index=id_index,
-            force_suppress=force_suppress,
-            background_id=-1,
-            in_format=in_format,
-            out_format=in_format,
-        )
-        mx_sym = mx.sym.contrib.box_nms(
-            mx.sym.var("data"),
-            overlap_thresh=overlap_thresh,
-            valid_thresh=valid_thresh,
-            topk=topk,
-            coord_start=coord_start,
-            score_index=score_index,
-            id_index=id_index,
-            force_suppress=force_suppress,
-            background_id=-1,
-            in_format=in_format,
-            out_format=in_format,
-        )
-        shape_dict = {"data": data_shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            if tvm.contrib.thrust.can_use_thrust(
-                tvm.target.Target(target + " -libs=thrust"), "tvm.contrib.thrust.sort"
-            ):
-                target += " -libs=thrust"
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    data
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify((1, 10, 6))
-    # No valid boxes
-    verify((1, 10, 6), valid_thresh=1)
-
-
-@tvm.testing.uses_gpu
-def test_forward_box_decode():
-    def verify(data_shape, anchor_shape, stds=[1, 1, 1, 1], clip=-1, in_format="corner"):
-        dtype = "float32"
-        data = np.random.uniform(low=-2, high=2, size=data_shape).astype(dtype)
-        anchors = np.random.uniform(low=-2, high=2, size=anchor_shape).astype(dtype)
-        ref_res = mx.nd.contrib.box_decode(
-            mx.nd.array(data),
-            mx.nd.array(anchors),
-            stds[0],
-            stds[1],
-            stds[2],
-            stds[3],
-            clip,
-            in_format,
-        )
-        mx_sym = mx.sym.contrib.box_decode(
-            mx.sym.var("data"),
-            mx.sym.var("anchors"),
-            stds[0],
-            stds[1],
-            stds[2],
-            stds[3],
-            clip,
-            in_format,
-        )
-        shape_dict = {"data": data_shape, "anchors": anchor_shape}
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    data, anchors
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify((1, 10, 4), (1, 10, 4))
-    verify((4, 10, 4), (1, 10, 4))
-    verify((1, 10, 4), (1, 10, 4), stds=[2, 3, 0.5, 1.5])
-    verify((1, 10, 4), (1, 10, 4), clip=1)
-    verify((1, 10, 4), (1, 10, 4), in_format="center")
-
-
-@tvm.testing.uses_gpu
-def test_forward_softmax():
-    def verify(data_shape, axis, use_length, length):
-        dtype = "float32"
-        x = np.random.uniform(low=-100, high=100, size=data_shape).astype(dtype)
-        if use_length:
-            ref_res = mx.nd.softmax(
-                data=mx.nd.array(x),
-                length=mx.nd.array(length, dtype="int32"),
-                axis=axis,
-                use_length=use_length,
-            )
-            mx_sym = mx.symbol.softmax(
-                data=mx.sym.var("data"),
-                length=mx.sym.var("length"),
-                axis=axis,
-                use_length=use_length,
-            )
-            shape_dict = {"data": data_shape, "length": (length.shape)}
-            dtype_dict = {"data": dtype, "length": "int32"}
-            mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype_dict)
-        else:
-            ref_res = mx.nd.softmax(data=mx.nd.array(x), axis=axis)
-            mx_sym = mx.symbol.softmax(data=mx.sym.var("data"), axis=axis)
-            shape_dict = {"data": data_shape}
-            mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-
-        for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                func = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()
-                if use_length:
-                    op_res = func(x, length)
-                else:
-                    op_res = func(x)
-
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-5)
-
-    verify((2, 3, 5), -1, False, None)
-    verify((2, 3, 5), 2, False, None)
-    verify((2, 3), -1, True, np.array([2, 1]).astype("int32"))
-    verify((2, 3, 4), -1, True, np.array([[3, 4, 2], [2, 1, 1]]).astype("int32"))
-    verify((2, 3, 4), 2, True, np.array([[3, 4, 2], [1, 2, 1]]).astype("int32"))
-
-
-@pytest.mark.skipif(not hasattr(mx.sym.np, "pad"), reason="mx.sym.np.pad hasn't been publish yet")
-@pytest.mark.parametrize(
-    "data_shape, pad_width",
-    [
-        ((1, 1, 3, 5), ((0, 0), (0, 0), (1, 2), (3, 4))),
-        ((1, 1, 3, 5, 7), ((0, 0), (0, 0), (1, 2), (3, 4), (5, 6))),
-    ],
-)
-@pytest.mark.parametrize("mode", ["constant", "edge", "reflect"])
-@pytest.mark.parametrize("dtype", ["float64", "float32", "int64", "int32"])
-@pytest.mark.parametrize("constant_value", [0.0, 3.0])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-def test_forward_npi_pad(data_shape, pad_width, mode, dtype, constant_value, target, dev, kind):
-    data_np = np.random.uniform(size=data_shape).astype(dtype)
-    data = mx.sym.var("data")
-    if mode == "constant":
-        ref_res = np.pad(data_np, mode=mode, pad_width=pad_width, constant_values=constant_value)
-        mx_sym = mx.sym.np.pad(
-            data.as_np_ndarray(), mode=mode, pad_width=pad_width, constant_values=constant_value
-        )
-    else:
-        ref_res = np.pad(data_np, mode=mode, pad_width=pad_width)
-        mx_sym = mx.sym.np.pad(data.as_np_ndarray(), mode=mode, pad_width=pad_width)
-    mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape}, dtype=dtype)
-    op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(data_np)
-    tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
-
-
-@pytest.mark.skipif(
-    not hasattr(mx.sym.np, "pad"), reason="test'll abort with Mxnet 1.x, skip for now"
-)
-@pytest.mark.parametrize("data_shape", [(2, 2, 2), (2, 7, 2)])
-@pytest.mark.parametrize("dtype", ["float64", "float32", "int64", "int32", "bool"])
-@pytest.mark.parametrize("axes", [(1, 0, 2), None])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-def test_forward_npi_transpose(data_shape, axes, dtype, target, dev, kind):
-    data_np = np.random.uniform(size=data_shape).astype(dtype)
-    data = mx.sym.var("data")
-    ref_res = mx.np.transpose(mx.np.array(data_np), axes=axes)
-    mx_sym = mx.sym.np.transpose(data.as_np_ndarray(), axes=axes)
-    mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape}, dtype=dtype)
-    op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(data_np)
-    tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-
-@pytest.mark.parametrize(
-    "data_shape1, data_shape2, axis",
-    [
-        ((2, 2), (2, 2), 1),
-        ((2, 4), (2, 3), 1),
-        ((1, 3, 2), (1, 3, 5), 2),
-        ((1, 3, 3), (1, 3, 3), 1),
-        ((1, 3), (1, 3), 0),
-    ],
-)
-@pytest.mark.parametrize("dtype", ["float64", "float32", "int64", "int32"])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-def test_forward_npi_concatenate(data_shape1, data_shape2, axis, dtype, target, dev, kind):
-    data_np1 = np.random.uniform(size=data_shape1).astype(dtype)
-    data_np2 = np.random.uniform(size=data_shape2).astype(dtype)
-    data1 = mx.sym.var("data1")
-    data2 = mx.sym.var("data2")
-    ref_res = mx.np.concatenate([mx.np.array(data_np1), mx.np.array(data_np2)], axis=axis)
-    mx_sym = mx.sym.np.concatenate([data1.as_np_ndarray(), data2.as_np_ndarray()], axis=axis)
-    mod, _ = relay.frontend.from_mxnet(
-        mx_sym, shape={"data1": data_shape1, "data2": data_shape2}, dtype=dtype
-    )
-    op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-        data_np1, data_np2
-    )
-    tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-
-@pytest.mark.parametrize(
-    "data_shape1, data_shape2, axis",
-    [
-        ((3,), (3,), 0),
-        ((3,), (3,), -1),
-        ((1, 3, 2), (1, 3, 2), 2),
-        ((1, 3, 3), (1, 3, 3), 1),
-        ((1, 3), (1, 3), 0),
-    ],
-)
-@pytest.mark.parametrize("dtype", ["float64", "float32", "int64", "int32"])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-def test_forward_npi_stack(data_shape1, data_shape2, axis, dtype, target, dev, kind):
-    data_np1 = np.random.uniform(size=data_shape1).astype(dtype)
-    data_np2 = np.random.uniform(size=data_shape2).astype(dtype)
-    data1 = mx.sym.var("data1")
-    data2 = mx.sym.var("data2")
-    ref_res = mx.np.stack([mx.np.array(data_np1), mx.np.array(data_np2)], axis=axis)
-    mx_sym = mx.sym.np.stack([data1.as_np_ndarray(), data2.as_np_ndarray()], axis=axis)
-    mod, _ = relay.frontend.from_mxnet(
-        mx_sym, shape={"data1": data_shape1, "data2": data_shape2}, dtype=dtype
-    )
-    op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-        data_np1, data_np2
-    )
-    tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-
-@pytest.mark.parametrize("data_shape", [(2, 2, 2), (2, 7, 2), (2, 2, 2, 1, 2, 3, 1), (1, 8)])
-@pytest.mark.parametrize("dtype", ["float64", "float32", "int64", "int32", "bool"])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-def test_forward_np_copy(data_shape, dtype, target, dev, kind):
-    data_np = np.random.uniform(size=data_shape).astype(dtype)
-    data = mx.sym.var("data")
-    ref_res = mx.np.copy(mx.np.array(data_np))
-    mx_sym = mx.sym.np.copy(data.as_np_ndarray())
-    mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape}, dtype=dtype)
-    op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(data_np)
-    tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-
-@pytest.mark.parametrize("dtype", ["float64", "float32", "int64", "int32", "bool"])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-@pytest.mark.parametrize(
-    "data_shape,out_shape,reverse",
-    [
-        ((2, 3, 8), (-2, -2, 2, -1), False),
-        ((8, 3, 3, 3, 4, 4), (-6, 2, -1, -4), False),
-        ((8, 3, 3, 3, 4, 4), (-5, -4), False),
-        ((1, 8, 3, 3, 3, 4, 4), (-3, -5, -4), False),
-        ((8, 1, 3, 4), (-2, -3, -1), False),
-        ((8, 3, 3, 3, 3, 8), (-4, -5), True),
-        ((8, 3, 2, 4, 8), (-4, -1, 2, -6), True),
-        ((3, 2, 4, 8, 1, 1), (-4, -1, 2, -6, -5, -3), True),
-        ((2, 4, 1, 8), (-4, -3, -1, 2, -6), True),
-    ],
-)
-def test_forward_npx_reshape(data_shape, out_shape, dtype, target, reverse, dev, kind):
-    data_np = np.random.uniform(size=data_shape).astype(dtype)
-    data = mx.sym.var("data")
-    ref_res = mx.npx.reshape(mx.np.array(data_np), newshape=out_shape, reverse=reverse)
-    mx_sym = mx.sym.npx.reshape(data.as_np_ndarray(), newshape=out_shape, reverse=reverse)
-    mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape}, dtype=dtype)
-    op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(data_np)
-    tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-
-@pytest.mark.parametrize(
-    "data_shape", [(2, 2, 2), (2, 7, 2), (2, 2, 2, 1, 2, 3, 1), (1, 8), (2, 2), (1, 3)]
-)
-@pytest.mark.parametrize("dtype", ["float64", "float32", "int64", "int32"])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-def test_forward_npi_binary(data_shape, dtype, target, dev, kind):
-    ref_ops = [mx.np.power, mx.np.multiply, mx.np.add, mx.np.subtract, mx.np.less]
-    mx_ops = [
-        mx.sym.np.power,
-        mx.sym.np.multiply,
-        mx.sym.np.add,
-        mx.sym.np.subtract,
-        mx.sym.np.less,
-    ]
-    for i in range(len(ref_ops)):
-        ref_op = ref_ops[i]
-        mx_op = mx_ops[i]
-        # mx.np.power only support float type
-        if ref_op == mx.np.power and dtype not in ["float64", "float32"]:
-            continue
-        data_np1 = np.random.uniform(size=data_shape).astype(dtype)
-        data_np2 = np.random.uniform(size=data_shape).astype(dtype)
-        data1 = mx.sym.var("lhs")
-        data2 = mx.sym.var("rhs")
-        ref_res = ref_op(mx.np.array(data_np1), mx.np.array(data_np2))
-        mx_sym = mx_op(data1.as_np_ndarray(), data2.as_np_ndarray())
-        mod, _ = relay.frontend.from_mxnet(
-            mx_sym, shape={"lhs": data_shape, "rhs": data_shape}, dtype=dtype
-        )
-        op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-            data_np1, data_np2
-        )
-        tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-
-@pytest.mark.parametrize(
-    "data_shape", [(2, 2, 2), (2, 7, 2), (2, 2, 2, 1, 2, 3, 1), (1, 8), (2, 2), (1, 3)]
-)
-@pytest.mark.parametrize("dtype", ["float64", "float32", "int64", "int32"])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("scalar", [1.0, 2.0, 3.0, 4.0])
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-def test_forward_npi_binary_scalar(data_shape, dtype, scalar, target, dev, kind):
-    ref_ops = [mx.np.power, mx.np.multiply, mx.np.add, mx.np.subtract, mx.np.true_divide]
-    mx_ops = [
-        mx.sym.np.power,
-        mx.sym.np.multiply,
-        mx.sym.np.add,
-        mx.sym.np.subtract,
-        mx.sym.np.true_divide,
-    ]
-    for i in range(len(ref_ops)):
-        ref_op = ref_ops[i]
-        mx_op = mx_ops[i]
-        # mx.np.power only support float type
-        if ref_op == mx.np.power and dtype not in ["float64", "float32"]:
-            continue
-        data_np1 = np.random.uniform(size=data_shape).astype(dtype)
-        data1 = mx.sym.var("lhs")
-        ref_res = ref_op(mx.np.array(data_np1), scalar)
-        mx_sym = mx_op(data1.as_np_ndarray(), scalar)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape={"lhs": data_shape}, dtype=dtype)
-        op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-            data_np1
-        )
-        tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-
-@pytest.mark.parametrize(
-    "data_shape", [(2, 2, 2), (2, 7, 2), (2, 2, 2, 1, 2, 3, 1), (1, 8), (2, 2), (1, 3)]
-)
-@pytest.mark.parametrize("dtype", ["float64", "float32"])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-def test_forward_npi_tanh(data_shape, dtype, target, dev, kind):
-    data_np1 = np.random.uniform(size=data_shape).astype(dtype)
-    data1 = mx.sym.var("data")
-    ref_res = mx.np.tanh(mx.np.array(data_np1))
-    mx_sym = mx.sym.np.tanh(data1.as_np_ndarray())
-    mod, _ = relay.frontend.from_mxnet(mx_sym, shape={"data": data_shape}, dtype=dtype)
-    op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(data_np1)
-    tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-
-@pytest.mark.skipif(not hasattr(mx.np, "where"), reason="mx.np.where hasn't been publish yet")
-@pytest.mark.parametrize(
-    "data_shape,cond_shape",
-    [[(2, 2, 2), (2, 2, 2)], [(2, 7, 2), (7, 2)], [(2, 2), (1, 2)], [(1, 3), (3, 3)]],
-)
-@pytest.mark.parametrize("data_dtype", ["float64", "float32", "int64", "int32", "bool"])
-@pytest.mark.parametrize("cond_dtype", ["float64", "float32", "int64", "int32", "bool"])
-@pytest.mark.parametrize("scalar", [1.0, 2.0])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-def test_forward_npi_where_rscalar(
-    data_shape, cond_shape, data_dtype, cond_dtype, scalar, target, dev, kind
-):
-    if data_dtype == "bool":
-        scalar = scalar == 0.0
-    cond_np = np.random.uniform(size=cond_shape).astype(cond_dtype)
-    data_np = np.random.uniform(size=data_shape).astype(data_dtype)
-    cond = mx.sym.var("condition")
-    data = mx.sym.var("x")
-    ref_res = mx.np.where(mx.np.array(cond_np), mx.np.array(data_np), scalar)
-    mx_sym = mx.sym.np.where(cond.as_np_ndarray(), data.as_np_ndarray(), scalar)
-    dtypeDic = {}
-    dtypeDic["condition"] = cond_dtype
-    dtypeDic["x"] = data_dtype
-    mod, _ = relay.frontend.from_mxnet(
-        mx_sym, shape={"condition": cond_shape, "x": data_shape}, dtype=dtypeDic
-    )
-    op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-        cond_np, data_np
-    )
-    tvm.testing.assert_allclose(op_res.numpy(), ref_res.asnumpy(), rtol=1e-5)
-
-
-@pytest.mark.parametrize("dtype", ["float64", "float32", "int64", "int32", "bool"])
-@tvm.testing.parametrize_targets
-@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
-@pytest.mark.parametrize(
-    "data_shape, axis, indices_or_sections, squeeze_axis",
-    [
-        ((3, 2, 1), 1, 2, False),
-        ((3, 2, 1), 0, 3, False),
-        ((3, 2, 1), 0, 3, True),
-        ((3, 2, 1), 0, (1, 2), False),
-    ],
-)
-def test_forward_split_v2(
-    data_shape, axis, dtype, indices_or_sections, squeeze_axis, target, dev, kind
-):
-    data_np = np.random.uniform(size=data_shape).astype(dtype)
-    data = mx.sym.var("data")
-    ref_res = mx.ndarray.split_v2(
-        mx.nd.array(data_np), indices_or_sections, axis=axis, squeeze_axis=squeeze_axis
-    )
-    mx_sym = mx.sym.split_v2(
-        data.as_nd_ndarray(), indices_or_sections, axis=axis, squeeze_axis=squeeze_axis
-    )
-    mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape}, dtype=dtype)
-    op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(data_np)
-    op_res_ = []
-    for arr in op_res:
-        op_res_.append(arr.numpy().tolist())
-    ref_res_ = []
-    for arr in ref_res:
-        ref_res_.append(arr.asnumpy().tolist())
-    tvm.testing.assert_allclose(op_res_, ref_res_, rtol=1e-5)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/mxnet/test_graph.py b/tests/python/frontend/mxnet/test_graph.py
deleted file mode 100644
index 63ce763f1725..000000000000
--- a/tests/python/frontend/mxnet/test_graph.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import mxnet as mx
-
-import tvm
-from tvm import te
-from tvm import relay
-from tvm.relay import transform
-import model_zoo
-
-
-def compare_graph(lhs_mod, rhs_mod):
-    lhs_mod = transform.InferType()(lhs_mod)
-    rhs_mod = transform.InferType()(rhs_mod)
-    tvm.ir.assert_structural_equal(lhs_mod["main"], rhs_mod["main"])
-
-
-def test_mlp():
-    shape = {"data": (1, 1, 28, 28)}
-    mx_fun = model_zoo.mx_mlp()
-    mod, _ = relay.frontend.from_mxnet(mx_fun, shape=shape)
-    relay_fun = model_zoo.relay_mlp()
-    compare_graph(mod, relay_fun)
-
-
-def test_vgg():
-    shape = {"data": (1, 3, 224, 224)}
-    for n in [11, 13, 16, 19]:
-        mx_sym = model_zoo.mx_vgg(n)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape=shape)
-        relay_mod = model_zoo.relay_vgg(n)
-        compare_graph(mod, relay_mod)
-
-
-def test_resnet():
-    shape = {"data": (1, 3, 224, 224)}
-    for n in [18, 34, 50, 101]:
-        mx_sym = model_zoo.mx_resnet(n)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape=shape)
-        relay_mod = model_zoo.relay_resnet(n)
-        compare_graph(mod, relay_mod)
-
-
-def test_squeezenet():
-    shape = {"data": (1, 3, 224, 224)}
-    for version in ["1.0", "1.1"]:
-        mx_sym = model_zoo.mx_squeezenet(version)
-        mod, _ = relay.frontend.from_mxnet(mx_sym, shape)
-        relay_mod = model_zoo.relay_squeezenet(version)
-        compare_graph(mod, relay_mod)
-
-
-def test_inception_v3():
-    shape = {"data": (1, 3, 299, 299)}
-    mx_sym = model_zoo.mx_inception_v3()
-    mod, _ = relay.frontend.from_mxnet(mx_sym, shape)
-    relay_mod = model_zoo.relay_inception_v3()
-    compare_graph(mod, relay_mod)
-
-
-def test_dqn():
-    shape = {"data": (1, 4, 84, 84)}
-    mx_sym = model_zoo.mx_dqn()
-    mod, _ = relay.frontend.from_mxnet(mx_sym, shape)
-    relay_mod = model_zoo.relay_dqn()
-    compare_graph(mod, relay_mod)
-
-
-def test_dcgan():
-    shape = {"data": (2, 100)}
-    mx_sym = model_zoo.mx_dcgan()
-    mod, _ = relay.frontend.from_mxnet(mx_sym, shape)
-    relay_mod = model_zoo.relay_dcgan(batch_size=2)
-    compare_graph(mod, relay_mod)
-
-
-def test_multi_outputs():
-    xshape = (10, 27)
-    yshape = (10, 9)
-
-    def mx_compose(F, **kwargs):
-        x = F.sym.Variable("x")
-        y = F.sym.Variable("y")
-        z = F.sym.split(x, **kwargs)
-        return F.sym.broadcast_sub(F.sym.broadcast_add(z[0], z[2]), y)
-
-    def relay_compose(F, **kwargs):
-        x = F.var("x", shape=xshape)
-        y = F.var("y", shape=yshape)
-        z = F.split(x, **kwargs)
-        z = F.subtract(F.add(z[0], z[2]), y)
-        func = relay.Function(relay.analysis.free_vars(z), z)
-        return tvm.IRModule.from_expr(func)
-
-    mx_sym = mx_compose(mx, num_outputs=3, axis=1)
-    mod, _ = relay.frontend.from_mxnet(mx_sym, shape={"x": xshape, "y": yshape})
-    relay_mod = relay_compose(relay, indices_or_sections=3, axis=1)
-    compare_graph(mod, relay_mod)
-
-
-if __name__ == "__main__":
-    test_mlp()
-    test_resnet()
-    test_vgg()
-    test_multi_outputs()
-    test_dqn()
-    test_dcgan()
-    test_squeezenet()
-    test_inception_v3()
diff --git a/tests/python/frontend/mxnet/test_qnn_ops_utils.py b/tests/python/frontend/mxnet/test_qnn_ops_utils.py
deleted file mode 100644
index adbb0a74558b..000000000000
--- a/tests/python/frontend/mxnet/test_qnn_ops_utils.py
+++ /dev/null
@@ -1,224 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import numpy as np
-import tvm
-from tvm import relay
-from tvm.contrib import graph_executor
-from tvm.relay.frontend.mxnet_qnn_op_utils import (
-    dequantize_mxnet_min_max,
-    quantize_mxnet_min_max,
-    get_mkldnn_int8_scale,
-    get_mkldnn_uint8_scale,
-    quantize_conv_bias_mkldnn_from_var,
-)
-
-
-def test_mkldnn_dequantize():
-    def dequantize_test_driver(in_dtype, quant_args, in_data, verify_output_data):
-        shape = in_data.shape
-        input_data = relay.var("input_data", shape=shape, dtype=in_dtype)
-        min_range = quant_args["min_range"]
-        max_range = quant_args["max_range"]
-        dequantized_output = dequantize_mxnet_min_max(
-            input_data, min_range=min_range, max_range=max_range, in_dtype=in_dtype
-        )
-        mod = relay.Function(relay.analysis.free_vars(dequantized_output), dequantized_output)
-        mod = tvm.IRModule.from_expr(mod)
-        with tvm.transform.PassContext(opt_level=3):
-            graph, lib, params = relay.build(mod, "llvm", params=None)
-            rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
-            rt_mod.set_input(input_data=in_data)
-            rt_mod.set_input(**params)
-            rt_mod.run()
-            res = rt_mod.get_output(0).numpy()
-            assert np.allclose(res, verify_output_data)
-            assert res.dtype == np.float32
-
-    def test_uint8_to_float32():
-        data = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]).astype("uint8").reshape((2, 5))
-        output = (
-            np.array(
-                [
-                    0.0,
-                    0.25048923,
-                    0.50097847,
-                    0.7514677,
-                    1.0019569,
-                    62.8728,
-                    63.123287,
-                    63.373775,
-                    63.624268,
-                    63.874756,
-                ]
-            )
-            .astype("float32")
-            .reshape((2, 5))
-        )
-        quant_args = {"min_range": -63.5, "max_range": 64}
-        dequantize_test_driver(
-            in_dtype="uint8", quant_args=quant_args, in_data=data, verify_output_data=output
-        )
-
-    def test_int8_to_float32():
-        data = (
-            np.array([-126, -125, -124, -123, -122, 123, 124, 125, 126, 127])
-            .astype("int8")
-            .reshape((2, 5))
-        )
-        output = (
-            np.array(
-                [
-                    -63.247063,
-                    -62.745102,
-                    -62.24314,
-                    -61.74118,
-                    -61.23922,
-                    61.74118,
-                    62.24314,
-                    62.745102,
-                    63.247063,
-                    63.749023,
-                ]
-            )
-            .astype("float32")
-            .reshape((2, 5))
-        )
-        dequantize_args = {"min_range": -63.5, "max_range": 64}
-        dequantize_test_driver(
-            in_dtype="int8", quant_args=dequantize_args, in_data=data, verify_output_data=output
-        )
-
-    test_uint8_to_float32()
-    test_int8_to_float32()
-
-
-def test_mkldnn_quantize():
-    def quantize_test_driver(out_dtype, quant_args, in_data, verify_output_data):
-        shape = in_data.shape
-        input_data = relay.var("input_data", shape=shape, dtype="float32")
-        min_range = quant_args["min_range"]
-        max_range = quant_args["max_range"]
-        quantized_output, _, _ = quantize_mxnet_min_max(
-            input_data, min_range=min_range, max_range=max_range, out_dtype=out_dtype
-        )
-        mod = relay.Function(relay.analysis.free_vars(quantized_output), quantized_output)
-        mod = tvm.IRModule.from_expr(mod)
-        with tvm.transform.PassContext(opt_level=3):
-            graph, lib, params = relay.build(mod, "llvm", params=None)
-            rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
-            rt_mod.set_input(input_data=in_data)
-            rt_mod.set_input(**params)
-            rt_mod.run()
-            res = rt_mod.get_output(0).numpy()
-            assert np.allclose(res, verify_output_data)
-            assert res.dtype == verify_output_data.dtype
-
-    def test_float32_to_uint8():
-        data = (
-            np.array(
-                [
-                    0.0,
-                    0.25048923,
-                    0.50097847,
-                    0.7514677,
-                    1.0019569,
-                    62.8728,
-                    63.123287,
-                    63.373775,
-                    63.624268,
-                    63.874756,
-                ]
-            )
-            .astype("float32")
-            .reshape((2, 5))
-        )
-        output = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]).astype("uint8").reshape((2, 5))
-
-        quant_args = {"min_range": -63.5, "max_range": 64}
-        quantize_test_driver(
-            out_dtype="uint8", quant_args=quant_args, in_data=data, verify_output_data=output
-        )
-
-    def test_float32_to_int8():
-        data = (
-            np.array(
-                [
-                    -63.247063,
-                    -62.745102,
-                    -62.24314,
-                    -61.74118,
-                    -61.23922,
-                    61.74118,
-                    62.24314,
-                    62.745102,
-                    63.247063,
-                    63.749023,
-                ]
-            )
-            .astype("float32")
-            .reshape((2, 5))
-        )
-        output = (
-            np.array([-126, -125, -124, -123, -122, 123, 124, 125, 126, 127])
-            .astype("int8")
-            .reshape((2, 5))
-        )
-
-        quant_args = {"min_range": -63.5, "max_range": 64}
-        quantize_test_driver(
-            out_dtype="int8", quant_args=quant_args, in_data=data, verify_output_data=output
-        )
-
-    test_float32_to_uint8()
-    test_float32_to_int8()
-
-
-def test_get_mkldnn_int8_scale():
-    range_min = -3.904039
-    range_max = 3.904039
-    expected = 0.03061991354976495
-    output = get_mkldnn_int8_scale(range_max=range_max, range_min=range_min)
-    assert np.allclose(output, expected)
-
-
-def test_get_mkldnn_uint8_scale():
-    range_min = 0.0
-    range_max = 55.77269
-    expected = 0.21828841189047482
-    output = get_mkldnn_uint8_scale(range_max=range_max, range_min=range_min)
-    assert np.allclose(output, expected)
-
-
-def test_quantize_conv_bias_mkldnn_from_var():
-    bias_var = relay.var("bias", shape=(3,), dtype="float32")
-    bias_scale = tvm.nd.array(np.array([0.5, 0.6, 0.7]))
-    output = quantize_conv_bias_mkldnn_from_var(bias_var, bias_scale)
-    assert isinstance(output, tvm.relay.expr.Call)
-    attrs = output.attrs
-    assert attrs.axis == 0
-    assert attrs.out_dtype == "int32"
-    assert output.op.name == "qnn.quantize"
-    assert output.args[1].data == bias_scale
-
-
-if __name__ == "__main__":
-    test_mkldnn_dequantize()
-    test_mkldnn_quantize()
-    test_get_mkldnn_int8_scale()
-    test_get_mkldnn_uint8_scale()
-    test_quantize_conv_bias_mkldnn_from_var()
diff --git a/tests/python/frontend/oneflow/test_forward.py b/tests/python/frontend/oneflow/test_forward.py
deleted file mode 100644
index fda5f1b723c7..000000000000
--- a/tests/python/frontend/oneflow/test_forward.py
+++ /dev/null
@@ -1,963 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=arguments-differ, unused-argument
-"""Unit tests for various models and operators"""
-import os
-
-import numpy as np
-import oneflow as flow
-from packaging import version as package_version
-import tvm
-import tvm.testing
-import tvm.topi.testing
-from tvm import relay
-
-MODEL_HOME = "test_model"
-
-
-def mkdir(path):
-    # init
-    path = path.strip()
-    path = path.rstrip("\\")
-
-    if not os.path.exists(path):
-        os.makedirs(path)
-    else:
-        print(f"{path} is already here")
-
-
-def rmdir(path):
-    for root, dirs, files in os.walk(path, topdown=False):
-        for name in files:
-            os.remove(os.path.join(root, name))
-        for name in dirs:
-            os.rmdir(os.path.join(root, name))
-    os.removedirs(path)
-
-
-def assert_shape(out1, out2):
-    if out1.shape != out2.shape:
-        msg = "Output shapes {} and {} don't match"
-        raise AssertionError(msg.format(out1.shape, out2.shape))
-
-
-class OneFlowGraph(flow.nn.Graph):
-    def __init__(self, module):
-        super().__init__()
-        self.m = module
-
-    def build(self, x):
-        out = self.m(x)
-        return out
-
-
-class OneFlowGraphV2(flow.nn.Graph):
-    def __init__(self, module):
-        super().__init__()
-        self.m = module
-
-    def build(self, input_1, input_2, input_3):
-        out = self.m(input_1, input_2, input_3)
-        return out
-
-
-class OneFlowGraphV3(flow.nn.Graph):
-    def __init__(self, module):
-        super().__init__()
-        self.m = module
-
-    def build(self, input_1, input_2):
-        out = self.m(input_1, input_2)
-        return out
-
-
-def get_oneflow_output(model, inputs):
-    flow_output = model(inputs)
-    return flow_output.numpy()
-
-
-def get_oneflow_concat_output(model, input1, input2, input3):
-    flow_output = model(input1, input2, input3).numpy()
-    return flow_output
-
-
-def get_oneflow_elementwise_output(model, input1, input2):
-    return model(input1, input2).numpy()
-
-
-def get_tvm_output(graph, model_path, inputs: flow.tensor, target="llvm", dtype="float32"):
-    """Generic function to execute and get tvm output"""
-    inputs_numpy = inputs.numpy()
-    if target == "llvm":
-        device = tvm.cpu(0)
-    elif target == "cuda":
-        device = tvm.cuda(0)
-
-    mod, params = relay.frontend.from_oneflow(graph, model_path)
-    with tvm.transform.PassContext(opt_level=10):
-        intrp = relay.build_module.create_executor("graph", mod, device, target)
-    tvm_output = intrp.evaluate()(tvm.nd.array(inputs_numpy.astype(dtype)), **params).numpy()
-    return tvm_output
-
-
-def get_tvm_concat_output(
-    graph,
-    model_path,
-    input1: flow.tensor,
-    input2: flow.tensor,
-    input3: flow.tensor,
-    target="llvm",
-    dtype="float32",
-):
-    """Generic function to execute and get tvm concat output"""
-    input1_numpy = input1.numpy()
-    input2_numpy = input2.numpy()
-    input3_numpy = input3.numpy()
-    if target == "llvm":
-        device = tvm.cpu(0)
-    elif target == "cuda":
-        device = tvm.cuda(0)
-
-    mod, params = relay.frontend.from_oneflow(graph, model_path)
-    with tvm.transform.PassContext(opt_level=10):
-        intrp = relay.build_module.create_executor("graph", mod, device, target)
-    tvm_output = intrp.evaluate()(
-        tvm.nd.array(input1_numpy.astype(dtype)),
-        tvm.nd.array(input2_numpy.astype(dtype)),
-        tvm.nd.array(input3_numpy.astype(dtype)),
-        **params,
-    ).numpy()
-    return tvm_output
-
-
-def get_tvm_elementwise_output(
-    graph,
-    model_path,
-    input1: flow.tensor,
-    input2: flow.tensor,
-    target="llvm",
-    dtype="float32",
-):
-    """Generic function to execute and get tvm elementwise output"""
-    input1_numpy = input1.numpy()
-    input2_numpy = input2.numpy()
-    if target == "llvm":
-        device = tvm.cpu(0)
-    elif target == "cuda":
-        device = tvm.cuda(0)
-
-    mod, params = relay.frontend.from_oneflow(graph, model_path)
-    with tvm.transform.PassContext(opt_level=10):
-        intrp = relay.build_module.create_executor("graph", mod, device, target)
-    tvm_output = intrp.evaluate()(
-        tvm.nd.array(input1_numpy.astype(dtype)),
-        tvm.nd.array(input2_numpy.astype(dtype)),
-        **params,
-    ).numpy()
-    return tvm_output
-
-
-def verify_conv(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs=flow.tensor(
-        np.random.rand(1, 3, 224, 224),
-        dtype=flow.float32,
-    ),
-    device="llvm",
-):
-    """verify_conv"""
-    if device == "cuda":
-        model.to(device)
-        inputs = inputs.to(device)
-
-    graph = OneFlowGraph(model)
-    graph._compile(inputs)
-
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_output(graph, inputs)
-    out_tvm = get_tvm_output(graph, MODEL_HOME, inputs, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-def verify_pool(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs=flow.tensor(
-        np.random.rand(1, 3, 224, 224),
-        dtype=flow.float32,
-    ),
-    device="llvm",
-):
-    """verify_pool"""
-    if device == "cuda":
-        model.to(device)
-        inputs = inputs.to(device)
-
-    graph = OneFlowGraph(model)
-    graph._compile(inputs)
-
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_output(graph, inputs)
-    out_tvm = get_tvm_output(graph, MODEL_HOME, inputs, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-def verify_normalization(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs=flow.tensor(
-        np.random.rand(1, 3, 224, 224),
-        dtype=flow.float32,
-    ),
-    device="llvm",
-):
-    """verify_normalization"""
-    if device == "cuda":
-        model.to(device)
-        inputs = inputs.to(device)
-
-    graph = OneFlowGraph(model)
-    graph._compile(inputs)
-
-    # write params
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_output(graph, inputs)
-    out_tvm = get_tvm_output(graph, MODEL_HOME, inputs, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-def verify_upsample(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs=flow.tensor(
-        np.random.rand(1, 3, 50, 50),
-        dtype=flow.float32,
-    ),
-    device="llvm",
-):
-    """verify_upsample"""
-    if device == "cuda":
-        model.to(device)
-        inputs = inputs.to(device)
-
-    graph = OneFlowGraph(model)
-    graph._compile(inputs)
-
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_output(graph, inputs)
-    out_tvm = get_tvm_output(graph, MODEL_HOME, inputs, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-def verify_convtran(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs=flow.tensor(
-        np.random.rand(1, 3, 50, 50),
-        dtype=flow.float32,
-    ),
-    device="llvm",
-):
-    """verify_convtran"""
-    if device == "cuda":
-        model.to(device)
-        inputs = inputs.to(device)
-
-    graph = OneFlowGraph(model)
-    graph._compile(inputs)
-
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_output(graph, inputs)
-    out_tvm = get_tvm_output(graph, MODEL_HOME, inputs, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-def verify_activation(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs=flow.tensor(
-        np.random.rand(10, 10),
-        dtype=flow.float32,
-    ),
-    device="llvm",
-):
-    """verify_activation"""
-    if device == "cuda":
-        model.to(device)
-        inputs = inputs.to(device)
-
-    graph = OneFlowGraph(model)
-    graph._compile(inputs)
-
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_output(graph, inputs)
-    out_tvm = get_tvm_output(graph, MODEL_HOME, inputs, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-def verify_math(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs=flow.tensor(
-        np.random.rand(100, 1),
-        dtype=flow.float32,
-    ),
-    device="llvm",
-):
-    """verify_math"""
-    if device == "cuda":
-        model.to(device)
-        inputs = inputs.to(device)
-
-    graph = OneFlowGraph(model)
-    graph._compile(inputs)
-
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_output(graph, inputs)
-    out_tvm = get_tvm_output(graph, MODEL_HOME, inputs, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-def verify_matmul(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs1=flow.tensor(np.random.randn(2, 5), dtype=flow.float32),
-    inputs2=flow.tensor(np.random.randn(5, 2), dtype=flow.float32),
-    device="llvm",
-):
-    """verify_matmul"""
-    if device == "cuda":
-        model.to(device)
-        inputs1 = inputs1.to(device)
-        inputs2 = inputs2.to(device)
-
-    graph = OneFlowGraphV3(model)
-    graph._compile(inputs1, inputs2)
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_elementwise_output(graph, inputs1, inputs2)
-    out_tvm = get_tvm_elementwise_output(graph, MODEL_HOME, inputs1, inputs2, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-def verify_concat(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs1=flow.tensor(np.random.randn(2, 5, 5, 4), dtype=flow.float32),
-    inputs2=flow.tensor(np.random.randn(2, 5, 5, 2), dtype=flow.float32),
-    inputs3=flow.tensor(np.random.randn(2, 5, 5, 3), dtype=flow.float32),
-    device="llvm",
-):
-    """verify_concat"""
-    if device == "cuda":
-        model.to(device)
-        inputs1 = inputs1.to(device)
-        inputs2 = inputs2.to(device)
-        inputs3 = inputs3.to(device)
-
-    graph = OneFlowGraphV2(model)
-    graph._compile(inputs1, inputs2, inputs3)
-
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_concat_output(graph, inputs1, inputs2, inputs3)
-    out_tvm = get_tvm_concat_output(graph, MODEL_HOME, inputs1, inputs2, inputs3, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-# defs/nn
-@tvm.testing.uses_gpu
-def test_conv2d():
-    """Conv2d"""
-
-    class Conv2dModel(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.conv = flow.nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
-
-        def forward(self, x):
-            x = self.conv(x)
-            return x
-
-    if os.path.exists(MODEL_HOME):
-        rmdir(MODEL_HOME)
-
-    model = Conv2dModel()
-    model.eval()
-
-    for device in ["llvm"]:
-        verify_conv(model, device=device)
-
-
-@tvm.testing.uses_gpu
-def test_pool2d():
-    """Pool2d"""
-
-    class MaxPool2dModel(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.pool = flow.nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-
-        def forward(self, x):
-            x = self.pool(x)
-            return x
-
-    class AvgPool2dModel(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.pool = flow.nn.AvgPool2d(kernel_size=3, stride=2, padding=1)
-
-        def forward(self, x):
-            x = self.pool(x)
-            return x
-
-    class AdaptiveAvgPool2dModel(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.pool = flow.nn.AdaptiveAvgPool2d((None, 7))
-
-        def forward(self, x):
-            x = self.pool(x)
-            return x
-
-    if os.path.exists(MODEL_HOME):
-        rmdir(MODEL_HOME)
-
-    model1 = MaxPool2dModel().eval()
-    model2 = AvgPool2dModel().eval()
-    model3 = AdaptiveAvgPool2dModel().eval()
-
-    for device in ["llvm"]:
-        verify_pool(model1, device=device)
-        verify_pool(model2, device=device)
-        verify_pool(model3, device=device)
-
-
-@tvm.testing.uses_gpu
-def test_normalization():
-    """Normalization"""
-
-    class BatchNorm2dModel(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.normalization = flow.nn.BatchNorm2d(3)
-
-        def forward(self, x):
-            x = self.normalization(x)
-            return x
-
-    if os.path.exists(MODEL_HOME):
-        rmdir(MODEL_HOME)
-
-    model = BatchNorm2dModel().eval()
-
-    for device in ["llvm"]:
-        verify_normalization(model, device=device)
-
-
-@tvm.testing.uses_gpu
-def test_upsample():
-    """Upsample"""
-
-    class UpsampleModel(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.upsample = flow.nn.Upsample(scale_factor=2.0, mode="nearest")
-
-        def forward(self, x):
-            x = self.upsample(x)
-            return x
-
-    class UpsampleBiliModel(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.upsample = flow.nn.UpsamplingBilinear2d(scale_factor=2.0)
-
-        def forward(self, x):
-            x = self.upsample(x)
-            return x
-
-    if os.path.exists(MODEL_HOME):
-        rmdir(MODEL_HOME)
-
-    model1 = UpsampleModel().eval()
-    model2 = UpsampleBiliModel().eval()
-
-    for device in ["llvm"]:
-        verify_upsample(model1, device=device)
-        verify_upsample(model2, device=device)
-
-
-@tvm.testing.uses_gpu
-def test_convtran():
-    """ConvTran"""
-
-    class ConvTranModel(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.convtran = flow.nn.ConvTranspose2d(3, 4, (3, 5), stride=(2, 1), padding=(4, 2))
-
-        def forward(self, x):
-            x = self.convtran(x)
-            return x
-
-    if os.path.exists(MODEL_HOME):
-        rmdir(MODEL_HOME)
-
-    model = ConvTranModel().eval()
-
-    for device in ["llvm"]:
-        verify_convtran(model, device=device)
-
-
-@tvm.testing.uses_gpu
-def test_activation():
-    """Activation"""
-
-    class Softmax(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.Softmax()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class Softplus(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.Softplus()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class Softsign(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.Softsign()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class Tanh(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.Tanh()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class ReLU(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.ReLU()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class ReLU6(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.ReLU6()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class PReLU(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.PReLU()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class SELU(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.SELU()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class SiLU(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.SiLU()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class LeakyReLU(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.LeakyReLU(0.1)
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class GELU(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.GELU()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class HardTanh(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.Hardtanh()
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    class TensorSoftmax(flow.nn.Module):
-        def forward(self, x):
-            x = x.softmax(dim=-1)
-            return x
-
-    class Threshold(flow.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.active = flow.nn.Threshold(0.5, 0.2)
-
-        def forward(self, x):
-            x = self.active(x)
-            return x
-
-    if os.path.exists(MODEL_HOME):
-        rmdir(MODEL_HOME)
-
-    model1 = Softmax().eval()
-    model2 = Softplus().eval()  # pylint: disable=unused-variable
-    model3 = Softsign().eval()
-    model4 = Tanh().eval()
-    model5 = ReLU().eval()
-    model6 = ReLU6().eval()
-    model7 = PReLU().eval()
-    model8 = SELU().eval()
-    model9 = SiLU().eval()
-    model10 = LeakyReLU().eval()
-    model11 = GELU().eval()
-    model12 = HardTanh().eval()
-    model13 = TensorSoftmax().eval()
-
-    for device in ["llvm"]:
-        verify_activation(model1, device=device)
-        verify_activation(model2, device=device)
-        verify_activation(model3, device=device)
-        verify_activation(model4, device=device)
-        verify_activation(model5, device=device)
-        verify_activation(model6, device=device)
-        verify_activation(model7, device=device)
-        verify_activation(model8, device=device)
-        verify_activation(model9, device=device)
-        verify_activation(model10, device=device)
-        verify_activation(model11, device=device)
-        verify_activation(model12, device=device)
-        verify_activation(
-            model13,
-            device=device,
-            inputs=flow.tensor(np.random.rand(1, 12, 197, 197).astype(np.float32)),
-        )
-
-    # Threshold was introduced in the version 0.8.0 of oneflow
-    if package_version.parse(flow.__version__) >= package_version.parse("0.8.0"):
-        model14 = Threshold().eval()
-        verify_activation(model14, device="llvm")
-
-
-@tvm.testing.uses_gpu
-def test_math():
-    """Math"""
-
-    class Sigmoid(flow.nn.Module):
-        def forward(self, x):
-            return flow.sigmoid(x)
-
-    class Sign(flow.nn.Module):
-        def forward(self, x):
-            return flow.sign(x)
-
-    class Reciprocal(flow.nn.Module):
-        def forward(self, x):
-            return flow.reciprocal(x)
-
-    class Pow(flow.nn.Module):
-        def forward(self, x):
-            return flow.pow(x, 2.0)
-
-    class Log(flow.nn.Module):
-        def forward(self, x):
-            return flow.log(x)
-
-    class Log2(flow.nn.Module):
-        def forward(self, x):
-            return flow.log1p(x)
-
-    class Exp(flow.nn.Module):
-        def forward(self, x):
-            return flow.exp(x)
-
-    class Exp2(flow.nn.Module):
-        def forward(self, x):
-            return flow.expm1(x)
-
-    class Variance(flow.nn.Module):
-        def forward(self, x):
-            return flow.var(x, 1, unbiased=False, keepdim=True)
-
-    model1 = Sigmoid().eval()
-    model2 = Sign().eval()
-    model3 = Log().eval()
-    model4 = Log2().eval()
-    model5 = Exp().eval()
-    model6 = Exp2().eval()
-    model7 = Reciprocal().eval()
-    model8 = Pow().eval()
-    model9 = Variance().eval()
-
-    for device in ["llvm"]:
-        verify_math(model1, device=device)
-        verify_math(model2, device=device)
-        verify_math(model3, device=device)
-        verify_math(model4, device=device)
-        verify_math(model5, device=device)
-        verify_math(model6, device=device)
-        verify_math(model7, device=device)
-        verify_math(model8, device=device)
-        verify_math(model9, device=device)
-
-
-@tvm.testing.uses_gpu
-def test_slice():
-    """Slice"""
-
-    class Slice(flow.nn.Module):
-        def forward(self, x):
-            tup_list = [[None, None, None], [0, 5, 2], [0, 6, 3]]
-            out = flow.slice(x, slice_tup_list=tup_list)
-            return out
-
-    model = Slice().eval()
-
-    for device in ["llvm"]:
-        verify_math(
-            model, device=device, inputs=flow.tensor(np.random.randn(3, 6, 9).astype(np.float32))
-        )
-
-
-@tvm.testing.uses_gpu
-def test_concat():
-    """Concat"""
-
-    class Concat(flow.nn.Module):
-        def forward(self, input_1, input_2, input_3):
-            out = flow.cat([input_1, input_2, input_3], dim=-1)
-            return out
-
-    model = Concat().eval()
-
-    for device in ["llvm"]:
-        verify_concat(model, device=device)
-
-
-@tvm.testing.uses_gpu
-def test_add_constant():
-    """ConstantAdd"""
-
-    class ConstantAdd(flow.nn.Module):
-        def forward(self, x):
-            out = flow.add(1.0, x)
-            return out
-
-    model = ConstantAdd().eval()
-
-    for device in ["llvm"]:
-        verify_math(
-            model, device=device, inputs=flow.tensor(np.random.randn(3, 6, 9).astype(np.float32))
-        )
-
-
-@tvm.testing.uses_gpu
-def test_logical():
-    class LogicalGreater(flow.nn.Module):
-        def forward(self, x):
-            return x > 1.0
-
-    model1 = LogicalGreater().eval()
-
-    for device in ["llvm"]:
-        verify_math(
-            model1, device=device, inputs=flow.tensor(np.random.randn(3, 6, 9).astype(np.float32))
-        )
-
-
-@tvm.testing.uses_gpu
-def test_expand():
-    class Expand(flow.nn.Module):
-        def forward(self, x):
-            return x.expand(2, -1, -1)
-
-    model1 = Expand().eval()
-
-    for device in ["llvm"]:
-        verify_math(
-            model1, device=device, inputs=flow.tensor(np.random.randn(1, 6, 9).astype(np.float32))
-        )
-
-
-@tvm.testing.uses_gpu
-def test_matmul():
-    """MatMul"""
-
-    class MatMul(flow.nn.Module):
-        def forward(self, x, y):
-            return flow._C.matmul(x, y)
-
-    class MatMulTranspose(flow.nn.Module):
-        def forward(self, x, y):
-            return flow._C.matmul(x, y, transpose_b=True)
-
-    class BatchMatMul(flow.nn.Module):
-        def forward(self, x, y):
-            return flow._C.batch_matmul(x, y)
-
-    class BroadCastMatMul(flow.nn.Module):
-        def forward(self, x, y):
-            return flow._C.matmul(x, y)
-
-    model1 = MatMul().eval()
-    model2 = MatMulTranspose().eval()
-    model3 = BatchMatMul().eval()
-    model4 = BroadCastMatMul().eval()
-
-    for device in ["llvm"]:
-        verify_matmul(
-            model1,
-            device=device,
-            inputs1=flow.tensor(np.random.randn(2, 3).astype(np.float32)),
-            inputs2=flow.tensor(np.random.randn(3, 3).astype(np.float32)),
-        )
-        verify_matmul(
-            model2,
-            device=device,
-            inputs1=flow.tensor(np.random.randn(1, 2).astype(np.float32)),
-            inputs2=flow.tensor(np.random.randn(3, 2).astype(np.float32)),
-        )
-        verify_matmul(
-            model3,
-            device=device,
-            inputs1=flow.tensor(np.random.randn(2, 1, 2).astype(np.float32)),
-            inputs2=flow.tensor(np.random.randn(2, 2, 3).astype(np.float32)),
-        )
-        verify_matmul(
-            model4,
-            device=device,
-            inputs1=flow.tensor(np.random.randn(3, 8, 8, 16).astype(np.float32)),
-            inputs2=flow.tensor(np.random.randn(16, 8).astype(np.float32)),
-        )
-
-
-if __name__ == "__main__":
-    test_conv2d()
-    test_pool2d()
-    test_normalization()
-    test_upsample()
-    test_convtran()
-    test_activation()
-    test_math()
-    test_slice()
-    test_concat()
-    test_add_constant()
-    test_logical()
-    test_expand()
-    test_matmul()
-    rmdir("log")
diff --git a/tests/python/frontend/oneflow/test_vision_models.py b/tests/python/frontend/oneflow/test_vision_models.py
deleted file mode 100644
index 03478dc41e33..000000000000
--- a/tests/python/frontend/oneflow/test_vision_models.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name
-# pylint: disable=arguments-differ, unused-argument
-"""Unit tests for various models and operators"""
-import os
-
-import numpy as np
-import oneflow as flow
-from flowvision.models.alexnet import alexnet
-from flowvision.models.squeezenet import squeezenet1_0
-from flowvision.models.shufflenet_v2 import shufflenet_v2_x0_5
-from flowvision.models.mobilenet import mobilenet_v2
-from flowvision.models.ghostnet import ghostnet
-from flowvision.models.vision_transformer import vit_base_patch16_224
-import tvm
-import tvm.testing
-import tvm.topi.testing
-from tvm import relay
-
-MODEL_HOME = "test_model"
-
-
-def mkdir(path):
-    # init
-    path = path.strip()
-    path = path.rstrip("\\")
-
-    if not os.path.exists(path):
-        os.makedirs(path)
-    else:
-        print(f"{path} is already here")
-
-
-def rmdir(path):
-    for root, dirs, files in os.walk(path, topdown=False):
-        for name in files:
-            os.remove(os.path.join(root, name))
-        for name in dirs:
-            os.rmdir(os.path.join(root, name))
-    os.removedirs(path)
-
-
-def assert_shape(out1, out2):
-    if out1.shape != out2.shape:
-        msg = "Output shapes {} and {} don't match"
-        raise AssertionError(msg.format(out1.shape, out2.shape))
-
-
-class OneFlowGraph(flow.nn.Graph):
-    def __init__(self, module):
-        super().__init__()
-        self.m = module
-
-    def build(self, x):
-        out = self.m(x)
-        return out
-
-
-def get_oneflow_output(model, inputs):
-    flow_output = model(inputs)
-    return flow_output.numpy()
-
-
-def get_tvm_output(graph, model_path, inputs: flow.tensor, target="llvm", dtype="float32"):
-    """Generic function to execute and get tvm output"""
-    inputs_numpy = inputs.numpy()
-    if target == "llvm":
-        device = tvm.cpu(0)
-    elif target == "cuda":
-        device = tvm.cuda(0)
-
-    mod, params = relay.frontend.from_oneflow(graph, model_path)
-    with tvm.transform.PassContext(opt_level=10):
-        intrp = relay.build_module.create_executor("graph", mod, device, target)
-    tvm_output = intrp.evaluate()(tvm.nd.array(inputs_numpy.astype(dtype)), **params).numpy()
-    return tvm_output
-
-
-def verify_model(
-    model,
-    name="",
-    rtol=1e-5,
-    atol=1e-5,
-    inputs=flow.tensor(
-        np.random.rand(1, 3, 224, 224),
-        dtype=flow.float32,
-    ),
-    device="llvm",
-):
-    """Generic function to generate and compare oneflow and TVM output"""
-    if device == "cuda":
-        model.to(device)
-        inputs = inputs.to(device)
-
-    graph = OneFlowGraph(model)
-    graph._compile(inputs)
-
-    mkdir(MODEL_HOME)
-    flow.save(model.state_dict(), MODEL_HOME)
-
-    out_flow = get_oneflow_output(graph, inputs)
-    out_tvm = get_tvm_output(graph, MODEL_HOME, inputs, target=device)
-    rmdir(MODEL_HOME)
-
-    assert_shape(out_flow, out_tvm)
-    tvm.testing.assert_allclose(out_flow, out_tvm, rtol=rtol, atol=atol)
-
-
-@tvm.testing.uses_gpu
-def test_vision_models():
-    """Vision models test"""
-
-    if os.path.exists(MODEL_HOME):
-        rmdir(MODEL_HOME)
-
-    vision_alexnet = alexnet().eval()
-    vision_squeezenet = squeezenet1_0().eval()
-    vision_shufflenet = shufflenet_v2_x0_5().eval()
-    vision_mobilenetv2 = mobilenet_v2().eval()
-    vision_ghostnet = ghostnet().eval()
-    vision_vit = vit_base_patch16_224().eval()
-
-    for device in ["llvm"]:
-        verify_model(vision_alexnet, device=device)
-        verify_model(vision_squeezenet, device=device)
-        verify_model(vision_shufflenet, device=device)
-        verify_model(vision_mobilenetv2, device=device)
-        verify_model(vision_ghostnet, device=device)
-        verify_model(vision_vit, device=device)
-
-
-if __name__ == "__main__":
-    test_vision_models()
-    rmdir("log")
diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py
deleted file mode 100644
index a81352bb679f..000000000000
--- a/tests/python/frontend/onnx/test_forward.py
+++ /dev/null
@@ -1,8716 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-argument
-"""
-ONNX testcases
-================
-This article is a test script to test ONNX operator with Relay.
-"""
-import glob
-import os
-import platform
-import re
-import copy
-import tempfile
-import pytest
-import scipy
-import numpy as np
-
-import tvm
-import tvm.testing
-import tvm.topi.testing
-from tvm import relay
-from tvm.contrib import graph_executor, utils
-from tvm.relay.frontend.common import infer_type
-from tvm.relay.build_module import bind_params_by_name
-from relay.utils.tag_span import _create_span, _set_span, _verify_structural_equal_with_span
-
-import onnx
-import onnxruntime.backend
-from onnx import TensorProto, helper, mapping, numpy_helper
-from onnxruntime.quantization import CalibrationDataReader, quantize_static
-
-import torch
-import torchvision
-from torch.nn import Linear, Module, Sequential
-
-
-def get_input_data_shape_dict(graph_def, input_data):
-    """Get input data shape"""
-    if isinstance(input_data, list):
-        input_names = {}
-        shape_dict = {}
-        for i, _ in enumerate(input_data):
-            input_names[i] = graph_def.graph.input[i].name
-            input_ = input_data[i]
-
-            if input_ is None or not hasattr(input_, "shape") or input_.shape == ():
-                # Skip adding input shape data when the input data is None;
-                # This is to enable optional arguments for onnx operators.
-                continue
-
-            elif isinstance(input_, list):
-                shape_dict[input_names[i]] = (len(input_),)
-
-            else:
-                shape_dict[input_names[i]] = input_.shape
-
-    else:
-        input_names = graph_def.graph.input[0].name
-        shape_dict = {input_names: input_data.shape}
-
-    return input_names, shape_dict
-
-
-def get_tvm_output_with_vm(
-    graph_def,
-    input_data,
-    target,
-    dev,
-    opset=None,
-    freeze_params=False,
-    convert_config=None,
-    validate_structural_equal=True,
-):
-    """Generic function to execute and get tvm output with vm executor"""
-    if not isinstance(input_data, list):
-        input_data = [input_data]
-    _, shape_dict = get_input_data_shape_dict(graph_def, input_data)
-
-    with tvm.testing.disable_span_filling():
-        mod, params = relay.frontend.from_onnx(
-            graph_def,
-            shape_dict,
-            opset=opset,
-            freeze_params=freeze_params,
-            convert_config=convert_config,
-        )
-        # handle the bfloat16 so we explicitly allocate
-        # bfloat16 arrays as input
-        for i, param in enumerate(mod["main"].params):
-            if param.type_annotation.dtype == "bfloat16":
-                input_data[i] = tvm.nd.empty(input_data[i].shape, "bfloat16").copyfrom(
-                    input_data[i]
-                )
-
-    if validate_structural_equal:
-        with tvm.testing.enable_span_filling():
-            mod_with_span, _ = relay.frontend.from_onnx(
-                graph_def,
-                shape_dict,
-                opset=opset,
-                freeze_params=freeze_params,
-                convert_config=convert_config,
-            )
-        tvm.ir.assert_structural_equal(mod, mod_with_span)
-
-    result = relay.create_executor("vm", mod=mod, device=dev, target=target).evaluate()(
-        *input_data, **params
-    )
-    if isinstance(result, tvm.runtime.NDArray):
-        return result.numpy()
-    return [r.numpy() for r in result]
-
-
-def get_tvm_output(
-    graph_def,
-    input_data,
-    target,
-    dev,
-    output_shape=None,
-    output_dtype="float32",
-    opset=None,
-    opt_level=1,
-    convert_config=None,
-):
-    """Generic function to execute and get tvm output"""
-    # TODO: Resolve the issues and remove the following lines
-    input_names, shape_dict = get_input_data_shape_dict(graph_def, input_data)
-
-    mod, params = relay.frontend.from_onnx(
-        graph_def, shape_dict, opset=opset, convert_config=convert_config
-    )
-
-    with tvm.transform.PassContext(opt_level=opt_level):
-        graph, lib, params = relay.build(mod, target, params=params)
-
-    m = graph_executor.create(graph, lib, dev)
-    # set inputs
-    if isinstance(input_data, list):
-        for i, _ in enumerate(input_names):
-            # Its possible for some onnx inputs to not be needed in the tvm
-            # module, confirm its present before setting.
-            # pylint: disable=unnecessary-list-index-lookup
-            m.set_input(input_names[i], tvm.nd.array(input_data[i].astype(input_data[i].dtype)))
-    else:
-        m.set_input(input_names, tvm.nd.array(input_data.astype(input_data.dtype)))
-
-    m.set_input(**params)
-    # execute
-    m.run()
-    # get outputs
-    if isinstance(output_shape, list):
-        tvm_output_list = []
-        for i, _ in enumerate(output_shape):
-            tvm_output = m.get_output(i)
-            tvm_output_list.append(tvm_output.numpy())
-        return tvm_output_list
-    else:
-        tvm_output = m.get_output(0)
-        return tvm_output.numpy()
-
-
-def get_onnxruntime_output(model, inputs):
-    """Generic function to generate onnxruntime output"""
-    rep = onnxruntime.backend.prepare(model.SerializeToString(), "CPU")
-    if isinstance(inputs, list) and len(inputs) == 1:
-        inp = inputs[0]
-    else:
-        inp = inputs
-    output = rep.run(inp)
-    # Unpack output if there's only a single value.
-    if len(output) == 1:
-        output = output[0]
-    return output
-
-
-def verify_with_ort_with_inputs(
-    model,
-    inputs,
-    out_shape=None,
-    target=None,
-    dev=None,
-    use_vm=False,
-    opset=None,
-    freeze_params=False,
-    dtype="float32",
-    rtol=1e-5,
-    atol=1e-5,
-    apply_softmax=False,
-    opt_level=1,
-    convert_config=None,
-):
-    """verify_with_ort_with_inputs"""
-    if opset is not None:
-        model.opset_import[0].version = opset
-
-    ort_out = get_onnxruntime_output(model, inputs)
-    if use_vm:
-        tvm_out = get_tvm_output_with_vm(
-            model,
-            inputs,
-            target,
-            dev,
-            opset=opset,
-            freeze_params=freeze_params,
-            convert_config=convert_config,
-        )
-    else:
-        tvm_out = get_tvm_output(
-            model,
-            inputs,
-            target,
-            dev,
-            out_shape,
-            dtype,
-            opset=opset,
-            opt_level=opt_level,
-            convert_config=convert_config,
-        )
-
-    if not isinstance(tvm_out, list):
-        tvm_out = [tvm_out]
-    if not isinstance(ort_out, list):
-        ort_out = [ort_out]
-    for tvm_val, ort_val in zip(tvm_out, ort_out):
-        if apply_softmax:
-            ort_val = scipy.special.softmax(ort_val)
-            tvm_val = scipy.special.softmax(tvm_val)
-        tvm.testing.assert_allclose(ort_val, tvm_val, rtol=rtol, atol=atol)
-        assert ort_val.dtype == tvm_val.dtype
-
-
-def verify_with_ort(
-    model,
-    input_shapes,
-    out_shape=None,
-    target=None,
-    dev=None,
-    use_vm=False,
-    opset=None,
-    freeze_params=False,
-    dtype="float32",
-    rtol=1e-5,
-    atol=1e-5,
-):
-    """verify_with_ort"""
-    inputs = [np.random.uniform(size=ishape).astype(dtype) for ishape in input_shapes]
-    verify_with_ort_with_inputs(
-        model,
-        inputs,
-        out_shape=out_shape,
-        target=target,
-        dev=dev,
-        use_vm=use_vm,
-        opset=opset,
-        freeze_params=freeze_params,
-        dtype=dtype,
-        rtol=rtol,
-        atol=atol,
-    )
-
-
-def quantize_and_verify_with_ort(
-    onnx_model, input_names, input_shapes, target, dev, rtol=1e-5, atol=1e-5
-):
-    """quantize_and_verify_with_ort"""
-    input_arrays = [np.random.random(shape).astype("float32") for shape in input_shapes]
-
-    class RandomDataReader(CalibrationDataReader):
-        # pylint: disable=missing-class-docstring
-        def __init__(self, n=10):
-            input_dict = dict(zip(input_names, input_shapes))
-            self.data = iter(
-                [
-                    {
-                        name: np.random.random(shape).astype("float32")
-                        for name, shape in input_dict.items()
-                    }
-                    for _ in range(n)
-                ]
-            )
-
-        def get_next(self):
-            return next(self.data, None)
-
-    t_dir = tvm.contrib.utils.tempdir()
-    model_fp32 = os.path.join(t_dir.temp_dir, "model.onnx")
-    onnx.save_model(onnx_model, model_fp32)
-    model_quant = os.path.join(t_dir.temp_dir, "model.quant.onnx")
-    _ = quantize_static(  # pylint: disable=assignment-from-no-return
-        model_fp32, model_quant, RandomDataReader()
-    )
-    # opt_level=1 will cause error with qnn lowering
-    model = onnx.load(model_quant)
-    verify_with_ort_with_inputs(
-        model, input_arrays, opt_level=2, target=target, dev=dev, use_vm=True, rtol=rtol, atol=atol
-    )
-
-
-def make_constant_node(name, data_type, dims, vals):
-    return helper.make_node(
-        "Constant",
-        inputs=[],
-        outputs=[name],
-        value=helper.make_tensor(name=name, data_type=data_type, dims=dims, vals=vals),
-    )
-
-
-def is_version_greater_than(ver):
-    return "".join(re.findall(r"(\d+\.)(\d+\.)(\d)", onnx.__version__)[0]) > "".join(
-        re.findall(r"(\d+\.)(\d+\.)(\d)", ver)[0]
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_reshape(target, dev):
-    """test_reshape"""
-    in_shape = (4, 3, 3, 4)
-    ref_shape = (6, 2, 4, 3)
-
-    ref_array = np.array(ref_shape)
-    ref_node = onnx.helper.make_node(
-        "Constant",
-        inputs=[],
-        outputs=["ref_in"],
-        value=onnx.helper.make_tensor(
-            name="const_tensor",
-            data_type=onnx.TensorProto.INT32,
-            dims=ref_array.shape,
-            vals=ref_array.flatten().astype(int),
-        ),
-    )
-    reshape_node = helper.make_node("Reshape", ["in", "ref_in"], ["out"])
-
-    graph = helper.make_graph(
-        [ref_node, reshape_node],
-        "reshape_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(ref_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="reshape_test")
-
-    x = np.random.uniform(size=in_shape).astype("int32")
-    tvm_out = get_tvm_output(model, x, target, dev, ref_shape, "float32")
-    tvm.testing.assert_allclose(ref_shape, tvm_out.shape)
-
-
-@tvm.testing.parametrize_targets
-def test_double_reshape(target, dev):
-    """test_double_reshape"""
-    in_shape = (4, 3, 3, 4)
-    ref_shape = (6, 2, 4, 3)
-
-    ref_array = np.array(ref_shape)
-    ref_node = onnx.helper.make_node(
-        "Constant",
-        inputs=[],
-        outputs=["ref_in"],
-        value=onnx.helper.make_tensor(
-            name="const_tensor",
-            data_type=onnx.TensorProto.INT32,
-            dims=ref_array.shape,
-            vals=ref_array.flatten().astype(int),
-        ),
-    )
-    reshape_node1 = helper.make_node("Reshape", ["in", "ref_in"], ["out1"])
-    reshape_node2 = helper.make_node("Reshape", ["in", "ref_in"], ["out2"])
-    add_node = helper.make_node("Add", ["out1", "out2"], ["out"])
-
-    graph = helper.make_graph(
-        [ref_node, reshape_node1, reshape_node2, add_node],
-        "reshape_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(ref_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="reshape_test")
-
-    x = np.random.uniform(size=in_shape).astype("int32")
-    tvm_out = get_tvm_output(model, x, target, dev, ref_shape, "float32")
-    tvm.testing.assert_allclose(ref_shape, tvm_out.shape)
-
-
-@tvm.testing.parametrize_targets
-def test_expand(target, dev):
-    """test_expand"""
-
-    def _test_expand(name, data, shape, ref_data, dtype="int32"):
-        shape_array = np.array(shape)
-        if dtype == "int32":
-            shape_node = onnx.helper.make_node(
-                "Constant",
-                inputs=[],
-                outputs=["shape"],
-                value=onnx.helper.make_tensor(
-                    name="const_tensor",
-                    data_type=onnx.TensorProto.INT32,
-                    dims=shape_array.shape,
-                    vals=shape_array.flatten().astype("int32"),
-                ),
-            )
-        elif dtype == "int64":
-            shape_node = onnx.helper.make_node(
-                "Constant",
-                inputs=[],
-                outputs=["shape"],
-                value=onnx.helper.make_tensor(
-                    name="const_tensor",
-                    data_type=onnx.TensorProto.INT64,
-                    dims=shape_array.shape,
-                    vals=shape_array.flatten().astype("int64"),
-                ),
-            )
-        else:
-            raise TypeError("Invalid dtype")
-        expand_node = helper.make_node("Expand", ["in", "shape"], ["out"])
-
-        graph = helper.make_graph(
-            [shape_node, expand_node],
-            "expand_test",
-            inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(data.shape))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(ref_data.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name=name)
-
-        tvm_out = get_tvm_output_with_vm(model, data, target, dev, freeze_params=True)
-        tvm.testing.assert_allclose(ref_data, tvm_out)
-
-    in_shape = (3, 1)
-    shape = (3, 4)
-    data = np.random.uniform(size=in_shape).astype(np.float32)
-    ref_data = np.tile(data, 4)
-    _test_expand("expand_with_dim_unchanged_test", data, shape, ref_data, "int32")
-    _test_expand("expand_with_dim_unchanged_test", data, shape, ref_data, "int64")
-
-    in_shape = (3, 1)
-    shape = (2, 1, 6)
-    data = np.random.uniform(size=in_shape).astype(np.float32)
-    ref_data = data * np.ones(shape, dtype=np.float32)
-    _test_expand("expand_larger_target_shape_test", data, shape, ref_data, "int32")
-    _test_expand("expand_larger_target_shape_test", data, shape, ref_data, "int64")
-
-    in_shape = (1, 1)
-    shape = (3,)
-    data = np.random.uniform(size=in_shape).astype(np.float32)
-    ref_data = data * np.ones(shape, dtype=np.float32)
-    _test_expand("expand_smaller_target_shape_test", data, shape, ref_data, "int32")
-    _test_expand("expand_smaller_target_shape_test", data, shape, ref_data, "int64")
-
-
-@tvm.testing.parametrize_targets
-def test_depth_to_space(target, dev):
-    """test_depth_to_space"""
-
-    def verify_depth_to_space(inshape, outshape, mode, block_size):
-        node = onnx.helper.make_node(
-            "DepthToSpace", inputs=["x"], outputs=["y"], blocksize=block_size
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "depth_to_space_test",
-            inputs=[helper.make_tensor_value_info("x", TensorProto.FLOAT, list(inshape))],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(outshape))],
-        )
-
-        model = helper.make_model(graph, producer_name="depth_to_space_test")
-
-        verify_with_ort(model, [inshape], [outshape], target, dev)
-
-    # current onnx.checker use OpSet-1 version of DepthToSpace, which doesn't have a mode argument.
-    # TO-DO, we can add mode argument to test CRD mode and DCR mode
-    # in the future when we update to a newer onnx version.
-    verify_depth_to_space((1, 8, 2, 3), (1, 2, 4, 6), mode="CRD", block_size=2)
-
-
-@tvm.testing.parametrize_targets
-def test_space_to_depth(target, dev):
-    """test_space_to_depth"""
-
-    def verify_space_to_depth(inshape, outshape, block_size):
-        node = onnx.helper.make_node(
-            "SpaceToDepth", inputs=["x"], outputs=["y"], blocksize=block_size
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "space_to_depth_test",
-            inputs=[helper.make_tensor_value_info("x", TensorProto.FLOAT, list(inshape))],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(outshape))],
-        )
-
-        model = helper.make_model(graph, producer_name="space_to_depth_test")
-
-        verify_with_ort(model, [inshape], [outshape], target, dev)
-
-    verify_space_to_depth((1, 1, 4, 6), (1, 4, 2, 3), 2)
-
-
-@tvm.testing.parametrize_targets
-def test_shape(target, dev):
-    """test_shape"""
-    in_shape = (4, 3, 3, 4)
-    ref_shape = (6, 2, 4, 3)
-
-    ref_array = np.array(ref_shape)
-    ref_node = onnx.helper.make_node(
-        "Constant",
-        inputs=[],
-        outputs=["ref_in"],
-        value=onnx.helper.make_tensor(
-            name="const_tensor",
-            data_type=onnx.TensorProto.INT32,
-            dims=ref_array.shape,
-            vals=ref_array.flatten().astype(int),
-        ),
-    )
-    reshape_node = helper.make_node("Reshape", ["in", "ref_in"], ["out"])
-
-    shape_node = helper.make_node("Shape", ["out"], ["final_out"])
-
-    graph = helper.make_graph(
-        [ref_node, reshape_node, shape_node],
-        "shape_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-        outputs=[helper.make_tensor_value_info("final_out", TensorProto.FLOAT, list(ref_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="shape_test")
-
-    x = np.random.uniform(size=in_shape).astype("int32")
-    tvm_out = get_tvm_output(model, x, target, dev, ref_shape, "int32")
-    tvm.testing.assert_allclose(ref_shape, tvm_out)
-
-
-@tvm.testing.parametrize_targets
-def test_power(target, dev):
-    """test_power"""
-
-    def _test_power_iteration(x_shape, y_shape):
-        if isinstance(y_shape, int):
-            y_shape = [y_shape]
-
-        x = np.random.uniform(size=x_shape).astype(np.float32)
-        y = np.random.uniform(size=y_shape).astype(np.float32)
-
-        np_res = np.power(x, y).astype(np.float32)
-
-        res = helper.make_node("Pow", ["x", "y"], ["out"])
-
-        graph = helper.make_graph(
-            [res],
-            "power_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape)),
-                helper.make_tensor_value_info("y", TensorProto.FLOAT, list(y_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(np_res.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="power_test")
-
-        tvm_out = get_tvm_output(model, [x, y], target, dev, np_res.shape)
-        tvm.testing.assert_allclose(np_res, tvm_out, rtol=1e-5, atol=1e-5)
-
-    _test_power_iteration((1, 3), (1))
-    _test_power_iteration((2, 3), (2, 3))
-    _test_power_iteration((2, 3), (1, 3))
-
-
-@tvm.testing.parametrize_targets
-def test_range(target, dev):
-    """test_range"""
-
-    def verify_range(start, limit, delta, dtype):
-        dtype_map = {
-            "float32": TensorProto.FLOAT,
-            "int32": TensorProto.INT32,
-            "int64": TensorProto.INT64,
-        }
-        dtype_onnx = dtype_map[dtype]
-        y = helper.make_node("Range", ["start", "limit", "delta"], ["output"])
-        graph = helper.make_graph(
-            [y],
-            "range_test",
-            inputs=[
-                helper.make_tensor_value_info("start", dtype_onnx, []),
-                helper.make_tensor_value_info("limit", dtype_onnx, []),
-                helper.make_tensor_value_info("delta", dtype_onnx, []),
-            ],
-            outputs=[
-                helper.make_tensor_value_info(
-                    "output", dtype_onnx, np.arange(start, limit, delta).shape
-                )
-            ],
-        )
-        model = helper.make_model(graph, producer_name="range_test")
-        inputs = [np.array(x).astype(dtype) for x in [start, limit, delta]]
-        verify_with_ort_with_inputs(model, inputs, target=target, dev=dev, use_vm=True)
-
-    for t in ["float32", "int32", "int64"]:
-        verify_range(0, 10, 1, t)
-        verify_range(2, 8, 2, t)
-        verify_range(-3, 6, 4, t)
-        verify_range(-2, -7, -1, t)
-
-
-@tvm.testing.parametrize_targets
-def test_squeeze(target, dev):
-    """test_squeeze"""
-
-    def test_squeeze_once(in_shape, out_shape, axes=None):
-        y = helper.make_node("Squeeze", ["in"], ["out"], axes=axes)
-
-        graph = helper.make_graph(
-            [y],
-            "squeeze_test",
-            inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="squeeze_test")
-        x = np.random.uniform(size=in_shape).astype("float32")
-        verify_with_ort_with_inputs(model, [x], [out_shape], target=target, dev=dev, opset=11)
-
-    test_squeeze_once((1, 3, 1, 3, 1, 1), (3, 3), [0, 2, 4, 5])
-    test_squeeze_once((1, 3, 1, 3, 1, 1), (3, 3))  # empty axis.
-    test_squeeze_once((), ())  # scalar testing.
-
-
-@tvm.testing.parametrize_targets
-def test_flatten(target, dev):
-    """test_flatten"""
-
-    def verify_flatten(in_shape, axis, ref_shape):
-        flatten_node = helper.make_node("Flatten", ["in"], ["out"], axis=axis)
-
-        graph = helper.make_graph(
-            [flatten_node],
-            "flatten_test",
-            inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(ref_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="flatten_test")
-        verify_with_ort(model, [in_shape], target=target, dev=dev)
-
-    verify_flatten((1, 3, 4, 4), 1, (1, 48))
-    verify_flatten((1,), 1, (1, 1))
-
-
-@tvm.testing.parametrize_targets
-def test_unsqueeze(target, dev):
-    """test_unsqueeze"""
-    in_shape = (3, 3)
-    axis = (0, 3, 4)
-    out_shape = (1, 3, 3, 1, 1)
-    y = helper.make_node("Unsqueeze", ["in"], ["out"], axes=list(axis))
-
-    graph = helper.make_graph(
-        [y],
-        "squeeze_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="squeeze_test")
-    verify_with_ort(model, [in_shape], target=target, dev=dev, opset=11)
-
-
-@tvm.testing.parametrize_targets
-def test_unsqueeze_with_neg_axes(target, dev):
-    def verify_unsqueeze_with_neg_axes(opset=11):
-        in_shape = (2, 3, 4)
-        axis = (-2, -1)
-        out_shape = (2, 3, 4, 1, 1)
-        if opset < 13:
-            y = helper.make_node("Unsqueeze", ["in"], ["out"], axes=list(axis))
-            nodes = [y]
-        else:
-            axes = np.array(list(axis)).astype(np.int64)
-            axes = helper.make_node(
-                "Constant",
-                inputs=[],
-                outputs=["axes"],
-                value=onnx.helper.make_tensor(
-                    name="const_axes",
-                    data_type=onnx.TensorProto.INT64,
-                    dims=axes.shape,
-                    vals=axes.flatten().astype(int),
-                ),
-            )
-            y = helper.make_node("Unsqueeze", ["in", "axes"], ["out"])
-            nodes = [axes, y]
-
-        graph = helper.make_graph(
-            nodes,
-            "squeeze_test",
-            inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="squeeze_test")
-        verify_with_ort(model, [in_shape], target=target, dev=dev, opset=opset)
-
-    verify_unsqueeze_with_neg_axes()
-    verify_unsqueeze_with_neg_axes(opset=13)
-
-
-@tvm.testing.parametrize_targets
-def test_gather(target, dev):
-    """test_gather"""
-
-    def verify_gather(in_shape, indices, axis, dtype):
-        x = np.random.uniform(size=in_shape).astype(dtype)
-        indices = np.array(indices, dtype="int64")
-        out_np = np.take(x, indices, axis=axis)
-
-        y = helper.make_node("Gather", ["in", "indices"], ["out"], axis=axis)
-
-        graph = helper.make_graph(
-            [y],
-            "gather_test",
-            inputs=[
-                helper.make_tensor_value_info(
-                    "in", mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)], list(in_shape)
-                ),
-                helper.make_tensor_value_info("indices", TensorProto.INT64, list(indices.shape)),
-            ],
-            outputs=[
-                helper.make_tensor_value_info(
-                    "out", mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)], list(out_np.shape)
-                )
-            ],
-        )
-        model = helper.make_model(graph, producer_name="gather_test")
-        verify_with_ort_with_inputs(model, [x, indices], target=target, dev=dev, dtype=dtype)
-
-    verify_gather((4,), [1], 0, "int32")
-    verify_gather((1, 4), [0], 0, "int32")
-    verify_gather((4,), [[[1, 0], [0, 1]]], 0, "float32")
-    verify_gather((2, 2), [[[1, 0], [0, 1]]], 1, "int32")
-    verify_gather((3, 3, 3), [[[1, 0]]], -1, "int32")
-    verify_gather((4, 3, 5, 6), [[2, 1, 0, 0]], 0, "float32")
-
-
-@tvm.testing.parametrize_targets
-def test_dynamic_gather(target, dev):
-    """test_dynamic_gather"""
-    dtype = "float32"
-    in_shape = [2, 2]
-    indices = 1
-    axis = 1
-    x = np.random.uniform(size=in_shape).astype(dtype)
-    indices = np.array(indices, dtype="int64")
-    out_np = np.take(x, indices, axis=axis)
-
-    indices = helper.make_node(
-        "Constant",
-        inputs=[],
-        outputs=["indices"],
-        value=onnx.helper.make_tensor(
-            name="const_indices",
-            data_type=onnx.TensorProto.INT64,
-            dims=[],
-            vals=[1],
-        ),
-    )
-    y = helper.make_node("Gather", ["in", "indices"], ["out"], axis=axis)
-
-    graph = helper.make_graph(
-        [indices, y],
-        "gather_test",
-        inputs=[
-            helper.make_tensor_value_info(
-                "in", mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)], ["?", "?"]
-            ),
-        ],
-        outputs=[
-            helper.make_tensor_value_info(
-                "out", mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)], ["?"] * len(out_np.shape)
-            )
-        ],
-    )
-    model = helper.make_model(graph, producer_name="dynamic_gather_test")
-
-    mod, params = relay.frontend.from_onnx(model)
-
-    result = relay.create_executor("vm", mod=mod, device=dev, target=target).evaluate()(x, **params)
-    tvm.testing.assert_allclose(out_np, result.numpy(), rtol=1e-5, atol=1e-5)
-
-
-@tvm.testing.parametrize_targets
-def test_gatherelements(target, dev):
-    """test_gatherelements"""
-
-    def verify_gatherelements(in_shape, indices, axis):
-        x = np.random.uniform(size=in_shape).astype("float32")
-        indices = np.array(indices, dtype="int32")
-
-        y = helper.make_node("GatherElements", ["data", "indices"], ["output"], axis=axis)
-        graph = helper.make_graph(
-            [y],
-            "gather_elements_test",
-            inputs=[
-                helper.make_tensor_value_info("data", TensorProto.FLOAT, list(in_shape)),
-                helper.make_tensor_value_info("indices", TensorProto.INT32, list(indices.shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("output", TensorProto.FLOAT, list(in_shape))],
-        )
-        model = helper.make_model(graph, producer_name="gather_elements_test")
-
-        verify_with_ort_with_inputs(model, [x, indices], target=target, dev=dev)
-
-    verify_gatherelements((4,), [3, 0, 2, 1], 0)
-    verify_gatherelements((2, 2), [[1, 0], [0, 1]], 0)
-    verify_gatherelements((2, 2), [[0, 0], [1, 0]], 1)
-    verify_gatherelements((2, 2), [[1, 0], [0, 1]], 1)
-
-    indices = [
-        [[1, 0, 0], [1, 0, 1], [0, 1, 1]],
-        [[1, 1, 1], [1, 2, 1], [1, 0, 1]],
-        [[1, 2, 1], [1, 2, 1], [1, 2, 1]],
-    ]
-
-    verify_gatherelements((3, 3, 3), indices, 2)
-
-
-@tvm.testing.parametrize_targets
-def test_scatter(target, dev):
-    """test_scatter"""
-
-    def verify_scatter(in_shape, indices, axis):
-        x = np.random.uniform(size=in_shape).astype("float32")
-        indices = np.array(indices, dtype="int32")
-        updates = np.random.uniform(size=indices.shape).astype("float32")
-
-        y = helper.make_node("Scatter", ["data", "indices", "updates"], ["output"], axis=axis)
-
-        graph = helper.make_graph(
-            [y],
-            "scatter_test",
-            inputs=[
-                helper.make_tensor_value_info("data", TensorProto.FLOAT, list(in_shape)),
-                helper.make_tensor_value_info("indices", TensorProto.INT32, list(indices.shape)),
-                helper.make_tensor_value_info("updates", TensorProto.FLOAT, list(indices.shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("output", TensorProto.FLOAT, list(in_shape))],
-        )
-        model = helper.make_model(graph, producer_name="scatter_test")
-        # Scatter operator has been supported from version 9 and
-        # deprecated since version 11 of the default ONNX operator set
-        verify_with_ort_with_inputs(model, [x, indices, updates], target=target, dev=dev, opset=9)
-
-    verify_scatter((4,), [1], 0)
-    verify_scatter((1, 4), [[0]], 0)
-    verify_scatter((4,), [2, 3], 0)
-    verify_scatter((2, 2), [[1, 0], [0, 1]], 1)
-    verify_scatter((3, 3, 3), [[[-1, -3]]], -1)
-    verify_scatter((4, 3, 5, 6), [[[[2, 1, 0, 0]]]], 0)
-
-
-@tvm.testing.parametrize_targets
-def test_scatter_elements(target, dev):
-    """test_scatter_elements"""
-
-    def verify_scatter_elements(in_shape, indices, axis=0, reduction="update"):
-        x = np.random.uniform(size=in_shape).astype("float32")
-        indices = np.array(indices, dtype="int32")
-        updates = np.random.uniform(size=indices.shape).astype("float32")
-
-        scatter_elements_node = helper.make_node(
-            "ScatterElements",
-            ["data", "indices", "updates"],
-            ["output"],
-            axis=axis,
-            reduction=reduction,
-        )
-
-        graph = helper.make_graph(
-            [scatter_elements_node],
-            "scatter_elements_test",
-            inputs=[
-                helper.make_tensor_value_info("data", TensorProto.FLOAT, list(in_shape)),
-                helper.make_tensor_value_info("indices", TensorProto.INT32, list(indices.shape)),
-                helper.make_tensor_value_info("updates", TensorProto.FLOAT, list(indices.shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("output", TensorProto.FLOAT, list(in_shape))],
-        )
-        model = helper.make_model(graph, producer_name="scatter_elements_test")
-        verify_with_ort_with_inputs(model, [x, indices, updates], target=target, dev=dev)
-
-    # Usual scatter for 1d input
-    verify_scatter_elements((4,), [2, 3])
-    # Usual scatter with specified positive axis
-    verify_scatter_elements((2, 2), [[1, 0], [0, 1]], 1)
-    # Usual scatter for 3d input with spicified negative indices and axis
-    verify_scatter_elements((3, 3, 3), [[[-1, -3]]], -1)
-    # Usual scatter for 4d input
-    verify_scatter_elements((4, 3, 5, 6), [[[[2, 1, 0, 0]]]])
-    # Scatter elements with addition reduction of duplicates
-    verify_scatter_elements(
-        (3, 3, 3),
-        [[[0, 2, 1], [1, 1, 1], [2, 1, 0]], [[0, 2, 1], [1, 1, 1], [2, 1, 0]]],
-        0,
-        "add",
-    )
-    # Scatter elements with reduction and specified axis
-    verify_scatter_elements((3, 3, 3), [[[2, 2, 2], [1, 1, 1], [0, 0, 0]]], 2, "add")
-    # Scatter elements with multiplication reduction of duplicates
-    verify_scatter_elements(
-        (3, 3, 3),
-        [[[0, 2, 1], [1, 1, 1], [2, 1, 0]], [[0, 2, 1], [1, 1, 1], [2, 1, 0]]],
-        0,
-        "mul",
-    )
-    # TODO(vvchernov): min and max options are supported from 18 version, but CI supports 17 only
-    # # Scatter elements with min reduction of duplicates
-    # verify_scatter_elements(
-    #     (3, 3, 3),
-    #     [[[0, 2, 1], [1, 1, 1], [2, 1, 0]], [[0, 2, 1], [1, 1, 1], [2, 1, 0]]],
-    #     0,
-    #     "min",
-    # )
-    # # Scatter elements with max reduction of duplicates
-    # verify_scatter_elements(
-    #     (3, 3, 3),
-    #     [[[0, 2, 1], [1, 1, 1], [2, 1, 0]], [[0, 2, 1], [1, 1, 1], [2, 1, 0]]],
-    #     0,
-    #     "max",
-    # )
-
-
-@tvm.testing.parametrize_targets
-def test_slice(target, dev):
-    """test_slice"""
-
-    def _test_slice_iteration_v1(indata, outdata, starts, ends, axes=None):
-        if axes:
-            y = helper.make_node("Slice", ["in"], ["out"], axes=axes, starts=starts, ends=ends)
-        else:
-            y = helper.make_node("Slice", ["in"], ["out"], starts=starts, ends=ends)
-
-        graph = helper.make_graph(
-            [y],
-            "slice_test",
-            inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(indata.shape))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(outdata.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="slice_test")
-        verify_with_ort_with_inputs(
-            model, [indata], [outdata.shape], opset=1, target=target, dev=dev
-        )
-
-    def _test_slice_iteration_v10(indata, outdata, **attrs):
-        starts = attrs["starts"]
-        ends = attrs["ends"]
-        axes = None if "axes" not in attrs else attrs["axes"]
-        steps = None if "steps" not in attrs else attrs["steps"]
-        starts = np.asarray(starts)
-        ends = np.asarray(ends)
-        inputs = [
-            helper.make_tensor_value_info("data", TensorProto.FLOAT, list(indata.shape)),
-            helper.make_tensor_value_info("starts", TensorProto.INT64, list(starts.shape)),
-            helper.make_tensor_value_info("ends", TensorProto.INT64, list(ends.shape)),
-        ]
-        initializer = [
-            helper.make_tensor("starts", TensorProto.INT64, list(starts.shape), starts),
-            helper.make_tensor("ends", TensorProto.INT64, list(ends.shape), ends),
-        ]
-        nodes = []
-
-        if "add_noop_to_input_attrs" in attrs:
-
-            def add_noop_to_input_attr(attr_name, attr):
-                output_name = attr_name + "_output"
-
-                ref_shape = list(np.array(attr).shape)
-                ref_shape.insert(0, 1)
-                ref_shape = tuple(ref_shape)
-                ref_array = np.array(ref_shape)
-                ref_node = onnx.helper.make_node(
-                    "Constant",
-                    inputs=[],
-                    outputs=["ref_in_" + attr_name],
-                    value=onnx.helper.make_tensor(
-                        name="const_tensor__1_" + attr_name,
-                        data_type=onnx.TensorProto.INT64,
-                        dims=ref_array.shape,
-                        vals=ref_array.flatten().astype(int),
-                    ),
-                )
-                in_shape = np.array(attr).shape
-                in_array = np.array(in_shape)
-                ref_node2 = onnx.helper.make_node(
-                    "Constant",
-                    inputs=[],
-                    outputs=["input_shape_" + attr_name],
-                    value=onnx.helper.make_tensor(
-                        name="const_tensor__2_" + attr_name,
-                        data_type=onnx.TensorProto.INT64,
-                        dims=in_array.shape,
-                        vals=in_array.flatten().astype(int),
-                    ),
-                )
-
-                reshape1_node = helper.make_node(
-                    "Reshape", [attr_name, "ref_in_" + attr_name], ["reshape_" + attr_name]
-                )
-                reshape2_node = helper.make_node(
-                    "Reshape", ["reshape_" + attr_name, "input_shape_" + attr_name], [output_name]
-                )
-                return [ref_node, ref_node2, reshape1_node, reshape2_node]
-
-        slice_inputs = []
-        for attr_name in ["starts", "ends", "axes", "steps"]:
-            if attr_name not in attrs:
-                continue
-            if "add_noop_to_input_attrs" in attrs and attr_name in attrs["add_noop_to_input_attrs"]:
-                nodes.extend(add_noop_to_input_attr(attr_name, attrs[attr_name]))
-                slice_inputs.append(attr_name + "_output")
-            else:
-                slice_inputs.append(attr_name)
-
-        if axes:
-            axes = np.asarray(axes)
-            inputs.append(
-                helper.make_tensor_value_info("axes", TensorProto.INT64, list(axes.shape))
-            )
-            initializer.append(
-                helper.make_tensor("axes", TensorProto.INT64, list(axes.shape), axes)
-            )
-
-        if steps:
-            assert axes is not None and len(axes) == len(steps)
-            steps = np.asarray(steps)
-            inputs.append(
-                helper.make_tensor_value_info("steps", TensorProto.INT64, list(axes.shape))
-            )
-            initializer.append(
-                helper.make_tensor("steps", TensorProto.INT64, list(steps.shape), steps)
-            )
-
-        y = helper.make_node("Slice", ["data", *slice_inputs], ["out"])
-
-        nodes.append(y)
-        graph = helper.make_graph(
-            nodes,
-            "slice_test",
-            inputs=inputs,
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(outdata.shape))],
-            initializer=initializer,
-        )
-        model = helper.make_model(graph, producer_name="slice_test")
-        verify_with_ort_with_inputs(
-            model, [indata], opset=10, freeze_params=True, use_vm=True, target=target, dev=dev
-        )
-
-    x = np.random.randn(20, 10, 5).astype(np.float32)
-    _test_slice_iteration_v1(x, x[0:3, 0:10], starts=(0, 0), ends=(3, 10), axes=(0, 1))
-    _test_slice_iteration_v1(x, x[0:3, 0:10], starts=(0, 0), ends=(10, 3), axes=(1, 0))
-    _test_slice_iteration_v1(x, x[:, :, 3:4], starts=(0, 0, 3), ends=(20, 10, 4))
-    _test_slice_iteration_v1(x, x[:, 1:1000], starts=(1,), ends=(1000,), axes=(1,))
-    _test_slice_iteration_v1(x, x[:, 0:-1], starts=(0,), ends=(-1,), axes=(1,))
-    _test_slice_iteration_v10(x, x[0:3, 0:10], starts=(0, 0), ends=(3, 10), axes=(0, 1))
-    _test_slice_iteration_v10(x, x[0:3, 0:10], starts=(0, 0), ends=(10, 3), axes=(1, 0))
-    _test_slice_iteration_v10(x, x[:, :, 3:4], starts=(0, 0, 3), ends=(20, 10, 4))
-    _test_slice_iteration_v10(x, x[:, 1:1000], starts=(1,), ends=(1000,), axes=(1,))
-    _test_slice_iteration_v10(x, x[:, 0:-1], starts=(0,), ends=(-1,), axes=(1,))
-    _test_slice_iteration_v10(x, x[:, 0:-1], starts=(0,), ends=(-1,), axes=(-1,))
-    _test_slice_iteration_v10(
-        x,
-        x[0:3, 0:10],
-        starts=(0, 0),
-        ends=(3, 10),
-        axes=(0, 1),
-        add_noop_to_input_attrs=["starts"],
-    )
-    _test_slice_iteration_v10(
-        x, x[:, :, 3:4], starts=(0, 0, 3), ends=(20, 10, 4), add_noop_to_input_attrs=["ends"]
-    )
-    _test_slice_iteration_v10(
-        x, x[:, 1:1000], starts=(1,), ends=(1000,), axes=(1,), add_noop_to_input_attrs=["axes"]
-    )
-    _test_slice_iteration_v10(
-        x,
-        x[:, 0:-1],
-        starts=(0,),
-        ends=(-1,),
-        axes=(1,),
-        add_noop_to_input_attrs=["starts", "ends"],
-    )
-    _test_slice_iteration_v10(
-        x,
-        x[0:3, 0:10],
-        starts=(0, 0),
-        ends=(3, 10),
-        axes=(0, 1),
-        add_noop_to_input_attrs=["ends", "axes"],
-    )
-    _test_slice_iteration_v10(
-        x,
-        x[:, :, 3:4],
-        starts=(0, 0, 3),
-        ends=(20, 10, 4),
-        add_noop_to_input_attrs=["starts", "axes"],
-    )
-    _test_slice_iteration_v10(
-        x,
-        x[:, 1:1000],
-        starts=(1,),
-        ends=(1000,),
-        axes=(1,),
-        add_noop_to_input_attrs=["starts", "ends", "axes"],
-    )
-    x = np.random.randn(1, 1, 1, 128).astype(np.float32)
-    _test_slice_iteration_v10(
-        x, x, starts=(0, 0), ends=(9223372036854775807, 9223372036854775807), axes=(0, 3)
-    )
-
-    x = np.random.randn(4, 4).astype(np.float32)
-    _test_slice_iteration_v10(
-        x, x[:, 1::2], starts=(1,), ends=(9223372036854775807,), axes=(1,), steps=(2,)
-    )
-    _test_slice_iteration_v10(
-        x,
-        x[0::1, 1::2],
-        starts=(0, 1),
-        ends=(4, 4),
-        axes=(0, 1),
-        steps=(1, 2),
-    )
-
-
-def _test_onnx_op_elementwise(
-    target, dev, inshape, outfunc, npargs, dtype, opname, kwargs, opset=None, verify=True
-):
-    indata = np.random.uniform(-1, 1, size=inshape).astype(dtype)
-    outdata = outfunc(indata, **npargs)
-
-    y = helper.make_node(opname, ["in"], ["out"], **kwargs)
-
-    ONNX_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)]
-
-    graph = helper.make_graph(
-        [y],
-        opname + "_test",
-        inputs=[helper.make_tensor_value_info("in", ONNX_DTYPE, list(indata.shape))],
-        outputs=[helper.make_tensor_value_info("out", ONNX_DTYPE, list(outdata.shape))],
-    )
-
-    model = helper.make_model(graph, producer_name=opname + "_test")
-    if verify:
-        verify_with_ort_with_inputs(
-            model, [indata], [outdata.shape], opset=opset, dtype=dtype, target=target, dev=dev
-        )
-    else:
-        get_tvm_output(
-            model,
-            [indata],
-            target,
-            dev,
-            [outdata.shape],
-            dtype,
-            opset=opset,
-            opt_level=3,
-        )
-
-
-@tvm.testing.parametrize_targets
-def test_floor(target, dev):
-    _test_onnx_op_elementwise(target, dev, (2, 4, 5, 6), np.floor, {}, "float32", "Floor", {})
-
-
-@tvm.testing.parametrize_targets
-def test_ceil(target, dev):
-    _test_onnx_op_elementwise(target, dev, (2, 4, 5, 6), np.ceil, {}, "float32", "Ceil", {})
-
-
-@tvm.testing.parametrize_targets
-def test_clip(target, dev):
-    """test_clip"""
-    _test_onnx_op_elementwise(
-        target,
-        dev,
-        (2, 4, 5, 6),
-        np.clip,
-        {"a_min": -1.0, "a_max": 1.0},
-        "float32",
-        "Clip",
-        {"min": -1.0, "max": 1.0},
-        opset=6,
-    )
-
-    _test_onnx_op_elementwise(
-        target,
-        dev,
-        (2, 4, 5, 6),
-        np.clip,
-        {"a_min": -np.inf, "a_max": 1.0},
-        "float32",
-        "Clip",
-        {"max": 1.0},
-        opset=6,
-    )
-
-    _test_onnx_op_elementwise(
-        target,
-        dev,
-        (2, 4, 5, 6),
-        np.clip,
-        {"a_min": -1.0, "a_max": np.inf},
-        "float32",
-        "Clip",
-        {"min": -1.0},
-        opset=6,
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_clip_min_max_as_inputs(target, dev):
-    """test_clip_min_max_as_inputs"""
-    input_shape = (2, 4, 5, 6)
-    nodes = [
-        make_constant_node("min", onnx.TensorProto.FLOAT, (), [0.0]),
-        make_constant_node("max", onnx.TensorProto.FLOAT, (), [6.0]),
-    ]
-    input_names = ["in", "min", "max"]
-    nodes.append(helper.make_node("Clip", inputs=input_names, outputs=["out"]))
-    graph = helper.make_graph(
-        nodes,
-        "clip_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(input_shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(input_shape))],
-    )
-    model = helper.make_model(graph, producer_name="clip_test")
-
-    verify_with_ort(model, [input_shape], out_shape=[input_shape], target=target, dev=dev)
-
-
-@tvm.testing.parametrize_targets
-def test_round(target, dev):
-    _test_onnx_op_elementwise(target, dev, (2, 4, 5, 6), np.round, {}, "float32", "Round", {})
-    _test_onnx_op_elementwise(
-        target, dev, (2, 4, 5, 6), np.round, {}, "float64", "Round", {}, verify=False
-    )  # TODO: enable verification once ORT supports float64
-
-
-def _test_finite_ops(target, dev, inshape, outfunc, npargs, dtype, opname, kwargs):
-    indata = np.random.choice(a=[np.nan, np.inf, -np.inf, 0.5, 1.0, 0], size=inshape).astype(dtype)
-
-    outdata = outfunc(indata, **npargs)
-    y = helper.make_node(opname, ["in"], ["out"], **kwargs)
-
-    graph = helper.make_graph(
-        [y],
-        opname + "_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(indata.shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.BOOL, list(outdata.shape))],
-    )
-
-    model = helper.make_model(graph, producer_name=opname + "_test")
-    verify_with_ort_with_inputs(
-        model, [indata], [outdata.shape], dtype=dtype, target=target, dev=dev
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_isinf(target, dev):
-    _test_finite_ops(target, dev, (2, 4, 5, 6), np.isinf, {}, "float32", "IsInf", {})
-
-
-@tvm.testing.parametrize_targets
-def test_isnan(target, dev):
-    """test_isnan"""
-    _test_finite_ops(target, dev, (2, 4, 5, 6), np.isnan, {}, "float32", "IsNaN", {})
-
-
-@tvm.testing.parametrize_targets
-def test_gather_nd(target, dev):
-    """test_gather_nd"""
-
-    def verify_gather_nd(in_shape, indices, out_shape, dtype="float32", batch_dims=0, opset=11):
-        x = np.random.uniform(size=in_shape).astype(dtype)
-        indices = np.array(indices, dtype="int64")
-
-        y = helper.make_node("GatherND", ["in", "indices"], ["out"])
-
-        if opset >= 12:
-            batch_dims_attr = helper.make_attribute("batch_dims", batch_dims)
-            y.attribute.append(batch_dims_attr)
-
-        graph = helper.make_graph(
-            [y],
-            "gather_test",
-            inputs=[
-                helper.make_tensor_value_info(
-                    "in", mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)], list(in_shape)
-                ),
-                helper.make_tensor_value_info("indices", TensorProto.INT64, list(indices.shape)),
-            ],
-            outputs=[
-                helper.make_tensor_value_info(
-                    "out", mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)], list(out_shape)
-                )
-            ],
-        )
-        model = helper.make_model(graph, producer_name="gather_test")
-        verify_with_ort_with_inputs(
-            model, [x, indices], [out_shape], opset=opset, target=target, dev=dev
-        )
-
-    verify_gather_nd([2, 2], [[0, 0], [1, 1]], [2], "int32")
-    verify_gather_nd([2, 2], [[1], [0]], [2, 2])
-    verify_gather_nd([2, 2, 2], [[0, 1], [1, 0]], [2, 2])
-    verify_gather_nd([2, 2, 2], [[[0, 1]], [[1, 0]]], [2, 1, 2])
-
-    if is_version_greater_than("1.6.0"):
-        verify_gather_nd([2, 2, 2], [[1], [0]], [2, 2], batch_dims=1, opset=12)
-        verify_gather_nd(
-            (3, 2, 2, 3, 4),
-            np.random.randint(low=0, high=2, size=(3, 2, 3), dtype="int64"),
-            (3, 2),
-            batch_dims=2,
-            opset=12,
-        )
-
-
-@tvm.testing.parametrize_targets
-def test_onehot(target, dev):
-    """test_onehot"""
-    indices_shape = [10]
-    indices_array = np.random.randint(low=0, high=9, size=indices_shape, dtype="int32")
-    depth = 10
-    values = np.asarray([0, 1]).astype("int32")
-    out_np = np.eye(depth)[indices_array.reshape(-1)]
-
-    onehot_node = helper.make_node("OneHot", ["indices", "depth", "values"], ["out"])
-
-    graph = helper.make_graph(
-        [onehot_node],
-        "onehot_test",
-        inputs=[
-            helper.make_tensor_value_info("indices", TensorProto.INT32, indices_shape),
-            helper.make_tensor_value_info("depth", TensorProto.INT32, [1]),
-            helper.make_tensor_value_info("values", TensorProto.INT32, values.shape),
-        ],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.INT32, out_np.shape)],
-    )
-
-    model = helper.make_model(graph, producer_name="onehot_test")
-
-    # TODO(jwfromm): Replace test against np with test against onnxrt once we update versions.
-    tvm_out = get_tvm_output_with_vm(
-        model, [indices_array, np.array([depth]).astype("int32"), values], target, dev
-    )
-    tvm.testing.assert_allclose(out_np, tvm_out, rtol=1e-5, atol=1e-5)
-
-
-@tvm.testing.parametrize_targets
-def test_gemm(target, dev):
-    """test_gemm"""
-
-    def verify_gemm(a_shape, b_shape, c_shape=None, freeze_params=False, dtype="float32"):
-        out_shape = [a_shape[0], b_shape[1]]
-        a_array = np.random.uniform(size=a_shape).astype(dtype)
-        b_array = np.random.uniform(size=b_shape).astype(dtype)
-        input_names = ["a", "b"]
-        ONNX_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)]
-        input_nodes = [
-            helper.make_tensor_value_info("a", ONNX_DTYPE, list(a_shape)),
-            helper.make_tensor_value_info("b", ONNX_DTYPE, list(b_shape)),
-        ]
-        input_values = [a_array, b_array]
-        if c_shape is not None:
-            c_array = np.random.uniform(size=c_shape).astype(dtype)
-            input_names.append("c")
-            input_nodes.append(helper.make_tensor_value_info("c", ONNX_DTYPE, list(c_shape)))
-            input_values.append(c_array)
-
-        gemm_node = helper.make_node("Gemm", input_names, ["out"])
-
-        graph = helper.make_graph(
-            [gemm_node],
-            "gemm_test",
-            inputs=input_nodes,
-            outputs=[helper.make_tensor_value_info("out", ONNX_DTYPE, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="gemm_test")
-        atol = 1e-5
-        rtol = 1e-5
-        if dtype == "float16":
-            atol = 1e-3
-            rtol = 1e-3
-        verify_with_ort_with_inputs(
-            model,
-            input_values,
-            freeze_params=freeze_params,
-            dtype=dtype,
-            atol=atol,
-            rtol=rtol,
-            target=target,
-            dev=dev,
-        )
-
-    verify_gemm(a_shape=(4, 3), b_shape=(3, 4))
-    verify_gemm(a_shape=(4, 3), b_shape=(3, 4), c_shape=(4,))
-    verify_gemm(a_shape=(4, 3), b_shape=(3, 4), c_shape=(4,), freeze_params=True)
-    verify_gemm(a_shape=(4, 3), b_shape=(3, 4), c_shape=(4,), freeze_params=True, dtype="float16")
-
-
-@tvm.testing.parametrize_targets
-def test_matmul(target, dev):
-    """test_matmul"""
-
-    def test_one_matmul(a_shape, b_shape):
-        out_shape = np.matmul(np.zeros(a_shape), np.zeros(b_shape)).shape
-
-        a_array = np.random.uniform(size=a_shape).astype("float32")
-        b_array = np.random.uniform(size=b_shape).astype("float32")
-
-        mul_node = helper.make_node("MatMul", ["a", "b"], ["out"])
-
-        graph = helper.make_graph(
-            [mul_node],
-            "matmul_test",
-            inputs=[
-                helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape)),
-                helper.make_tensor_value_info("b", TensorProto.FLOAT, list(b_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="matmul_test")
-        verify_with_ort_with_inputs(model, [a_array, b_array], target=target, dev=dev)
-
-    test_one_matmul((4, 3), (3, 4))
-    test_one_matmul((3,), (3, 1))
-    test_one_matmul((1, 3), (3,))
-    test_one_matmul((3,), (3,))
-
-
-@tvm.testing.parametrize_targets
-def test_batch_matmul(target, dev):
-    """test_batch_matmul"""
-
-    def verify_batch_matmul(a_shape, b_shape, out_shape, convert_config=None):
-        a_array = np.random.uniform(size=a_shape).astype("float32")
-        b_array = np.random.uniform(size=b_shape).astype("float32")
-
-        mul_node = helper.make_node("MatMul", ["a", "b"], ["out"])
-
-        graph = helper.make_graph(
-            [mul_node],
-            "matmul_test",
-            inputs=[
-                helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape)),
-                helper.make_tensor_value_info("b", TensorProto.FLOAT, list(b_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, out_shape)],
-        )
-
-        model = helper.make_model(graph, producer_name="matmul_test")
-        verify_with_ort_with_inputs(
-            model,
-            [a_array, b_array],
-            use_vm=True,
-            target=target,
-            dev=dev,
-            convert_config=convert_config,
-        )
-
-    verify_batch_matmul((2, 3, 4, 3), (2, 3, 3, 4), (2, 3, 4, 4))
-    verify_batch_matmul((2, 4, 3), (3, 4), (2, 4, 4))
-    verify_batch_matmul((2, 3, 4, 3), (3, 4), (2, 3, 4, 4))
-    # Test implicit broadcasting.
-    verify_batch_matmul((5,), (5, 5, 4), (5, 4))
-    verify_batch_matmul((5, 4, 5), (5,), (5, 4))
-    verify_batch_matmul((4, 3), (2, 3, 4), (2, 4, 4))
-    verify_batch_matmul((2, 4, 3), (1, 3, 4), (2, 4, 4))
-    verify_batch_matmul((1, 4, 3), (2, 3, 4), (2, 4, 4))
-    verify_batch_matmul((4, 32, 16), (16, 32), (4, 32, 32))
-    verify_batch_matmul((4, 32, 16, 32), (32, 16), (4, 32, 16, 16))
-    verify_batch_matmul((4, 32, 16, 32), (1, 32, 32, 16), (4, 32, 16, 16))
-    verify_batch_matmul((4, 1, 16, 32), (1, 32, 32, 16), (4, 32, 16, 16))
-    # Test transb=False
-    verify_batch_matmul(
-        (2, 3, 4, 3),
-        (2, 3, 3, 4),
-        (2, 3, 4, 4),
-        convert_config={"use_nt_batch_matmul": False},
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_use_nt_batch_matmul(target, dev):
-    """test_use_nt_batch_matmul"""
-    a_shape = (2, 3, 4)
-    b_shape = (2, 4, 3)
-    out_shape = [2, 3, 3]
-    a_array = np.random.uniform(size=a_shape).astype("float32")
-    b_array = np.random.uniform(size=b_shape).astype("float32")
-
-    for use_nt_batch_matmul in [True, False]:
-        mul_node = helper.make_node("MatMul", ["a", "b"], ["out"])
-
-        graph = helper.make_graph(
-            [mul_node],
-            "matmul_test",
-            inputs=[
-                helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape)),
-                helper.make_tensor_value_info("b", TensorProto.FLOAT, list(b_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="matmul_test")
-        _, shape_dict = get_input_data_shape_dict(model, [a_array, b_array])
-
-        mod, _ = relay.frontend.from_onnx(
-            model, shape_dict, convert_config={"use_nt_batch_matmul": use_nt_batch_matmul}
-        )
-        has_transpose_op = "transpose" in str(mod)
-        # use_nt_batch_matmul implies, TVM converts qualified onnx `matmul`
-        # to `transpose(weight) + nn.batch_matmul_NT`, otherwise to `nn.batch_matmul`
-        assert has_transpose_op == use_nt_batch_matmul
-
-
-@tvm.testing.parametrize_targets
-def test_matmulinteger16(target, dev):
-    """test_matmulinteger16"""
-
-    def verify_matmulinteger16(a_shape, b_shape, out_shape):
-        a_dtype = "int16"
-        b_dtype = "int16"
-        low = np.iinfo(np.int16).min
-        high = np.iinfo(np.int16).max
-
-        a_proto = TensorProto.INT16
-        b_proto = TensorProto.INT16
-        out_proto = TensorProto.INT32
-        a_array = np.random.randint(low, high, size=a_shape).astype(a_dtype)
-        b_array = np.random.randint(low, high, size=b_shape).astype(b_dtype)
-
-        mul_node = helper.make_node("MatMulInteger16", ["a", "b"], ["out"], domain="com.microsoft")
-
-        graph = helper.make_graph(
-            [mul_node],
-            "matmuli16_test",
-            inputs=[
-                helper.make_tensor_value_info("a", a_proto, list(a_shape)),
-                helper.make_tensor_value_info("b", b_proto, list(b_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", out_proto, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="matmuli16_test")
-        verify_with_ort_with_inputs(model, [a_array, b_array], target=target, dev=dev)
-
-    # 2D computation to verify matmul op
-    verify_matmulinteger16((4, 3), (3, 4), (4, 4))
-    verify_matmulinteger16((5, 7), (7, 8), (5, 8))
-    # Verify 3D matmul using batch_matmul op
-    verify_matmulinteger16((2, 4, 3), (1, 3, 4), (2, 4, 4))
-    verify_matmulinteger16((1, 4, 3), (2, 3, 4), (2, 4, 4))
-    # Test implicit broadcasting
-    verify_matmulinteger16((2, 3, 5, 3), (2, 3, 3, 5), (2, 3, 5, 5))
-    verify_matmulinteger16((2, 7, 3), (3, 7), (2, 7, 7))
-    verify_matmulinteger16((2, 3, 4, 3), (3, 4), (2, 3, 4, 4))
-
-
-def verify_simple_dynamic_model(a_shape, b_shape, target, dev):
-    """verify_simple_dynamic_model"""
-
-    def verify_model(model, a_shape, b_shape):
-        a_array = np.random.uniform(size=a_shape).astype("float32")
-        b_array = np.random.uniform(size=b_shape).astype("float32")
-        # matmul
-        out_np = np.matmul(a_array, b_array)
-        # relu
-        out_np[out_np < 0] = 0
-
-        tvm_out = model(a_array, b_array).numpy()
-        tvm.testing.assert_allclose(out_np, tvm_out, rtol=1e-5, atol=1e-5)
-
-    mul_node = helper.make_node("MatMul", ["a", "b"], ["out"])
-    relu_node = helper.make_node("Relu", ["out"], ["relu"])
-
-    a_array = np.random.uniform(size=a_shape).astype("float32")
-    b_array = np.random.uniform(size=b_shape).astype("float32")
-    # matmul
-    out_np = np.matmul(a_array, b_array)
-
-    graph = helper.make_graph(
-        [mul_node, relu_node],
-        "matmul_test",
-        inputs=[
-            helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape)),
-            helper.make_tensor_value_info("b", TensorProto.FLOAT, list(b_shape)),
-        ],
-        outputs=[helper.make_tensor_value_info("relu", TensorProto.FLOAT, list(out_np.shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="matmul_test")
-
-    a_anys = [relay.Any()] * len(a_shape)
-    b_anys = [relay.Any()] * len(b_shape)
-
-    mod, _ = relay.frontend.from_onnx(model, {"a": a_anys, "b": b_anys})
-    model = relay.create_executor("vm", mod=mod, device=dev, target=target).evaluate()
-    verify_model(model, a_shape, b_shape)
-    verify_model(model, [a * 2 for a in a_shape], [b * 2 for b in b_shape])
-    verify_model(model, [a * 3 for a in a_shape], [b * 3 for b in b_shape])
-
-
-# TODO(mbrookhart, electriclilies): Add CUDA as a target once batch matmul is fixed
-@tvm.testing.parametrize_targets("llvm")
-def test_batch_matmul_dynamic_model(target, dev):
-    verify_simple_dynamic_model((2, 3, 4, 3), (2, 3, 3, 4), target, dev)
-    verify_simple_dynamic_model((2, 4, 3), (3, 4), target, dev)
-    verify_simple_dynamic_model((2, 3, 4, 3), (3, 4), target, dev)
-
-
-@tvm.testing.parametrize_targets
-def test_lrn(target, dev):
-    """test_lrn"""
-
-    def verify_lrn(shape, nsize, dtype, alpha=None, beta=None, bias=None):
-        in_array = np.random.uniform(size=shape).astype(dtype)
-
-        if alpha is None and beta is None and bias is None:
-            alpha = 0.0001
-            beta = 0.75
-            bias = 1.0
-            node = onnx.helper.make_node("LRN", inputs=["in"], outputs=["out"], size=nsize)
-        else:
-            node = onnx.helper.make_node(
-                "LRN", inputs=["in"], outputs=["out"], alpha=alpha, beta=beta, bias=bias, size=nsize
-            )
-
-        graph = helper.make_graph(
-            [node],
-            "lrn_test",
-            inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(shape))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(shape))],
-        )
-        model = helper.make_model(graph, producer_name="lrn_test")
-        verify_with_ort_with_inputs(model, [in_array], target=target, dev=dev)
-
-    verify_lrn((5, 5, 5, 5), 3, "float32")
-    verify_lrn((5, 5, 5, 5), 3, "float32", alpha=0.0002, beta=0.5, bias=2.0)
-
-
-@tvm.testing.parametrize_targets
-def test_instance_norm(target, dev):
-    """test_instance_norm"""
-
-    def verify_instance_norm(shape, axis=1):
-        x = np.random.randn(*shape).astype(np.float32)
-        gamma = np.random.randn(shape[1]).astype(np.float32)
-        beta = np.random.randn(shape[1]).astype(np.float32)
-        epsilon = 1e-5
-
-        node = onnx.helper.make_node(
-            "InstanceNormalization",
-            inputs=["x", "gamma", "beta"],
-            outputs=["y"],
-            epsilon=epsilon,
-        )
-        graph = helper.make_graph(
-            [node],
-            "instance_norm_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(shape)),
-                helper.make_tensor_value_info("gamma", TensorProto.FLOAT, (shape[1],)),
-                helper.make_tensor_value_info("beta", TensorProto.FLOAT, (shape[1],)),
-            ],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(shape))],
-        )
-        model = helper.make_model(graph, producer_name="instance_norm_test")
-        verify_with_ort_with_inputs(
-            model, [x, gamma, beta], out_shape=[shape], target=target, dev=dev
-        )
-
-    verify_instance_norm((2, 3, 4, 5))
-    verify_instance_norm((32, 64, 80, 64))
-    verify_instance_norm((8, 6, 5))
-    verify_instance_norm((8, 7, 6, 5, 4))
-
-
-@tvm.testing.parametrize_targets
-def test_upsample_nearest(target, dev):
-    """test_upsample_nearest"""
-    scale = 2
-    in_shape = (1, 1, 3, 3)
-    out_shape = (1, 1, 3 * scale, 3 * scale)
-    y = helper.make_node("Upsample", ["in"], ["out"], mode="nearest", scales=[1.0, 1.0, 2.0, 2.0])
-
-    in_array = np.random.uniform(size=in_shape).astype(np.float32)
-
-    graph = helper.make_graph(
-        [y],
-        "upsample_nearest_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="upsample_nearest_test")
-    verify_with_ort_with_inputs(model, [in_array], [out_shape], opset=7, target=target, dev=dev)
-
-
-@tvm.testing.parametrize_targets
-def test_upsample_nearest_default(target, dev):
-    """test_upsample_nearest_default"""
-    scale = 2
-    in_shape = (1, 1, 3, 3)
-    out_shape = (1, 1, 3 * scale, 3 * scale)
-    y = helper.make_node("Upsample", ["in"], ["out"], scales=[1.0, 1.0, 2.0, 2.0])
-
-    in_array = np.random.uniform(size=in_shape).astype(np.float32)
-
-    graph = helper.make_graph(
-        [y],
-        "upsample_nearest_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="upsample_nearest_test")
-    verify_with_ort_with_inputs(model, [in_array], [out_shape], opset=7, target=target, dev=dev)
-
-
-@tvm.testing.parametrize_targets
-def test_upsample3d_nearest(target, dev):
-    """test_upsample3d_nearest"""
-    scale = 2
-    in_shape = (1, 1, 3, 3, 3)
-    out_shape = (1, 1, 3 * scale, 3 * scale, 3 * scale)
-    y = helper.make_node(
-        "Upsample", ["in"], ["out"], mode="nearest", scales=[1.0, 1.0, 2.0, 2.0, 2.0]
-    )
-
-    in_array = np.random.uniform(size=in_shape).astype(np.float32)
-
-    graph = helper.make_graph(
-        [y],
-        "upsample_nearest_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="upsample_nearest_test")
-    # Upsample is deprecated after opset 9
-    verify_with_ort_with_inputs(model, [in_array], [out_shape], opset=7, target=target, dev=dev)
-
-
-@tvm.testing.parametrize_targets
-def test_upsample_bilinear(target, dev):
-    """test_upsample_bilinear"""
-    scale = 2
-    in_shape = (1, 1, 3, 3)
-    out_shape = (1, 1, 3 * scale, 3 * scale)
-    y = helper.make_node("Upsample", ["in"], ["out"], mode="linear", scales=[1.0, 1.0, 2.0, 2.0])
-
-    in_array = np.random.uniform(size=in_shape).astype(np.float32)
-
-    graph = helper.make_graph(
-        [y],
-        "upsample_bilinear_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="upsample_bilinear_test")
-    verify_with_ort_with_inputs(model, [in_array], [out_shape], opset=7, target=target, dev=dev)
-
-
-@tvm.testing.parametrize_targets
-def test_upsample3d_trilinear(target, dev):
-    """test_upsample3d_trilinear"""
-    scale = 2
-    in_shape = (1, 1, 3, 3, 3)
-    out_shape = (1, 1, 3 * scale, 3 * scale, 3 * scale)
-    y = helper.make_node("Upsample", ["in", "scales"], ["out"], mode="linear")
-    scales = [1.0, 1.0, 2.0, 2.0, 2.0]
-    in_array = np.random.uniform(size=in_shape).astype(np.float32)
-    out_array = tvm.topi.testing.resize3d_python(
-        in_array,
-        (scale, scale, scale),
-        "NCDHW",
-        "linear",
-        coordinate_transformation_mode="asymmetric",
-    )
-
-    ref_array = np.array(scales)
-    ref_node = helper.make_node(
-        "Constant",
-        inputs=[],
-        outputs=["scales"],
-        value=onnx.helper.make_tensor(
-            name="const_tensor",
-            data_type=TensorProto.FLOAT,
-            dims=ref_array.shape,
-            vals=ref_array.flatten().astype(float),
-        ),
-    )
-
-    graph = helper.make_graph(
-        [ref_node, y],
-        "upsample_trilinear_test",
-        inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-        outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="upsample_trilinear_test")
-    # TODO(jwfromm): Trilinear upsampling not supported in 1.0.0 onnxruntime.
-    # Replace topi comparison with verify_with_ort once we update.
-    tvm_out = get_tvm_output(model, in_array, target, dev, out_shape, "float32")
-    tvm.testing.assert_allclose(out_array, tvm_out, rtol=1e-5, atol=1e-5)
-
-
-# TODO: Fix softmax with dynamic input on cuda and enable this test
-@tvm.testing.known_failing_targets("cuda")
-@tvm.testing.parametrize_targets
-def test_softmax(target, dev):
-    """test_softmax"""
-
-    def verify_softmax(inshape, axis, opset=None, dynamic=False):
-        opname = "Softmax"
-        outshape = inshape
-        node_list = []
-        input_node_list = [helper.make_tensor_value_info("in", TensorProto.FLOAT, list(inshape))]
-        output_node_list = [helper.make_tensor_value_info("out", TensorProto.FLOAT, list(outshape))]
-        input_list = [np.random.uniform(size=inshape).astype(np.float32)]
-        softmax_inputs = ["in"]
-
-        if dynamic:
-            input_node_list.append(
-                helper.make_tensor_value_info("shape", TensorProto.INT64, [len(inshape)])
-            )
-            input_list.append(np.asarray(inshape))
-            reshape_node = helper.make_node("Reshape", ["in", "shape"], ["dynamic_in"])
-            softmax_inputs[0] = "dynamic_in"
-            node_list += [reshape_node]
-
-        y = helper.make_node(opname, softmax_inputs, ["out"])
-        if axis is not None:
-            axis_attr = helper.make_attribute("axis", axis)
-            y.attribute.append(axis_attr)
-        node_list.append(y)
-
-        graph = helper.make_graph(
-            node_list,
-            opname + "_test",
-            inputs=input_node_list,
-            outputs=output_node_list,
-        )
-
-        model = helper.make_model(graph, producer_name=opname + "_test")
-        verify_with_ort_with_inputs(
-            model, input_list, use_vm=True, opset=opset, target=target, dev=dev
-        )
-
-    verify_softmax((1, 10), None)
-    verify_softmax((1, 10), 1)
-    verify_softmax((1, 2, 3, 10), 0)
-    verify_softmax((1, 2, 3, 10), 2)
-    verify_softmax((1, 2, 3, 4, 10), 3)
-    verify_softmax((1, 2, 3, 4, 10), 4)
-    verify_softmax((1, 10), -1, dynamic=True)
-    verify_softmax((1, 2, 3, 10), -1, dynamic=True)
-    verify_softmax((1, 10), -1, opset=8, dynamic=True)
-    verify_softmax((1, 2, 3, 10), -1, opset=8, dynamic=True)
-
-
-@tvm.testing.parametrize_targets
-def test_forward_min(target, dev):
-    """test_forward_min"""
-
-    def verify_min(input_dim):
-        dtype = "float32"
-
-        a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-        a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-        a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
-        min_node = helper.make_node("Min", ["a_np1", "a_np2", "a_np3"], ["out"])
-
-        graph = helper.make_graph(
-            [min_node],
-            "Min_test",
-            inputs=[
-                helper.make_tensor_value_info("a_np1", TensorProto.FLOAT, list(input_dim)),
-                helper.make_tensor_value_info("a_np2", TensorProto.FLOAT, list(input_dim)),
-                helper.make_tensor_value_info("a_np3", TensorProto.FLOAT, list(input_dim)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(input_dim))],
-        )
-
-        model = helper.make_model(graph, producer_name="Min_test")
-        verify_with_ort_with_inputs(model, [a_np1, a_np2, a_np3], target=target, dev=dev)
-
-    verify_min((1, 3, 20, 20))
-    verify_min((20, 20))
-
-
-@tvm.testing.parametrize_targets
-def test_forward_max(target, dev):
-    """test_forward_max"""
-
-    def verify_max(input_dim):
-        dtype = "float32"
-
-        a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-        a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-        a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
-        max_node = helper.make_node("Max", ["a_np1", "a_np2", "a_np3"], ["out"])
-
-        graph = helper.make_graph(
-            [max_node],
-            "Max_test",
-            inputs=[
-                helper.make_tensor_value_info("a_np1", TensorProto.FLOAT, list(input_dim)),
-                helper.make_tensor_value_info("a_np2", TensorProto.FLOAT, list(input_dim)),
-                helper.make_tensor_value_info("a_np3", TensorProto.FLOAT, list(input_dim)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(input_dim))],
-        )
-
-        model = helper.make_model(graph, producer_name="Max_test")
-        verify_with_ort_with_inputs(model, [a_np1, a_np2, a_np3], target=target, dev=dev)
-
-    verify_max((1, 3, 20, 20))
-    verify_max((20, 20))
-
-
-@tvm.testing.parametrize_targets
-def test_forward_mean(target, dev):
-    """test_forward_mean"""
-
-    def verify_mean(input_dim):
-        dtype = "float32"
-
-        a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-        a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-        a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
-        mean_node = helper.make_node("Mean", ["a_np1", "a_np2", "a_np3"], ["out"])
-
-        graph = helper.make_graph(
-            [mean_node],
-            "Mean_test",
-            inputs=[
-                helper.make_tensor_value_info("a_np1", TensorProto.FLOAT, list(input_dim)),
-                helper.make_tensor_value_info("a_np2", TensorProto.FLOAT, list(input_dim)),
-                helper.make_tensor_value_info("a_np3", TensorProto.FLOAT, list(input_dim)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(input_dim))],
-        )
-
-        model = helper.make_model(graph, producer_name="Mean_test")
-        verify_with_ort_with_inputs(model, [a_np1, a_np2, a_np3], target=target, dev=dev)
-
-    verify_mean((1, 3, 20, 20))
-    verify_mean((20, 20))
-
-
-@tvm.testing.parametrize_targets
-def test_forward_hardsigmoid(target, dev):
-    """test_forward_hardsigmoid"""
-
-    def verify_hardsigmoid(input_dim, alpha, beta):
-        dtype = "float32"
-
-        a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-
-        hardsigmoid_node = helper.make_node(
-            "HardSigmoid", ["a_np1"], ["out"], alpha=alpha, beta=beta
-        )
-
-        graph = helper.make_graph(
-            [hardsigmoid_node],
-            "HardSigmoid_test",
-            inputs=[helper.make_tensor_value_info("a_np1", TensorProto.FLOAT, list(input_dim))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(input_dim))],
-        )
-
-        model = helper.make_model(graph, producer_name="HardSigmoid_test")
-        verify_with_ort_with_inputs(model, [a_np1], target=target, dev=dev)
-
-    verify_hardsigmoid((1, 3, 20, 20), 0.5, 0.6)
-    verify_hardsigmoid((20, 20), 0.3, 0.4)
-
-
-# TODO (mbrookhart, electriclilies) Fix argmin on GPU and enable this test
-@tvm.testing.known_failing_targets("cuda")
-@tvm.testing.parametrize_targets
-def test_forward_arg_min_max(target, dev):
-    """test_forward_arg_min_max"""
-
-    def verify_argreduce(input_dim, op_name, axis=None, keepdims=None):
-        a_np1 = np.random.uniform(-10, 10, input_dim).astype(np.int32)
-        out_shape = list(a_np1.shape)
-        def_axis = axis if axis is not None else 0
-        if keepdims == 1 or keepdims is None:
-            out_shape[def_axis] = 1
-        else:
-            out_shape.pop(def_axis)
-
-        node = onnx.helper.make_node(op_name, inputs=["a_np1"], outputs=["out"])
-
-        if keepdims is not None:
-            keepdims_attr = helper.make_attribute("keepdims", keepdims)
-            node.attribute.append(keepdims_attr)
-        if axis is not None:
-            axis_attr = helper.make_attribute("axis", axis)
-            node.attribute.append(axis_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "argreduce_test",
-            inputs=[helper.make_tensor_value_info("a_np1", TensorProto.INT32, list(a_np1.shape))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.INT64, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="argreduce_test")
-        verify_with_ort_with_inputs(model, [a_np1], target=target, dev=dev)
-
-    # Verify argmin and argmax
-    verify_argreduce([3, 4, 4], "ArgMin")
-    verify_argreduce([3, 4, 4], "ArgMax")
-    verify_argreduce([3, 4, 4], "ArgMin", axis=1)
-    verify_argreduce([3, 4, 4], "ArgMax", axis=0)
-    verify_argreduce([3, 4, 4], "ArgMin", keepdims=0)
-    verify_argreduce([3, 4, 4], "ArgMax", keepdims=1)
-    for axis in [None, 0, 1, 2]:
-        for keepdims in [None, True, False]:
-            verify_argreduce([3, 4, 4], "ArgMin", axis, keepdims)
-            verify_argreduce([3, 4, 4], "ArgMax", axis, keepdims)
-
-
-@tvm.testing.parametrize_targets
-def test_constantofshape(target, dev):
-    """test_constantofshape"""
-
-    def verify_constantofshape(input_dim, value, dtype):
-        fill_node = helper.make_node(
-            "ConstantOfShape",
-            ["input"],
-            ["output"],
-            value=helper.make_tensor(
-                "value", mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)], (1,), (value,)
-            ),
-        )
-
-        inputs = [helper.make_tensor_value_info("input", TensorProto.INT64, [len(input_dim)])]
-
-        graph = helper.make_graph(
-            [fill_node],
-            "fill_test",
-            inputs,
-            outputs=[
-                helper.make_tensor_value_info(
-                    "output", mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)], input_dim
-                )
-            ],
-        )
-
-        model = helper.make_model(graph, producer_name="fill_test")
-        input_np = np.array(input_dim).astype("int64")
-        verify_with_ort_with_inputs(model, [input_np], use_vm=True, target=target, dev=dev)
-
-    verify_constantofshape((2, 3, 4, 5), 10, "float32")
-    verify_constantofshape((3, 3), 0, "int32")
-    verify_constantofshape((1, 2, 3), -1, "float32")
-
-
-@tvm.testing.parametrize_targets
-def test_pad(target, dev):
-    """test_pad"""
-
-    def verify_pad(indata, pads, mode="constant", value=0.0):
-        indata = np.array(indata).astype(np.float32)
-        #  numpy expect result
-        len_dim = len(pads) // 2
-        np_pads = [(pads[i], pads[i + len_dim]) for i in range(len_dim)]
-        #  onnx graph
-        if mode in ["edge", "reflect"]:
-            outdata = np.pad(indata, pad_width=np_pads, mode=mode)
-            node = helper.make_node(
-                "Pad",
-                inputs=["input"],
-                outputs=["output"],
-                mode=mode,
-                pads=pads,
-            )
-        else:
-            outdata = np.pad(indata, pad_width=np_pads, mode="constant", constant_values=value)
-            node = helper.make_node(
-                "Pad", inputs=["input"], outputs=["output"], mode="constant", pads=pads, value=value
-            )
-        graph = helper.make_graph(
-            [node],
-            "pad_test",
-            inputs=[helper.make_tensor_value_info("input", TensorProto.FLOAT, list(indata.shape))],
-            outputs=[
-                helper.make_tensor_value_info("output", TensorProto.FLOAT, list(outdata.shape))
-            ],
-        )
-        model = helper.make_model(graph, producer_name="pad_test")
-        verify_with_ort_with_inputs(
-            model, [indata], [outdata.shape], dtype="float32", opset=2, target=target, dev=dev
-        )
-
-    def verify_pad_v11(indata, pads, mode="constant", value=0.0):
-        indata = np.array(indata).astype(np.float32)
-        #  numpy expect result
-        len_dim = len(pads) // 2
-        np_pads = [(pads[i], pads[i + len_dim]) for i in range(len_dim)]
-        pads = np.array(pads)
-        #  onnx graph
-        if mode in ["edge", "reflect"]:
-            inputs = [indata]
-            outdata = np.pad(indata, pad_width=np_pads, mode=mode)
-            node = helper.make_node("Pad", inputs=["input", "pads"], outputs=["output"], mode=mode)
-            graph = helper.make_graph(
-                [node],
-                "pad_test",
-                inputs=[
-                    helper.make_tensor_value_info("input", TensorProto.FLOAT, list(indata.shape)),
-                    helper.make_tensor_value_info("pads", TensorProto.INT64, (len(pads),)),
-                ],
-                initializer=[helper.make_tensor("pads", TensorProto.INT64, (len(pads),), pads)],
-                outputs=[
-                    helper.make_tensor_value_info("output", TensorProto.FLOAT, list(outdata.shape))
-                ],
-            )
-        else:
-            inputs = [indata]
-            outdata = np.pad(indata, pad_width=np_pads, mode="constant", constant_values=value)
-            node = helper.make_node(
-                "Pad",
-                inputs=["input", "pads", "constant_value"],
-                outputs=["output"],
-                mode="constant",
-            )
-            graph = helper.make_graph(
-                [node],
-                "pad_test",
-                inputs=[
-                    helper.make_tensor_value_info("input", TensorProto.FLOAT, list(indata.shape)),
-                    helper.make_tensor_value_info("pads", TensorProto.INT64, (len(pads),)),
-                    helper.make_tensor_value_info("constant_value", TensorProto.FLOAT, (1,)),
-                ],
-                initializer=[
-                    helper.make_tensor("pads", TensorProto.INT64, (len(pads),), pads),
-                    helper.make_tensor("constant_value", TensorProto.FLOAT, (1,), [value]),
-                ],
-                outputs=[
-                    helper.make_tensor_value_info("output", TensorProto.FLOAT, list(outdata.shape))
-                ],
-            )
-        model = helper.make_model(graph, producer_name="pad_test")
-        verify_with_ort_with_inputs(model, inputs, opset=11, use_vm=True, target=target, dev=dev)
-
-    verify_pad(np.random.randn(2, 2).astype(np.float32), [0, 1, 0, 0], "constant", 0.0)
-    verify_pad(np.random.randn(2, 3).astype(np.float32), [1, 0, 0, 1], "constant", 0.0)
-    verify_pad(np.random.randn(3, 2).astype(np.float32), [0, 0, 1, 0], "constant", 5.0)
-    verify_pad(np.random.randn(1, 3, 4, 5).astype(np.float32), [0, 0, 1, 1, 0, 0, 1, 1], "edge")
-    verify_pad(np.random.randn(1, 3, 4, 5).astype(np.float32), [0, 0, 1, 1, 0, 0, 1, 1], "reflect")
-
-    verify_pad_v11(np.random.randn(2, 2).astype(np.float32), [0, 1, 0, 0], "constant", 0.0)
-    verify_pad_v11(np.random.randn(2, 3).astype(np.float32), [1, 0, 0, 1], "constant", 0.0)
-    verify_pad_v11(np.random.randn(3, 2).astype(np.float32), [0, 0, 1, 0], "constant", 5.0)
-    verify_pad_v11(np.random.randn(1, 3, 4, 5).astype(np.float32), [0, 0, 1, 1, 0, 0, 1, 1], "edge")
-    verify_pad_v11(
-        np.random.randn(1, 3, 4, 5).astype(np.float32), [0, 0, 1, 1, 0, 0, 1, 1], "reflect"
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_all_reduce_funcs(target, dev):
-    """test_all_reduce_funcs"""
-
-    def verify_reduce_func(func, data, axis, keepdims):
-        inshape = data.shape
-        outshape = np.sum(data, axis=axis, keepdims=keepdims == 1).shape
-
-        if axis:
-            node = onnx.helper.make_node(
-                func, inputs=["x"], outputs=["y"], axes=axis, keepdims=keepdims
-            )
-        else:
-            node = onnx.helper.make_node(func, inputs=["x"], outputs=["y"], keepdims=keepdims)
-
-        graph = helper.make_graph(
-            [node],
-            "reduce_test",
-            inputs=[helper.make_tensor_value_info("x", TensorProto.FLOAT, list(inshape))],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(outshape))],
-        )
-
-        model = helper.make_model(graph, producer_name="reduce_test")
-
-        verify_with_ort_with_inputs(
-            model,
-            [data],
-            [outshape],
-            opset=11,
-            target=target,
-            dev=dev,
-            rtol=1e-4,
-            atol=1e-4,
-        )
-
-    funcs = [
-        "ReduceMax",
-        "ReduceMean",
-        "ReduceMin",
-        "ReduceProd",
-        "ReduceSum",
-        "ReduceSumSquare",
-        "ReduceLogSum",
-        "ReduceLogSumExp",
-        "ReduceL1",
-        "ReduceL2",
-    ]
-
-    for func in funcs:
-        verify_reduce_func(func, np.array(1.0).astype(np.float32), axis=None, keepdims=False)
-
-        for keepdims in [True, False]:
-            verify_reduce_func(
-                func, np.random.randn(3, 2, 2).astype(np.float32), axis=None, keepdims=keepdims
-            )
-
-            verify_reduce_func(
-                func, np.random.randn(3, 2, 3).astype(np.float32), axis=None, keepdims=keepdims
-            )
-
-            verify_reduce_func(
-                func, np.random.randn(3, 3, 3).astype(np.float32), axis=(1,), keepdims=keepdims
-            )
-
-            verify_reduce_func(
-                func, np.random.randn(3, 3, 3, 1).astype(np.float32), axis=(1, 2), keepdims=keepdims
-            )
-
-            verify_reduce_func(
-                func, np.random.randn(3, 3, 3, 1).astype(np.float32), axis=(1,), keepdims=keepdims
-            )
-
-            verify_reduce_func(
-                func, np.random.randn(1, 3, 4, 1).astype(np.float32), axis=(1,), keepdims=keepdims
-            )
-
-
-@tvm.testing.parametrize_targets
-def test_split(target, dev):
-    """test_split"""
-
-    def verify_split(indata, outdatas, split, axis=0, pass_split=True, opset=11):
-        indata = np.array(indata).astype(np.float32)
-        outdatas = [np.array(o).astype(np.float32) for o in outdatas]
-        inputs = [helper.make_tensor_value_info("input", TensorProto.FLOAT, list(indata.shape))]
-        input_names = ["input"]
-        initializer = []
-
-        if split:
-            split_index = range(len(split))
-        else:
-            split_index = range(len(outdatas))
-
-        if pass_split:
-            if opset >= 13:
-                input_names.append("split")
-                np_split = np.array(split).astype(np.int64)
-                inputs.append(
-                    helper.make_tensor_value_info("split", TensorProto.INT64, list(np_split.shape))
-                )
-                # TODO(mbrookhart): Support dynamic split, edit this test case to remove split from
-                # the initializer and add it back to the input data
-                indata = [indata]  # , np_split]
-                initializer.append(
-                    helper.make_tensor("split", TensorProto.INT64, list(np_split.shape), np_split)
-                )
-        node = helper.make_node(
-            "Split",
-            inputs=input_names,
-            outputs=[f"output_{i}" for i in range(len(split_index))],
-            axis=axis,
-        )
-
-        if pass_split and opset < 13:
-            split_attr = helper.make_attribute("split", split)
-            node.attribute.append(split_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "split_test",
-            inputs=inputs,
-            initializer=initializer,
-            outputs=[
-                helper.make_tensor_value_info(
-                    f"output_{i}", TensorProto.FLOAT, list(outdatas[i].shape)
-                )
-                for i in range(len(split_index))
-            ],
-        )
-        model = helper.make_model(graph, producer_name="split_test")
-        verify_with_ort_with_inputs(
-            model,
-            indata,
-            out_shape=list(range(len(split_index))),
-            opset=opset,
-            target=target,
-            dev=dev,
-            use_vm=True,
-            freeze_params=(opset >= 13),
-        )
-
-    # 1D
-    verify_split([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], [2, 2, 2], 0)
-    verify_split(
-        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], [2, 2, 2], 0, False
-    )
-    verify_split([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], [[1.0, 2.0], [3.0], [4.0, 5.0, 6.0]], [2, 1, 3], 0)
-    verify_split(
-        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], [[1.0, 2.0], [3.0], [4.0, 5.0, 6.0]], [2, 1, 3], 0, opset=13
-    )
-    # 2D
-    verify_split(
-        [[1.0, 2.0, 3.0, 4.0], [7.0, 8.0, 9.0, 10.0]],
-        [[[1.0, 2.0], [7.0, 8.0]], [[3.0, 4.0], [9.0, 10.0]]],
-        [2, 2],
-        1,
-    )
-    verify_split(
-        [[1.0, 2.0, 3.0, 4.0], [7.0, 8.0, 9.0, 10.0]],
-        [[[1.0, 2.0], [7.0, 8.0]], [[3.0, 4.0], [9.0, 10.0]]],
-        [2, 2],
-        1,
-        opset=13,
-    )
-    # Split evenly (unstack)
-    verify_split([1, 2, 3], [[1], [2], [3]], False, 0, False)
-    # Split a single value to a single value
-    verify_split([1], [[1]], [1], pass_split=True)
-    # Test that the default case modifies nothing when split list has length one
-    verify_split([[1.0, 2.0]], [[1.0, 2.0]], [2], 1)
-    verify_split([[1.0, 2.0]], [[1.0, 2.0]], [1], 0)
-
-
-@tvm.testing.parametrize_targets
-def test_binary_ops(target, dev):
-    """test_binary_ops"""
-    in_shape = (1, 2, 3, 3)
-    dtype = "float32"
-    out_shape = in_shape
-
-    def verify_binary_ops(op, x, y, out_type="float32"):
-        out = helper.make_node(op, ["in1", "in2"], ["out"])
-        graph = helper.make_graph(
-            [out],
-            "_test",
-            inputs=[
-                helper.make_tensor_value_info("in1", TensorProto.FLOAT, x.shape),
-                helper.make_tensor_value_info("in2", TensorProto.FLOAT, y.shape),
-            ],
-            outputs=[
-                helper.make_tensor_value_info(
-                    "out", mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(out_type)], list(out_shape)
-                )
-            ],
-        )
-        model = helper.make_model(graph, producer_name="_test")
-        verify_with_ort_with_inputs(model, [x, y], target=target, dev=dev)
-
-    x = np.random.uniform(size=in_shape).astype(dtype)
-    y = np.random.uniform(size=in_shape).astype(dtype)
-    z_array = np.random.uniform(size=(3,)).astype(dtype)
-    verify_binary_ops("Add", x, y)
-    verify_binary_ops("Add", x, z_array)
-    verify_binary_ops("Sub", x, y)
-    verify_binary_ops("Sub", x, z_array)
-    verify_binary_ops("Mul", x, y)
-    verify_binary_ops("Mul", x, z_array)
-    verify_binary_ops("Div", x, y)
-    verify_binary_ops("Div", x, z_array)
-    verify_binary_ops("Sum", x, y)
-    verify_binary_ops("Sum", x, z_array)
-    verify_binary_ops("Greater", x, y, "bool")
-    verify_binary_ops("Greater", x, z_array, "bool")
-    verify_binary_ops("GreaterOrEqual", x, y, "bool")
-    verify_binary_ops("GreaterOrEqual", x, z_array, "bool")
-    verify_binary_ops("Less", x, y, "bool")
-    verify_binary_ops("Less", x, z_array, "bool")
-    verify_binary_ops("LessOrEqual", x, y, "bool")
-    verify_binary_ops("LessOrEqual", x, z_array, "bool")
-    verify_binary_ops("Equal", x, y, "bool")
-    verify_binary_ops("Equal", x, z_array, "bool")
-
-
-@tvm.testing.parametrize_targets
-def test_unary_ops(target, dev):
-    """test_unary_ops"""
-    in_shape = (1, 2, 3, 3)
-    _ = "float32"
-    out_shape = in_shape
-
-    def verify_unary_ops(op, x, rtol=1e-5, atol=1e-5, dtype="float32"):
-        x = x.astype(dtype)
-        ONNX_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)]
-        out = helper.make_node(op, ["in1"], ["out"])
-        graph = helper.make_graph(
-            [out],
-            "_test",
-            inputs=[
-                helper.make_tensor_value_info("in1", ONNX_DTYPE, list(in_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", ONNX_DTYPE, list(out_shape))],
-        )
-        model = helper.make_model(graph, producer_name="_test")
-        verify_with_ort_with_inputs(model, [x], rtol=rtol, atol=atol, target=target, dev=dev)
-
-    x = np.random.uniform(size=in_shape)
-    verify_unary_ops("Neg", x)
-    verify_unary_ops("Abs", x)
-    verify_unary_ops("Reciprocal", x)
-    verify_unary_ops("Reciprocal", x, dtype="float16")
-    verify_unary_ops("Sqrt", x)
-    verify_unary_ops("Relu", x)
-    verify_unary_ops("Exp", x)
-    verify_unary_ops("Log", x)
-    verify_unary_ops("Log", x)
-    verify_unary_ops("Acos", x)
-    verify_unary_ops("Acosh", x)
-    verify_unary_ops("Asin", x)
-    verify_unary_ops("Asinh", x)
-    verify_unary_ops("Atan", x)
-    verify_unary_ops("Atanh", x)
-    verify_unary_ops("Cos", x)
-    verify_unary_ops("Cosh", x)
-    verify_unary_ops("Sin", x)
-    verify_unary_ops("Sinh", x)
-    verify_unary_ops("Tan", x)
-    verify_unary_ops("Tanh", x)
-    verify_unary_ops("Sigmoid", x)
-    verify_unary_ops("Softsign", x)
-
-
-@tvm.testing.parametrize_targets
-def test_leaky_relu(target, dev):
-    def leaky_relu_x(x, alpha):
-        return np.where(x >= 0, x, x * alpha)
-
-    _test_onnx_op_elementwise(
-        target,
-        dev,
-        (2, 4, 5, 6),
-        leaky_relu_x,
-        {"alpha": 0.25},
-        "float32",
-        "LeakyRelu",
-        {"alpha": 0.25},
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_elu(target, dev):
-    def elu_x(x, alpha):
-        return np.where(x > 0, x, alpha * (np.exp(x) - 1.0))
-
-    _test_onnx_op_elementwise(
-        target, dev, (2, 4, 5, 6), elu_x, {"alpha": 0.25}, "float32", "Elu", {"alpha": 0.25}
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_selu(target, dev):
-    def selu_x(x, alpha, gamma):
-        return gamma * np.where(x > 0, x, alpha * (np.exp(x) - 1.0))
-
-    _test_onnx_op_elementwise(
-        target,
-        dev,
-        (2, 4, 5, 6),
-        selu_x,
-        {"alpha": 0.25, "gamma": 0.3},
-        "float32",
-        "Selu",
-        {"alpha": 0.25, "gamma": 0.3},
-    )
-
-
-@pytest.mark.skip("Currently ONNX Runtime in CI does not support domain version of 18")
-@tvm.testing.parametrize_targets
-def test_mish(target, dev):
-    def mish_x(x):
-        return x * np.tanh(np.log1p(np.exp(x)))
-
-    _test_onnx_op_elementwise(target, dev, (2, 4, 5, 6), mish_x, {}, "float64", "Mish", {})
-
-
-@tvm.testing.parametrize_targets
-def test_prelu(target, dev):
-    """test_prelu"""
-
-    def verify_prelu(x_shape, a_shape):
-        node = helper.make_node("PRelu", inputs=["X", "slope"], outputs=["Y"])
-
-        graph = helper.make_graph(
-            [node],
-            "prelu_test",
-            inputs=[
-                helper.make_tensor_value_info("X", TensorProto.FLOAT, list(x_shape)),
-                helper.make_tensor_value_info("slope", TensorProto.FLOAT, list(a_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(x_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="prelu_test")
-
-        verify_with_ort(
-            model,
-            [x_shape, a_shape],
-            out_shape=[list(x_shape)],
-            use_vm=True,
-            target=target,
-            dev=dev,
-        )
-
-    verify_prelu([3, 4, 5, 6], [1, 4, 1, 1])
-    verify_prelu([1, 8, 5, 6], [1, 8, 1, 1])
-    verify_prelu([2, 12, 16, 16], [1, 12, 1, 1])
-    verify_prelu([2, 12, 16, 16], [1])  # Test alpha broadcasting.
-    verify_prelu([3, 1], [3, 1])  # Test non NCHW workload.
-
-
-@tvm.testing.parametrize_targets
-def test_thresholded_relu(target, dev):
-    def thresholded_relu_x(x, alpha):
-        out_np = np.clip(x, alpha, np.inf)
-        out_np[out_np == alpha] = 0
-        return out_np
-
-    _test_onnx_op_elementwise(
-        target,
-        dev,
-        (2, 4, 5, 6),
-        thresholded_relu_x,
-        {"alpha": 0.25},
-        "float32",
-        "ThresholdedRelu",
-        {"alpha": 0.25},
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_logsoftmax(target, dev):
-    _test_onnx_op_elementwise(
-        target,
-        dev,
-        (1, 4),
-        tvm.topi.testing.log_softmax_python,
-        {},
-        "float32",
-        "LogSoftmax",
-        {"axis": 1},
-    )
-
-
-def check_torch_conversion(model, input_size, target, dev):
-    dummy_input = torch.randn(*input_size)
-    file_name = f"{model.__name__}.onnx"
-    # Set verbose=True for more output
-    torch.onnx.export(model(), dummy_input, file_name, export_params=True, verbose=False)
-    onnx_model = onnx.load(file_name)
-    input_data = np.random.uniform(size=input_size).astype("float32")
-    verify_with_ort_with_inputs(
-        onnx_model, [input_data], apply_softmax=True, target=target, dev=dev
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_resnet(target, dev):
-    check_torch_conversion(torchvision.models.resnet18, (1, 3, 224, 224), target, dev)
-    # check_torch_conversion(torchvision.models.resnet101, (1,3,224,224))
-
-
-# def test_alexnet():
-# Torch's ONNX export does not support the adaptive pooling used by AlexNet?
-# check_torch_conversion(torchvision.models.alexnet, (1,3,224,224))
-
-# Torch's ONNX export does not support the adaptive pooling used by vgg16?
-# def test_vgg16():
-#     check_torch_conversion(torchvision.models.vgg16, (1,3,224,224))
-
-# TODO(@jroesch): Update Torch + ONNX to support this import.
-# def test_squeezenet():
-#     # Torch's ONNX export does not support the max pooling used by Squezenet
-#     check_torch_conversion(torchvision.models.squeezenet1_0, (1,3,224,224))
-
-
-@tvm.testing.parametrize_targets
-def test_densenet(target, dev):
-    check_torch_conversion(torchvision.models.densenet161, (1, 3, 224, 224), target, dev)
-
-
-@tvm.testing.parametrize_targets
-def test_inception(target, dev):
-    check_torch_conversion(torchvision.models.inception_v3, (1, 3, 224, 224), target, dev)
-
-
-# TODO(@jroesch): Update Torch + ONNX to support this import.
-# def test_googlenet():
-#     check_torch_conversion(torchvision.models.googlenet, (1,3,224,224))
-
-# TODO(@jroesch): Update Torch + ONNX to support this import.
-# def test_shufflenetv2():
-#     check_torch_conversion(torchvision.models.shufflenetv2, (1,3,224,224))
-
-
-@tvm.testing.parametrize_targets
-def test_sign(target, dev):
-    def sign_x(x):
-        return np.sign(x)
-
-    _test_onnx_op_elementwise(target, dev, (3, 4, 5, 6), sign_x, {}, "float32", "Sign", {})
-
-
-@tvm.testing.parametrize_targets
-def test_not(target, dev):
-    """test_not"""
-
-    def verify_not(indata, dtype):
-        x = indata.astype(dtype)
-
-        node = helper.make_node(
-            "Not",
-            inputs=["in"],
-            outputs=["out"],
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "not_test",
-            inputs=[helper.make_tensor_value_info("in", TensorProto.BOOL, list(x.shape))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.BOOL, list(x.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="not_test")
-        verify_with_ort_with_inputs(model, [x], target=target, dev=dev)
-
-    # 2d
-    verify_not(indata=(np.random.randn(3, 4) > 0), dtype=bool)
-    # 3d
-    verify_not(indata=(np.random.randn(3, 4, 5) > 0), dtype=bool)
-    # 4d
-    verify_not(indata=(np.random.randn(3, 4, 5, 6) > 0), dtype=bool)
-
-
-@tvm.testing.parametrize_targets
-def test_and(target, dev):
-    """test_and"""
-
-    def verify_and(indata, dtype):
-        x = indata[0].astype(dtype)
-        y = indata[1].astype(dtype)
-        outdata = np.logical_and(x, y)
-
-        node = helper.make_node(
-            "And",
-            inputs=["in1", "in2"],
-            outputs=["out"],
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "and_test",
-            inputs=[
-                helper.make_tensor_value_info("in1", TensorProto.BOOL, list(x.shape)),
-                helper.make_tensor_value_info("in2", TensorProto.BOOL, list(y.shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.BOOL, list(outdata.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="and_test")
-        verify_with_ort_with_inputs(model, [x, y], [outdata.shape], target=target, dev=dev)
-
-    # 2d
-    x = np.random.randn(3, 4) > 0
-    y = np.random.randn(3, 4) > 0
-    verify_and(indata=[x, y], dtype=bool)
-
-    # 3d
-    x = np.random.randn(3, 4, 5) > 0
-    y = np.random.randn(3, 4, 5) > 0
-    verify_and(indata=[x, y], dtype=bool)
-
-    # 4d
-    x = np.random.randn(3, 4, 5, 6) > 0
-    y = np.random.randn(3, 4, 5, 6) > 0
-    verify_and(indata=[x, y], dtype=bool)
-
-    # 3d vs 1d
-    x = np.random.randn(3, 4, 5) > 0
-    y = np.random.randn(5) > 0
-    verify_and(indata=[x, y], dtype=bool)
-
-    # 3d vs 2d
-    x = np.random.randn(3, 4, 5) > 0
-    y = np.random.randn(4, 5) > 0
-    verify_and(indata=[x, y], dtype=bool)
-
-
-@tvm.testing.parametrize_targets
-def test_tile(target, dev):
-    """test_tile"""
-
-    def verify_tile_v6(indata, repeats, outdata):
-        node = helper.make_node("Tile", inputs=["input", "repeats"], outputs=["out"])
-        graph = helper.make_graph(
-            [node],
-            "tile_test",
-            inputs=[
-                helper.make_tensor_value_info("input", TensorProto.FLOAT, list(indata.shape)),
-                helper.make_tensor_value_info("repeats", TensorProto.INT64, list(repeats.shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(outdata.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="tile_test")
-        verify_with_ort_with_inputs(
-            model, [indata, repeats], use_vm=True, opset=6, target=target, dev=dev
-        )
-
-    x = np.random.rand(2, 3, 4, 5).astype(np.float32)
-    repeats = np.random.randint(low=1, high=10, size=(np.ndim(x),)).astype(np.int64)
-    z_array = np.tile(x, repeats)
-    verify_tile_v6(x, repeats, z_array)
-
-
-@tvm.testing.parametrize_targets
-def test_erf(target, dev):
-    """test_erf"""
-
-    def verify_erf(indata, outdata):
-        node = helper.make_node("Erf", inputs=["in"], outputs=["out"])
-        graph = helper.make_graph(
-            [node],
-            "erf_test",
-            inputs=[helper.make_tensor_value_info("in", TensorProto.FLOAT, list(indata.shape))],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(outdata.shape))],
-        )
-        model = helper.make_model(graph, producer_name="erf_test")
-        verify_with_ort_with_inputs(model, [indata], [outdata.shape], target=target, dev=dev)
-
-    x = np.random.rand(2, 3, 4, 6).astype(np.float32)
-    z_array = scipy.special.erf(x)
-    verify_erf(x, z_array)
-
-
-@tvm.testing.parametrize_targets
-def test_where(target, dev):
-    """test_where"""
-
-    def verify_where(condition, x, y, dtype, outdata, dynamic=False):
-        node_list = []
-        where_inputs = ["condition", "x", "y"]
-        if dynamic:
-            shape_node = helper.make_node("Shape", ["x"], ["shape"])
-            reshape_node = helper.make_node("Reshape", ["x", "shape"], ["X"])
-            where_inputs[1] = "X"
-            node_list += [shape_node, reshape_node]
-        node = helper.make_node("Where", inputs=where_inputs, outputs=["out"])
-        node_list.append(node)
-        graph = helper.make_graph(
-            node_list,
-            "where_test",
-            inputs=[
-                helper.make_tensor_value_info("condition", TensorProto.BOOL, list(condition.shape)),
-                helper.make_tensor_value_info("x", dtype, list(x.shape)),
-                helper.make_tensor_value_info("y", dtype, list(y.shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", dtype, list(outdata.shape))],
-        )
-        model = helper.make_model(graph, producer_name="where_test")
-        verify_with_ort_with_inputs(
-            model, [condition, x, y], [outdata.shape], use_vm=True, target=target, dev=dev
-        )
-
-    condition = np.array([[1, 0], [1, 1]], dtype=bool)
-    x = np.array([[1, 2], [3, 4]], dtype=np.int64)
-    y = np.array([[9, 8], [7, 6]], dtype=np.int64)
-    outdata = np.where(condition, x, y)
-    verify_where(condition, x, y, TensorProto.INT64, outdata)
-
-    x = np.array([[1, 2], [3, 4]], dtype=np.float32)
-    y = np.array([[9, 8], [7, 6]], dtype=np.float32)
-    outdata = np.where(condition, x, y)
-    verify_where(condition, x, y, TensorProto.FLOAT, outdata)
-
-    x = np.array(1, dtype=np.float32)
-    y = np.array([2], dtype=np.float32)
-    outdata = np.where(condition, x, y)
-    verify_where(condition, x, y, TensorProto.FLOAT, outdata)
-
-    x = np.array([2], dtype=np.float32)
-    y = np.array(1, dtype=np.float32)
-    outdata = np.where(condition, x, y)
-    verify_where(condition, x, y, TensorProto.FLOAT, outdata)
-
-    condition = np.array(1, dtype=bool)
-    x = np.array([[1, 2], [3, 4]], dtype=np.float32)
-    y = np.array([[5, 6], [7, 8]], dtype=np.float32)
-    outdata = np.where(condition, x, y)
-    verify_where(condition, x, y, TensorProto.FLOAT, outdata)
-
-    x = np.array([[1, 2], [3, 4]], dtype=np.float32)
-    y = np.array([[1], [7]], dtype=np.float32)
-    outdata = np.where(condition, x, y)
-    verify_where(condition, x, y, TensorProto.FLOAT, outdata)
-    verify_where(condition, x, y, TensorProto.FLOAT, outdata, dynamic=True)
-
-    condition = np.random.uniform(size=(3, 1)) < 0.5
-    x = np.random.uniform(size=2).astype(np.float32)
-    y = np.random.uniform(size=2).astype(np.float32)
-    outdata = np.where(condition, x, y)
-    verify_where(condition, x, y, TensorProto.FLOAT, outdata)
-
-
-@tvm.testing.parametrize_targets
-def test_or(target, dev):
-    """test_or"""
-
-    def verify_or(indata, dtype):
-        x = indata[0].astype(dtype)
-        y = indata[1].astype(dtype)
-        outdata = np.logical_or(x, y)
-
-        node = helper.make_node(
-            "Or",
-            inputs=["in1", "in2"],
-            outputs=["out"],
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "or_test",
-            inputs=[
-                helper.make_tensor_value_info("in1", TensorProto.BOOL, list(x.shape)),
-                helper.make_tensor_value_info("in2", TensorProto.BOOL, list(y.shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.BOOL, list(outdata.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="or_test")
-        verify_with_ort_with_inputs(model, [x, y], [outdata.shape], target=target, dev=dev)
-
-    # 2d
-    x = np.random.randn(3, 4) > 0
-    y = np.random.randn(3, 4) > 0
-    verify_or(indata=[x, y], dtype=bool)
-
-    # 3d
-    x = np.random.randn(3, 4, 5) > 0
-    y = np.random.randn(3, 4, 5) > 0
-    verify_or(indata=[x, y], dtype=bool)
-
-    # 4d
-    x = np.random.randn(3, 4, 5, 6) > 0
-    y = np.random.randn(3, 4, 5, 6) > 0
-    verify_or(indata=[x, y], dtype=bool)
-
-    # 3d vs 1d
-    x = np.random.randn(3, 4, 5) > 0
-    y = np.random.randn(5) > 0
-    verify_or(indata=[x, y], dtype=bool)
-
-    # 3d vs 2d
-    x = np.random.randn(3, 4, 5) > 0
-    y = np.random.randn(4, 5) > 0
-    verify_or(indata=[x, y], dtype=bool)
-
-
-@tvm.testing.parametrize_targets
-def test_batch_norm(target, dev):
-    """test_batch_norm"""
-
-    def verify_batch_norm(in_shape):
-        batchnorm = onnx.helper.make_node(
-            "BatchNormalization", inputs=["x", "scale", "B", "mean", "var"], outputs=["Y"]
-        )
-
-        graph = helper.make_graph(
-            [batchnorm],
-            "batchnorm_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(in_shape)),
-                helper.make_tensor_value_info("scale", TensorProto.FLOAT, [in_shape[1]]),
-                helper.make_tensor_value_info("B", TensorProto.FLOAT, [in_shape[1]]),
-                helper.make_tensor_value_info("mean", TensorProto.FLOAT, [in_shape[1]]),
-                helper.make_tensor_value_info("var", TensorProto.FLOAT, [in_shape[1]]),
-            ],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(in_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="batchnorm_test")
-        # X, scale, b, mean, var
-        inshapes = [in_shape, in_shape[1], in_shape[1], in_shape[1], in_shape[1]]
-        verify_with_ort(model, inshapes, out_shape=[in_shape], target=target, dev=dev)
-
-    verify_batch_norm([1, 3, 224, 224])
-    verify_batch_norm([1, 3, 24, 24])
-    verify_batch_norm([16, 3, 24, 24])
-    verify_batch_norm([16, 16, 24, 24])
-    verify_batch_norm([16, 16, 10, 10])
-
-
-@tvm.testing.parametrize_targets
-def test_batch_norm_dynamic_subgraph(target, dev):
-    """test_batch_norm_dynamic_subgraph"""
-
-    def verify_batch_norm_dynamic_subgraph(in_shape, o_shape):
-
-        batchnorm = onnx.helper.make_node(
-            "BatchNormalization", inputs=["x", "scale", "B", "mean", "var"], outputs=["Y"]
-        )
-
-        shape_node = helper.make_node("Shape", ["Y"], ["shape"])
-        reshape_node = helper.make_node("Reshape", ["in", "shape"], ["out"])
-        graph = helper.make_graph(
-            [batchnorm, shape_node, reshape_node],
-            "batchnorm_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(in_shape)),
-                helper.make_tensor_value_info("in", TensorProto.FLOAT, list(o_shape)),
-                helper.make_tensor_value_info("scale", TensorProto.FLOAT, [in_shape[1]]),
-                helper.make_tensor_value_info("B", TensorProto.FLOAT, [in_shape[1]]),
-                helper.make_tensor_value_info("mean", TensorProto.FLOAT, [in_shape[1]]),
-                helper.make_tensor_value_info("var", TensorProto.FLOAT, [in_shape[1]]),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, list(in_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="batchnorm_test")
-
-        # X, inp, scale, b, mean, var
-        inshapes = [in_shape, o_shape, in_shape[1], in_shape[1], in_shape[1], in_shape[1]]
-        verify_with_ort(model, inshapes, out_shape=[in_shape], use_vm=True, target=target, dev=dev)
-
-    verify_batch_norm_dynamic_subgraph([16, 16, 10, 10], [160, 160])
-
-
-@tvm.testing.parametrize_targets
-def test_conv(target, dev):
-    """test_conv"""
-
-    def verify_conv(
-        x_shape,
-        w_shape,
-        y_shape,
-        padding,
-        kernel_shape,
-        strides,
-        dilations,
-        group=1,
-        auto_pad="NOTSET",
-        unset_pad=False,
-    ):
-        if unset_pad:
-            node = helper.make_node(
-                "Conv",
-                inputs=["x", "W"],
-                outputs=["y"],
-                kernel_shape=kernel_shape,
-                # Default values for other attributes:
-                strides=strides,
-                dilations=dilations,
-                group=group,
-            )
-        elif padding is None:
-            ## autopadding with unset default attributes
-            kwargs = {}
-            if not all(list(s == 1 for s in strides)):
-                kwargs["strides"] = strides
-            if not all(list(d == 1 for d in dilations)):
-                kwargs["dilations"] = dilations
-
-            node = helper.make_node(
-                "Conv",
-                inputs=["x", "W"],
-                outputs=["y"],
-                # Default values for other attributes:
-                auto_pad=auto_pad,
-                group=group,
-                **kwargs,
-            )
-        else:
-            node = helper.make_node(
-                "Conv",
-                inputs=["x", "W"],
-                outputs=["y"],
-                kernel_shape=kernel_shape,
-                # Default values for other attributes:
-                strides=strides,
-                dilations=dilations,
-                group=group,
-                pads=padding,
-            )
-
-        graph = helper.make_graph(
-            [node],
-            "conv_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape)),
-                helper.make_tensor_value_info("W", TensorProto.FLOAT, list(w_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(y_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="conv_test")
-
-        verify_with_ort(
-            model,
-            [x_shape, w_shape],
-            [y_shape],
-            use_vm=True,
-            target=target,
-            dev=dev,
-        )
-
-    def repeat(num, dims):
-        return tuple(num for _ in range(dims))
-
-    for dims in [1, 2, 3]:
-        # Convolution with padding
-        verify_conv(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            (1, 1) + repeat(5, dims),
-            2 * repeat(1, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-        )
-        # Convolution with asymmetric padding
-        verify_conv(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            (1, 1) + repeat(4, dims),
-            repeat(0, dims) + repeat(1, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-        )
-        # Convolution without padding
-        verify_conv(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            (1, 1) + repeat(3, dims),
-            2 * repeat(0, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-        )
-        # Convolution with autopadding
-        verify_conv(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            (1, 1) + repeat(5, dims),
-            None,
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-            auto_pad="SAME_UPPER",
-        )
-        # Convolution with valid autopadding
-        verify_conv(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            (1, 1) + repeat(3, dims),
-            None,
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-            auto_pad="VALID",
-        )
-        # Convolution with unset padding
-        verify_conv(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            (1, 1) + repeat(3, dims),
-            2 * repeat(0, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-            True,
-        )
-        # Convolution with non uniform stride
-        verify_conv(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            (1, 1) + repeat(3, dims),
-            None,
-            repeat(3, dims),
-            repeat(2, dims),
-            repeat(1, dims),
-            auto_pad="SAME_UPPER",
-        )
-        # Convolution with dilation
-        verify_conv(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            (1, 1) + repeat(5, dims),
-            2 * repeat(2, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(2, dims),
-        )
-
-    # TODO(jwfromm): Merge with other tests once group_conv3d is supported.
-    for dims in [1, 2, 3]:
-        # Group Convolution
-        verify_conv(
-            (1, 8) + repeat(5, dims),
-            (8, 1) + repeat(3, dims),
-            (1, 8) + repeat(5, dims),
-            2 * repeat(1, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-            group=8,
-        )
-
-        verify_conv(
-            (1, 12) + repeat(5, dims),
-            (30, 4) + repeat(3, dims),
-            (1, 30) + repeat(5, dims),
-            2 * repeat(1, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-            group=3,
-        )
-
-
-@tvm.testing.parametrize_targets
-def test_convtranspose(target, dev):
-    """test_convtranspose"""
-
-    def verify_convtranspose_with_output_shape(
-        x_shape,
-        w_shape,
-        output_shape,
-        kernel_shape,
-        strides,
-        dilations,
-        auto_pad="SAME_UPPER",
-        group=1,
-    ):
-        node = helper.make_node(
-            "ConvTranspose",
-            inputs=["x", "W"],
-            outputs=["y"],
-            kernel_shape=kernel_shape,
-            # Default values for other attributes:
-            strides=strides,
-            dilations=dilations,
-            output_shape=output_shape,
-            auto_pad=auto_pad,
-        )
-
-        if group is not None:
-            group_attr = helper.make_attribute("group", group)
-            node.attribute.append(group_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "ConvTranspose_with_output_shape_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape)),
-                helper.make_tensor_value_info("W", TensorProto.FLOAT, list(w_shape)),
-            ],
-            outputs=[
-                helper.make_tensor_value_info("y", TensorProto.FLOAT, [1, 1] + list(output_shape))
-            ],
-        )
-
-        model = helper.make_model(graph, producer_name="convtranspose_output_shape_test")
-
-        verify_with_ort(model, [x_shape, w_shape], use_vm=True, target=target, dev=dev)
-
-    def verify_convtranspose_with_padding(
-        x_shape,
-        w_shape,
-        padding,
-        kernel_shape,
-        strides,
-        dilations,
-        auto_pad="NOTSET",
-        unset_pad=False,
-        group=1,
-    ):
-        node = helper.make_node(
-            "ConvTranspose",
-            inputs=["x", "W"],
-            outputs=["y"],
-            kernel_shape=kernel_shape,
-            # Default values for other attributes:
-            strides=strides,
-            dilations=dilations,
-        )
-        if not unset_pad:
-            if padding is None:
-                pad_attr = helper.make_attribute("auto_pad", auto_pad)
-            else:
-                pad_attr = helper.make_attribute("pads", padding)
-            node.attribute.append(pad_attr)
-
-        if group is not None:
-            group_attr = helper.make_attribute("group", group)
-            node.attribute.append(group_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "convtranspose_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape)),
-                helper.make_tensor_value_info("W", TensorProto.FLOAT, list(w_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, ["?"] * len(x_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="convtranspose_pad_test")
-
-        verify_with_ort(model, [x_shape, w_shape], use_vm=True, target=target, dev=dev)
-
-    def verify_convtranspose(x_shape, w_shape, y_shape, p, group=1):
-        node = onnx.helper.make_node(
-            "ConvTranspose",
-            inputs=["x", "W"],
-            outputs=["y"],
-            strides=[3, 2],
-            kernel_shape=[3, 3],
-            pads=p,
-        )
-
-        if group is not None:
-            group_attr = helper.make_attribute("group", group)
-            node.attribute.append(group_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "verify_convtranspose_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape)),
-                helper.make_tensor_value_info("W", TensorProto.FLOAT, list(w_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(y_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="convtranspose_test")
-        verify_with_ort(model, [x_shape, w_shape], y_shape, opset=11, target=target, dev=dev)
-
-    # Convolution Transpose with padding
-    # (1, 1, 3, 3) input tensor
-    # (1, 2, 3, 3) tensor for convolution weights
-    # (1, 2, 7, 3) output tensor
-    # [1, 2, 1, 2] list for pads
-    verify_convtranspose((1, 1, 3, 3), (1, 2, 3, 3), (1, 2, 7, 3), [1, 2, 1, 2])
-    # Test undefined groups.
-    verify_convtranspose((1, 1, 3, 3), (1, 2, 3, 3), (1, 2, 7, 3), [1, 2, 1, 2], group=None)
-
-    if "llvm" in target:
-        # GPU does not support groups != 1 for convtranspose, so only test llvm
-        # Test depthwise-convolution
-        verify_convtranspose((1, 10, 3, 3), (10, 1, 3, 3), (1, 10, 7, 3), [1, 2, 1, 2], group=10)
-
-        # Test grouped-convolution
-        verify_convtranspose((1, 10, 3, 3), (10, 1, 3, 3), (1, 5, 7, 3), [1, 2, 1, 2], group=5)
-
-    def repeat(num, dims):
-        return tuple(num for _ in range(dims))
-
-    # Once onnxruntime update is complete
-    for dims in [1, 2, 3]:
-        # Convolution with padding
-        verify_convtranspose_with_padding(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            2 * repeat(1, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-        )
-        # Convolution without padding
-        verify_convtranspose_with_padding(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            2 * repeat(0, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-        )
-        # Convolution with unset padding
-        verify_convtranspose_with_padding(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            2 * repeat(0, dims),
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-            True,
-        )
-        # Convolution with autopadding
-        verify_convtranspose_with_padding(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            None,
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-            auto_pad="SAME_UPPER",
-        )
-        # Convolution with valid autopadding
-        verify_convtranspose_with_padding(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            None,
-            repeat(3, dims),
-            repeat(1, dims),
-            repeat(1, dims),
-            auto_pad="VALID",
-        )
-        # Convolution with non uniform stride
-        verify_convtranspose_with_padding(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            None,
-            repeat(3, dims),
-            repeat(2, dims),
-            repeat(1, dims),
-            auto_pad="SAME_UPPER",
-        )
-        # Convolution with default stride
-        verify_convtranspose_with_padding(
-            (1, 1) + repeat(5, dims),
-            (1, 1) + repeat(3, dims),
-            2 * repeat(1, dims),
-            repeat(3, dims),
-            None,
-            repeat(1, dims),
-        )
-        # Convolution with dilation
-        # TODO(mbrookhart): Relay doesn't currently support convtranspose with dilation
-        # verify_convtranspose_with_padding(
-        #     (1, 1) + repeat(5, D),
-        #     (1, 1) + repeat(3, D),
-        #     2 * repeat(2, D),
-        #     repeat(3, D),
-        #     repeat(1, D),
-        #     repeat(2, D),
-        # )
-
-    # Convolution with output_shape
-    for dims in [1, 2, 3]:
-        for num in range(60, 66):
-            verify_convtranspose_with_output_shape(
-                (1, 1) + repeat(32, dims),
-                (1, 1) + repeat(4, dims),
-                repeat(num, dims),
-                repeat(4, dims),
-                repeat(2, dims),
-                repeat(1, dims),
-            )
-
-            verify_convtranspose_with_output_shape(
-                (1, 1) + repeat(32, dims),
-                (1, 1) + repeat(4, dims),
-                repeat(num, dims),
-                repeat(4, dims),
-                repeat(2, dims),
-                repeat(1, dims),
-                auto_pad="SAME_LOWER",
-            )
-
-            verify_convtranspose_with_output_shape(
-                (1, 1) + repeat(32, dims),
-                (1, 2) + repeat(4, dims),
-                repeat(num, dims),
-                repeat(4, dims),
-                repeat(2, dims),
-                repeat(1, dims),
-                auto_pad="SAME_UPPER",
-            )
-
-    verify_convtranspose_with_output_shape(
-        (1, 1, 3, 3),
-        (1, 2, 3, 3),
-        (6, 6),
-        (3, 3),
-        (2, 2),
-        (1, 1),
-        auto_pad="SAME_UPPER",
-    )
-
-    verify_convtranspose_with_output_shape(
-        (1, 1, 3, 3),
-        (1, 2, 3, 3),
-        (6, 6),
-        (3, 3),
-        (2, 2),
-        (1, 1),
-        auto_pad="SAME_LOWER",
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_unsqueeze_constant(target, dev):
-    """test_unsqueeze_constant"""
-
-    class Flatten(Module):
-        def forward(self, input_):
-            return input_.view(input_.size(0), -1)
-
-    with tempfile.NamedTemporaryFile() as f:
-        file_name = f.name
-        input_size = (1, 16, 32, 32)
-        dummy_input = torch.randn(*input_size)
-        layer = Sequential(Flatten(), Linear(16 * 32 * 32, 64))
-        torch.onnx.export(layer, dummy_input, file_name, export_params=True)
-
-        onnx_model = onnx.load(file_name)
-        relay.frontend.from_onnx(onnx_model, {"onnx::Reshape_0": input_size})
-
-
-@tvm.testing.parametrize_targets
-def test_pooling(target, dev):
-    """test_pooling"""
-
-    def verify_pooling(x_shape, kernel_shape, strides, pads, out_shape, mode, auto_pad="NOTSET"):
-        _ = np.random.uniform(size=x_shape).astype("float32")
-
-        if mode == "max":
-            node_type = "MaxPool"
-        elif mode == "average":
-            node_type = "AveragePool"
-        else:
-            raise ValueError(f"Pool method {mode} is not supported.")
-
-        pool_node = helper.make_node(
-            node_type, inputs=["x"], outputs=["y"], kernel_shape=kernel_shape, strides=strides
-        )
-
-        if pads is None:
-            pad_attr = helper.make_attribute("auto_pad", auto_pad)
-        else:
-            pad_attr = helper.make_attribute("pads", pads)
-        pool_node.attribute.append(pad_attr)
-
-        if mode == "max":
-            storage_attr = helper.make_attribute("storage_order", 0)
-            pool_node.attribute.append(storage_attr)
-
-        graph = helper.make_graph(
-            [pool_node],
-            "pooling_test",
-            inputs=[helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape))],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="pooling_test")
-        verify_with_ort(
-            model,
-            [x_shape],
-            [out_shape],
-            use_vm=False,
-            target=target,
-            dev=dev,
-        )
-
-    for mode in ["max", "average"]:
-        # Pool1D
-        verify_pooling(
-            x_shape=[1, 1, 32],
-            kernel_shape=[3],
-            strides=[1],
-            pads=[1, 1],
-            out_shape=[1, 1, 32],
-            mode=mode,
-        )
-        # Pool2D
-        verify_pooling(
-            x_shape=[1, 1, 32, 32],
-            kernel_shape=[3, 3],
-            strides=[1, 1],
-            pads=[1, 1, 1, 1],
-            out_shape=[1, 1, 32, 32],
-            mode=mode,
-        )
-
-        # Pool1D with stride
-        verify_pooling(
-            x_shape=[1, 1, 32],
-            kernel_shape=[3],
-            strides=[2],
-            pads=[1, 1],
-            out_shape=[1, 1, 16],
-            mode=mode,
-        )
-        # Pool2D with stride
-        verify_pooling(
-            x_shape=[1, 1, 32, 32],
-            kernel_shape=[3, 3],
-            strides=[2, 2],
-            pads=[1, 1, 1, 1],
-            out_shape=[1, 1, 16, 16],
-            mode=mode,
-        )
-
-        # Pool1D with stride and autopadding
-        verify_pooling(
-            x_shape=[1, 1, 32],
-            kernel_shape=[3],
-            strides=[2],
-            pads=None,
-            out_shape=[1, 1, 16],
-            mode=mode,
-            auto_pad="SAME_UPPER",
-        )
-        # Pool2D with stride and autopadding
-        verify_pooling(
-            x_shape=[1, 1, 32, 32],
-            kernel_shape=[3, 3],
-            strides=[2, 2],
-            pads=None,
-            out_shape=[1, 1, 16, 16],
-            mode=mode,
-            auto_pad="SAME_UPPER",
-        )
-
-        # Pool3D with stride
-        verify_pooling(
-            x_shape=[1, 1, 32, 32, 32],
-            kernel_shape=[3, 3, 3],
-            strides=[2, 2, 2],
-            pads=[1, 1, 1, 1, 1, 1],
-            out_shape=[1, 1, 16, 16, 16],
-            mode=mode,
-        )
-
-        # Pool3D with stride and autopadding
-        verify_pooling(
-            x_shape=[1, 1, 32, 32, 32],
-            kernel_shape=[3, 3, 3],
-            strides=[2, 2, 2],
-            pads=None,
-            out_shape=[1, 1, 16, 16, 16],
-            mode=mode,
-            auto_pad="SAME_UPPER",
-        )
-
-
-@tvm.testing.parametrize_targets
-def test_global_pooling(target, dev):
-    """test_global_pooling"""
-
-    def verify_global_pooling(x_shape, mode):
-        out_shape = x_shape[:2] + [1] * (len(x_shape) - 2)
-
-        if mode == "max":
-            node_type = "GlobalMaxPool"
-        elif mode == "average":
-            node_type = "GlobalAveragePool"
-        else:
-            raise ValueError(f"Pool method {mode} is not supported.")
-
-        pool_node = helper.make_node(node_type, inputs=["x"], outputs=["y"])
-
-        graph = helper.make_graph(
-            [pool_node],
-            "global_pooling_test",
-            inputs=[helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape))],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="global_pooling_test")
-        verify_with_ort(
-            model,
-            [x_shape],
-            [out_shape],
-            use_vm=False,
-            target=target,
-            dev=dev,
-        )
-
-    # Test each pooling mode across all N-D inputs.
-    for mode in ["average", "max"]:
-        # 1D Pooling (NCW)
-        verify_global_pooling([1, 8, 8], mode)
-        verify_global_pooling([4, 1, 4], mode)
-        # 2D Pooling (NCHW)
-        verify_global_pooling([1, 8, 8, 8], mode)
-        verify_global_pooling([4, 1, 6, 4], mode)
-        # 3D Pooling (NCDHW)
-        verify_global_pooling([1, 8, 6, 8, 8], mode)
-        verify_global_pooling([4, 1, 2, 6, 4], mode)
-
-
-@pytest.mark.skip("flaky")
-@tvm.testing.parametrize_targets
-def test_qlinear_average_pool(target, dev):
-    """test_qlinear_average_pool"""
-
-    def verify_qlinear_average_pool(
-        x_shape, kernel_shape, strides, pads, out_shape, auto_pad="NOTSET"
-    ):
-        input_nodes = [
-            helper.make_tensor_value_info("X", TensorProto.FLOAT, list(x_shape)),
-        ]
-
-        output_nodes = [
-            helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(out_shape)),
-        ]
-
-        input_names = ["X"]
-
-        node = helper.make_node(
-            "AveragePool",
-            inputs=input_names,
-            outputs=["Y"],
-            kernel_shape=kernel_shape,
-            strides=strides,
-        )
-
-        if pads is None:
-            pad_attr = helper.make_attribute("auto_pad", auto_pad)
-        else:
-            pad_attr = helper.make_attribute("pads", pads)
-        node.attribute.append(pad_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "qlinear_average_pool_test",
-            inputs=input_nodes,
-            outputs=output_nodes,
-        )
-
-        model = helper.make_model(graph, producer_name="qlinear_average_pool_Test")
-        quantize_and_verify_with_ort(model, input_names, [x_shape], target, dev)
-
-    # Pool1D
-    verify_qlinear_average_pool(
-        x_shape=[1, 1, 32],
-        kernel_shape=[3],
-        strides=[1],
-        pads=[1, 1],
-        out_shape=[1, 1, 32],
-    )
-    # Pool2D
-    verify_qlinear_average_pool(
-        x_shape=[1, 1, 32, 32],
-        kernel_shape=[3, 3],
-        strides=[1, 1],
-        pads=[1, 1, 1, 1],
-        out_shape=[1, 1, 32, 32],
-    )
-
-    # Pool1D with stride
-    verify_qlinear_average_pool(
-        x_shape=[1, 1, 32],
-        kernel_shape=[3],
-        strides=[2],
-        pads=[1, 1],
-        out_shape=[1, 1, 16],
-    )
-    # Pool2D with stride
-    verify_qlinear_average_pool(
-        x_shape=[1, 1, 32, 32],
-        kernel_shape=[3, 3],
-        strides=[2, 2],
-        pads=[1, 1, 1, 1],
-        out_shape=[1, 1, 16, 16],
-    )
-
-    # Pool1D with stride and autopadding
-    verify_qlinear_average_pool(
-        x_shape=[1, 1, 32],
-        kernel_shape=[3],
-        strides=[2],
-        pads=None,
-        out_shape=[1, 1, 16],
-        auto_pad="SAME_UPPER",
-    )
-    # Pool2D with stride and autopadding
-    verify_qlinear_average_pool(
-        x_shape=[1, 1, 32, 32],
-        kernel_shape=[3, 3],
-        strides=[2, 2],
-        pads=None,
-        out_shape=[1, 1, 16, 16],
-        auto_pad="SAME_UPPER",
-    )
-
-    # Pool3D with stride
-    verify_qlinear_average_pool(
-        x_shape=[1, 1, 32, 32, 32],
-        kernel_shape=[3, 3, 3],
-        strides=[2, 2, 2],
-        pads=[1, 1, 1, 1, 1, 1],
-        out_shape=[1, 1, 16, 16, 16],
-    )
-
-    # Pool3D with stride and autopadding
-    verify_qlinear_average_pool(
-        x_shape=[1, 1, 32, 32, 32],
-        kernel_shape=[3, 3, 3],
-        strides=[2, 2, 2],
-        pads=None,
-        out_shape=[1, 1, 16, 16, 16],
-        auto_pad="SAME_UPPER",
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_qlinear_global_average_pool(target, dev):
-    """test_qlinear_global_average_pool"""
-
-    def verify_qlinear_global_average_pool(x_shape):
-        out_shape = x_shape[:2] + [1] * (len(x_shape) - 2)
-
-        node_type = "GlobalAveragePool"
-
-        input_names = ["X"]
-
-        pool_node = helper.make_node(node_type, inputs=input_names, outputs=["Y"])
-
-        graph = helper.make_graph(
-            [pool_node],
-            "qlinear_global_average_pool_test",
-            inputs=[helper.make_tensor_value_info("X", TensorProto.FLOAT, list(x_shape))],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="qlinear_global_average_pool_test")
-        quantize_and_verify_with_ort(model, input_names, [x_shape], target, dev)
-
-    # 1D Pooling (NCW)
-    verify_qlinear_global_average_pool([1, 8, 8])
-    verify_qlinear_global_average_pool([4, 1, 4])
-
-    # 2D Pooling (NCHW)
-    verify_qlinear_global_average_pool([1, 8, 8, 8])
-    verify_qlinear_global_average_pool([4, 1, 6, 4])
-
-    # 3D Pooling (NCDHW)
-    verify_qlinear_global_average_pool([1, 8, 6, 8, 8])
-    verify_qlinear_global_average_pool([4, 1, 2, 6, 4])
-
-
-@tvm.testing.parametrize_targets
-def test_mod(target, dev):
-    """test_mod"""
-
-    def verify_mod(x_shape, y_shape, fmod, out_shape, dtype="float32"):
-        x_np = np.random.uniform(-100.0, 100.0, x_shape).astype(dtype)
-        y_np = np.random.uniform(-100.0, 100.0, y_shape).astype(dtype)
-        y_np = np.where(y_np == 0, 1, y_np)  # remove 0's to avoid division by zero error
-
-        mod_node = helper.make_node("Mod", inputs=["x", "y"], outputs=["z"], fmod=fmod)
-
-        onnx_dtype = TensorProto.FLOAT if dtype == "float32" else TensorProto.INT32
-        graph = helper.make_graph(
-            [mod_node],
-            "mod_test",
-            inputs=[
-                helper.make_tensor_value_info("x", onnx_dtype, list(x_shape)),
-                helper.make_tensor_value_info("y", onnx_dtype, list(y_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("z", onnx_dtype, list(out_shape))],
-        )
-        model = helper.make_model(graph, producer_name="mod_test")
-        verify_with_ort_with_inputs(model, [x_np, y_np], [out_shape], target=target, dev=dev)
-
-    # Mod
-    verify_mod(
-        x_shape=[1, 32, 32], y_shape=[1, 1, 32], fmod=0, out_shape=(1, 32, 32), dtype="int32"
-    )
-    verify_mod(
-        x_shape=[1, 32, 32, 32],
-        y_shape=[1, 32, 32, 32],
-        fmod=0,
-        out_shape=(1, 32, 32, 32),
-        dtype="int32",
-    )
-
-    # fmod
-    verify_mod(
-        x_shape=[1, 32, 32], y_shape=[1, 32, 32], fmod=1, out_shape=(1, 32, 32), dtype="int32"
-    )
-    verify_mod(x_shape=[1, 1, 32, 32], y_shape=[1, 32, 32, 32], fmod=1, out_shape=(1, 32, 32, 32))
-    verify_mod(x_shape=[1, 32, 32, 32], y_shape=[1, 1, 32, 32], fmod=1, out_shape=(1, 32, 32, 32))
-    verify_mod(
-        x_shape=[1, 32, 32, 32],
-        y_shape=[1, 32, 32, 32],
-        fmod=1,
-        out_shape=(1, 32, 32, 32),
-        dtype="int32",
-    )
-    verify_mod(x_shape=[1, 32, 32, 32], y_shape=[1, 32, 32, 32], fmod=1, out_shape=(1, 32, 32, 32))
-
-
-@tvm.testing.parametrize_targets
-def test_xor(target, dev):
-    """test_xor"""
-
-    def verify_xor(x_shape, y_shape):
-        x_np = np.random.choice(a=[False, True], size=x_shape).astype("bool")
-        y_np = np.random.choice(a=[False, True], size=y_shape).astype("bool")
-
-        np_out = np.logical_xor(x_np, y_np)
-        out_shape = np_out.shape
-
-        xor_node = helper.make_node("Xor", inputs=["x", "y"], outputs=["z"])
-
-        onnx_dtype = TensorProto.BOOL
-        graph = helper.make_graph(
-            [xor_node],
-            "xor_test",
-            inputs=[
-                helper.make_tensor_value_info("x", onnx_dtype, list(x_shape)),
-                helper.make_tensor_value_info("y", onnx_dtype, list(y_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("z", onnx_dtype, list(out_shape))],
-        )
-        model = helper.make_model(graph, producer_name="xor_test")
-        verify_with_ort_with_inputs(model, [x_np, y_np], [out_shape], target=target, dev=dev)
-
-    # XOR
-    verify_xor(x_shape=[1, 32, 32], y_shape=[1, 32, 32])
-
-    # Xor broadcast
-    verify_xor(x_shape=[1, 32, 32], y_shape=[1, 1, 32])
-
-
-@tvm.testing.parametrize_targets
-def test_max_roi_pool(target, dev):
-    """test_max_roi_pool"""
-
-    def verify_max_roi_pool(x_shape, rois_shape, pooled_shape, spatial_scale, out_shape):
-        if spatial_scale is None:
-            pool_node = helper.make_node(
-                "MaxRoiPool", inputs=["x", "rois"], outputs=["y"], pooled_shape=pooled_shape
-            )
-        else:
-            pool_node = helper.make_node(
-                "MaxRoiPool",
-                inputs=["x", "rois"],
-                outputs=["y"],
-                pooled_shape=pooled_shape,
-                spatial_scale=spatial_scale,
-            )
-
-        graph = helper.make_graph(
-            [pool_node],
-            "pool_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape)),
-                helper.make_tensor_value_info("rois", TensorProto.FLOAT, list(rois_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="pool_test")
-        verify_with_ort(model, [x_shape, rois_shape], [out_shape], target=target, dev=dev)
-
-    verify_max_roi_pool(
-        x_shape=[1, 3, 6, 6],
-        rois_shape=[3, 5],
-        pooled_shape=[1, 1],
-        spatial_scale=None,
-        out_shape=[3, 3, 1, 1],
-    )
-
-    verify_max_roi_pool(
-        x_shape=[1, 3, 10, 10],
-        rois_shape=[4, 5],
-        pooled_shape=[2, 2],
-        spatial_scale=2.0,
-        out_shape=[4, 3, 2, 2],
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_lppool(target, dev):
-    """test_lppool"""
-
-    def verify_lppool(x_shape, kernel_shape, p, strides, pads, out_shape, auto_pad="NOTSET"):
-        kwargs = {}
-        if p is not None:
-            kwargs["p"] = p
-        if pads is None:
-            pool_node = helper.make_node(
-                "LpPool",
-                inputs=["x"],
-                outputs=["y"],
-                kernel_shape=kernel_shape,
-                auto_pad=auto_pad,
-                strides=strides,
-                **kwargs,
-            )
-        else:
-            pool_node = helper.make_node(
-                "LpPool",
-                inputs=["x"],
-                outputs=["y"],
-                kernel_shape=kernel_shape,
-                pads=pads,
-                strides=strides,
-                **kwargs,
-            )
-
-        graph = helper.make_graph(
-            [pool_node],
-            "lppool_test",
-            inputs=[helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape))],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(out_shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="lppool_test")
-        verify_with_ort(
-            model,
-            [x_shape],
-            [out_shape],
-            use_vm=True,
-            target=target,
-            dev=dev,
-        )
-
-    # Pool1D
-    verify_lppool(
-        x_shape=[1, 1, 32], kernel_shape=[3], p=2, strides=[1], pads=[1, 1], out_shape=[1, 1, 32]
-    )
-
-    # Pool2D
-    verify_lppool(
-        x_shape=[1, 1, 32, 32],
-        kernel_shape=[3, 3],
-        p=2,
-        strides=[1, 1],
-        pads=[1, 1, 1, 1],
-        out_shape=[1, 1, 32, 32],
-    )
-
-    # Pool1D with stride
-    verify_lppool(
-        x_shape=[1, 1, 32], kernel_shape=[3], p=2, strides=[2], pads=[1, 1], out_shape=[1, 1, 16]
-    )
-
-    # Pool2D with stride
-    verify_lppool(
-        x_shape=[1, 1, 32, 32],
-        kernel_shape=[3, 3],
-        p=2,
-        strides=[2, 2],
-        pads=[1, 1, 1, 1],
-        out_shape=[1, 1, 16, 16],
-    )
-
-    # Pool1D with stride and autopadding
-    verify_lppool(
-        x_shape=[1, 1, 32],
-        kernel_shape=[3],
-        p=2,
-        strides=[2],
-        pads=None,
-        out_shape=[1, 1, 16],
-        auto_pad="SAME_UPPER",
-    )
-
-    # Pool2D with stride and autopadding
-    verify_lppool(
-        x_shape=[1, 1, 32, 32],
-        kernel_shape=[3, 3],
-        p=2,
-        strides=[2, 2],
-        pads=None,
-        out_shape=[1, 1, 16, 16],
-        auto_pad="SAME_UPPER",
-    )
-
-    # Pool2D with empty stride
-    verify_lppool(
-        x_shape=[1, 3, 32, 32],
-        kernel_shape=[2, 2],
-        p=4,
-        strides=None,
-        pads=None,
-        out_shape=[1, 3, 32, 32],
-        auto_pad="SAME_LOWER",
-    )
-
-    # Pool3D with stride
-    verify_lppool(
-        x_shape=[1, 1, 32, 32, 32],
-        kernel_shape=[3, 3, 3],
-        p=2,
-        strides=[2, 2, 2],
-        pads=[1, 1, 1, 1, 1, 1],
-        out_shape=[1, 1, 16, 16, 16],
-    )
-
-    # Pool3D with stride and autopadding
-    verify_lppool(
-        x_shape=[1, 1, 32, 32, 32],
-        kernel_shape=[3, 3, 3],
-        p=2,
-        strides=[2, 2, 2],
-        pads=None,
-        out_shape=[1, 1, 16, 16, 16],
-        auto_pad="SAME_UPPER",
-    )
-    # Pool2D with empty p
-    verify_lppool(
-        x_shape=[1, 1, 32, 32],
-        kernel_shape=[3, 3],
-        p=None,
-        strides=[1, 1],
-        pads=[1, 1, 1, 1],
-        out_shape=[1, 1, 32, 32],
-    )
-
-
-def verify_global_lppool(x_shape, p, out_shape, target, dev):
-    """verify_global_lppool"""
-    pool_node = helper.make_node(
-        "GlobalLpPool",
-        inputs=["x"],
-        outputs=["y"],
-        p=p,
-    )
-
-    graph = helper.make_graph(
-        [pool_node],
-        "global_lppool_test",
-        inputs=[helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x_shape))],
-        outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(out_shape))],
-    )
-
-    model = helper.make_model(graph, producer_name="global_lppool_test")
-    verify_with_ort(model, [x_shape], out_shape, use_vm=True, target=target, dev=dev)
-
-
-@tvm.testing.parametrize_targets
-def test_global_lppool(target, dev):
-    """test_global_lppool"""
-    # LpPool1D
-    verify_global_lppool(x_shape=[1, 15, 16], p=2, out_shape=[1, 15, 1], target=target, dev=dev)
-
-    # LpPool2D
-    verify_global_lppool(
-        x_shape=[1, 15, 32, 32], p=2, out_shape=[1, 15, 1, 1], target=target, dev=dev
-    )
-
-    # LpPool2D
-    verify_global_lppool(
-        x_shape=[1, 15, 32, 32], p=3, out_shape=[1, 15, 1, 1], target=target, dev=dev
-    )
-
-    # LpPool3D
-    verify_global_lppool(
-        x_shape=[1, 15, 3, 32, 32], p=2, out_shape=[1, 15, 1, 1, 1], target=target, dev=dev
-    )
-
-
-def verify_rnn(
-    seq_length,
-    batch_size,
-    input_size,
-    hidden_size,
-    rnn_type="LSTM",
-    use_bias=False,
-    activations=None,
-    alphas=None,
-    betas=None,
-    use_initial_state=False,
-    use_peep=False,
-    linear_before_reset=False,
-    directions=1,
-    layout=0,
-    rtol=1e-5,
-    atol=1e-5,
-    target=None,
-    dev=None,
-    use_sequence_lens=False,
-):
-    """verify_rnn"""
-    if rnn_type == "RNN":
-        multiplier = 1
-    elif rnn_type == "LSTM":
-        multiplier = 4
-    elif rnn_type == "GRU":
-        multiplier = 3
-    else:
-        raise NotImplementedError(f"{rnn_type} RNNs not yet supported.")
-
-    if directions not in [1, 2]:
-        raise ValueError(f"Direction should be either 1 or 2 (for bidirectional LSTMs)")
-
-    def get_inputs():
-        input_names = []
-        input_values = []
-        input_tensors = []
-
-        def register(np_arr, name, shape=None):
-            input_values.append(np_arr)
-            input_names.append(name)
-
-            # Map of numpy dtypes to the protobuf equivalent
-            dtype_map = {
-                "float32": TensorProto.FLOAT,
-                "int32": TensorProto.INT32,
-                "int8": TensorProto.INT8,
-            }
-
-            if np_arr.dtype.name not in dtype_map:
-                raise ValueError(f"Unknown dtype we don't know how to handle {np.dtype.name}")
-            if shape is None:
-                shape = list(np_arr.shape)
-            proto_type = dtype_map[np_arr.dtype.name]
-            input_tensors.append(helper.make_tensor_value_info(name, proto_type, shape))
-
-        if layout == 1:
-            x_np = np.random.uniform(size=(batch_size, seq_length, input_size)).astype("float32")
-        else:
-            x_np = np.random.uniform(size=(seq_length, batch_size, input_size)).astype("float32")
-        w_np = np.random.uniform(size=(directions, multiplier * hidden_size, input_size)).astype(
-            "float32"
-        )
-        r_np = np.random.uniform(size=(directions, multiplier * hidden_size, hidden_size)).astype(
-            "float32"
-        )
-        register(x_np, "X")
-        register(w_np, "W")
-        register(r_np, "R")
-
-        if use_bias:
-            b_np = np.random.uniform(size=(directions, multiplier * 2 * hidden_size)).astype(
-                "float32"
-            )
-            register(b_np, "B")
-
-        if use_sequence_lens:
-            sequence_np = np.random.uniform(0, seq_length, size=(batch_size)).astype("int32")
-            register(sequence_np, "sequence_lens")
-
-        if use_initial_state:
-            assert use_bias is True, "Initial states must have bias specified."
-
-            if not use_sequence_lens:
-                sequence_np = np.repeat(seq_length, batch_size).astype("int32")
-                register(sequence_np, "sequence_lens")
-
-            if layout == 1:
-                initial_h_np = np.random.uniform(size=(batch_size, directions, hidden_size)).astype(
-                    "float32"
-                )
-            else:
-                initial_h_np = np.random.uniform(size=(directions, batch_size, hidden_size)).astype(
-                    "float32"
-                )
-            register(initial_h_np, "initial_h")
-
-            if rnn_type == "LSTM":
-                if layout == 1:
-                    initial_c_np = np.random.uniform(
-                        size=(batch_size, directions, hidden_size)
-                    ).astype("float32")
-                else:
-                    initial_c_np = np.random.uniform(
-                        size=(directions, batch_size, hidden_size)
-                    ).astype("float32")
-                register(initial_c_np, "initial_c")
-
-        if use_peep and rnn_type == "LSTM":
-            assert use_initial_state is True, "Peepholes require initial state to be specified."
-            p_np = np.random.uniform(size=(directions, 3 * hidden_size)).astype("float32")
-            register(p_np, "P")
-
-        return input_names, input_tensors, input_values
-
-    input_names, input_tensors, input_values = get_inputs()
-
-    def get_outputs():
-        output_names = []
-        graph_outputs = []
-        output_shapes = []
-
-        def register(name, shape, proto_type):
-            output_names.append(name)
-            graph_outputs.append(helper.make_tensor_value_info(name, proto_type, list(shape)))
-            output_shapes.append(list(shape))
-
-        if layout == 1:
-            register("Y", [directions, seq_length, batch_size, hidden_size], TensorProto.FLOAT)
-            register("Y_h", [batch_size, directions, hidden_size], TensorProto.FLOAT)
-        else:
-            register("Y", [seq_length, directions, batch_size, hidden_size], TensorProto.FLOAT)
-            register("Y_h", [directions, batch_size, hidden_size], TensorProto.FLOAT)
-
-        if rnn_type == "LSTM":
-            if layout == 1:
-                register("Y_c", [batch_size, directions, hidden_size], TensorProto.FLOAT)
-            else:
-                register("Y_c", [directions, batch_size, hidden_size], TensorProto.FLOAT)
-
-        return output_names, graph_outputs, output_shapes
-
-    output_names, graph_outputs, output_shapes = get_outputs()
-
-    rnn_node = helper.make_node(
-        rnn_type, inputs=input_names, outputs=output_names, hidden_size=hidden_size
-    )
-    if activations is not None:
-        activations_attr = helper.make_attribute("activations", activations)
-        rnn_node.attribute.append(activations_attr)
-    if directions == 2:
-        direction_attr = helper.make_attribute("direction", "bidirectional")
-        rnn_node.attribute.append(direction_attr)
-    if alphas is not None:
-        alphas_attr = helper.make_attribute("activation_alpha", alphas)
-        rnn_node.attribute.append(alphas_attr)
-    if betas is not None:
-        betas_attr = helper.make_attribute("activation_beta", betas)
-        rnn_node.attribute.append(betas_attr)
-    if linear_before_reset and rnn_type == "GRU":
-        lbr_attr = helper.make_attribute("linear_before_reset", 1)
-        rnn_node.attribute.append(lbr_attr)
-    if layout == 1:
-        layout_attr = helper.make_attribute("layout", 1)
-        rnn_node.attribute.append(layout_attr)
-
-    graph = helper.make_graph([rnn_node], "rnn_test", inputs=input_tensors, outputs=graph_outputs)
-
-    model = helper.make_model(graph, producer_name="rnn_test")
-
-    verify_with_ort_with_inputs(
-        model, input_values, output_shapes, atol=atol, rtol=rtol, target=target, dev=dev
-    )
-
-
-def verify_rnn_helper(target, dev, rnn_type):
-    num_activations = 1
-    if rnn_type == "GRU":
-        num_activations = 2
-    elif rnn_type == "LSTM":
-        num_activations = 3
-
-    for directions in [1, 2]:
-        # No bias.
-        verify_rnn(
-            seq_length=2,
-            batch_size=1,
-            input_size=16,
-            hidden_size=32,
-            use_bias=False,
-            rnn_type=rnn_type,
-            directions=directions,
-            target=target,
-            dev=dev,
-        )
-        # large batch.
-        verify_rnn(
-            seq_length=4,
-            batch_size=8,
-            input_size=16,
-            hidden_size=32,
-            use_bias=True,
-            rnn_type=rnn_type,
-            directions=directions,
-            target=target,
-            dev=dev,
-        )
-        # Non power of two.
-        verify_rnn(
-            seq_length=3,
-            batch_size=3,
-            input_size=16,
-            hidden_size=40,
-            use_bias=True,
-            rnn_type=rnn_type,
-            directions=directions,
-            target=target,
-            dev=dev,
-        )
-        # Long sequence.
-        verify_rnn(
-            seq_length=8,
-            batch_size=1,
-            input_size=16,
-            hidden_size=32,
-            use_bias=True,
-            rnn_type=rnn_type,
-            directions=directions,
-            target=target,
-            dev=dev,
-        )
-        # Large hidden.
-        verify_rnn(
-            seq_length=2,
-            batch_size=1,
-            input_size=16,
-            hidden_size=128,
-            use_bias=True,
-            rnn_type=rnn_type,
-            directions=directions,
-            target=target,
-            dev=dev,
-        )
-        # Large input.
-        verify_rnn(
-            seq_length=2,
-            batch_size=1,
-            input_size=64,
-            hidden_size=32,
-            use_bias=True,
-            rnn_type=rnn_type,
-            directions=directions,
-            target=target,
-            dev=dev,
-        )
-
-        # Different activation testing.
-        # Default value hardsigmoid.
-        # TODO: onnxruntime <= v1.12.0 has wrong default value of all activation functions
-        if rnn_type != "RNN":
-            activations = ["HardSigmoid", "Tanh", "Tanh"][0:num_activations] * directions
-            verify_rnn(
-                seq_length=2,
-                batch_size=1,
-                input_size=16,
-                hidden_size=32,
-                use_bias=False,
-                activations=activations,
-                rnn_type=rnn_type,
-                directions=directions,
-                target=target,
-                dev=dev,
-            )
-        # Multiple parametrized activations.
-        activations = ["HardSigmoid", "LeakyRelu", "Tanh"][0:num_activations] * directions
-        alphas = [2.0, 0.5, 0.0][0:num_activations] * directions
-        betas = [0.3, 0.0, 0.0][0:num_activations] * directions
-        verify_rnn(
-            seq_length=2,
-            batch_size=1,
-            input_size=16,
-            hidden_size=32,
-            use_bias=False,
-            activations=activations,
-            alphas=alphas,
-            betas=betas,
-            rnn_type=rnn_type,
-            directions=directions,
-            target=target,
-            dev=dev,
-        )
-        # All parametrized with new Affine activation.
-        activations = ["Affine", "LeakyRelu", "HardSigmoid"][0:num_activations] * directions
-        alphas = [0.8, 2.0, 0.5][0:num_activations] * directions
-        betas = [0.0, 0.3, 0.0][0:num_activations] * directions
-        verify_rnn(
-            seq_length=2,
-            batch_size=1,
-            input_size=16,
-            hidden_size=32,
-            use_bias=False,
-            activations=activations,
-            alphas=alphas,
-            betas=betas,
-            rnn_type=rnn_type,
-            directions=directions,
-            target=target,
-            dev=dev,
-        )
-
-        # Testing with initial state
-        verify_rnn(
-            seq_length=2,
-            batch_size=1,
-            input_size=16,
-            hidden_size=32,
-            use_bias=True,
-            use_initial_state=True,
-            rnn_type=rnn_type,
-            directions=directions,
-            target=target,
-            dev=dev,
-        )
-
-        # Testing layout
-        # TODO: onnxruntime <= 1.12.0 doesn't support layout == 1
-        # verify_rnn(
-        #     seq_length=2,
-        #     batch_size=1,
-        #     input_size=16,
-        #     hidden_size=32,
-        #     use_bias=True,
-        #     rnn_type="RNN",
-        #     directions=directions,
-        #     layout=1,
-        #     target=target,
-        #     dev=dev,
-        # )
-
-        # Testing with initial state
-        if rnn_type == "GRU":
-            verify_rnn(
-                seq_length=2,
-                batch_size=1,
-                input_size=16,
-                hidden_size=32,
-                use_bias=True,
-                use_initial_state=True,
-                rnn_type=rnn_type,
-                directions=directions,
-                target=target,
-                dev=dev,
-                use_sequence_lens=True,
-            )
-            verify_rnn(
-                seq_length=8,
-                batch_size=8,
-                input_size=16,
-                hidden_size=32,
-                use_bias=True,
-                use_initial_state=True,
-                rnn_type=rnn_type,
-                directions=directions,
-                target=target,
-                dev=dev,
-                use_sequence_lens=True,
-            )
-
-        # Testing with peepholes
-        if rnn_type == "LSTM":
-            verify_rnn(
-                seq_length=2,
-                batch_size=1,
-                input_size=16,
-                hidden_size=32,
-                use_bias=True,
-                use_initial_state=True,
-                use_peep=True,
-                rnn_type="LSTM",
-                directions=directions,
-                target=target,
-                dev=dev,
-            )
-
-
-@tvm.testing.parametrize_targets
-def test_rnn(target, dev):
-    verify_rnn_helper(target, dev, "RNN")
-
-
-@tvm.testing.parametrize_targets
-def test_lstm(target, dev):
-    verify_rnn_helper(target, dev, "LSTM")
-
-
-@tvm.testing.parametrize_targets
-def test_gru(target, dev):
-    verify_rnn_helper(target, dev, "GRU")
-
-
-@tvm.testing.parametrize_targets
-def test_resize(target, dev):
-    """test_resize"""
-
-    def verify(ishape, oshape, scales, mode, coord_trans="asymmetric", alpha=0.5, exclude=False):
-        nodes = [
-            make_constant_node("roi", onnx.TensorProto.FLOAT, (0,), []),
-            make_constant_node("scales", onnx.TensorProto.FLOAT, (len(scales),), scales),
-        ]
-        input_names = ["X", "roi", "scales"]
-
-        if oshape != []:
-            nodes.append(
-                make_constant_node("sizes", onnx.TensorProto.INT64, (len(oshape),), oshape)
-            )
-            input_names.append("sizes")
-        nodes.append(
-            helper.make_node(
-                "Resize",
-                inputs=input_names,
-                outputs=["Y"],
-                mode=mode,
-                coordinate_transformation_mode=coord_trans,
-                cubic_coeff_a=alpha,
-                exclude_outside=exclude,
-            )
-        )
-
-        if oshape == []:
-            oshape = [round(dim * scale) for (dim, scale) in zip(ishape, scales)]
-        graph = helper.make_graph(
-            nodes,
-            "resize_test",
-            inputs=[helper.make_tensor_value_info("X", TensorProto.FLOAT, ishape)],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, oshape)],
-        )
-
-        model = helper.make_model(graph, producer_name="resize_test")
-
-        verify_with_ort(
-            model,
-            [ishape],
-            [oshape],
-            use_vm=True,
-            opset=11,
-            freeze_params=True,
-            target=target,
-            dev=dev,
-        )
-
-    for ndim in [1, 2, 3]:
-        method = "nearest"
-        for coord_trans in ["asymmetric", "align_corners", "half_pixel"]:
-            # upsampling
-            verify([1, 16] + [32] * ndim, [1, 16] + [64] * ndim, [], method, coord_trans)
-            # downsampling
-            verify([1, 16] + [32] * ndim, [1, 16] + [16] * ndim, [], method, coord_trans)
-            # scales are specified instead of sizes
-            verify([1, 16] + [32] * ndim, [], [1, 1] + [0.5] * ndim, method, coord_trans)
-            verify([1, 16] + [32] * ndim, [], [1, 1] + [2] * ndim, method, coord_trans)
-            verify([1, 16] + [32] * ndim, [], [1, 1] + [2] * ndim, None, coord_trans)
-
-        method = "linear"
-        # upsampling
-        verify([1, 16] + [32] * ndim, [1, 16] + [64] * ndim, [], method)
-        # downsampling
-        verify([1, 16] + [32] * ndim, [1, 16] + [16] * ndim, [], method)
-        # scales are specified instead of sizes
-        verify([1, 16] + [32] * ndim, [], [1, 1] + [0.5] * ndim, method)
-        verify([1, 16] + [32] * ndim, [], [1, 1] + [2] * ndim, method)
-
-        if ndim == 2:
-            # ONNX Runtime only supports cubic interpolation for 2D images
-            method = "cubic"
-            for alpha in [0.5, 0.75]:
-                for exclude in [True, False]:
-                    # upsampling
-                    verify(
-                        [1, 16] + [32] * ndim,
-                        [1, 16] + [64] * ndim,
-                        [],
-                        method,
-                        alpha=alpha,
-                        exclude=exclude,
-                    )
-                    # downsampling
-                    verify(
-                        [1, 16] + [32] * ndim,
-                        [1, 16] + [16] * ndim,
-                        [],
-                        method,
-                        alpha=alpha,
-                        exclude=exclude,
-                    )
-                    # scales are specified instead of sizes
-                    verify(
-                        [1, 16] + [32] * ndim,
-                        [],
-                        [1, 1] + [0.5] * ndim,
-                        method,
-                        alpha=alpha,
-                        exclude=exclude,
-                    )
-                    verify(
-                        [1, 16] + [32] * ndim,
-                        [],
-                        [1, 1] + [2] * ndim,
-                        method,
-                        alpha=alpha,
-                        exclude=exclude,
-                    )
-
-    def verify_opset_10(ishape, scales, mode):
-        nodes = [
-            make_constant_node("scales", onnx.TensorProto.FLOAT, (len(scales),), scales),
-        ]
-        input_names = ["X", "scales"]
-        nodes.append(
-            helper.make_node(
-                "Resize",
-                inputs=input_names,
-                outputs=["Y"],
-                mode=mode,
-            )
-        )
-
-        oshape = [round(dim * scale) for (dim, scale) in zip(ishape, scales)]
-        graph = helper.make_graph(
-            nodes,
-            "resize_test",
-            inputs=[helper.make_tensor_value_info("X", TensorProto.FLOAT, ishape)],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, oshape)],
-        )
-
-        model = helper.make_model(graph, producer_name="resize_test")
-        verify_with_ort(
-            model,
-            [ishape],
-            [oshape],
-            use_vm=True,
-            freeze_params=True,
-            opset=10,
-            target=target,
-            dev=dev,
-        )
-
-    verify_opset_10([1, 16, 32, 32], [1, 1, 2, 2], "nearest")
-    verify_opset_10([1, 16, 32, 32], [1, 1, 0.5, 0.5], "linear")
-
-
-@tvm.testing.parametrize_targets
-def test_nonzero(target, dev):
-    """test_nonzero"""
-
-    def verify_nonzero(indata, outdata, dtype):
-        node = helper.make_node(
-            "NonZero",
-            inputs=["X"],
-            outputs=["Y"],
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "nonzero_test",
-            inputs=[helper.make_tensor_value_info("X", TensorProto.INT64, list(indata.shape))],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.INT64, list(outdata.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="nonzero_test")
-
-        verify_with_ort_with_inputs(
-            model, [indata], dtype="int64", use_vm=True, opset=9, target=target, dev=dev
-        )
-
-    input_data = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    result = np.array((np.nonzero(input_data)))  # expected output [[0, 1, 1], [0, 0, 1]]
-    verify_nonzero(input_data, result, dtype=np.int64)
-
-    input_data = np.array([[3, 0, 0], [0, 4, 0], [5, 6, 0]], dtype=np.int64)
-    result = np.array((np.nonzero(input_data)))  # expected output [[0, 1, 2, 2], [0, 1, 0, 1]]
-    verify_nonzero(input_data, result, dtype=np.int64)
-
-
-@tvm.testing.parametrize_targets
-def test_topk(target, dev):
-    """test_topk"""
-
-    def verify_topk(input_dims, k, axis=-1):
-        output_dims = list(input_dims)
-        output_dims[axis] = k
-
-        node = helper.make_node("TopK", inputs=["X", "K"], outputs=["Values", "Indices"], axis=axis)
-
-        graph = helper.make_graph(
-            [node],
-            "topk_test",
-            inputs=[
-                helper.make_tensor_value_info("X", TensorProto.FLOAT, list(input_dims)),
-                helper.make_tensor_value_info(
-                    "K",
-                    TensorProto.INT64,
-                    [
-                        1,
-                    ],
-                ),
-            ],
-            outputs=[
-                helper.make_tensor_value_info("Values", TensorProto.FLOAT, output_dims),
-                helper.make_tensor_value_info("Indices", TensorProto.INT64, output_dims),
-            ],
-        )
-
-        model = helper.make_model(graph, producer_name="topk_test")
-
-        indata = np.random.uniform(-10, 10, input_dims).astype(np.float32)
-        verify_with_ort_with_inputs(
-            model, [indata, np.array([k])], use_vm=True, target=target, dev=dev
-        )
-
-    for n in [12, 32]:
-        for shape in [[n], [n, n], [n, n, n]]:
-            for k in [1, 5, 10]:
-                verify_topk(shape, k)
-
-        verify_topk([n, n, n], 5, 0)
-        verify_topk([n, n, n], 5, 1)
-        verify_topk([n, n, n], 5, 2)
-
-
-@tvm.testing.parametrize_targets
-def test_roi_align(target, dev):
-    """test_roi_align"""
-
-    def verify_roi_align(
-        input_dims,
-        num_roi,
-        output_height,
-        output_width,
-        sampling_ratio=0,
-        spatial_scale=1.0,
-        mode="avg",
-    ):
-        output_dims = [num_roi, input_dims[1], output_height, output_width]
-
-        node = helper.make_node(
-            "RoiAlign",
-            coordinate_transformation_mode="output_half_pixel",
-            inputs=["X", "rois", "batch_indices"],
-            outputs=["Y"],
-            mode=mode,
-            output_height=output_height,
-            output_width=output_width,
-            sampling_ratio=sampling_ratio,
-            spatial_scale=spatial_scale,
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "roialign_test",
-            inputs=[
-                helper.make_tensor_value_info("X", TensorProto.FLOAT, list(input_dims)),
-                helper.make_tensor_value_info("rois", TensorProto.FLOAT, [num_roi, 4]),
-                helper.make_tensor_value_info(
-                    "batch_indices",
-                    TensorProto.INT64,
-                    [
-                        num_roi,
-                    ],
-                ),
-            ],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, output_dims)],
-        )
-
-        model = helper.make_model(graph, producer_name="roialign_test")
-
-        np_data = np.random.uniform(size=input_dims).astype("float32")
-        np_rois = np.random.uniform(size=[num_roi, 4]).astype("float32") * input_dims[2]
-        np_batch_indices = np.random.randint(low=0, high=input_dims[0], size=num_roi)
-
-        verify_with_ort_with_inputs(
-            model,
-            [np_data, np_rois, np_batch_indices],
-            out_shape=[output_dims],
-            target=target,
-            dev=dev,
-        )
-
-    verify_roi_align((1, 4, 16, 16), 32, 7, 7, sampling_ratio=0, spatial_scale=1.0)
-    verify_roi_align((4, 4, 16, 32), 32, 7, 7, sampling_ratio=0, spatial_scale=1.0)
-    verify_roi_align((1, 8, 16, 16), 32, 7, 7, sampling_ratio=0, spatial_scale=1.0)
-    verify_roi_align((1, 4, 8, 8), 32, 7, 7, sampling_ratio=0, spatial_scale=1.0)
-    verify_roi_align((1, 4, 16, 16), 16, 5, 7, sampling_ratio=0, spatial_scale=1.0)
-    verify_roi_align((1, 4, 16, 12), 8, 7, 3, sampling_ratio=0, spatial_scale=1.0)
-    verify_roi_align((1, 4, 16, 16), 32, 7, 7, sampling_ratio=0, spatial_scale=0.5)
-    verify_roi_align((3, 4, 12, 16), 32, 7, 7, sampling_ratio=0, spatial_scale=1.5)
-    verify_roi_align((5, 4, 16, 14), 32, 7, 7, sampling_ratio=1, spatial_scale=1.0)
-    verify_roi_align((1, 4, 16, 16), 32, 7, 7, sampling_ratio=2, spatial_scale=1.0)
-
-    # ONNX implementation of roi_align with max mode is incorrect, so we don't compare outputs here.
-
-
-@tvm.testing.parametrize_targets
-def test_non_max_suppression(target, dev):
-    """test_non_max_suppression"""
-
-    def verify_nms(
-        boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, output_dims
-    ):
-        input_names = ["boxes", "scores", "max_output_boxes_per_class", "iou_threshold"]
-        input_nodes = [
-            helper.make_tensor_value_info("boxes", TensorProto.FLOAT, boxes.shape),
-            helper.make_tensor_value_info("scores", TensorProto.FLOAT, scores.shape),
-            helper.make_tensor_value_info(
-                "max_output_boxes_per_class", TensorProto.INT64, max_output_boxes_per_class.shape
-            ),
-            helper.make_tensor_value_info("iou_threshold", TensorProto.FLOAT, iou_threshold.shape),
-        ]
-        inputs = [boxes, scores, max_output_boxes_per_class, iou_threshold]
-        if score_threshold is not None:
-            input_names.append("score_threshold")
-            input_nodes.append(
-                helper.make_tensor_value_info(
-                    "score_threshold", TensorProto.FLOAT, score_threshold.shape
-                )
-            )
-            inputs.append(score_threshold)
-        node = helper.make_node(
-            "NonMaxSuppression",
-            inputs=input_names,
-            outputs=["Y"],
-            center_point_box=0,
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "nms_test",
-            inputs=input_nodes,
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.INT64, output_dims)],
-        )
-
-        model = helper.make_model(graph, producer_name="nms_test")
-
-        verify_with_ort_with_inputs(model, inputs, use_vm=True, target=target, dev=dev)
-
-    boxes = np.array(
-        [
-            [
-                [0.0, 0.0, 0.3, 0.3],
-                [0.0, 0.0, 0.4, 0.4],
-                [0.0, 0.0, 0.5, 0.5],
-                [0.5, 0.5, 0.9, 0.9],
-                [0.5, 0.5, 1.0, 1.0],
-            ],
-            [
-                [0.0, 0.0, 0.3, 0.3],
-                [0.0, 0.0, 0.4, 0.4],
-                [0.5, 0.5, 0.95, 0.95],
-                [0.5, 0.5, 0.96, 0.96],
-                [0.5, 0.5, 1.0, 1.0],
-            ],
-        ]
-    ).astype("float32")
-
-    scores = np.array(
-        [
-            [[0.1, 0.2, 0.6, 0.3, 0.9], [0.1, 0.2, 0.6, 0.3, 0.9]],
-            [[0.1, 0.2, 0.6, 0.3, 0.9], [0.1, 0.2, 0.6, 0.3, 0.9]],
-        ]
-    ).astype("float32")
-    max_output_boxes_per_class = np.array(2).astype("int64")
-    iou_threshold = np.array(0.8).astype("float32")
-    output_dims = [8, 3]
-    verify_nms(boxes, scores, max_output_boxes_per_class, iou_threshold, None, output_dims)
-
-    boxes = np.array(
-        [
-            [
-                [0.0, 0.0, 1.0, 1.0],
-                [0.0, 0.1, 1.0, 1.1],
-                [0.0, -0.1, 1.0, 0.9],
-                [0.0, 10.0, 1.0, 11.0],
-                [0.0, 10.1, 1.0, 11.1],
-                [0.0, 100.0, 1.0, 101.0],
-            ]
-        ]
-    ).astype(np.float32)
-    scores = np.array([[[0.9, 0.75, 0.6, 0.95, 0.5, 0.3]]]).astype(np.float32)
-    max_output_boxes_per_class = np.array([3]).astype(np.int64)
-    iou_threshold = np.array([0.5]).astype(np.float32)
-    score_threshold = np.array([0.4]).astype(np.float32)
-    output_dims = [2, 3]
-    verify_nms(
-        boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, output_dims
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_loop(target, dev):
-    """test_loop"""
-
-    def verify_cond_loop():
-        y_in = helper.make_tensor_value_info("y_in", TensorProto.FLOAT, [1])
-        y_out = helper.make_tensor_value_info("y_out", TensorProto.FLOAT, [1])
-        scan_out = helper.make_tensor_value_info("scan_out", TensorProto.FLOAT, [1])
-        cond_in = helper.make_tensor_value_info("cond_in", TensorProto.BOOL, [])
-        cond_out = helper.make_tensor_value_info("cond_out", TensorProto.BOOL, [])
-        iter_count = helper.make_tensor_value_info("iter_count", TensorProto.INT64, [])
-
-        y = np.array([-2]).astype(np.float32)
-
-        five_const_node = helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=["five"],
-            value=helper.make_tensor(
-                name="const_tensor_five", data_type=TensorProto.FLOAT, dims=(), vals=[5]
-            ),
-        )
-
-        iter_cast_node = helper.make_node(
-            "Cast", inputs=["iter_count"], outputs=["iter_cast"], to=onnx.TensorProto.FLOAT
-        )
-
-        y_add_node = helper.make_node("Add", inputs=["y_in", "iter_cast"], outputs=["y_out"])
-
-        less_node = helper.make_node("Less", inputs=["y_out", "five"], outputs=["cond_less"])
-
-        squeeze_node = helper.make_node("Squeeze", inputs=["cond_less"], outputs=["cond_squeeze"])
-
-        cond_cast_node = helper.make_node(
-            "Cast", inputs=["cond_squeeze"], outputs=["cond_out"], to=onnx.TensorProto.BOOL
-        )
-
-        scan_identity_node = helper.make_node("Identity", inputs=["y_out"], outputs=["scan_out"])
-
-        loop_body = helper.make_graph(
-            [
-                five_const_node,
-                iter_cast_node,
-                y_add_node,
-                less_node,
-                squeeze_node,
-                cond_cast_node,
-                scan_identity_node,
-            ],
-            "loop_body",
-            [iter_count, cond_in, y_in],
-            [cond_out, y_out, scan_out],
-        )
-
-        loop_node = helper.make_node(
-            "Loop",
-            inputs=["trip_count", "cond", "y"],
-            outputs=["res_y", "res_scan"],
-            body=loop_body,
-        )
-
-        trip_count = np.array(5).astype(np.int64)
-        _ = np.array([13]).astype(np.float32)
-        cond = np.array(1).astype(bool)
-        loop_graph = onnx.helper.make_graph(
-            [loop_node],
-            "loop_outer",
-            inputs=[
-                onnx.helper.make_tensor_value_info("trip_count", onnx.TensorProto.INT64, []),
-                onnx.helper.make_tensor_value_info("cond", onnx.TensorProto.BOOL, []),
-                onnx.helper.make_tensor_value_info("y", onnx.TensorProto.FLOAT, [1]),
-            ],
-            outputs=[
-                onnx.helper.make_tensor_value_info("res_y", onnx.TensorProto.FLOAT, [1]),
-                onnx.helper.make_tensor_value_info("res_scan", onnx.TensorProto.FLOAT, [5, 1]),
-            ],
-        )
-        loop_model = onnx.helper.make_model(loop_graph)
-
-        # Set a high trip count so that condition trips first.
-        trip_count = np.array(40).astype(np.int64)
-        cond = np.array(1).astype(bool)
-        input_vals = [trip_count, cond, y]
-        verify_with_ort_with_inputs(
-            loop_model,
-            input_vals,
-            use_vm=True,
-            freeze_params=True,
-            opset=11,
-            target=target,
-            dev=dev,
-        )
-
-    def verify_count_loop():
-        y_in = helper.make_tensor_value_info("y_in", TensorProto.FLOAT, [])
-        y_out = helper.make_tensor_value_info("y_out", TensorProto.FLOAT, [])
-        scan_out = helper.make_tensor_value_info("scan_out", TensorProto.FLOAT, [])
-        cond_in = helper.make_tensor_value_info("cond_in", TensorProto.BOOL, [])
-        cond_out = helper.make_tensor_value_info("cond_out", TensorProto.BOOL, [])
-        iter_count = helper.make_tensor_value_info("iter_count", TensorProto.INT64, [])
-
-        y = np.array(-2).astype(np.float32)
-
-        iter_cast_node = helper.make_node(
-            "Cast", inputs=["iter_count"], outputs=["iter_cast"], to=onnx.TensorProto.FLOAT
-        )
-
-        y_add_node = helper.make_node("Add", inputs=["y_in", "iter_cast"], outputs=["y_out"])
-
-        identity_node = helper.make_node("Identity", inputs=["cond_in"], outputs=["cond_out"])
-
-        scan_identity_node = helper.make_node("Identity", inputs=["y_out"], outputs=["scan_out"])
-
-        loop_body = helper.make_graph(
-            [identity_node, iter_cast_node, y_add_node, scan_identity_node],
-            "loop_body",
-            [iter_count, cond_in, y_in],
-            [cond_out, y_out, scan_out],
-        )
-
-        loop_node = helper.make_node(
-            "Loop",
-            inputs=["trip_count", "cond", "y"],
-            outputs=["res_y", "res_scan"],
-            body=loop_body,
-        )
-
-        trip_count = np.array(5).astype(np.int64)
-        _ = np.array([13]).astype(np.float32)
-        cond = np.array(1).astype(bool)
-        loop_graph = onnx.helper.make_graph(
-            [loop_node],
-            "loop_outer",
-            inputs=[
-                onnx.helper.make_tensor_value_info("trip_count", onnx.TensorProto.INT64, []),
-                onnx.helper.make_tensor_value_info("cond", onnx.TensorProto.BOOL, []),
-                onnx.helper.make_tensor_value_info("y", onnx.TensorProto.FLOAT, []),
-            ],
-            outputs=[
-                onnx.helper.make_tensor_value_info("res_y", onnx.TensorProto.FLOAT, []),
-                onnx.helper.make_tensor_value_info("res_scan", onnx.TensorProto.FLOAT, [5]),
-            ],
-        )
-        loop_model = onnx.helper.make_model(loop_graph)
-
-        trip_count = np.array(5).astype(np.int64)
-        cond = np.array(1).astype(bool)
-        input_vals = [trip_count, cond, y]
-        verify_with_ort_with_inputs(
-            loop_model,
-            input_vals,
-            use_vm=True,
-            freeze_params=True,
-            opset=11,
-            target=target,
-            dev=dev,
-        )
-
-    def verify_tensor_loop(shapeless_output=False):
-        y_in = helper.make_tensor_value_info("y_in", TensorProto.FLOAT, [3, 3, 3, 3])
-        y_out = helper.make_tensor_value_info("y_out", TensorProto.FLOAT, [3, 3, 3, 3])
-        scan_out = helper.make_tensor_value_info("scan_out", TensorProto.FLOAT, [3, 3, 3, 3])
-        cond_in = helper.make_tensor_value_info("cond_in", TensorProto.BOOL, [])
-        cond_out = helper.make_tensor_value_info("cond_out", TensorProto.BOOL, [])
-        iter_count = helper.make_tensor_value_info("iter_count", TensorProto.INT64, [])
-
-        y = np.random.normal(size=[3, 3, 3, 3]).astype(np.float32)
-
-        iter_cast_node = helper.make_node(
-            "Cast", inputs=["iter_count"], outputs=["iter_cast"], to=onnx.TensorProto.FLOAT
-        )
-
-        y_add_node = helper.make_node("Add", inputs=["y_in", "iter_cast"], outputs=["y_out"])
-
-        identity_node = helper.make_node("Identity", inputs=["cond_in"], outputs=["cond_out"])
-
-        scan_identity_node = helper.make_node("Identity", inputs=["y_out"], outputs=["scan_out"])
-
-        loop_body = helper.make_graph(
-            [identity_node, iter_cast_node, y_add_node, scan_identity_node],
-            "loop_body",
-            [iter_count, cond_in, y_in],
-            [cond_out, y_out, scan_out],
-        )
-
-        loop_node = helper.make_node(
-            "Loop",
-            inputs=["trip_count", "cond", "y"],
-            outputs=["res_y", "res_scan"],
-            body=loop_body,
-        )
-
-        trip_count = np.array(5).astype(np.int64)
-        cond = np.array(1).astype(bool)
-
-        # Allow testing of malformed nodes since pytorch likes to create these.
-        if shapeless_output:
-            scan_shape = None
-        else:
-            scan_shape = [5, 3, 3, 3, 3]
-
-        loop_graph = onnx.helper.make_graph(
-            [loop_node],
-            "loop_outer",
-            inputs=[
-                onnx.helper.make_tensor_value_info("trip_count", onnx.TensorProto.INT64, []),
-                onnx.helper.make_tensor_value_info("cond", onnx.TensorProto.BOOL, []),
-                onnx.helper.make_tensor_value_info("y", onnx.TensorProto.FLOAT, [3, 3, 3, 3]),
-            ],
-            outputs=[
-                onnx.helper.make_tensor_value_info("res_y", onnx.TensorProto.FLOAT, [3, 3, 3, 3]),
-                onnx.helper.make_tensor_value_info("res_scan", onnx.TensorProto.FLOAT, scan_shape),
-            ],
-        )
-        loop_model = onnx.helper.make_model(loop_graph)
-
-        trip_count = np.array(5).astype(np.int64)
-        cond = np.array(1).astype(bool)
-        input_vals = [trip_count, cond, y]
-        verify_with_ort_with_inputs(
-            loop_model,
-            input_vals,
-            use_vm=True,
-            freeze_params=True,
-            opset=11,
-            target=target,
-            dev=dev,
-        )
-
-    # Test a loop that exits once a condition is met.
-    verify_cond_loop()
-    # Test a loop that exits after a fixed number of iterations with scalar outputs.
-    verify_count_loop()
-    # Test a loop that uses an array output.
-    verify_tensor_loop()
-    # Test a loop that is malformed and has no output shape defined.
-    verify_tensor_loop(shapeless_output=True)
-
-
-@tvm.testing.parametrize_targets
-def test_if(target, dev):
-    """test_if"""
-
-    def verify_if(cond_array, num_outputs):
-        # Given a bool scalar input cond.
-        # return constant tensor x if cond is True, otherwise return constant tensor y.
-
-        def append_constant_nodes(nodes, outputs, expected, name):
-            outputs.append(onnx.helper.make_tensor_value_info(name, onnx.TensorProto.FLOAT, [5]))
-
-            expected.append(np.random.randn(5).astype("float32"))
-
-            nodes.append(
-                onnx.helper.make_node(
-                    "Constant",
-                    inputs=[],
-                    outputs=[name],
-                    value=numpy_helper.from_array(expected[-1]),
-                )
-            )
-
-        if_outputs = []
-        graph_outputs = []
-
-        then_nodes, then_outs, then_expected = [], [], []
-        else_nodes, else_outs, else_expected = [], [], []
-
-        for i in range(num_outputs):
-            append_constant_nodes(then_nodes, then_outs, then_expected, f"then_out{i}")
-            append_constant_nodes(else_nodes, else_outs, else_expected, f"else_out{i}")
-
-            if_outputs.append(f"res{i}")
-            graph_outputs.append(
-                onnx.helper.make_tensor_value_info(f"res{i}", onnx.TensorProto.FLOAT, [5]),
-            )
-
-        then_body = onnx.helper.make_graph(then_nodes, "then_body", [], then_outs)
-        else_body = onnx.helper.make_graph(else_nodes, "else_body", [], else_outs)
-
-        if_node = onnx.helper.make_node(
-            "If", inputs=["cond"], outputs=if_outputs, then_branch=then_body, else_branch=else_body
-        )
-
-        if_graph = onnx.helper.make_graph(
-            [if_node],
-            "if_outer",
-            inputs=[
-                onnx.helper.make_tensor_value_info("cond", onnx.TensorProto.BOOL, []),
-            ],
-            outputs=graph_outputs,
-        )
-
-        if_model = onnx.helper.make_model(if_graph)
-        if cond_array:
-            cond = np.array([1]).astype("bool")
-        else:
-            cond = np.array(1).astype("bool")
-        correct_out = then_expected if cond else else_expected
-
-        # TODO(jwfromm): Onnxruntime 1.0.0 is buggy with If statements. Replace this with
-        # verify_with_ort once we update versions.
-        tvm_out = get_tvm_output_with_vm(if_model, [cond], target, dev, freeze_params=True)
-        if not isinstance(tvm_out, list):
-            tvm_out = [tvm_out]
-        for i, _ in enumerate(tvm_out):
-            tvm.testing.assert_allclose(
-                correct_out[i],
-                tvm_out[i],  # pylint: disable=unnecessary-list-index-lookup
-                rtol=1e-05,
-                atol=1e-05,
-            )
-
-    # Confirm that if works with cond as an array or scalar.
-    verify_if(cond_array=False, num_outputs=1)
-    verify_if(cond_array=False, num_outputs=2)
-    verify_if(cond_array=True, num_outputs=1)
-    verify_if(cond_array=True, num_outputs=2)
-
-
-@tvm.testing.parametrize_targets
-def test_graph_input_use_in_if(target, dev):
-    """test_graph_input_use_in_if"""
-
-    def verify_if(num_nested, cond):
-        # return "graph input" if cond is True, else return constant(-1).
-
-        input_tensor = helper.make_tensor_value_info("graph_input", TensorProto.FLOAT, [1])
-        output_tensor = helper.make_tensor_value_info("graph_output", TensorProto.FLOAT, [1])
-        constant_node = make_constant_node("const_val", TensorProto.FLOAT, [1], [-1])
-        cond_tensor = helper.make_tensor_value_info("cond", TensorProto.BOOL, [1])
-        inner_if_node = None
-        for i in range(num_nested):
-            identity_node = helper.make_node(
-                "Identity",
-                inputs=["const_val"],
-                outputs=[f"const{i}"],
-                name=f"depth{i}'th else identity",
-            )
-            else_branch = helper.make_graph(
-                [identity_node],
-                f"else{i}_body",
-                inputs=[],
-                outputs=[helper.make_tensor_value_info(f"const{i}", TensorProto.FLOAT, [1])],
-            )
-            out_name = f"if_output{i}" if i != (num_nested - 1) else "graph_output"
-
-            if i == 0:
-                identity_node = helper.make_node(
-                    "Identity",
-                    inputs=["graph_input"],
-                    outputs=[f"input_identity{i}"],
-                    name=f"depth{i}'th then identity",
-                )
-                then_branch = helper.make_graph(
-                    [identity_node],
-                    f"then{i}_body",
-                    inputs=[],
-                    outputs=[
-                        helper.make_tensor_value_info(f"input_identity{i}", TensorProto.FLOAT, [1])
-                    ],
-                )
-                if_node = helper.make_node(
-                    "If",
-                    inputs=["cond"],
-                    outputs=[out_name],
-                    then_branch=then_branch,
-                    else_branch=else_branch,
-                    name=f"depth{i}'s If node",
-                )
-                inner_if_node = if_node
-            else:
-                then_branch = helper.make_graph(
-                    [inner_if_node],
-                    f"then{i}_body",
-                    inputs=[],
-                    outputs=[
-                        helper.make_tensor_value_info(f"if_output{i-1}", TensorProto.FLOAT, [1])
-                    ],
-                )
-                if_node = helper.make_node(
-                    "If",
-                    inputs=["cond"],
-                    outputs=[out_name],
-                    then_branch=then_branch,
-                    else_branch=else_branch,
-                    name=f"depth{i}'s If node",
-                )
-                inner_if_node = if_node
-        graph_nodes = [constant_node, inner_if_node]
-        graph = helper.make_graph(
-            graph_nodes,
-            "input_use_in_if_test",
-            inputs=[input_tensor, cond_tensor],
-            outputs=[output_tensor],
-        )
-        model = helper.make_model(graph, producer_name="input_use_in_if_test")
-
-        verify_with_ort_with_inputs(
-            model,
-            [np.array([3.0], dtype="float32"), np.array([cond])],
-            dtype="float32",
-            use_vm=True,
-            opset=14,
-            target=target,
-            dev=dev,
-        )
-
-    # Confirm that if works with cond as an array or scalar.
-    verify_if(num_nested=1, cond=True)
-    verify_if(num_nested=1, cond=False)
-    verify_if(num_nested=2, cond=True)
-    verify_if(num_nested=2, cond=False)
-
-
-@tvm.testing.parametrize_targets
-def test_size(target, dev):
-    """test_size"""
-
-    def verify_size(indata):
-        node = helper.make_node(
-            "Size",
-            inputs=["X"],
-            outputs=["Y"],
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "size_test",
-            inputs=[helper.make_tensor_value_info("X", TensorProto.INT64, list(indata.shape))],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.INT64, [])],
-        )
-
-        model = helper.make_model(graph, producer_name="size_test")
-
-        verify_with_ort_with_inputs(
-            model, [indata], dtype="int64", use_vm=True, opset=11, target=target, dev=dev
-        )
-
-    input_data = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    verify_size(input_data)
-
-    input_data = np.array([[3, 0, 0], [0, 4, 0], [5, 6, 0]], dtype=np.int64)
-    verify_size(input_data)
-
-
-@tvm.testing.parametrize_targets
-def test_maxunpool(target, dev):
-    """test_maxunpool"""
-
-    def verify_maxunpool(data, indices, kernel_shape, strides, output_shape=None, pads=None):
-        input_names = ["xT", "xI"]
-        input_info = [
-            helper.make_tensor_value_info("xT", TensorProto.FLOAT, list(data.shape)),
-            helper.make_tensor_value_info("xI", TensorProto.INT64, list(indices.shape)),
-        ]
-        input_values = [data, indices]
-        if output_shape is not None:
-            input_names.append("output_shape")
-            input_info.append(
-                helper.make_tensor_value_info(
-                    "output_shape", TensorProto.INT64, list(output_shape.shape)
-                )
-            )
-            input_values.append(output_shape)
-        else:
-            # Compute expected output shape
-            output_shape = np.asarray(([1, 1] + list(strides))) * np.asarray(list(data.shape))
-            output_shape += np.asarray(([0, 0] + list(kernel_shape))) - np.asarray(
-                ([0, 0] + list(strides))
-            )
-            if pads is not None:
-                output_shape -= np.asarray(
-                    [0, 0] + list(np.sum(np.reshape(list(pads), [-1, 2]), axis=-1))
-                )
-        output_shape = [int(i) for i in output_shape]
-
-        node = helper.make_node(
-            "MaxUnpool", inputs=input_names, outputs=["y"], kernel_shape=kernel_shape
-        )
-
-        if pads is not None:
-            pad_attr = helper.make_attribute("pads", pads)
-            node.attribute.append(pad_attr)
-
-        if strides is not None:
-            strides_attr = helper.make_attribute("strides", strides)
-            node.attribute.append(strides_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "maxunpool_test",
-            inputs=input_info,
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, output_shape)],
-        )
-
-        model = helper.make_model(graph, producer_name="size_test")
-
-        verify_with_ort_with_inputs(
-            model, input_values, use_vm=True, opset=11, target=target, dev=dev
-        )
-
-    # Basic test
-    x_t = np.array([[[[5, 6], [7, 8]]]], dtype=np.float32)
-    x_i = np.array([[[[0, 7], [13, 15]]]], dtype=np.int64)
-    verify_maxunpool(x_t, x_i, [2, 2], strides=[2, 2])
-    # Small stride
-    verify_maxunpool(x_t, x_i, [2, 2], strides=[1, 1])
-    # Big kernel
-    verify_maxunpool(x_t, x_i, [3, 3], strides=[2, 2])
-    # With output shape
-    output_shape = np.array((1, 1, 5, 5), dtype=np.int64)
-    verify_maxunpool(x_t, x_i, [2, 2], strides=[2, 2], output_shape=output_shape)
-    # With explicit reverse padding
-    pads = np.asarray([1, 1, 1, 1]).astype(np.int64)
-    verify_maxunpool(x_t, x_i, [2, 2], strides=[2, 2], pads=pads)
-
-
-@tvm.testing.parametrize_targets
-def test_softplus(target, dev):
-    """test_softplus"""
-
-    def verify_softplus(indata):
-        node = helper.make_node(
-            "Softplus",
-            inputs=["X"],
-            outputs=["Y"],
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "softplus_test",
-            inputs=[helper.make_tensor_value_info("X", TensorProto.FLOAT, list(indata.shape))],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(indata.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="softplus_test")
-
-        verify_with_ort_with_inputs(
-            model, [indata], dtype="float32", use_vm=True, opset=11, target=target, dev=dev
-        )
-
-    # Simple case with all signs.
-    input_data = np.array([[-1, 0, 1]], dtype=np.float32)
-    verify_softplus(input_data)
-    # More fancy case.
-    input_data = np.random.randn(1, 32, 32, 3).astype("float32")
-    verify_softplus(input_data)
-
-
-@tvm.testing.parametrize_targets
-def test_cumsum(target, dev):
-    """test_cumsum"""
-
-    def verify_cumsum(indata, axis, exclusive=0, reverse=0, dtype="float32"):
-        cumsum_node = onnx.helper.make_node(
-            "CumSum",
-            inputs=["X", "axis"],
-            outputs=["Y"],
-        )
-        if exclusive != 0:
-            exclusive_attr = helper.make_attribute("exclusive", exclusive)
-            cumsum_node.attribute.append(exclusive_attr)
-        if reverse != 0:
-            reverse_attr = helper.make_attribute("reverse", reverse)
-            cumsum_node.attribute.append(reverse_attr)
-        nodes = [
-            make_constant_node("axis", onnx.TensorProto.INT32, [1], [axis]),
-            cumsum_node,
-        ]
-        if dtype == "float32":
-            tensor_type = TensorProto.FLOAT
-        else:
-            tensor_type = TensorProto.INT32
-            dtype = "int32"
-
-        graph = helper.make_graph(
-            nodes,
-            "cumsum_test",
-            inputs=[
-                helper.make_tensor_value_info("X", tensor_type, list(indata.shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("Y", tensor_type, list(indata.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="cumsum_test")
-
-        verify_with_ort_with_inputs(
-            model, [indata], dtype=dtype, use_vm=True, opset=11, target=target, dev=dev
-        )
-
-    data = (
-        np.array(
-            [
-                1.0,
-                2.0,
-                3.0,
-                4.0,
-                5.0,
-                6.0,
-                7.0,
-                8.0,
-                9.0,
-                10.0,
-                11.0,
-                12.0,
-            ]
-        )
-        .astype(np.float32)
-        .reshape((3, 4))
-    )
-
-    verify_cumsum(data, 0)
-    verify_cumsum(data, 1)
-    verify_cumsum(data, 0, 1, 0)
-    verify_cumsum(data, 1, 1, 0)
-    verify_cumsum(data, 0, 0, 1)
-    verify_cumsum(data, 1, 0, 1)
-    verify_cumsum(data, 1, 1, 1)
-    data = np.random.randn(1, 32, 32, 3).astype("float32")
-    verify_cumsum(data, 1)
-    data = np.random.randn(1, 32, 32, 3).astype("int32")
-    verify_cumsum(data, 0, dtype="int32")
-    verify_cumsum(data, 1, dtype="int32")
-    verify_cumsum(data, 0, 1, 0, dtype="int32")
-    verify_cumsum(data, 1, 1, 0, dtype="int32")
-    verify_cumsum(data, 0, 0, 1, dtype="int32")
-    verify_cumsum(data, 1, 0, 1, dtype="int32")
-    verify_cumsum(data, 1, 1, 1, dtype="int32")
-
-
-@tvm.testing.parametrize_targets
-def test_eyelike(target, dev):
-    """test_eyelike"""
-
-    def verify_eyelike(indata, dynamic=False):
-        node_list = []
-        eyelike_inputs = ["X"]
-        input_node_list = [
-            helper.make_tensor_value_info("X", TensorProto.FLOAT, list(indata.shape))
-        ]
-        input_list = [indata]
-
-        if dynamic:
-            input_node_list.append(
-                helper.make_tensor_value_info("shape", TensorProto.INT64, [len(indata.shape)])
-            )
-            input_list.append(np.asarray(indata.shape))
-            reshape_node = helper.make_node("Reshape", ["X", "shape"], ["X_dyn"])
-            eyelike_inputs[0] = "X_dyn"
-            node_list += [reshape_node]
-
-        node = helper.make_node(
-            "EyeLike",
-            inputs=eyelike_inputs,
-            outputs=["Y"],
-        )
-        node_list.append(node)
-
-        graph = helper.make_graph(
-            node_list,
-            "eyelike_test",
-            inputs=input_node_list,
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(indata.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="eyelike_test")
-        verify_with_ort_with_inputs(
-            model, input_list, dtype="float32", opset=9, target=target, dev=dev, use_vm=True
-        )
-
-    input_data = np.zeros((5, 5), dtype=np.float32)
-    verify_eyelike(input_data)
-    verify_eyelike(input_data, True)
-
-
-# The following parametrized tests loads the tests that ONNX ships as
-# serialized ONNX files, inputs, and outputs. The goal of this test
-# is to ensure the ONNX importer is in line with the ONNX specification.
-# To allow these tests to run in CI before all pass, a number of tests
-# that are not yet supported are skipped.
-
-onnx_test_node_dir = os.path.join(os.path.dirname(onnx.__file__), "backend", "test", "data", "node")
-
-onnx_test_folders = sorted(
-    dirname
-    for dirname in os.listdir(onnx_test_node_dir)
-    if dirname.startswith("test") and os.path.isdir(os.path.join(onnx_test_node_dir, dirname))
-)
-
-unsupported_onnx_tests = [
-    "test_batchnorm_epsilon_training_mode",
-    "test_batchnorm_example_training_mode",
-    "test_bernoulli",
-    "test_bernoulli_expanded",
-    "test_bernoulli_double",
-    "test_bernoulli_double_expanded",
-    "test_bernoulli_seed",
-    "test_bernoulli_seed_expanded",
-    "test_blackmanwindow",
-    "test_blackmanwindow_expanded",
-    "test_blackmanwindow_symmetric",
-    "test_blackmanwindow_symmetric_expanded",
-    # the follow cast and castlike cases have lowering issues
-    "test_cast_FLOAT_to_STRING",
-    "test_cast_STRING_to_FLOAT",
-    "test_castlike_FLOAT_to_STRING",
-    "test_castlike_FLOAT_to_STRING_expanded",
-    "test_castlike_STRING_to_FLOAT",
-    "test_castlike_STRING_to_FLOAT_expanded",
-    # the following cast and castlike cases segfault
-    "test_cast_DOUBLE_to_FLOAT16",
-    "test_castlike_DOUBLE_to_FLOAT16",
-    "test_castlike_DOUBLE_to_FLOAT16_expanded",
-    "test_convtranspose_dilations",
-    "test_cumsum_1d",
-    "test_cumsum_1d_exclusive",
-    "test_cumsum_1d_reverse",
-    "test_cumsum_1d_reverse_exclusive",
-    "test_cumsum_2d_axis_0",
-    "test_cumsum_2d_axis_1",
-    "test_cumsum_2d_negative_axis",
-    "test_det_2d",
-    "test_det_nd",
-    "test_dropout_default",
-    "test_dropout_default_mask",
-    "test_dropout_default_mask_ratio",
-    "test_dropout_default_ratio",
-    "test_gru_batchwise",
-    "test_hammingwindow",
-    "test_hammingwindow_expanded",
-    "test_hammingwindow_symmetric",
-    "test_hammingwindow_symmetric_expanded",
-    "test_hannwindow",
-    "test_hannwindow_expanded",
-    "test_hannwindow_symmetric",
-    "test_hannwindow_symmetric_expanded",
-    "test_identity_opt",
-    "test_identity_sequence",
-    "test_if_opt",
-    "test_if_seq",
-    "test_loop13_seq",
-    "test_loop16_seq_none",
-    "test_lstm_batchwise",
-    "test_maxpool_with_argmax_2d_precomputed_pads",
-    "test_maxpool_with_argmax_2d_precomputed_strides",
-    "test_maxunpool_export_with_output_shape",
-    "test_melweightmatrix",
-    # This test fails llvm with a lowering error:
-    "test_nllloss_NCd1d2d3_none_no_weight_negative_ii_expanded",
-    "test_qlinearmatmul_3D",
-    "test_range_float_type_positive_delta_expanded",
-    "test_range_int32_type_negative_delta_expanded",
-    "test_reduce_sum_do_not_keepdims_example",
-    "test_reduce_sum_do_not_keepdims_random",
-    "test_reduce_sum_keepdims_example",
-    "test_reduce_sum_keepdims_random",
-    "test_reduce_sum_negative_axes_keepdims_example",
-    "test_reduce_sum_negative_axes_keepdims_random",
-    "test_roialign_aligned_true",
-    "test_sequence_insert_at_back",
-    "test_sequence_insert_at_front",
-    "test_sequence_map_add_1_sequence_1_tensor",
-    "test_sequence_map_add_1_sequence_1_tensor_expanded",
-    "test_sequence_map_add_2_sequences",
-    "test_sequence_map_add_2_sequences_expanded",
-    "test_sequence_map_extract_shapes",
-    "test_sequence_map_extract_shapes_expanded",
-    "test_sequence_map_identity_1_sequence",
-    "test_sequence_map_identity_1_sequence_1_tensor",
-    "test_sequence_map_identity_1_sequence_1_tensor_expanded",
-    "test_sequence_map_identity_1_sequence_expanded",
-    "test_sequence_map_identity_2_sequences",
-    "test_sequence_map_identity_2_sequences_expanded",
-    "test_simple_rnn_batchwise",
-    "test_simple_rnn_defaults",
-    "test_simple_rnn_with_initial_bias",
-    "test_split_variable_parts_1d",
-    "test_split_variable_parts_2d",
-    "test_split_variable_parts_default_axis",
-    "test_split_zero_size_splits",
-    "test_stft",
-    "test_stft_with_window",
-    "test_strnormalizer_export_monday_casesensintive_lower",
-    "test_strnormalizer_export_monday_casesensintive_nochangecase",
-    "test_strnormalizer_export_monday_casesensintive_upper",
-    "test_strnormalizer_export_monday_empty_output",
-    "test_strnormalizer_export_monday_insensintive_upper_twodim",
-    "test_strnormalizer_nostopwords_nochangecase",
-    "test_tfidfvectorizer_tf_batch_onlybigrams_skip0",
-    "test_tfidfvectorizer_tf_batch_onlybigrams_skip5",
-    "test_tfidfvectorizer_tf_batch_uniandbigrams_skip5",
-    "test_tfidfvectorizer_tf_only_bigrams_skip0",
-    "test_tfidfvectorizer_tf_onlybigrams_levelempty",
-    "test_tfidfvectorizer_tf_onlybigrams_skip5",
-    "test_tfidfvectorizer_tf_uniandbigrams_skip5",
-    "test_training_dropout",
-    "test_training_dropout_default",
-    "test_training_dropout_default_mask",
-    "test_training_dropout_mask",
-    "test_training_dropout_zero_ratio",
-    "test_training_dropout_zero_ratio_mask",
-    "test_tril_zero",
-    "test_triu_zero",
-    "test_unique_sorted_with_axis",
-    "test_unique_sorted_with_axis_3d",
-    "test_unique_sorted_with_negative_axis",
-    "test_upsample_nearest",
-    "test_upsample_nearest_default",
-]
-
-
-target_skips = {
-    "cuda": [
-        "test_range_float_type_positive_delta_expanded",
-        "test_range_int32_type_positive_delta_expanded",
-        "test_mod_mixed_sign_float16",
-        "test_qlinearconv",
-        "test_qlinearmatmul",
-        "test_resize_upsample_sizes_nearest",
-    ]
-}
-
-
-def _load_proto(proto_filename, target_list, model_type_proto):
-    with open(proto_filename, "rb") as fin:
-        protobuf_content = fin.read()
-        if model_type_proto.HasField("sequence_type"):
-            sequence = onnx.SequenceProto()
-            sequence.ParseFromString(protobuf_content)
-            target_list.append(numpy_helper.to_list(sequence))
-        elif model_type_proto.HasField("tensor_type"):
-            tensor = onnx.TensorProto()
-            tensor.ParseFromString(protobuf_content)
-            target_list.append(numpy_helper.to_array(tensor))
-        elif model_type_proto.HasField("optional_type"):
-            optional = onnx.OptionalProto()
-            optional.ParseFromString(protobuf_content)
-            target_list.append(numpy_helper.to_optional(optional))
-        else:
-            raise ValueError(
-                "Loading proto of that specific type (Map/Sparse Tensor) is currently not supported"
-            )
-
-
-def is_ort_version_lower_than(ver):
-    import onnxruntime as ort
-
-    v11, v12, v13 = tuple(int(v) for v in ort.__version__.split("."))
-    v21, v22, v23 = tuple(int(v) for v in ver.split("."))
-
-    return (v11 < v21) or (v11 == v21 and v12 < v22) or ((v11, v12) == (v21, v22) and v13 < v23)
-
-
-@pytest.mark.parametrize("onnx_test", onnx_test_folders)
-@tvm.testing.parametrize_targets
-def test_onnx_nodes(target, dev, onnx_test):
-    """test_onnx_nodes"""
-    if platform.machine() == "aarch64" and onnx_test == "test_resize_upsample_sizes_nearest":
-        pytest.skip("Currently failing on AArch64")
-
-    target_kind = tvm.target.Target(target).kind.name
-
-    if onnx_test in unsupported_onnx_tests:
-        pytest.skip(f"Onnx test '{onnx_test}' not yet supported by TVM")
-
-    target_specific_skips = target_skips.get(target_kind, [])
-    if onnx_test in target_specific_skips:
-        pytest.skip(f"Onnx test '{onnx_test}' not yet supported by TVM on {target_kind} targets")
-
-    if is_ort_version_lower_than("1.13.1") and onnx_test == "test_convtranspose_autopad_same":
-        pytest.skip(
-            f"Onnx test '{onnx_test}' expected to fail for onnxruntime version lower than 1.13.1 "
-            "due to different interpretation of auto_pad parameters SAME_UPPER and SAME_LOWER."
-        )
-
-    test_dir = os.path.join(onnx_test_node_dir, onnx_test)
-
-    atol = 1e-5
-    rtol = 1e-5
-    if "roialign" in test_dir:
-        # for some reason the ONNX test crops the
-        # roialign results to 4 decimal places
-        atol = 1e-4
-
-    if "to_BFLOAT16" in test_dir:
-        # the tolerance here is for the comparison in uint16 space, but is not as significant
-        # of a delta in bfloat16 space because it's representing the mantissa being off by 1
-        atol = 1
-
-    if "_sce_" in test_dir:
-        # complicated loss functions like SoftmaxCrossEntropy can have minor variations
-        # in accuracy depending on implementation
-        atol = 1e-4
-
-    if "bicubic" in test_dir:
-        # satisfies onnx precision for bicubic interpolation
-        atol = 1e-4
-
-    if "dft" in test_dir:
-        atol = 1e-3
-
-    model = onnx.load(os.path.join(test_dir, "model.onnx"))
-    for test_data_dir in glob.glob(os.path.join(test_dir, "test_data_set*")):
-        inputs = []
-        n_inputs = len(glob.glob(os.path.join(test_data_dir, "input_*.pb")))
-        for i in range(n_inputs):
-            input_file = os.path.join(test_data_dir, f"input_{i}.pb")
-            _load_proto(input_file, inputs, model.graph.input[i].type)
-
-        outputs = []
-        n_outputs = len(glob.glob(os.path.join(test_data_dir, "output_*.pb")))
-        for i in range(n_outputs):
-            output_file = os.path.join(test_data_dir, f"output_{i}.pb")
-            _load_proto(output_file, outputs, model.graph.output[i].type)
-
-    tvm_val = get_tvm_output_with_vm(model, inputs, target, dev)
-    if len(outputs) == 1:
-        tvm.testing.assert_allclose(outputs[0], tvm_val, rtol=rtol, atol=atol)
-    else:
-        for output, val in zip(outputs, tvm_val):
-            tvm.testing.assert_allclose(output, val, rtol=rtol, atol=atol)
-
-
-def test_wrong_input():
-    """test_wrong_input"""
-    node = helper.make_node(
-        "Softplus",
-        inputs=["X"],
-        outputs=["Y"],
-    )
-
-    graph = helper.make_graph(
-        [node],
-        "softplus_test",
-        inputs=[helper.make_tensor_value_info("X", TensorProto.FLOAT, list([5]))],
-        outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, list([5]))],
-    )
-    model = helper.make_model(graph, producer_name="softplus_test")
-
-    # Check that the graph can import correctly with proper shape definitions.
-    correct_shape_dict = {"X": [5]}
-    relay.frontend.from_onnx(model, shape=correct_shape_dict)
-
-    # Check that an assertion is triggered when an input not in the graph is provided.
-    wrong_shape_dict = {"Z": [5]}
-    with pytest.raises(AssertionError):
-        relay.frontend.from_onnx(model, shape=wrong_shape_dict)
-
-
-@pytest.mark.skip(reason="unsupported op numel")
-@tvm.testing.parametrize_targets
-def test_aten(target, dev):
-    """test_aten"""
-    torch.set_grad_enabled(False)
-
-    def _convert_to_onnx(model, inputs):
-        file_name = "aten_model.onnx"
-        torch.onnx.export(
-            model,
-            inputs,
-            file_name,
-            export_params=True,
-            verbose=False,
-            opset_version=10,
-            operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN,
-        )
-        onnx_model = onnx.load(file_name)
-        return onnx_model
-
-    def verify_embedding_bag(num_embedding, embedding_dim, data_shape, num_bags=None):
-        dummy_data = torch.randint(0, num_embedding - 1, data_shape)
-        tvm_inputs = [dummy_data.numpy()]
-        model = torch.nn.EmbeddingBag(num_embedding, embedding_dim)
-        onnx_model = _convert_to_onnx(model, dummy_data)
-        torch_out = model(dummy_data)
-        tvm_out = get_tvm_output_with_vm(
-            onnx_model,
-            tvm_inputs,
-            freeze_params=True,
-            target=target,
-            dev=dev,
-        )
-        tvm.testing.assert_allclose(torch_out.numpy(), tvm_out, atol=5e-7)
-
-    verify_embedding_bag(10, 3, [2, 10])
-    verify_embedding_bag(32, 2, [3, 3])
-
-
-@tvm.testing.parametrize_targets
-def test_index_put(target, dev):
-    """test_index_put"""
-
-    class IndexPutModel(torch.nn.Module):
-        def __init__(self, indices, values, accumulate):
-            super().__init__()
-            self.indices = indices
-            self.values = values
-            self.accumulate = accumulate
-
-        def forward(self, x):
-            return x.index_put(self.indices, self.values, self.accumulate)
-
-    def _convert_to_onnx(model, dummy_data):
-        file_name = "aten_model.onnx"
-        torch.onnx.export(
-            model,
-            dummy_data,
-            file_name,
-            export_params=True,
-            verbose=False,
-            opset_version=11,
-            operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK,
-        )
-        onnx_model = onnx.load(file_name)
-        return onnx_model
-
-    def verify_index_put(data_shape, indices, accumulate):
-        dummy_data = torch.ones(data_shape)
-        tvm_inputs = [dummy_data.numpy()]
-        values = torch.rand(indices[0].size())
-        model = IndexPutModel(indices, values, accumulate)
-        onnx_model = _convert_to_onnx(model, dummy_data)
-        torch_out = model(dummy_data)
-
-        tvm_out = get_tvm_output_with_vm(onnx_model, tvm_inputs, target, dev, freeze_params=True)
-        tvm.testing.assert_allclose(torch_out.numpy(), tvm_out)
-
-    shape = (3, 5)
-    xidx = torch.tensor([0, 1, 2, 2])
-    yidx = torch.tensor([0, 1, 3, 4])
-    verify_index_put(shape, [xidx, yidx], True)
-
-    shape = (3, 5, 3)
-    xidx = torch.tensor([0, 1, 2, 2, 0])
-    yidx = torch.tensor([0, 1, 3, 4, 0])
-    zidx = torch.tensor([0, 1, 1, 2, 0])
-    verify_index_put(shape, [xidx, yidx, zidx], False)
-
-    def verify_index_put_slice(data_shape, value_shape, accumulate):
-        dummy_data = torch.ones(data_shape)
-        tvm_inputs = [dummy_data.numpy()]
-        indices = []
-        index_shape = [1] * len(value_shape)
-        index_shape[0] = -1
-        for _, v_shape in enumerate(value_shape):
-            indices.append(torch.arange(0, v_shape).reshape(tuple(index_shape)))
-            index_shape.pop()
-        values = torch.rand(value_shape)
-
-        model = IndexPutModel(indices, values, accumulate)
-        onnx_model = _convert_to_onnx(model, dummy_data)
-        torch_out = model(dummy_data)
-
-        tvm_out = get_tvm_output_with_vm(onnx_model, tvm_inputs, target, dev, freeze_params=True)
-        tvm.testing.assert_allclose(torch_out.numpy(), tvm_out)
-
-    verify_index_put_slice((3, 3), (2, 2), False)
-    verify_index_put_slice((2, 3, 4), (1, 2, 3), True)
-    verify_index_put_slice((2, 3, 4, 5), (1, 2, 3, 1), False)
-
-
-@tvm.testing.parametrize_targets
-def test_reverse_sequence(target, dev):
-    """test_reverse_sequence"""
-
-    def verify_reverse_sequence(x, sequence_lens, batch_axis, time_axis):
-        node = onnx.helper.make_node(
-            "ReverseSequence",
-            inputs=["x", "sequence_lens"],
-            outputs=["y"],
-            time_axis=time_axis,
-            batch_axis=batch_axis,
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "reverse_sequence_test",
-            inputs=[
-                helper.make_tensor_value_info("x", TensorProto.FLOAT, list(x.shape)),
-                helper.make_tensor_value_info(
-                    "sequence_lens", TensorProto.INT64, list(sequence_lens.shape)
-                ),
-            ],
-            outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(x.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name="reverse_sequence_test")
-        verify_with_ort_with_inputs(model, [x, sequence_lens], [x.shape], target=target, dev=dev)
-
-    x = np.array(
-        [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]],
-        dtype=np.float32,
-    )
-    sequence_lens = np.array([1, 2, 3, 4], dtype=np.int64)
-    verify_reverse_sequence(x, sequence_lens, 0, 1)
-
-    sequence_lens = np.array([4, 3, 2, 1], dtype=np.int64)
-    verify_reverse_sequence(x, sequence_lens, 1, 0)
-
-
-@pytest.mark.parametrize("op_name", ["Gelu", "FastGelu"], scope="session")
-@pytest.mark.parametrize("data_type", ["float16", "float32"], scope="session")
-@tvm.testing.parametrize_targets
-def test_gelu(target, dev, data_type, op_name):
-    """test_gelu"""
-    dtype = np.dtype(data_type)
-    tensor_type = mapping.NP_TYPE_TO_TENSOR_TYPE[dtype]
-    absolute_tolerance = 1e-3 if data_type == "float16" else 1e-5
-
-    def verify_gelu(x):
-        node = onnx.helper.make_node(
-            op_name,
-            inputs=["x"],
-            outputs=["y"],
-            domain="com.microsoft",
-        )
-
-        graph = helper.make_graph(
-            [node],
-            f"{op_name}_test",
-            inputs=[helper.make_tensor_value_info("x", tensor_type, list(x.shape))],
-            outputs=[helper.make_tensor_value_info("y", tensor_type, list(x.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name=f"{op_name}_test")
-        verify_with_ort_with_inputs(
-            model, [x], [x.shape], atol=absolute_tolerance, dtype=data_type, target=target, dev=dev
-        )
-
-    x = np.array([-1.0, 0, 1.0, 100.0, -100.0, 1000.0, -1000.0], dtype=dtype)
-    verify_gelu(x)
-    x = np.array([[1, 2], [3, 4]], dtype=dtype)
-    verify_gelu(x)
-
-
-@pytest.mark.parametrize("op_name", ["BiasGelu", "FastGelu"], scope="session")
-@pytest.mark.parametrize("data_type", ["float16", "float32"], scope="session")
-@tvm.testing.parametrize_targets
-def test_biasgelu(target, dev, data_type, op_name):
-    """test_biasgelu"""
-    dtype = np.dtype(data_type)
-    tensor_type = mapping.NP_TYPE_TO_TENSOR_TYPE[dtype]
-    absolute_tolerance = 1e-2 if data_type == "float16" else 1e-5
-
-    def verify_biasgelu(x, bias):
-        node = onnx.helper.make_node(
-            op_name,
-            inputs=["x", "bias"],
-            outputs=["y"],
-            domain="com.microsoft",
-        )
-
-        graph = helper.make_graph(
-            [node],
-            f"{op_name}_test",
-            inputs=[
-                helper.make_tensor_value_info("x", tensor_type, list(x.shape)),
-                helper.make_tensor_value_info("bias", tensor_type, list(bias.shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("y", tensor_type, list(x.shape))],
-        )
-
-        model = helper.make_model(graph, producer_name=f"{op_name}_test")
-        verify_with_ort_with_inputs(
-            model,
-            [x, bias],
-            [x.shape],
-            atol=absolute_tolerance,
-            dtype=data_type,
-            target=target,
-            dev=dev,
-        )
-
-    x = np.array([-1.0, 0, 1.0, 100.0, -100.0, 1000.0, -1000.0], dtype=dtype)
-    bias = np.repeat(2.0, 7).astype(dtype)
-    verify_biasgelu(x, bias)
-
-    x = np.array([[1, 2], [3, 4]], dtype=dtype)
-    bias = np.array([0.3, 4.0], dtype=dtype)
-    verify_biasgelu(x, bias)
-
-
-@tvm.testing.parametrize_targets
-def test_embedlayernormalization(target, dev):
-    """test_embedlayernormalization"""
-
-    def verify_embedlayernormalization(
-        input_ids,
-        segment_ids,
-        word_embedding,
-        position_embedding,
-        segment_embedding,
-        gamma,
-        beta,
-    ):
-        node = onnx.helper.make_node(
-            "EmbedLayerNormalization",
-            inputs=[
-                "input_ids",
-                "" if segment_ids is None else "segment_ids",
-                "word_embedding",
-                "position_embedding",
-                "" if segment_embedding is None else "segment_embedding",
-                "gamma",
-                "beta",
-            ],
-            outputs=["output", "mask_index"],
-            domain="com.microsoft",
-        )
-
-        node.attribute.append(onnx.helper.make_attribute("epsilon", 1e-4))
-
-        segment_ids_shape = [] if segment_ids is None else segment_ids.shape
-        segment_embedding_shape = [] if segment_embedding is None else segment_embedding.shape
-
-        graph = helper.make_graph(
-            [node],
-            "embedlayernormalization_test",
-            inputs=[
-                helper.make_tensor_value_info(
-                    "input_ids", TensorProto.INT32, list(input_ids.shape)
-                ),
-                helper.make_tensor_value_info("segment_ids", TensorProto.INT32, segment_ids_shape),
-                helper.make_tensor_value_info(
-                    "word_embedding", TensorProto.FLOAT, list(word_embedding.shape)
-                ),
-                helper.make_tensor_value_info(
-                    "position_embedding", TensorProto.FLOAT, list(position_embedding.shape)
-                ),
-                helper.make_tensor_value_info(
-                    "segment_embedding", TensorProto.FLOAT, segment_embedding_shape
-                ),
-                helper.make_tensor_value_info("gamma", TensorProto.FLOAT, list(gamma.shape)),
-                helper.make_tensor_value_info("beta", TensorProto.FLOAT, list(beta.shape)),
-            ],
-            outputs=[
-                helper.make_tensor_value_info(
-                    "output", TensorProto.FLOAT, list((batch_size, sequence_length, hidden_size))
-                ),
-                helper.make_tensor_value_info("mask_index", TensorProto.INT32, [batch_size]),
-            ],
-        )
-
-        model = helper.make_model(graph, producer_name="embedlayernormalization_test")
-
-        # TODO(@anwang2009): onnxruntime v1.9.0 requires empty list for optional argument,
-        # but v1.10.0+ requires None instead.
-        verify_with_ort_with_inputs(
-            model,
-            [
-                input_ids,
-                np.empty(0, dtype="int32") if segment_ids is None else segment_ids,
-                word_embedding,
-                position_embedding,
-                np.empty(0, dtype="float32") if segment_embedding is None else segment_embedding,
-                gamma,
-                beta,
-            ],
-            [
-                (batch_size, sequence_length, hidden_size),
-                batch_size,
-            ],
-            target=target,
-            dev=dev,
-            rtol=1e-4,
-            atol=1e-4,
-        )
-
-    hidden_size = 384
-    batch_size = 4
-    sequence_length = 3
-    vocab_size = 5
-
-    input_ids = np.full((batch_size, sequence_length), 3).astype("int32")
-    segment_ids = np.zeros((batch_size, sequence_length)).astype("int32")
-    word_embedding = np.full((vocab_size, hidden_size), 1).astype("float32")
-    position_embedding = np.full((sequence_length, hidden_size), 2).astype("float32")
-    segment_embedding = np.full((vocab_size, hidden_size), 3).astype("float32")
-
-    gamma = np.random.uniform(0.5, 0.7, hidden_size).astype("float32")
-    beta = np.random.randn(hidden_size).astype("float32") * 0.1
-
-    verify_embedlayernormalization(
-        input_ids, segment_ids, word_embedding, position_embedding, segment_embedding, gamma, beta
-    )
-
-    # Test with undefined segment embedding
-    verify_embedlayernormalization(
-        input_ids, None, word_embedding, position_embedding, None, gamma, beta
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_attention(target, dev):
-    """test_attention"""
-
-    def verify_attention(_unidirectional, _input, _weight, _bias, _mask_index=None, _past=None):
-        input_names = ["input", "weight", "bias"]
-        if _mask_index is not None:
-            input_names.append("mask_index")
-        if _past is not None:
-            input_names.append("past")
-
-        node = onnx.helper.make_node(
-            "Attention",
-            inputs=input_names,
-            outputs=["output", "present"],
-            domain="com.microsoft",
-            num_heads=num_heads,
-            unidirectional=_unidirectional,
-        )
-
-        past_shape = (2, batch_size, num_heads, past_sequence_length, head_size)
-        present_output_shape = (2, batch_size, num_heads, sequence_length, head_size)
-
-        inputs_info = [
-            helper.make_tensor_value_info("input", TensorProto.FLOAT, list(_input.shape)),
-            helper.make_tensor_value_info("weight", TensorProto.FLOAT, list(_weight.shape)),
-            helper.make_tensor_value_info("bias", TensorProto.FLOAT, list(_bias.shape)),
-        ]
-        if _mask_index is not None:
-            inputs_info.append(
-                helper.make_tensor_value_info(
-                    "mask_index", TensorProto.INT32, list(_mask_index.shape)
-                ),
-            )
-        if _past is not None:
-            inputs_info.append(
-                helper.make_tensor_value_info("past", TensorProto.FLOAT, list(past_shape))
-            )
-
-        graph = helper.make_graph(
-            [node],
-            "attention_test",
-            inputs=inputs_info,
-            outputs=[
-                helper.make_tensor_value_info("output", TensorProto.FLOAT, list(_input.shape)),
-                helper.make_tensor_value_info(
-                    "present", TensorProto.FLOAT, list(present_output_shape)
-                ),
-            ],
-        )
-
-        model = helper.make_model(graph, producer_name="attention_test")
-
-        inputs = [_input, _weight, _bias]
-        if _mask_index is not None:
-            inputs.append(_mask_index)
-        if _past is not None:
-            inputs.append(_past)
-
-        # "present" output should be nullptr when the "past" input isn't included,
-        # but ort requires an output shape to be specified?
-        verify_with_ort_with_inputs(
-            model,
-            inputs,
-            [_input.shape, present_output_shape],
-            target=target,
-            dev=dev,
-            rtol=1e-4,
-            atol=1e-4,
-        )
-
-    batch_size = 11
-    num_heads = 13
-    head_size = 37
-    sequence_length = 7
-    input_hidden_size = 147
-    weight_hidden_size = num_heads * head_size
-    past_sequence_length = 17
-
-    total_sequence_length = past_sequence_length + sequence_length
-
-    # Required inputs
-    input_array = np.random.normal(size=(batch_size, sequence_length, input_hidden_size)).astype(
-        "float32"
-    )
-    weight = (
-        np.random.normal(size=(input_hidden_size, 3 * weight_hidden_size)).astype("float32") * 0.1
-    )
-    bias = np.random.randn(3 * weight_hidden_size).astype("float32")
-
-    # Optional inputs
-    past = np.random.random((2, batch_size, num_heads, past_sequence_length, head_size)).astype(
-        "float32"
-    )
-
-    for unidirectional in [0, 1]:
-        for have_past in [False, True]:
-            if not have_past:
-                mask_index = np.random.randint(0, 2, (batch_size, sequence_length)).astype("int32")
-                verify_attention(unidirectional, input_array, weight, bias, mask_index)
-            else:
-                mask_index = np.random.randint(0, 2, (batch_size, total_sequence_length)).astype(
-                    "int32"
-                )
-                verify_attention(unidirectional, input_array, weight, bias, mask_index, past)
-
-
-@tvm.testing.parametrize_targets
-def test_qattention(target, dev):
-    """test_qattention"""
-
-    def verify_attention(
-        _unidirectional,
-        _input,
-        _weight,
-        _bias,
-        _input_scale,
-        _weight_scale,
-        _mask_index=None,
-        _input_zero_point=None,
-        _weight_zero_point=None,
-        _past=None,
-    ):
-        input_names = ["input", "weight", "bias", "input_scale", "weight_scale"]
-        if _mask_index is not None:
-            input_names.append("mask_index")
-        if _input_zero_point is not None:
-            input_names.append("input_zero_point")
-        if _weight_zero_point is not None:
-            input_names.append("weight_zero_point")
-        if _past is not None:
-            input_names.append("past")
-
-        node = onnx.helper.make_node(
-            "QAttention",
-            inputs=input_names,
-            outputs=["output", "present"],
-            domain="com.microsoft",
-            num_heads=num_heads,
-            unidirectional=_unidirectional,
-        )
-
-        past_shape = (2, batch_size, num_heads, past_sequence_length, head_size)
-        present_output_shape = (
-            2,
-            batch_size,
-            num_heads,
-            past_sequence_length + sequence_length,
-            head_size,
-        )
-
-        inputs_info = [
-            helper.make_tensor_value_info("input", TensorProto.UINT8, list(_input.shape)),
-            helper.make_tensor_value_info("weight", TensorProto.UINT8, list(_weight.shape)),
-            helper.make_tensor_value_info("bias", TensorProto.FLOAT, list(_bias.shape)),
-            helper.make_tensor_value_info("input_scale", TensorProto.FLOAT, ()),
-            helper.make_tensor_value_info("weight_scale", TensorProto.FLOAT, ()),
-        ]
-        if _mask_index is not None:
-            inputs_info.append(
-                helper.make_tensor_value_info(
-                    "mask_index", TensorProto.INT32, list(_mask_index.shape)
-                )
-            )
-        if _input_zero_point is not None:
-            inputs_info.append(
-                helper.make_tensor_value_info("input_zero_point", TensorProto.UINT8, ())
-            )
-        if _weight_zero_point is not None:
-            inputs_info.append(
-                helper.make_tensor_value_info("weight_zero_point", TensorProto.UINT8, ())
-            )
-        if _past is not None:
-            inputs_info.append(
-                helper.make_tensor_value_info("past", TensorProto.FLOAT, list(past_shape))
-            )
-
-        graph = helper.make_graph(
-            [node],
-            "qattention_test",
-            inputs=inputs_info,
-            outputs=[
-                helper.make_tensor_value_info("output", TensorProto.FLOAT, list(_input.shape)),
-                helper.make_tensor_value_info(
-                    "present", TensorProto.FLOAT, list(present_output_shape)
-                ),
-            ],
-        )
-
-        model = helper.make_model(graph, producer_name="qattention_test")
-
-        inputs = [_input, _weight, _bias, _input_scale, _weight_scale]
-        if _mask_index is not None:
-            inputs.append(_mask_index)
-        if _input_zero_point is not None:
-            inputs.append(_input_zero_point)
-        if _weight_zero_point is not None:
-            inputs.append(_weight_zero_point)
-        if _past is not None:
-            inputs.append(_past)
-
-        verify_with_ort_with_inputs(
-            model,
-            inputs,
-            [_input.shape, present_output_shape],
-            target=target,
-            dev=dev,
-            rtol=1e-3,
-            atol=1e-3,
-        )
-
-    batch_size = 11
-    num_heads = 13
-    head_size = 37
-    sequence_length = 7
-    input_hidden_size = 147
-    weight_hidden_size = num_heads * head_size
-    past_sequence_length = 17
-
-    total_sequence_length = past_sequence_length + sequence_length
-
-    # Required inputs
-    input_array = np.random.randint(
-        0, 255, (batch_size, sequence_length, input_hidden_size)
-    ).astype("uint8")
-    weight = np.random.randint(0, 255, (input_hidden_size, 3 * weight_hidden_size)).astype("uint8")
-    bias = np.random.randn(3 * weight_hidden_size).astype("float32")
-    input_scale = np.random.random(1).astype("float32")
-    weight_scale = np.random.random(1).astype("float32")
-
-    # Optional inputs
-    input_zero_point = np.random.randint(0, 255, 1).astype("uint8")
-    weight_zero_point = np.random.randint(0, 255, 1).astype("uint8")
-    past = np.random.random((2, batch_size, num_heads, past_sequence_length, head_size)).astype(
-        "float32"
-    )
-
-    for unidirectional in [0, 1]:
-        for have_past in [False, True]:
-            if not have_past:
-                mask_index = np.random.randint(0, 2, (batch_size, sequence_length)).astype("int32")
-
-                verify_attention(
-                    unidirectional,
-                    input_array,
-                    weight,
-                    bias,
-                    input_scale,
-                    weight_scale,
-                    mask_index,
-                )
-                verify_attention(
-                    unidirectional,
-                    input_array,
-                    weight,
-                    bias,
-                    input_scale,
-                    weight_scale,
-                    mask_index,
-                    input_zero_point,
-                )
-                verify_attention(
-                    unidirectional,
-                    input_array,
-                    weight,
-                    bias,
-                    input_scale,
-                    weight_scale,
-                    mask_index,
-                    input_zero_point,
-                    weight_zero_point,
-                )
-            else:
-                mask_index = np.random.randint(0, 2, (batch_size, total_sequence_length)).astype(
-                    "int32"
-                )
-
-                verify_attention(
-                    unidirectional,
-                    input_array,
-                    weight,
-                    bias,
-                    input_scale,
-                    weight_scale,
-                    mask_index,
-                    input_zero_point,
-                    weight_zero_point,
-                    past,
-                )
-
-
-@tvm.testing.parametrize_targets
-def test_skiplayernormalization(target, dev):
-    """test_skiplayernormalization"""
-
-    def verify_skiplayernormalization(input_, skip, gamma, beta, bias):
-        node = onnx.helper.make_node(
-            "SkipLayerNormalization",
-            inputs=["input", "skip", "gamma", "beta", "bias"],
-            outputs=["output"],
-            domain="com.microsoft",
-        )
-
-        node.attribute.append(onnx.helper.make_attribute("epsilon", 1e-4))
-
-        graph = helper.make_graph(
-            [node],
-            "skiplayernormalization_test",
-            inputs=[
-                helper.make_tensor_value_info("input", TensorProto.FLOAT, list(input_.shape)),
-                helper.make_tensor_value_info("skip", TensorProto.FLOAT, list(skip.shape)),
-                helper.make_tensor_value_info("gamma", TensorProto.FLOAT, list(gamma.shape)),
-                helper.make_tensor_value_info("beta", TensorProto.FLOAT, list(beta.shape)),
-                helper.make_tensor_value_info("bias", TensorProto.FLOAT, list(bias.shape)),
-            ],
-            outputs=[
-                helper.make_tensor_value_info("output", TensorProto.FLOAT, list(input_.shape)),
-            ],
-        )
-
-        model = helper.make_model(graph, producer_name="skiplayernormalization_test")
-        verify_with_ort_with_inputs(
-            model, [input_, skip, gamma, beta, bias], [input_.shape], target=target, dev=dev
-        )
-
-    hidden_size = 384
-    batch_size = 4
-    sequence_length = 4
-
-    dtype = "float32"
-    input_array = np.random.random((batch_size, sequence_length, hidden_size)).astype(dtype)
-    skip = np.random.random((batch_size, sequence_length, hidden_size)).astype(dtype)
-    gamma = np.random.uniform(0.5, 0.7, hidden_size).astype(dtype)
-    beta = np.random.randn(hidden_size).astype(dtype) * 0.1
-    bias = np.random.randn(hidden_size).astype(dtype)
-
-    verify_skiplayernormalization(input_array, skip, gamma, beta, bias)
-
-
-@tvm.testing.known_failing_targets("cuda")
-@tvm.testing.parametrize_targets
-def test_qgemm(target, dev):
-    """test_qgemm"""
-
-    def verify_qgemm(
-        a_shape,
-        b_shape,
-        y_shape,
-        C=False,
-        y_zp=False,
-        b_per_tensor_quantization=False,
-        alpha=1.0,
-        transA=0,
-        transB=1,
-    ):
-        a_array = np.random.randint(low=0, high=255, size=a_shape).astype("uint8")
-        b_array = np.random.uniform(low=0, high=255, size=b_shape).astype("uint8")
-
-        input_nodes = [
-            helper.make_tensor_value_info("a", TensorProto.UINT8, list(a_shape)),
-            helper.make_tensor_value_info("b", TensorProto.UINT8, list(b_shape)),
-        ]
-
-        initializer = [
-            helper.make_tensor("a_scale", TensorProto.FLOAT, (), [np.random.rand()]),
-            helper.make_tensor("a_zero_point", TensorProto.UINT8, (), [np.random.randint(0, 255)]),
-        ]
-
-        input_names = [
-            "a",
-            "a_scale",
-            "a_zero_point",
-            "b",
-            "b_scale",
-            "b_zero_point",
-        ]
-        input_values = [a_array, b_array]
-
-        if b_per_tensor_quantization:
-            initializer.append(
-                helper.make_tensor("b_scale", TensorProto.FLOAT, (), [np.random.rand()])
-            )
-            initializer.append(
-                helper.make_tensor(
-                    "b_zero_point", TensorProto.UINT8, (), [np.random.randint(0, 255)]
-                )
-            )
-        else:  # per_colume_quantization
-            shape_value = b_shape[0] if transB else b_shape[1]
-            b_scale_array = np.random.random(shape_value).astype("float32")
-            w_zero_point_array = np.random.randint(0, 255, size=shape_value).astype("uint8")
-            initializer.append(
-                helper.make_tensor(
-                    "b_scale", TensorProto.FLOAT, list(b_scale_array.shape), b_scale_array
-                )
-            )
-            initializer.append(
-                helper.make_tensor(
-                    "b_zero_point",
-                    TensorProto.UINT8,
-                    list(w_zero_point_array.shape),
-                    w_zero_point_array,
-                )
-            )
-
-        output_tensor = helper.make_tensor_value_info("output", TensorProto.FLOAT, list(y_shape))
-
-        if C is True:
-            C_shape = (b_shape[0] if transB else b_shape[1],)
-            C_array = np.random.randint(low=0, high=65536, size=C_shape).astype("int32")
-            input_nodes.append(helper.make_tensor_value_info("C", TensorProto.INT32, list(C_shape)))
-            input_names.append("C")
-            input_values.append(C_array)
-
-        if y_zp is True:
-            input_names.append("y_scale")
-            initializer.append(
-                helper.make_tensor("y_scale", TensorProto.FLOAT, (), [np.random.rand()])
-            )
-
-            input_names.append("y_zero_point")
-            initializer.append(
-                helper.make_tensor(
-                    "y_zero_point", TensorProto.UINT8, (), [np.random.randint(0, 255)]
-                )
-            )
-
-            output_tensor = helper.make_tensor_value_info(
-                "output", TensorProto.UINT8, list(y_shape)
-            )
-
-        kwargs = {}
-        kwargs["alpha"] = alpha
-        kwargs["transA"] = transA
-        kwargs["transB"] = transB
-
-        node = helper.make_node(
-            "QGemm",
-            inputs=input_names,
-            outputs=["output"],
-            domain="com.microsoft",
-            # Default values for other attributes:
-            **kwargs,
-        )
-
-        graph = helper.make_graph(
-            [node],
-            "QGemm",
-            inputs=input_nodes,
-            outputs=[output_tensor],
-            initializer=initializer,
-        )
-        model = helper.make_model(
-            graph,
-            producer_name="QGemm",
-            opset_imports=[
-                onnx.helper.make_opsetid("com.microsoft", 1),
-            ],
-        )
-
-        verify_with_ort_with_inputs(model, input_values, target=target, dev=dev)
-
-    # B per tensor quantization
-    verify_qgemm(
-        (20, 30),
-        (50, 30),
-        (20, 50),
-        True,
-        True,
-        True,
-    )
-
-    # B per column  quantization
-    verify_qgemm(
-        (20, 30),
-        (50, 30),
-        (20, 50),
-        True,
-        True,
-        False,
-    )
-
-    # test alpha
-    verify_qgemm(
-        (20, 30),
-        (50, 30),
-        (20, 50),
-        True,
-        True,
-        True,
-        0.5,
-    )
-
-    # test transpose A
-    verify_qgemm(
-        (20, 50),
-        (20, 80),
-        (50, 80),
-        True,
-        True,
-        True,
-        0.5,
-        1,
-        0,
-    )
-
-
-@tvm.testing.known_failing_targets("cuda")
-@tvm.testing.parametrize_targets
-def test_qlinearconv(target, dev):
-    """test_qlinearconv"""
-
-    def verify_qlinearconv(
-        x_shape,
-        w_shape,
-        y_shape,
-        padding,
-        kernel_shape,
-        strides,
-        dilations,
-        auto_pad="NOTSET",
-        bias=False,
-        per_channel_quantization=False,
-    ):
-
-        x_array = np.random.randint(low=0, high=255, size=x_shape).astype("uint8")
-        w_array = np.random.uniform(low=0, high=255, size=w_shape).astype("uint8")
-
-        initializer = [
-            helper.make_tensor("x_scale", TensorProto.FLOAT, (), [np.random.rand()]),
-            helper.make_tensor("x_zero_point", TensorProto.UINT8, (), [np.random.randint(0, 255)]),
-            helper.make_tensor("y_scale", TensorProto.FLOAT, (), [np.random.rand()]),
-            helper.make_tensor("y_zero_point", TensorProto.UINT8, (), [np.random.randint(0, 255)]),
-        ]
-
-        input_nodes = [
-            helper.make_tensor_value_info("x", TensorProto.UINT8, list(x_shape)),
-            helper.make_tensor_value_info("w", TensorProto.UINT8, list(w_shape)),
-        ]
-        input_names = [
-            "x",
-            "x_scale",
-            "x_zero_point",
-            "w",
-            "w_scale",
-            "w_zero_point",
-            "y_scale",
-            "y_zero_point",
-        ]
-        input_values = [x_array, w_array]
-
-        if per_channel_quantization:
-            w_scale_array = np.random.random(w_shape[0]).astype("float32")
-            w_zero_point_array = np.random.randint(0, 255, size=w_shape[0]).astype("uint8")
-
-            initializer.append(
-                helper.make_tensor("w_scale", TensorProto.FLOAT, [w_shape[0]], w_scale_array)
-            )
-            initializer.append(
-                helper.make_tensor(
-                    "w_zero_point", TensorProto.UINT8, [w_shape[0]], w_zero_point_array
-                )
-            )
-        else:
-            initializer.append(
-                helper.make_tensor("w_scale", TensorProto.FLOAT, (), [np.random.rand()])
-            )
-            initializer.append(
-                helper.make_tensor(
-                    "w_zero_point", TensorProto.UINT8, (), [np.random.randint(0, 255)]
-                )
-            )
-
-        if bias is True:
-            b_shape = w_shape[0:1]
-            b_array = np.random.randint(low=0, high=65536, size=b_shape).astype("int32")
-            input_nodes.append(helper.make_tensor_value_info("B", TensorProto.INT32, list(b_shape)))
-            input_names.append("B")
-            input_values.append(b_array)
-
-        if padding is None:
-            ## autopadding with unset default attributes
-            kwargs = {}
-            if not all(list(s == 1 for s in strides)):
-                kwargs["strides"] = strides
-            if not all(list(d == 1 for d in dilations)):
-                kwargs["dilations"] = dilations
-
-            node = helper.make_node(
-                "QLinearConv",
-                inputs=input_names,
-                outputs=["y"],
-                # Default values for other attributes:
-                auto_pad=auto_pad,
-                **kwargs,
-            )
-        else:
-            node = helper.make_node(
-                "QLinearConv",
-                inputs=input_names,
-                outputs=["y"],
-                kernel_shape=kernel_shape,
-                # Default values for other attributes:
-                strides=strides,
-                dilations=dilations,
-                # groups=1
-                pads=padding,
-            )
-
-        graph = helper.make_graph(
-            [node],
-            "conv_test",
-            inputs=input_nodes,
-            outputs=[helper.make_tensor_value_info("y", TensorProto.UINT8, list(y_shape))],
-            initializer=initializer,
-        )
-        model = helper.make_model(graph, producer_name="qlinearconv_test")
-        # opt_level=1 will cause error
-        verify_with_ort_with_inputs(model, input_values, opt_level=2, target=target, dev=dev)
-
-    def repeat(num, dims):
-        return tuple(num for _ in range(dims))
-
-    # only support QLinearConv2d because only support qnn.conv2d
-    dims = 2
-
-    # Convolution with padding
-    verify_qlinearconv(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(5, dims),
-        2 * repeat(1, dims),
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-    )
-
-    # Convolution with bias
-    verify_qlinearconv(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(5, dims),
-        2 * repeat(1, dims),
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-        bias=True,
-    )
-
-    # Convolution with asymmetric padding
-    verify_qlinearconv(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(4, dims),
-        repeat(0, dims) + repeat(1, dims),
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-    )
-    # Convolution without padding
-    verify_qlinearconv(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(3, dims),
-        2 * repeat(0, dims),
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-    )
-    # Convolution with autopadding
-    verify_qlinearconv(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(5, dims),
-        None,
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-        auto_pad="SAME_UPPER",
-    )
-    # Convolution with valid autopadding
-    verify_qlinearconv(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(3, dims),
-        None,
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-        auto_pad="VALID",
-    )
-    # Convolution with non uniform stride
-    verify_qlinearconv(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(3, dims),
-        None,
-        repeat(3, dims),
-        repeat(2, dims),
-        repeat(1, dims),
-        auto_pad="SAME_UPPER",
-    )
-    # Convolution with dilation
-    verify_qlinearconv(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(5, dims),
-        2 * repeat(2, dims),
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(2, dims),
-    )
-    # Convolution with per channel quantization
-    verify_qlinearconv(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(3, dims),
-        None,
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-        per_channel_quantization=True,
-    )
-
-
-# TODO(vvchernov): fix problem with quantization on cuda
-@tvm.testing.known_failing_targets("cuda")
-@tvm.testing.parametrize_targets
-def test_qlinearmatmul(target, dev):
-    """test_qlinearmatmul"""
-
-    def verify_qlinearmatmul(
-        x_shape,
-        w_shape,
-        y_shape,
-        x_dtype="uint8",
-        w_dtype="uint8",
-    ):
-        def get_randint_numpy_scalar(dtype="uint8"):
-            if dtype == "uint8":
-                return np.random.randint(0, 255)
-            else:  # "int8"
-                return np.random.randint(-128, 127)
-
-        if x_dtype == "uint8":
-            x_array = np.random.randint(low=0, high=255, size=x_shape).astype("uint8")
-        else:  # "int8"
-            x_array = np.random.randint(low=-128, high=127, size=x_shape).astype("int8")
-        if w_dtype == "uint8":
-            w_array = np.random.uniform(low=0, high=255, size=w_shape).astype("uint8")
-        else:  # "int8"
-            w_array = np.random.uniform(low=-128, high=127, size=w_shape).astype("int8")
-
-        x_proto_type = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(x_dtype)]
-        w_proto_type = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(w_dtype)]
-
-        y_dtype = "int8"
-        if x_dtype == "uint8" and w_dtype == "uint8":
-            y_dtype = "uint8"
-        y_proto_type = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(y_dtype)]
-
-        initializer = [
-            helper.make_tensor("x_scale", TensorProto.FLOAT, (), [np.random.rand()]),
-            # TODO: 0 value for int8?
-            helper.make_tensor(
-                "x_zero_point", x_proto_type, (), [get_randint_numpy_scalar(x_dtype)]
-            ),
-            helper.make_tensor("w_scale", TensorProto.FLOAT, (), [np.random.rand()]),
-            # TODO: 0 value for int8?
-            helper.make_tensor(
-                "w_zero_point", w_proto_type, (), [get_randint_numpy_scalar(w_dtype)]
-            ),
-            helper.make_tensor("y_scale", TensorProto.FLOAT, (), [np.random.rand()]),
-            helper.make_tensor(
-                "y_zero_point", y_proto_type, (), [get_randint_numpy_scalar(y_dtype)]
-            ),
-        ]
-
-        input_nodes = [
-            helper.make_tensor_value_info("x", x_proto_type, list(x_shape)),
-            helper.make_tensor_value_info("w", w_proto_type, list(w_shape)),
-        ]
-        input_names = [
-            "x",
-            "x_scale",
-            "x_zero_point",
-            "w",
-            "w_scale",
-            "w_zero_point",
-            "y_scale",
-            "y_zero_point",
-        ]
-        input_values = [x_array, w_array]
-
-        node = helper.make_node(
-            "QLinearMatMul",
-            inputs=input_names,
-            outputs=["y"],
-        )
-
-        y_proto_type = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype("int8")]
-        if x_dtype == "uint8" and w_dtype == "uint8":
-            y_proto_type = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype("uint8")]
-
-        graph = helper.make_graph(
-            [node],
-            "qmatmul_test",
-            inputs=input_nodes,
-            outputs=[helper.make_tensor_value_info("y", y_proto_type, list(y_shape))],
-            initializer=initializer,
-        )
-        model = helper.make_model(graph, producer_name="qlinearmatmul_test")
-        # opt_level=1 will cause error
-        verify_with_ort_with_inputs(model, input_values, opt_level=2, target=target, dev=dev)
-
-    # Default matmul both ranks = 2 (x_dtype = "uint8", w_dtype = "uint8")
-    verify_qlinearmatmul((2, 3), (3, 2), (2, 2))
-
-    # Default matmul both ranks = 2 (x_dtype = "int8", w_dtype = "int8")
-    verify_qlinearmatmul((2, 3), (3, 2), (2, 2), "int8", "int8")
-
-    # TODO(vvchernov): problems on ONNX Runtime side and type check (onnx.py:L4763) on TVM side
-    # Default matmul both ranks = 2 (x_dtype = "uint8", w_dtype = "int8")
-    # verify_qlinearmatmul((2, 3), (3, 2), (2, 2), "uint8", "int8")
-
-    # TODO(vvchernov): problems on ONNX Runtime side and type check (onnx.py:L4763) on TVM side
-    # Default matmul both ranks = 2 (x_dtype = "int8", w_dtype = "uint8")
-    # verify_qlinearmatmul((2, 3), (3, 2), (2, 2), "int8", "uint8")
-
-    # Reduced matmul: x_ranks = 1, w_rank = 2 (x_dtype = "uint8", w_dtype = "uint8")
-    verify_qlinearmatmul((3,), (3, 2), (2,))
-
-    # Special case matmul: x_ranks = 3, w_rank = 2 (x_dtype = "uint8", w_dtype = "uint8")
-    verify_qlinearmatmul((2, 3, 4), (4, 3), (2, 3, 3))
-
-    # GPT2-style matmul both ranks = 4 (x_dtype = "uint8", w_dtype = "uint8")
-    verify_qlinearmatmul((2, 4, 3, 3), (2, 4, 3, 3), (2, 4, 3, 3))
-
-    # Asymetric matmul: x_ranks = 4, w_rank = 3 (x_dtype = "uint8", w_dtype = "uint8")
-    verify_qlinearmatmul((2, 4, 3, 3), (4, 3, 3), (2, 4, 3, 3))
-
-    # Asymetric matmul: x_ranks = 2, w_rank = 3 (x_dtype = "uint8", w_dtype = "uint8")
-    # verify_qlinearmatmul((3, 3), (4, 3, 3), (4, 3, 3))
-
-
-@tvm.testing.parametrize_targets
-def test_qlinearconcat(target, dev):
-    """test_qlinearconcat"""
-
-    def verify_qlinearconcat(shapes, out_shape, axis=None):
-        input_names = []
-        input_values = []
-        input_nodes = []
-        for i, shape in enumerate(shapes):
-            tensor_name = chr(ord("a") + i)
-            node = helper.make_tensor_value_info(tensor_name, TensorProto.FLOAT, list(shape))
-
-            input_names.append(tensor_name)
-            input_values.append(np.random.random(shape).astype("float32"))
-            input_nodes.append(node)
-
-        node = helper.make_node("Concat", input_names, ["C"])
-        if axis is not None:
-            axis_attr = helper.make_attribute("axis", axis)
-            node.attribute.append(axis_attr)
-        graph = helper.make_graph(
-            [node],
-            "qlinearconcat_test",
-            inputs=input_nodes,
-            outputs=[helper.make_tensor_value_info("C", TensorProto.FLOAT, list(out_shape))],
-        )
-        model = helper.make_model(graph, producer_name="qlinearconcat_test")
-        quantize_and_verify_with_ort(model, input_names, shapes, target, dev)
-
-    verify_qlinearconcat([[2, 1], [2, 1]], [4, 1], 0)
-    verify_qlinearconcat([[2, 1], [2, 1]], [2, 2], 1)
-    verify_qlinearconcat([[1, 2], [2, 2], [3, 2]], [6, 2], 0)
-
-
-@tvm.testing.parametrize_targets
-def test_qlinearadd(target, dev):
-    """test_qlinearadd"""
-
-    def verify_qlinearadd(a_shape, b_shape, c_shape):
-
-        _ = np.random.random(a_shape).astype("float32")
-        _ = np.random.random(b_shape).astype("float32")
-
-        input_nodes = [
-            helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape)),
-            helper.make_tensor_value_info("b", TensorProto.FLOAT, list(b_shape)),
-        ]
-        input_names = [
-            "a",
-            "b",
-        ]
-
-        node = helper.make_node("Add", ["a", "b"], ["C"])
-        graph = helper.make_graph(
-            [node],
-            "qlinearadd_test",
-            inputs=input_nodes,
-            outputs=[helper.make_tensor_value_info("C", TensorProto.FLOAT, list(c_shape))],
-        )
-        model = helper.make_model(graph, producer_name="qlinearadd_test")
-        quantize_and_verify_with_ort(model, input_names, [a_shape, b_shape], target, dev)
-
-    verify_qlinearadd([4, 2], [4, 2], [4, 2])
-    verify_qlinearadd([4, 2], [2], [4, 2])
-    verify_qlinearadd([5, 1, 7], [2, 7], [5, 2, 7])
-
-
-@tvm.testing.parametrize_targets
-def test_qlinearmul(target, dev):
-    """test_qlinearmul"""
-
-    def verify_qlinearmul(a_shape, b_shape, c_shape):
-
-        _ = np.random.random(a_shape).astype("float32")
-        _ = np.random.random(b_shape).astype("float32")
-
-        input_nodes = [
-            helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape)),
-            helper.make_tensor_value_info("b", TensorProto.FLOAT, list(b_shape)),
-        ]
-        input_names = [
-            "a",
-            "b",
-        ]
-
-        node = helper.make_node("Mul", input_names, ["C"])
-        graph = helper.make_graph(
-            [node],
-            "qlinearmul_test",
-            inputs=input_nodes,
-            outputs=[helper.make_tensor_value_info("C", TensorProto.FLOAT, list(c_shape))],
-        )
-        model = helper.make_model(graph, producer_name="qlinearmul_test")
-        quantize_and_verify_with_ort(model, input_names, [a_shape, b_shape], target, dev)
-
-    verify_qlinearmul([7], [7], [7])
-    verify_qlinearmul([4, 2], [4, 2], [4, 2])
-    verify_qlinearmul([4, 2], [2], [4, 2])
-    verify_qlinearmul([5, 1, 7], [2, 7], [5, 2, 7])
-
-
-@pytest.mark.skip(reason="See https://github.com/apache/tvm/issues/11375")
-@tvm.testing.parametrize_targets
-def test_qlinearleakyrelu(target, dev):
-    """test_qlinearleakyrelu"""
-
-    def verify_qlinearleakyrelu(inshape, kwargs):
-
-        in_array = np.random.random(inshape).astype("float32")
-        node = helper.make_node("LeakyRelu", ["X"], ["Y"], **kwargs)
-
-        graph = helper.make_graph(
-            [node],
-            "qlinearRelu_test",
-            inputs=[helper.make_tensor_value_info("X", TensorProto.FLOAT, list(in_array.shape))],
-            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(in_array.shape))],
-        )
-        model = helper.make_model(graph, producer_name="qlinearRelu_test")
-        args = (model, ["X"], [in_array.shape], target, dev)
-        if dev == "cuda":
-            quantize_and_verify_with_ort(*args, rtol=1e-2, atol=1e-2)
-        else:
-            quantize_and_verify_with_ort(*args)
-
-    verify_qlinearleakyrelu([2, 4, 5, 6], {"alpha": 0.25})
-    verify_qlinearleakyrelu([6, 5, 6, 7], {"alpha": 0.35})
-    verify_qlinearleakyrelu([5, 1, 4, 6], {"alpha": 0.65})
-
-
-@pytest.mark.skip(reason="See https://github.com/apache/tvm/issues/11375")
-@tvm.testing.parametrize_targets
-def test_qlinearsigmoid(target, dev):
-    """test_qlinearsigmoid"""
-
-    def verify_qlinearsigmoid(a_shape):
-
-        _ = np.random.random(a_shape).astype("float32")
-
-        input_nodes = [helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape))]
-
-        node = helper.make_node("Sigmoid", ["a"], ["B"])
-        graph = helper.make_graph(
-            [node],
-            "qlinearsigmoid_test",
-            inputs=input_nodes,
-            outputs=[helper.make_tensor_value_info("B", TensorProto.FLOAT, list(a_shape))],
-        )
-        model = helper.make_model(graph, producer_name="qlinearsigmoid_test")
-        quantize_and_verify_with_ort(model, ["a"], [a_shape], target, dev)
-
-    verify_qlinearsigmoid([4, 2])
-    verify_qlinearsigmoid([5])
-    verify_qlinearsigmoid([3, 4, 5])
-    verify_qlinearsigmoid([])
-
-
-@tvm.testing.parametrize_targets
-def test_qlinearsoftmax(target, dev):
-    """test_qlinearsoftmax"""
-
-    def verify_qlinearsoftmax(a_shape):
-
-        _ = np.random.random(a_shape).astype("float32")
-
-        input_nodes = [helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape))]
-
-        node = helper.make_node("Softmax", ["a"], ["B"])
-        graph = helper.make_graph(
-            [node],
-            "qlinearsoftmax_test",
-            inputs=input_nodes,
-            outputs=[helper.make_tensor_value_info("B", TensorProto.FLOAT, list(a_shape))],
-        )
-        model = helper.make_model(graph, producer_name="qlinearsoftmax_test")
-        quantize_and_verify_with_ort(model, ["a"], [a_shape], target, dev)
-
-    verify_qlinearsoftmax([4, 2])
-    verify_qlinearsoftmax([5])
-    verify_qlinearsoftmax([3, 4, 5])
-
-
-@tvm.testing.parametrize_targets("llvm")
-def test_random_bernoulli(target, dev):
-    """test_random_bernoulli"""
-
-    def _get_tvm_output(
-        inputs,
-        out_dtype="int32",
-        seed=None,
-        target=target,
-        dev=dev,
-        use_vm=False,
-        freeze_params=False,
-    ):
-        def get_bernoulli_model(shape, in_dtype="float32", out_dtype="int32", seed=None):
-            onnx_itype = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(in_dtype)]
-            onnx_otype = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(out_dtype)]
-            node = helper.make_node(
-                "Bernoulli",
-                ["input"],
-                ["output"],
-            )
-            dtype_attr = helper.make_attribute("dtype", onnx_otype)
-            node.attribute.append(dtype_attr)
-            if seed is not None:
-                seed_attr = helper.make_attribute("seed", float(seed))
-                node.attribute.append(seed_attr)
-
-            graph = helper.make_graph(
-                [node],
-                "random_bernoulli_test",
-                inputs=[helper.make_tensor_value_info("input", onnx_itype, list(shape))],
-                outputs=[helper.make_tensor_value_info("output", onnx_otype, list(shape))],
-            )
-            return helper.make_model(graph, producer_name="random_bernoulli_test")
-
-        shape = inputs.shape
-        in_dtype = inputs.dtype
-        model = get_bernoulli_model(shape, in_dtype, out_dtype, seed)
-
-        if use_vm:
-            return get_tvm_output_with_vm(
-                model,
-                inputs,
-                target,
-                dev,
-                freeze_params=freeze_params,
-            )
-        else:
-            return get_tvm_output(
-                model,
-                inputs,
-                target,
-                dev,
-            )
-
-    def binom_test(input, ideal_mean, threshold=0.05):
-        # This test is strictly appropriate when input probabilities are all identical.
-        # In that case, it should lead to flaky failures in only one run in a million (p>=1e-6).
-        # The test should be over-conservative when input probabilities are not identical.
-        # (i.e., It should have a rate of flaky failures lower than one run in a million.)
-        # If this test starts repeatedly throwing flaky failures, consult a statistician
-        # in addition to your regular debugging.
-        bnm_test_res = scipy.stats.binomtest(
-            k=np.sum(input, dtype="int32"), n=len(input), p=ideal_mean
-        )
-        return bnm_test_res.pvalue > threshold
-
-    def verify_bernoulli(
-        inputs=None,
-        shape=[],
-        in_dtype="float32",
-        out_dtype="int32",
-        seed=None,
-        target=target,
-        dev=dev,
-        use_vm=False,
-        freeze_params=False,
-        in_out_equal=False,
-    ):
-        if inputs is None:
-            assert len(shape) != 0
-            inputs = np.random.uniform(size=shape).astype(in_dtype)
-
-        tvm_out = _get_tvm_output(
-            inputs,
-            out_dtype,
-            seed,
-            target,
-            dev,
-            use_vm,
-            freeze_params,
-        )
-
-        if isinstance(tvm_out, list):
-            tvm_out = tvm_out[0]
-        # check that values are 0 or 1
-        tvm_flat = tvm_out.flatten()
-        assert np.array_equal(tvm_flat, tvm_flat.astype("bool"))
-        if in_out_equal:
-            tvm.testing.assert_allclose(inputs, tvm_out)
-        else:
-            # check that mean value is close to the theoretical one by binomial test
-            ideal_mean = np.mean(inputs)
-            repeats = 3
-            check = False
-            for i in range(repeats):
-                if binom_test(tvm_flat, ideal_mean):
-                    check = True
-                    break
-                else:
-                    # repeat with new seed
-                    seed = np.random.randint(1e6)
-                    tvm_flat = _get_tvm_output(
-                        inputs,
-                        out_dtype,
-                        seed,
-                        target,
-                        dev,
-                        use_vm,
-                        freeze_params,
-                    ).flatten()
-            assert check, "Binomial test failed"
-
-    # Test input sequence of 0 and 1
-    inputs = np.random.randint(2, size=[10000]).astype("float32")
-    verify_bernoulli(inputs, in_out_equal=True)
-
-    # Binomial test input with 0.5 values
-    val_num = 10000
-    inputs = np.ones([val_num], dtype="float32") * 0.5
-    verify_bernoulli(inputs)
-
-    # Binomial test input with 0.1 values
-    inputs = np.ones([val_num], dtype="float32") * 0.1
-    verify_bernoulli(inputs)
-
-    # Simple test
-    verify_bernoulli(shape=[val_num])
-
-    # Floating output type
-    verify_bernoulli(shape=[val_num], out_dtype="float32")
-
-    # Double input type
-    verify_bernoulli(shape=[val_num], in_dtype="float64")
-
-    # Test N-D tensor generation
-    verify_bernoulli(shape=[2, 4, 100, 100])
-
-    # Test with seed
-    verify_bernoulli(shape=[val_num], seed=np.random.randint(1e6))
-
-    # Test result determinism with the same seeds
-    inputs = np.random.uniform(size=[val_num])
-    fixed_seed = np.random.randint(1e6)
-    tvm_out_1 = _get_tvm_output(inputs, seed=fixed_seed)
-    tvm_out_2 = _get_tvm_output(inputs, seed=fixed_seed)
-    tvm.testing.assert_allclose(tvm_out_1, tvm_out_2)
-
-
-@tvm.testing.parametrize_targets("llvm")
-def test_random_uniform(target, dev):
-    """test_random_uniform"""
-
-    def get_random_uniform(shape, dtype="float32", high=1.0, low=0.0, seed=None):
-        ONNX_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)]
-        node = helper.make_node(
-            "RandomUniform", [], ["out"], shape=shape, dtype=ONNX_DTYPE, high=high, low=low
-        )
-        if seed is not None:
-            seed_attr = helper.make_attribute("seed", seed)
-            node.attribute.append(seed_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "random_uniform_test",
-            inputs=[],
-            outputs=[helper.make_tensor_value_info("out", ONNX_DTYPE, shape)],
-        )
-        model = helper.make_model(graph, producer_name="random_uniform_test")
-        return get_tvm_output_with_vm(
-            model,
-            [],
-            target=target,
-            dev=dev,
-            validate_structural_equal=(seed is not None),
-        )
-
-    # Check that function runs and produces proper shape.
-    vals = get_random_uniform([10], dtype="float32")
-    assert list(vals.shape) == [10]
-    assert vals.dtype == "float32"
-
-    # Test N-D tensor generation.
-    vals = get_random_uniform([1, 3, 100, 100], dtype="float32")
-    assert list(vals.shape) == [1, 3, 100, 100]
-
-    # Check that bounds aren't exceeded.
-    vals = get_random_uniform(shape=[100], high=100.0, low=-100.0)
-    assert list(vals.shape) == [100]
-    assert all(vals >= -100) and all(vals <= 100)
-
-    # Check that a fixed seed produces the same values when run twice.
-    vals_1 = get_random_uniform(shape=[10], seed=1)
-    vals_2 = get_random_uniform(shape=[10], seed=1)
-    assert all(vals_1 == vals_2)
-
-    # Test against an expected output with a fixed seed.
-    real = get_random_uniform(shape=[10], seed=5.0)
-    expected = np.asarray(
-        [
-            0.043976,
-            0.96656,
-            0.292199,
-            0.904297,
-            0.25167,
-            0.521778,
-            0.778985,
-            0.085463,
-            0.939846,
-            0.194201,
-        ]
-    )
-    tvm.testing.assert_allclose(real, expected, rtol=1e-5)
-
-
-@tvm.testing.parametrize_targets("llvm")
-def test_random_uniform_like(target, dev):
-    """test_random_uniform_like"""
-
-    def get_random_uniform_like(input_, shape, dtype=None, high=1.0, low=0.0, seed=None):
-        node = helper.make_node("RandomUniformLike", ["in"], ["out"], high=high, low=low)
-        if seed is not None:
-            seed_attr = helper.make_attribute("seed", seed)
-            node.attribute.append(seed_attr)
-
-        ONNX_DTYPE = None
-        if dtype is not None:
-            ONNX_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)]
-            dtype_attr = helper.make_attribute("dtype", ONNX_DTYPE)
-            node.attribute.append(dtype_attr)
-        else:
-            dtype = input_.dtype
-            ONNX_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)]
-
-        graph = helper.make_graph(
-            [node],
-            "random_uniform_test",
-            inputs=[helper.make_tensor_value_info("in", ONNX_DTYPE, shape)],
-            outputs=[helper.make_tensor_value_info("out", ONNX_DTYPE, shape)],
-        )
-        model = helper.make_model(graph, producer_name="random_uniform_like_test")
-        return get_tvm_output_with_vm(
-            model,
-            [input_],
-            target=target,
-            dev=dev,
-            validate_structural_equal=(seed is not None),
-        )
-
-    # Check that function runs and produces proper shape and dtype.
-    shape = [10]
-    input_array = np.random.random(shape).astype("float32")
-    vals = get_random_uniform_like(input_array, shape, dtype="float32")
-    assert list(vals.shape) == [10]
-    assert vals.dtype == "float32"
-
-    # Test N-D tensor generation.
-    shape = [1, 3, 100, 100]
-    input_array = np.random.random(shape).astype("float32")
-    vals = get_random_uniform_like(input_array, shape, dtype="float64")
-    assert list(vals.shape) == shape
-    assert vals.dtype == "float64"
-
-    # Check that bounds aren't exceeded.
-    shape = [100]
-    input_array = np.random.random(shape).astype("float64")
-    vals = get_random_uniform_like(input_array, shape, high=100.0, low=-100.0)
-    assert list(vals.shape) == shape
-    assert all(vals >= -100) and all(vals <= 100)
-
-    # Test against an expected output with a fixed seed.
-    shape = [10]
-    input_array = np.random.random(shape).astype("float32")
-    real = get_random_uniform_like(input_array, shape=[10], seed=5.0)
-    expected = np.asarray(
-        [
-            0.043976,
-            0.96656,
-            0.292199,
-            0.904297,
-            0.25167,
-            0.521778,
-            0.778985,
-            0.085463,
-            0.939846,
-            0.194201,
-        ]
-    )
-    tvm.testing.assert_allclose(real, expected, rtol=1e-5)
-
-
-@tvm.testing.parametrize_targets("llvm")
-def test_random_normal(target, dev):
-    """test_random_normal"""
-
-    def get_random_normal(shape, dtype="float32", scale=1.0, mean=0.0, seed=None):
-        ONNX_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)]
-        node = helper.make_node(
-            "RandomNormal", [], ["out"], shape=shape, dtype=ONNX_DTYPE, scale=scale, mean=mean
-        )
-        if seed is not None:
-            seed_attr = helper.make_attribute("seed", seed)
-            node.attribute.append(seed_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "random_normal_test",
-            inputs=[],
-            outputs=[helper.make_tensor_value_info("out", ONNX_DTYPE, shape)],
-        )
-        model = helper.make_model(graph, producer_name="random_normal_test")
-        return get_tvm_output_with_vm(
-            model,
-            [],
-            target=target,
-            dev=dev,
-            validate_structural_equal=(seed is not None),
-        )
-
-    # Test N-D tensor generation.
-    vals = get_random_normal([1, 3, 100, 100], dtype="float32")
-    assert list(vals.shape) == [1, 3, 100, 100]
-    tvm.testing.assert_allclose(vals.mean(), 0.0, rtol=0.1, atol=0.1)
-    tvm.testing.assert_allclose(np.std(vals), 1.0, rtol=0.1, atol=0.1)
-
-    # Test mean=2.0 scale=10.0
-    vals = get_random_normal([1, 3, 100, 100], mean=2.0, scale=10.0, dtype="float32")
-    assert list(vals.shape) == [1, 3, 100, 100]
-    tvm.testing.assert_allclose(vals.mean(), 2.0, rtol=0.1, atol=0.1)
-    tvm.testing.assert_allclose(np.std(vals), 10.0, rtol=0.1, atol=0.1)
-
-    # Check that a fixed seed produces the same values when run twice.
-    vals_1 = get_random_normal(shape=[10], seed=1.0)
-    vals_2 = get_random_normal(shape=[10], seed=1.0)
-    assert all(vals_1 == vals_2)
-
-
-@tvm.testing.parametrize_targets("llvm")
-def test_random_normal_like(target, dev):
-    """test_random_normal_like"""
-
-    def get_random_normal_like(input_, shape, dtype="float32", scale=1.0, mean=0.0, seed=None):
-        ONNX_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)]
-        node = helper.make_node(
-            "RandomNormalLike", ["in"], ["out"], dtype=ONNX_DTYPE, scale=scale, mean=mean
-        )
-        if seed is not None:
-            seed_attr = helper.make_attribute("seed", seed)
-            node.attribute.append(seed_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "random_normal_like_test",
-            inputs=[helper.make_tensor_value_info("in", ONNX_DTYPE, shape)],
-            outputs=[helper.make_tensor_value_info("out", ONNX_DTYPE, shape)],
-        )
-        model = helper.make_model(graph, producer_name="random_normal_like_test")
-        return get_tvm_output_with_vm(
-            model,
-            [input_],
-            target=target,
-            dev=dev,
-            validate_structural_equal=(seed is not None),
-        )
-
-    # Test N-D tensor generation.
-    shape = [1, 3, 100, 100]
-    input_array = np.random.random(shape).astype("float32")
-    vals = get_random_normal_like(input_array, [1, 3, 100, 100], dtype="float32")
-    assert list(vals.shape) == [1, 3, 100, 100]
-    tvm.testing.assert_allclose(vals.mean(), 0.0, rtol=0.1, atol=0.1)
-    tvm.testing.assert_allclose(np.std(vals), 1.0, rtol=0.1, atol=0.1)
-
-    # Test mean=2.0 scale=10.0
-    shape = [1, 3, 100, 100]
-    input_array = np.random.random(shape).astype("float32")
-    vals = get_random_normal_like(
-        input_array, [1, 3, 100, 100], mean=2.0, scale=10.0, dtype="float32"
-    )
-    assert list(vals.shape) == [1, 3, 100, 100]
-    tvm.testing.assert_allclose(vals.mean(), 2.0, rtol=0.1, atol=0.1)
-    tvm.testing.assert_allclose(np.std(vals), 10.0, rtol=0.1, atol=0.1)
-
-
-@tvm.testing.parametrize_targets("llvm")
-def test_multinomial(target, dev):
-    def get_multinomial(input, shape, sample_size, seed=None):
-        IN_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype("float32")]
-        OUT_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype("int32")]
-        node = helper.make_node("Multinomial", ["in"], ["out"], sample_size=sample_size)
-        if seed is not None:
-            seed_attr = helper.make_attribute("seed", seed)
-            node.attribute.append(seed_attr)
-
-        graph = helper.make_graph(
-            [node],
-            "multinomial_test",
-            inputs=[helper.make_tensor_value_info("in", IN_DTYPE, shape)],
-            outputs=[helper.make_tensor_value_info("out", OUT_DTYPE, shape)],
-        )
-        model = helper.make_model(graph, producer_name="multinomial_test")
-        return get_tvm_output_with_vm(
-            model,
-            [input],
-            target=target,
-            dev=dev,
-            validate_structural_equal=(seed is not None),
-        )
-
-    # Test N-D tensor generation.
-    shape = [3]
-    sample_size = 2
-    probs = np.random.random(shape).astype("float32")
-    indices = get_multinomial(probs, shape, sample_size)
-    # Since specific values are random, we'll check that the output shape is
-    # correct and the values chosen are all valid indices.
-    assert list(indices.shape) == [sample_size]
-    assert np.max(indices) < shape[-1]
-
-    # Test 2d multinomial
-    shape = [10, 5]
-    sample_size = 4
-    probs = np.random.random(shape).astype("float32")
-    indices = get_multinomial(probs, shape, sample_size)
-    assert list(indices.shape) == [10, sample_size]
-    assert np.max(indices) < shape[-1]
-
-
-@tvm.testing.parametrize_targets
-def test_convinteger(target, dev):
-    """test_convinteger"""
-
-    def verify_convinteger(
-        x_shape,
-        w_shape,
-        y_shape,
-        padding,
-        kernel_shape,
-        strides,
-        dilations,
-        auto_pad="NOTSET",
-        dtype="uint8",
-    ):
-        x_array = np.random.randint(low=0, high=255, size=x_shape).astype(dtype)
-        w_array = np.random.uniform(low=0, high=255, size=w_shape).astype(dtype)
-        x_zero_point_array = np.random.randint(0, 255, size=[1]).astype(dtype)
-        w_zero_point_array = np.random.randint(0, 255, size=[1]).astype(dtype)
-
-        ONNX_DTYPE = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)]
-        input_nodes = [
-            helper.make_tensor_value_info("x", ONNX_DTYPE, list(x_shape)),
-            helper.make_tensor_value_info("w", ONNX_DTYPE, list(w_shape)),
-        ]
-        initializer = [
-            helper.make_tensor("x_zero_point", ONNX_DTYPE, [], x_zero_point_array),
-            helper.make_tensor("w_zero_point", ONNX_DTYPE, [], w_zero_point_array),
-        ]
-        input_names = ["x", "w", "x_zero_point", "w_zero_point"]
-        input_values = [x_array, w_array]
-
-        if padding is None:
-            ## autopadding with unset default attributes
-            kwargs = {}
-            if not all(list(s == 1 for s in strides)):
-                kwargs["strides"] = strides
-            if not all(list(d == 1 for d in dilations)):
-                kwargs["dilations"] = dilations
-
-            node = helper.make_node(
-                "ConvInteger",
-                inputs=input_names,
-                outputs=["y"],
-                # Default values for other attributes:
-                auto_pad=auto_pad,
-                **kwargs,
-            )
-        else:
-            node = helper.make_node(
-                "ConvInteger",
-                inputs=input_names,
-                outputs=["y"],
-                kernel_shape=kernel_shape,
-                # Default values for other attributes:
-                strides=strides,
-                dilations=dilations,
-                # groups=1
-                pads=padding,
-            )
-
-        graph = helper.make_graph(
-            [node],
-            "convinteger_test",
-            inputs=input_nodes,
-            initializer=initializer,
-            outputs=[helper.make_tensor_value_info("y", TensorProto.INT32, list(y_shape))],
-        )
-        model = helper.make_model(graph, producer_name="convinteger_test")
-        # opt_level=1 will cause error
-        verify_with_ort_with_inputs(model, input_values, target=target, dev=dev, opt_level=2)
-
-    def repeat(num, dims):
-        return tuple(num for _ in range(dims))
-
-    # only support 2D ConvInteger because we only support qnn.conv2d for now.
-    dims = 2
-
-    # Convolution with padding
-    verify_convinteger(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(5, dims),
-        2 * repeat(1, dims),
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-    )
-
-    # Convolution with asymmetric padding
-    verify_convinteger(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(4, dims),
-        repeat(0, dims) + repeat(1, dims),
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-    )
-    # Convolution without padding
-    verify_convinteger(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(3, dims),
-        2 * repeat(0, dims),
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-    )
-    # Convolution with autopadding
-    verify_convinteger(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(5, dims),
-        None,
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-        auto_pad="SAME_UPPER",
-    )
-    # Convolution with valid autopadding
-    verify_convinteger(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(3, dims),
-        None,
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(1, dims),
-        auto_pad="VALID",
-    )
-    # Convolution with non uniform stride
-    verify_convinteger(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(3, dims),
-        None,
-        repeat(3, dims),
-        repeat(2, dims),
-        repeat(1, dims),
-        auto_pad="SAME_UPPER",
-    )
-    # Convolution with dilation
-    verify_convinteger(
-        (1, 1) + repeat(5, dims),
-        (1, 1) + repeat(3, dims),
-        (1, 1) + repeat(5, dims),
-        2 * repeat(2, dims),
-        repeat(3, dims),
-        repeat(1, dims),
-        repeat(2, dims),
-    )
-
-
-@tvm.testing.parametrize_targets
-def test_bitshift(target, dev):
-    """test_bitshift"""
-
-    def verify_bitshift(in_shape, shift_shape, high=1000000000, in_dtype="uint64"):
-        in_shape = list(in_shape)
-        shift_shape = list(shift_shape)
-
-        # Create an input for each tensor.
-        tensor_values = [
-            np.random.randint(high, size=in_shape).astype(in_dtype),
-            np.random.randint(16, size=shift_shape).astype(in_dtype),
-            np.random.randint(16, size=shift_shape).astype(in_dtype),
-        ]
-
-        bitshift_left_node = helper.make_node(
-            "BitShift",
-            inputs=["input", "shift_left"],
-            outputs=["shifted"],
-            direction="LEFT",
-        )
-
-        bitshift_right_node = helper.make_node(
-            "BitShift",
-            inputs=["shifted", "shift_right"],
-            outputs=["output"],
-            direction="RIGHT",
-        )
-
-        # Create input and output tensors.
-        proto_type = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(in_dtype)]
-        graph_inputs = [
-            helper.make_tensor_value_info("input", proto_type, in_shape),
-            helper.make_tensor_value_info("shift_left", proto_type, shift_shape),
-            helper.make_tensor_value_info("shift_right", proto_type, shift_shape),
-        ]
-
-        graph_outputs = [helper.make_tensor_value_info("output", proto_type, in_shape)]
-
-        graph_nodes = [bitshift_left_node, bitshift_right_node]
-
-        graph = helper.make_graph(
-            graph_nodes,
-            "BitShift_test",
-            inputs=graph_inputs,
-            outputs=graph_outputs,
-        )
-        model = helper.make_model(
-            graph,
-            producer_name="BitShift_test",
-        )
-
-        verify_with_ort_with_inputs(model, tensor_values, target=target, dev=dev)
-
-    shape = (100, 4, 2)
-    broadcast_shape = (100, 1, 1)
-    # Common bitwise test
-    verify_bitshift(shape, shape)
-    # Bitwise test with broadcasting
-    verify_bitshift(shape, broadcast_shape)
-
-
-# TODO(vvchernov): return test back than ONNX Runtime in CI will support domain version of 18
-@pytest.mark.skip("Currently ONNX Runtime in CI does not support domain version of 18")
-@tvm.testing.parametrize_targets
-def test_bitwise(target, dev):
-    """test_bitwise"""
-
-    def verify_bitwise_ops(A_shape, B_shape, C_shape, D_shape, high=128, in_dtype="int32"):
-        A_shape = list(A_shape)
-        B_shape = list(B_shape)
-        C_shape = list(C_shape)
-        D_shape = list(D_shape)
-
-        # Create an input for each tensor.
-        tensor_values = [
-            np.random.randint(high, size=A_shape).astype(in_dtype),
-            np.random.randint(high, size=B_shape).astype(in_dtype),
-            np.random.randint(high, size=C_shape).astype(in_dtype),
-            np.random.randint(high, size=D_shape).astype(in_dtype),
-        ]
-
-        or_node = helper.make_node(
-            "BitwiseOr",
-            inputs=["A", "B"],
-            outputs=["OR"],
-        )
-
-        and_node = helper.make_node(
-            "BitwiseAnd",
-            inputs=["OR", "C"],
-            outputs=["AND"],
-        )
-
-        xor_node = helper.make_node(
-            "BitwiseXor",
-            inputs=["AND", "D"],
-            outputs=["XOR"],
-        )
-
-        not_node = helper.make_node(
-            "BitwiseNot",
-            inputs=["XOR"],
-            outputs=["output"],
-        )
-
-        # Create input and output tensors.
-        proto_type = mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(in_dtype)]
-        graph_inputs = [
-            helper.make_tensor_value_info("A", proto_type, A_shape),
-            helper.make_tensor_value_info("B", proto_type, B_shape),
-            helper.make_tensor_value_info("C", proto_type, C_shape),
-            helper.make_tensor_value_info("D", proto_type, D_shape),
-        ]
-
-        graph_outputs = [
-            helper.make_tensor_value_info("output", proto_type, A_shape),
-        ]
-
-        graph_nodes = [
-            or_node,
-            and_node,
-            xor_node,
-            not_node,
-        ]
-
-        graph = helper.make_graph(
-            graph_nodes,
-            "Bitwise_test",
-            inputs=graph_inputs,
-            outputs=graph_outputs,
-        )
-        model = helper.make_model(
-            graph,
-            producer_name="Bitwise_test",
-        )
-
-        verify_with_ort_with_inputs(model, tensor_values, target=target, dev=dev)
-
-    shape = (100, 4, 2)
-    broadcast_shape = (100, 1, 1)
-    dtypes = ["int8", "uint8", "int32", "uint32"]
-    high_vals = [128, 128, 2147483648, 2147483648]
-    for high, dtype in zip(high_vals, dtypes):
-        # Common bitwise test
-        verify_bitwise_ops(shape, shape, shape, shape, high, dtype)
-        # Bitwise test with broadcasting
-        verify_bitwise_ops(shape, broadcast_shape, broadcast_shape, broadcast_shape, high, dtype)
-
-
-@tvm.testing.parametrize_targets
-def test_scan(target, dev):
-    """test_scan"""
-
-    def verify_scan(
-        input_shapes,
-        output_shapes,
-        num_scan_inputs,
-        scan_input_axes,
-        scan_input_directions,
-        scan_output_axes,
-        scan_output_directions,
-        opset,
-    ):
-
-        body_input_shapes = copy.deepcopy(input_shapes)
-        num_state_inputs = len(input_shapes) - num_scan_inputs
-
-        if opset == 8:
-            for i in range(len(input_shapes)):
-                body_input_shapes[i].pop(0)
-            for i in range(num_state_inputs, len(input_shapes)):
-                body_input_shapes[i].pop(0)
-        else:
-            for i in range(num_state_inputs, len(input_shapes)):
-                body_input_shapes[i].pop(scan_input_axes[i - num_state_inputs])
-
-        initial0 = onnx.helper.make_tensor_value_info(
-            "initial0", onnx.TensorProto.FLOAT, body_input_shapes[0]
-        )
-        initial1 = onnx.helper.make_tensor_value_info(
-            "initial1", onnx.TensorProto.FLOAT, body_input_shapes[1]
-        )
-        input0 = onnx.helper.make_tensor_value_info(
-            "input0", onnx.TensorProto.FLOAT, body_input_shapes[2]
-        )
-        input1 = onnx.helper.make_tensor_value_info(
-            "input1", onnx.TensorProto.FLOAT, body_input_shapes[3]
-        )
-        input2 = onnx.helper.make_tensor_value_info(
-            "input2", onnx.TensorProto.FLOAT, body_input_shapes[4]
-        )
-        state0 = onnx.helper.make_tensor_value_info(
-            "state0", onnx.TensorProto.FLOAT, body_input_shapes[0]
-        )
-        scan_out0 = onnx.helper.make_tensor_value_info(
-            "scan_out0", onnx.TensorProto.FLOAT, body_input_shapes[0]
-        )
-        state1 = onnx.helper.make_tensor_value_info(
-            "state1", onnx.TensorProto.FLOAT, body_input_shapes[1]
-        )
-        scan_out1 = onnx.helper.make_tensor_value_info(
-            "scan_out1", onnx.TensorProto.FLOAT, body_input_shapes[1]
-        )
-        add_node = onnx.helper.make_node(
-            "Add",
-            inputs=["initial0", "input0"],
-            outputs=["state0"],
-        )
-        id_node_0 = onnx.helper.make_node(
-            "Identity",
-            inputs=["state0"],
-            outputs=["scan_out0"],
-        )
-        matmul_node = onnx.helper.make_node(
-            "MatMul",
-            inputs=["input1", "input2"],
-            outputs=["matmul_out"],
-        )
-        sub_node = onnx.helper.make_node(
-            "Sub",
-            inputs=["initial1", "matmul_out"],
-            outputs=["state1"],
-        )
-        id_node_1 = onnx.helper.make_node(
-            "Identity",
-            inputs=["state1"],
-            outputs=["scan_out1"],
-        )
-        scan_body = onnx.helper.make_graph(
-            [add_node, id_node_0, matmul_node, sub_node, id_node_1],
-            "scan_body",
-            [initial0, initial1, input0, input1, input2],
-            [state0, state1, scan_out0, scan_out1],
-        )
-        # create scan op node
-        scan_node = None
-        if opset == 8:
-            scan_node = onnx.helper.make_node(
-                "Scan",
-                inputs=["", "init0", "init1", "in0", "in1", "in2"],
-                outputs=["s0", "s1", "scan0", "scan1"],
-                num_scan_inputs=num_scan_inputs,
-                body=scan_body,
-            )
-        else:
-            scan_node = onnx.helper.make_node(
-                "Scan",
-                inputs=["init0", "init1", "in0", "in1", "in2"],
-                outputs=["s0", "s1", "scan0", "scan1"],
-                num_scan_inputs=num_scan_inputs,
-                body=scan_body,
-                scan_input_axes=scan_input_axes,
-                scan_input_directions=scan_input_directions,
-                scan_output_axes=scan_output_axes,
-                scan_output_directions=scan_output_directions,
-            )
-        input_info = [
-            helper.make_tensor_value_info("init0", TensorProto.FLOAT, input_shapes[0]),
-            helper.make_tensor_value_info("init1", TensorProto.FLOAT, input_shapes[1]),
-            helper.make_tensor_value_info("in0", TensorProto.FLOAT, input_shapes[2]),
-            helper.make_tensor_value_info("in1", TensorProto.FLOAT, input_shapes[3]),
-            helper.make_tensor_value_info("in2", TensorProto.FLOAT, input_shapes[4]),
-        ]
-        out_info = [
-            helper.make_tensor_value_info("s0", TensorProto.FLOAT, output_shapes[0]),
-            helper.make_tensor_value_info("s1", TensorProto.FLOAT, output_shapes[1]),
-            helper.make_tensor_value_info("scan0", TensorProto.FLOAT, output_shapes[2]),
-            helper.make_tensor_value_info("scan1", TensorProto.FLOAT, output_shapes[3]),
-        ]
-        graph = helper.make_graph(
-            nodes=[scan_node],
-            name="scan_test",
-            inputs=input_info,
-            outputs=out_info,
-        )
-        model = onnx.helper.make_model(graph, producer_name="scan-test")
-        init0 = np.random.uniform(low=0, high=255, size=input_shapes[0]).astype(np.float32)
-        init1 = np.random.uniform(low=0, high=255, size=input_shapes[1]).astype(np.float32)
-        in0 = np.random.uniform(low=0, high=255, size=input_shapes[2]).astype(np.float32)
-        in1 = np.random.uniform(low=0, high=255, size=input_shapes[3]).astype(np.float32)
-        in2 = np.random.uniform(low=0, high=255, size=input_shapes[4]).astype(np.float32)
-        input_values = [init0, init1, in0, in1, in2]
-
-        verify_with_ort_with_inputs(
-            model,
-            input_values,
-            target=target,
-            dev=dev,
-            opt_level=2,
-            use_vm=True,
-            opset=opset,
-        )
-
-    # opset 8
-    input_shapes = [[2, 6, 7, 8], [2, 3, 3], [2, 5, 6, 7, 8], [2, 5, 3, 4], [2, 5, 4, 3]]
-    output_shapes = [[2, 6, 7, 8], [2, 3, 3], [2, 5, 6, 7, 8], [2, 5, 3, 3]]
-    # input_shapes, output_shapes, num_scan_inputs, scan_input_axes, scan_input_directions,
-    # scan_output_axes, scan_output_directions, opset
-    verify_scan(input_shapes, output_shapes, 3, [0] * 3, [0] * 3, [0] * 2, [0] * 2, 8)
-    # opset 9
-    input_shapes = [[6, 7, 8], [3, 3], [5, 6, 7, 8], [5, 3, 4], [5, 4, 3]]
-    output_shapes = [[6, 7, 8], [3, 3], [5, 6, 7, 8], [5, 3, 3]]
-    verify_scan(input_shapes, output_shapes, 3, [0] * 3, [0] * 3, [0] * 2, [0] * 2, 9)
-
-    input_shapes = [[6, 7, 8], [3, 3], [5, 6, 7, 8], [3, 4, 5], [4, 5, 3]]
-    output_shapes = [[6, 7, 8], [3, 3], [6, 5, 7, 8], [3, 5, 3]]
-    verify_scan(input_shapes, output_shapes, 3, [0, 2, 1], [1] * 3, [1] * 2, [1] * 2, 9)
-    # Negative axes
-    input_shapes = [[6, 7, 8], [3, 3], [5, 6, 7, 8], [3, 4, 5], [4, 5, 3]]
-    output_shapes = [[6, 7, 8], [3, 3], [6, 5, 7, 8], [3, 5, 3]]
-    verify_scan(input_shapes, output_shapes, 3, [-4, -1, -2], [1] * 3, [-3, -2], [1] * 2, 9)
-
-
-@tvm.testing.parametrize_targets
-def test_linear_regressor(target, dev):
-    """test_linear_regressor"""
-
-    def verify_linear_regressor(a_shape, c_shape, i_shape, targets=1, batch=1):
-        a_array = np.random.uniform(size=a_shape).astype("float32")
-        out_shape = (batch, targets)
-
-        coefficients = np.random.uniform(size=c_shape).astype("float32")
-        intercepts = np.random.uniform(size=i_shape).astype("float32")
-
-        mul_node = helper.make_node(
-            "LinearRegressor",
-            ["a"],
-            ["out"],
-            coefficients=coefficients,
-            intercepts=intercepts,
-            targets=targets,
-            domain="ai.onnx.ml",
-        )
-
-        graph = helper.make_graph(
-            [mul_node],
-            "LinearRegressor_test",
-            inputs=[
-                helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape)),
-            ],
-            outputs=[helper.make_tensor_value_info("out", TensorProto.FLOAT, out_shape)],
-        )
-        model = helper.make_model(
-            graph,
-            producer_name="LinearRegressor_test",
-            opset_imports=[
-                onnx.helper.make_opsetid("ai.onnx.ml", 1),
-            ],
-        )
-        verify_with_ort_with_inputs(model, [a_array], target=target, dev=dev)
-
-    verify_linear_regressor((1, 3), (3), (1))
-    verify_linear_regressor((2, 10), (10), (1), batch=2)
-    verify_linear_regressor((1, 3), (30), (10), targets=10)
-    verify_linear_regressor((10, 3), (30), (10), targets=10, batch=10)
-    verify_linear_regressor((1, 4), (3), (1))
-
-
-@tvm.testing.parametrize_targets
-def test_dft(target, dev):
-    """test_dft"""
-
-    def verify_dft(
-        _axis,
-        _inverse,
-        _onesided,
-        _dft_length,
-        _input_shape,
-        _output_shape,
-    ):
-        input_names = ["input"]
-        if _dft_length is not None:
-            input_names.append("dft_length")
-
-        node = onnx.helper.make_node(
-            "DFT",
-            inputs=input_names,
-            outputs=["output"],
-            axis=_axis,
-            inverse=_inverse,
-            onesided=_onesided,
-        )
-
-        nodes = []
-        if _dft_length is not None:
-            nodes.append(
-                make_constant_node("dft_length", TensorProto.INT32, [], [_dft_length]),
-            )
-        nodes.append(node)
-
-        graph = helper.make_graph(
-            nodes,
-            "dft_test",
-            inputs=[
-                helper.make_tensor_value_info("input", TensorProto.FLOAT, _input_shape),
-            ],
-            outputs=[
-                helper.make_tensor_value_info("output", TensorProto.FLOAT, _output_shape),
-            ],
-        )
-
-        model = helper.make_model(graph, producer_name="dft_test")
-
-        _input = np.random.normal(size=_input_shape).astype("float32")
-        verify_with_ort_with_inputs(
-            model,
-            [_input],
-            [_input_shape],
-            target=target,
-            dev=dev,
-            rtol=1e-4,
-            atol=1e-4,
-            use_vm=False,
-        )
-
-    batch_size = 5
-    n = 2
-    D = 7
-
-    for axis in list(range(1, n)) + [-2]:
-        for inverse, onesided in [(0, 0), (0, 1), (1, 0), (None, None)]:
-            for n_fft in [D, D - 1, D + 1]:
-                for c in [1, 2]:
-                    input_shape = [batch_size] + n * [D] + [c]
-                    output_shape = [batch_size] + n * [D] + [2]
-                    if onesided == 1:
-                        output_shape[axis] = output_shape[axis] // 2 + 1
-                    verify_dft(axis, inverse, onesided, n_fft, input_shape, output_shape)
-
-
-@tvm.testing.parametrize_targets
-def test_sequence(target, dev):
-    """test_sequence"""
-
-    def verify_sequence_ops(tensor_shape, num_tensors, axis=0, position=0, new_axis=None):
-        tensor_shape = list(tensor_shape)
-        tensor_values = []
-        for i in range(num_tensors):
-            tensor_values.append(np.random.uniform(size=tensor_shape).astype("float32"))
-
-        # Create an input for each tensor.
-        input_tensor_names = []
-        for i in range(num_tensors):
-            name = f"input_tensor_{i}"
-            input_tensor_names.append(name)
-
-        # Test creating a tensor sequence.
-        construct_node = helper.make_node(
-            "SequenceConstruct",
-            inputs=input_tensor_names,
-            outputs=["sequence"],
-        )
-
-        position_node = make_constant_node("position", TensorProto.INT32, (), [position])
-
-        # Test sequence insertion.
-        insert_node = helper.make_node(
-            "SequenceInsert",
-            inputs=["sequence", input_tensor_names[0], "position"],
-            outputs=["inserted_sequence"],
-        )
-
-        # Test sequence erase.
-        erase_node = helper.make_node(
-            "SequenceErase",
-            inputs=["inserted_sequence", "position"],
-            outputs=["erased_sequence"],
-        )
-
-        # Test sequence concatenation.
-        concat_node = helper.make_node(
-            "ConcatFromSequence",
-            inputs=["erased_sequence"],
-            outputs=["concat_sequence"],
-            axis=axis,
-        )
-
-        # Test splitting a tensor into a sequence.
-        split_node = helper.make_node(
-            "SplitToSequence", inputs=["concat_sequence"], outputs=["split_sequence"], axis=axis
-        )
-
-        # Test tensor extraction from sequence
-        at_node = helper.make_node(
-            "SequenceAt", inputs=["split_sequence", "position"], outputs=["output"]
-        )
-
-        # Test sequence length
-        length_node = helper.make_node(
-            "SequenceLength", inputs=["split_sequence"], outputs=["output_2"]
-        )
-
-        if new_axis is not None:
-            new_axis_attr = helper.make_attribute("new_axis", new_axis)
-            concat_node.attribute.append(new_axis_attr)
-
-        # Create input and output tensors.
-        graph_inputs = []
-        for name in input_tensor_names:
-            input_tensor = helper.make_tensor_value_info(name, TensorProto.FLOAT, tensor_shape)
-            graph_inputs.append(input_tensor)
-
-        # Construct output tensor.
-        output_shape = tensor_shape
-        if new_axis is not None:
-            output_shape.insert(axis, 1)
-            output_shape[axis] = num_tensors + 1
-        else:
-            output_shape[axis] = (num_tensors + 1) * output_shape[axis]
-        graph_outputs = [
-            helper.make_tensor_value_info("output", TensorProto.FLOAT, output_shape),
-            helper.make_tensor_value_info("output_2", TensorProto.INT64, []),
-        ]
-
-        graph_nodes = [
-            position_node,
-            construct_node,
-            insert_node,
-            erase_node,
-            concat_node,
-            split_node,
-            at_node,
-            length_node,
-        ]
-
-        graph = helper.make_graph(
-            graph_nodes,
-            "Sequence_test",
-            inputs=graph_inputs,
-            outputs=graph_outputs,
-        )
-        model = helper.make_model(
-            graph,
-            producer_name="Sequence_test",
-        )
-
-        verify_with_ort_with_inputs(model, tensor_values, target=target, dev=dev)
-
-    verify_sequence_ops((10, 3), 2)
-    verify_sequence_ops((3, 3, 3, 3), 4, position=3)
-    verify_sequence_ops((3, 3, 3, 3), 4, axis=2)
-    verify_sequence_ops((3, 3, 3, 3), 4, axis=2, new_axis=1)
-
-
-@tvm.testing.parametrize_targets
-def test_empty_sequence(target, dev):
-    """test_empty_sequence"""
-
-    # Test creating an empty tensor sequence.
-    empty_node = helper.make_node(
-        "SequenceEmpty",
-        inputs=[],
-        outputs=["empty_sequence"],
-    )
-
-    length_node = helper.make_node("SequenceLength", inputs=["empty_sequence"], outputs=["output"])
-
-    graph_outputs = [helper.make_tensor_value_info("output", TensorProto.INT64, [])]
-
-    graph_nodes = [empty_node, length_node]
-
-    graph = helper.make_graph(
-        graph_nodes,
-        "Sequence_empty_test",
-        inputs=[],
-        outputs=graph_outputs,
-    )
-
-    model = helper.make_model(
-        graph,
-        producer_name="Sequence_empty_test",
-    )
-
-    verify_with_ort_with_inputs(model, [], target=target, dev=dev)
-
-
-def test_exporting_node_renamed_model():
-    """test exproting model when export_node_renamed_model is set"""
-
-    a_name, a_shape = "a", (4, 3)
-    b_name, b_shape = "b", (3, 4)
-    out_name, out_shape = "out", [a_shape[0], b_shape[1]]
-    temp_dir = utils.tempdir().path
-
-    # model definition
-    mul_node = helper.make_node("MatMul", [a_name, b_name], [out_name])
-    graph = helper.make_graph(
-        [mul_node],
-        "matmul_test",
-        inputs=[
-            helper.make_tensor_value_info(a_name, TensorProto.FLOAT, a_shape),
-            helper.make_tensor_value_info(b_name, TensorProto.FLOAT, b_shape),
-        ],
-        outputs=[helper.make_tensor_value_info(out_name, TensorProto.FLOAT, out_shape)],
-    )
-    model = helper.make_model(graph, producer_name="matmul_test")
-
-    # get frontend model
-    shape_dict = {a_name: a_shape, b_name: b_shape}
-    _, _ = relay.frontend.from_onnx(model, shape_dict, export_node_renamed_model_path=temp_dir)
-
-    exported_model_name = os.listdir(temp_dir)[0]
-    assert "tvm_exported_model_" in exported_model_name
-
-    exported_model = onnx.load(os.path.join(temp_dir, exported_model_name))
-    assert exported_model.graph.node[0].name == "MatMul_0"
-
-
-class TestSetSpan:
-    """test structural equal between translated / hand-crafted relay IR with span tagged."""
-
-    def _verify(self, res_fptr, golden_fptr):
-        with tvm.testing.enable_span_filling():
-            with_span = res_fptr()
-        with tvm.testing.disable_span_filling():
-            without_span = res_fptr()
-        tvm.ir.assert_structural_equal(with_span, without_span)
-        _verify_structural_equal_with_span(with_span, golden_fptr())
-
-    def test_conv2d_bias_add_span(self):
-        padding = [0, 0, 0, 0]
-        k_shape = [7, 7]
-        y_shape, y_name = [1, 6, 10, 10], "y"
-        x_shape, x_name = [1, 3, 10, 10], "x"
-        b_shape, b_name = [6], "b"
-        b_val = np.random.random(b_shape).astype(np.float32)
-        w_shape, w_name = [6, 3, 7, 7], "w"
-        w_val = np.random.random(w_shape).astype(np.float32)
-        group, strides, dilations = 1, [1, 1], [1, 1]
-        conv_name = "conv2d"
-
-        def _res():
-            # model definition
-            node = helper.make_node(
-                "Conv",
-                inputs=[x_name, w_name, b_name],
-                outputs=[y_name],
-                kernel_shape=k_shape,
-                strides=strides,
-                dilations=dilations,
-                group=group,
-                pads=padding,
-                name=conv_name,
-            )
-            graph = helper.make_graph(
-                [node],
-                "conv_test",
-                inputs=[helper.make_tensor_value_info(x_name, TensorProto.FLOAT, x_shape)],
-                outputs=[helper.make_tensor_value_info(y_name, TensorProto.FLOAT, y_shape)],
-                initializer=[
-                    helper.make_tensor(
-                        w_name,
-                        TensorProto.FLOAT,
-                        dims=w_shape,
-                        vals=w_val.flatten(),
-                    ),
-                    helper.make_tensor(
-                        b_name,
-                        TensorProto.FLOAT,
-                        dims=b_shape,
-                        vals=b_val.flatten(),
-                    ),
-                ],
-            )
-            model = helper.make_model(graph, producer_name="conv_test")
-
-            # get frontend model
-            shape_dict = {x_name: x_shape}
-            mod, _ = relay.frontend.from_onnx(model, shape_dict)
-            return mod["main"]
-
-        def _golden():
-            conv_si = conv_name
-            x = relay.var(
-                x_name,
-                shape=tuple(x_shape),
-                span=_create_span(f"{conv_si}.{x_name}"),
-            )
-            conv_weight = relay.const(
-                w_val,
-                span=_create_span(f"{conv_si}.{w_name}"),
-            )
-            conv_bias = relay.const(
-                b_val,
-                span=_create_span(f"{conv_si}.{b_name}"),
-            )
-            conv_out = _set_span(
-                relay.nn.conv2d(
-                    x,
-                    conv_weight,
-                    padding=[0] * 4,
-                    channels=y_shape[1],
-                    kernel_size=k_shape,
-                ),
-                conv_si,
-            )
-            bias_out = _set_span(relay.nn.bias_add(conv_out, conv_bias), conv_si)
-            return infer_type(relay.Function([x], bias_out))
-
-        self._verify(_res, _golden)
-
-    def test_batchnorm_span(self):
-        input_name, in_shape = "x", [1, 16, 10, 10]
-        bn_name = "bn"
-        output_name = "y"
-        scale_name = "scale"
-        bias_name = "b"
-        mean_name = "mean"
-        var_name = "var"
-
-        def _res():
-            # model definition
-            batchnorm = onnx.helper.make_node(
-                "BatchNormalization",
-                inputs=[input_name, scale_name, bias_name, mean_name, var_name],
-                outputs=[output_name],
-                name=bn_name,
-            )
-            graph = helper.make_graph(
-                [batchnorm],
-                "batchnorm_test",
-                inputs=[
-                    helper.make_tensor_value_info(input_name, TensorProto.FLOAT, in_shape),
-                    helper.make_tensor_value_info(scale_name, TensorProto.FLOAT, [in_shape[1]]),
-                    helper.make_tensor_value_info(bias_name, TensorProto.FLOAT, [in_shape[1]]),
-                    helper.make_tensor_value_info(mean_name, TensorProto.FLOAT, [in_shape[1]]),
-                    helper.make_tensor_value_info(var_name, TensorProto.FLOAT, [in_shape[1]]),
-                ],
-                outputs=[helper.make_tensor_value_info(output_name, TensorProto.FLOAT, in_shape)],
-            )
-            model = helper.make_model(graph, producer_name="batchnorm_test")
-
-            # get frontend model
-            shape_dict = {input_name: in_shape}
-            mod, _ = relay.frontend.from_onnx(model, shape_dict)
-            return mod["main"]
-
-        def _golden():
-            bn_si = bn_name
-            x = relay.var(
-                input_name,
-                shape=tuple(in_shape),
-                span=_create_span(f"{bn_si}.{input_name}"),
-            )
-            bn_scale = relay.var(
-                scale_name,
-                shape=(in_shape[1],),
-                span=_create_span(f"{bn_si}.{scale_name}"),
-            )
-            bn_bias = relay.var(
-                bias_name,
-                shape=(in_shape[1],),
-                span=_create_span(f"{bn_si}.{bias_name}"),
-            )
-            bn_rm = relay.var(
-                mean_name,
-                shape=(in_shape[1],),
-                span=_create_span(f"{bn_si}.{mean_name}"),
-            )
-            bn_rv = relay.var(
-                var_name,
-                shape=(in_shape[1],),
-                span=_create_span(f"{bn_si}.{var_name}"),
-            )
-            bn_out = _set_span(
-                relay.nn.batch_norm(x, bn_scale, bn_bias, bn_rm, bn_rv),
-                bn_si,
-            )
-            bn_tuple_get_item = _set_span(relay.TupleGetItem(bn_out.tuple_value, 0), bn_si)
-            return infer_type(
-                relay.Function([x, bn_scale, bn_bias, bn_rm, bn_rv], bn_tuple_get_item)
-            )
-
-        self._verify(_res, _golden)
-
-    def test_reshape_span(self):
-        input_shape = [2, 1, 10, 1, 10]
-        new_shape = [2, 1, 10, 10]
-        input_name = "in"
-        output_name = "out"
-        ref_name = "ref_in"
-        const_name = "const"
-        reshape_name = "reshape"
-
-        def _res():
-            # model definition
-            ref_array = np.array(new_shape)
-            ref_node = helper.make_node(
-                "Constant",
-                inputs=[],
-                outputs=[ref_name],
-                value=helper.make_tensor(
-                    name="const_tensor",
-                    data_type=TensorProto.INT32,
-                    dims=ref_array.shape,
-                    vals=ref_array.flatten().astype(int),
-                ),
-                name=const_name,
-            )
-            reshape_node = helper.make_node(
-                "Reshape",
-                [input_name, ref_name],
-                [output_name],
-                name=reshape_name,
-            )
-            graph = helper.make_graph(
-                [ref_node, reshape_node],
-                "reshape_test",
-                inputs=[helper.make_tensor_value_info(input_name, TensorProto.FLOAT, input_shape)],
-                outputs=[helper.make_tensor_value_info(output_name, TensorProto.FLOAT, new_shape)],
-            )
-            model = helper.make_model(graph, producer_name="reshape_test")
-
-            # get frontend model
-            shape_dict = {input_name: input_shape}
-            mod, _ = relay.frontend.from_onnx(model, shape_dict)
-            return mod["main"]
-
-        def _golden():
-            reshape_si = reshape_name
-            x = relay.var(
-                input_name,
-                shape=tuple(input_shape),
-                span=_create_span(f"{reshape_si}.{input_name}"),
-            )
-            reshape_out = _set_span(
-                relay.reshape(x, newshape=new_shape),
-                reshape_si,
-            )
-            return infer_type(relay.Function([x], reshape_out))
-
-        self._verify(_res, _golden)
-
-    def test_matmul_span(self):
-        a_name, a_shape = "a", (4, 3)
-        b_name, b_shape = "b", (3, 4)
-        out_name, out_shape = "out", [a_shape[0], b_shape[1]]
-        matmul_name = "matmul"
-
-        def _res():
-            # model definition
-            mul_node = helper.make_node("MatMul", [a_name, b_name], [out_name], name=matmul_name)
-            graph = helper.make_graph(
-                [mul_node],
-                "matmul_test",
-                inputs=[
-                    helper.make_tensor_value_info(a_name, TensorProto.FLOAT, a_shape),
-                    helper.make_tensor_value_info(b_name, TensorProto.FLOAT, b_shape),
-                ],
-                outputs=[helper.make_tensor_value_info(out_name, TensorProto.FLOAT, out_shape)],
-            )
-            model = helper.make_model(graph, producer_name="matmul_test")
-
-            # get frontend model
-            shape_dict = {a_name: a_shape, b_name: b_shape}
-            mod, _ = relay.frontend.from_onnx(model, shape_dict)
-            return mod["main"]
-
-        def _golden():
-            matmul_si = matmul_name
-            a = relay.var(
-                a_name,
-                shape=tuple(a_shape),
-                span=_create_span(f"{matmul_si}.{a_name}"),
-            )
-            b = relay.var(
-                b_name,
-                shape=tuple(b_shape),
-                span=_create_span(f"{matmul_si}.{b_name}"),
-            )
-            b_t = _set_span(relay.transpose(b, axes=[1, 0]), matmul_si)
-            matmul_out = _set_span(
-                relay.nn.dense(a, b_t, out_dtype="float32"),
-                matmul_si,
-            )
-            return infer_type(relay.Function([a, b], matmul_out))
-
-        self._verify(_res, _golden)
-
-
-@tvm.testing.parametrize_targets
-def test_pad_constant_value(target, dev):
-    """test_pad_constant_value"""
-
-    def verify_pad_constant_value(constant_value):
-        tensor_shape = [1, 2, 257, 126]
-        tensor_values = [np.random.uniform(size=tensor_shape).astype("float32")]
-        graph_inputs = [helper.make_tensor_value_info("input", TensorProto.FLOAT, tensor_shape)]
-        graph_outputs = [helper.make_tensor_value_info("output", TensorProto.FLOAT, None)]
-        pads = helper.make_tensor("pads", TensorProto.INT64, [8], [0, 0, 0, 2, 0, 0, 0, 0])
-        pad_node = helper.make_node(
-            "Pad", ["input", "pads", constant_value], ["output"], mode="constant"
-        )
-        graph_nodes = [pad_node]
-        graph = helper.make_graph(
-            graph_nodes,
-            "test_pad_constant_value",
-            inputs=graph_inputs,
-            outputs=graph_outputs,
-            initializer=[pads],
-        )
-        model = helper.make_model(
-            graph,
-            producer_name="test_pad_constant_value",
-        )
-        verify_with_ort_with_inputs(model, tensor_values, target=target, dev=dev)
-
-    verify_pad_constant_value("")
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/paddlepaddle/test_forward.py b/tests/python/frontend/paddlepaddle/test_forward.py
deleted file mode 100755
index 6b8e90545c83..000000000000
--- a/tests/python/frontend/paddlepaddle/test_forward.py
+++ /dev/null
@@ -1,2566 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import os
-from pathlib import Path
-import shutil
-
-import numpy as np
-import tvm
-import tvm.testing
-import tvm.topi.testing
-from tvm import relay
-from tvm.contrib import graph_executor
-import pytest
-
-import paddle
-
-paddle.disable_signal_handler()
-import paddle.nn as nn
-
-PADDLE_TEST_DATA_ROOT_PATH = Path(Path("~").expanduser(), ".tvm_test_data", "paddle")
-PADDLE_TEST_DATA_ROOT_PATH.mkdir(parents=True, exist_ok=True)
-cached_program = list()
-
-
-def assert_shapes_match(tru, est):
-    if tru.shape != est.shape:
-        msg = "Output shapes {} and {} don't match"
-        raise AssertionError(msg.format(tru.shape, est.shape))
-
-
-def get_paddle_model(func, input_spec):
-    global PADDLE_TEST_DATA_ROOT_PATH
-    global cached_program
-    model_path = Path(PADDLE_TEST_DATA_ROOT_PATH, "model")
-
-    paddle.jit.save(func, str(model_path), input_spec=input_spec)
-    baseline_model = paddle.jit.load(str(model_path))
-    if len(cached_program) >= 4:
-        cached_program = list()
-    cached_program.append(baseline_model._get_program_holder())
-
-    shutil.rmtree(str(PADDLE_TEST_DATA_ROOT_PATH))
-    return baseline_model
-
-
-def verify_model(func, input_data, use_vm=False, rtol=1e-5, atol=1e-5):
-    if not (isinstance(input_data, (tuple, list))):
-        input_data = [input_data]
-
-    input_spec = []
-    input_names = []
-    input_shape_dict = {}
-    compiled_input = {}
-    for idx, data in enumerate(input_data):
-        input_name = "input{}".format(idx)
-        input_spec.append(
-            paddle.static.InputSpec(dtype=data.dtype, shape=data.shape, name=input_name)
-        )
-        input_names.append(input_name)
-        input_shape_dict[input_name] = data.shape
-        if isinstance(data, np.ndarray):
-            compiled_input[input_name] = data
-        else:
-            compiled_input[input_name] = data.numpy()
-
-    baseline_model = get_paddle_model(func, input_spec)
-    baseline_outputs = baseline_model(*[input[:] for input in input_data])
-
-    # get paddle outputs
-    if isinstance(baseline_outputs, (tuple, list)):
-        baseline_outputs = tuple(out.numpy() for out in baseline_outputs)
-    else:
-        baseline_outputs = (baseline_outputs.numpy(),)
-
-    mod, params = relay.frontend.from_paddle(baseline_model, input_shape_dict)
-    compiled_names = []
-    for arg in mod["main"].params:
-        assert arg.name_hint in input_names or arg.name_hint in params
-        if arg.name_hint in input_names:
-            compiled_names.append(arg.name_hint)
-
-    if use_vm:
-        tvm_vm_input = []
-        for idx, data in enumerate(input_data):
-            if isinstance(data, np.ndarray):
-                tvm_vm_input.append(data)
-            else:
-                tvm_vm_input.append(data.numpy())
-        for target, dev in tvm.testing.enabled_targets():
-            result = relay.create_executor("vm", mod=mod, device=dev, target=target).evaluate()(
-                *tvm_vm_input, **params
-            )
-            tvm_vm_output = []
-            if isinstance(result, tvm.runtime.NDArray):
-                tvm_vm_output = result.numpy()
-            else:
-                tvm_vm_output = [r.numpy() for r in result]
-            if not isinstance(tvm_vm_output, list):
-                tvm_vm_output = [tvm_vm_output]
-
-            for i, baseline_output in enumerate(baseline_outputs):
-                assert_shapes_match(baseline_output, tvm_vm_output[i])
-                tvm.testing.assert_allclose(baseline_output, tvm_vm_output[i], rtol=rtol, atol=atol)
-    else:
-        with tvm.transform.PassContext(opt_level=3):
-            for target, dev in tvm.testing.enabled_targets():
-                lib = relay.build(mod, target=target, params=params)
-                gmod = graph_executor.GraphModule(lib["default"](dev))
-                for name in compiled_names:
-                    gmod.set_input(name, compiled_input[name])
-                gmod.run()
-
-                for i, baseline_output in enumerate(baseline_outputs):
-                    compiled_output = gmod.get_output(i).numpy()
-
-                    assert_shapes_match(baseline_output, compiled_output)
-                    tvm.testing.assert_allclose(
-                        baseline_output, compiled_output, rtol=rtol, atol=atol
-                    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_add_subtract():
-    input_shape = [10]
-
-    @paddle.jit.to_static
-    def add_subtract(inputs):
-        return paddle.subtract(paddle.add(inputs, inputs), inputs)
-
-    @paddle.jit.to_static
-    def add_subtract2(inputs):
-        return inputs + 1 - 2
-
-    @paddle.jit.to_static
-    def add_subtract3(inputs1, inputs2):
-        ones = paddle.ones([10], dtype="float32")
-        return inputs1 + ones - inputs2
-
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(add_subtract, input_data)
-    verify_model(add_subtract2, input_data)
-    input_data2 = paddle.rand(input_shape, dtype="float32")
-    verify_model(add_subtract3, [input_data, input_data2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_addmm():
-    class Addmm(nn.Layer):
-        def __init__(self, alpha=1.0, beta=1.0):
-            super(Addmm, self).__init__()
-            self.alpha = alpha
-            self.beta = beta
-
-        @paddle.jit.to_static
-        def forward(self, inputs, x, y):
-            return paddle.addmm(inputs, x, y, self.alpha, self.beta)
-
-    input_shapes = [[10, 10], [1, 1], [7, 1]]
-    x_shapes = [[10, 3], [5, 6], [7, 7]]
-    y_shapes = [[3, 10], [6, 2], [7, 3]]
-    input_shapes = [[10, 10]]
-    x_shapes = [[10, 3]]
-    y_shapes = [[3, 10]]
-
-    for i in range(len(input_shapes)):
-        input_data = paddle.rand(input_shapes[i], dtype="float32")
-        x_data = paddle.rand(x_shapes[i], dtype="float32")
-        y_data = paddle.rand(y_shapes[i], dtype="float32")
-        verify_model(Addmm(), input_data=[input_data, x_data, y_data])
-        verify_model(Addmm(0.5, 0.3), input_data=[input_data, x_data, y_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_arg_max_min():
-    class ArgMax(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.argmax(inputs)
-
-    class ArgMax1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return inputs.argmax(axis=1)
-
-    class ArgMax2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return inputs.argmax(axis=1, keepdim=False)
-
-    class ArgMax3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return inputs.argmax(axis=2, keepdim=True)
-
-    class ArgMin(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.argmin(inputs)
-
-    class ArgMin1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return inputs.argmin(axis=1)
-
-    class ArgMin2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return inputs.argmax(axis=1, keepdim=False)
-
-    class ArgMin3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return inputs.argmin(axis=2, keepdim=True)
-
-    input_shapes = [[256], [5, 28], [10, 5, 4], [1, 3, 8, 8]]
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(ArgMax(), input_data=input_data)
-        verify_model(ArgMin(), input_data=input_data)
-    for input_shape in input_shapes[1:]:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(ArgMax1(), input_data=input_data)
-        verify_model(ArgMax2(), input_data=input_data)
-        verify_model(ArgMin1(), input_data=input_data)
-        verify_model(ArgMin2(), input_data=input_data)
-    for input_shape in input_shapes[2:]:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(ArgMax3(), input_data=input_data)
-        verify_model(ArgMin3(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_argsort():
-    class ArgSort1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.argsort(inputs)
-
-    class ArgSort2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.argsort(inputs, axis=0, descending=True)
-
-    class ArgSort3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.argsort(inputs, axis=-1, descending=True)
-
-    input_shapes = [[256], [10, 20], [10, 5, 3], [1, 3, 5, 5]]
-    for input_shape in input_shapes:
-        # Avoid duplicate elements in the array which will bring
-        # different results with different sort algorithms
-        np.random.seed(13)
-        np_data = np.random.choice(range(-5000, 5000), np.prod(input_shape), replace=False)
-        input_data = paddle.to_tensor(np_data.reshape(input_shape).astype("int64"))
-        verify_model(ArgSort1(), [input_data])
-        verify_model(ArgSort2(), [input_data])
-        verify_model(ArgSort3(), [input_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_assign():
-    @paddle.jit.to_static
-    def assign(inputs):
-        return paddle.assign(inputs)
-
-    @paddle.jit.to_static
-    def assign_value(inputs):
-        x = paddle.to_tensor(np.array([3]).astype("float32"))
-        return inputs + x
-
-    input_shape = [2, 3]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(
-        assign,
-        [
-            input_data,
-        ],
-    )
-    input_data2 = np.random.randint(100, size=input_shape)
-    verify_model(
-        assign,
-        [
-            input_data2,
-        ],
-    )
-    verify_model(assign_value, [input_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_batch_norm():
-    class BatchNorm1D(nn.Layer):
-        def __init__(self):
-            super(BatchNorm1D, self).__init__()
-            self.batch_norm = nn.BatchNorm1D(2)
-
-        @paddle.jit.to_static
-        def forward(self, input_data):
-            return self.batch_norm(input_data)
-
-    class BatchNorm2D(nn.Layer):
-        def __init__(self):
-            super(BatchNorm2D, self).__init__()
-            self.batch_norm = nn.BatchNorm2D(2)
-
-        @paddle.jit.to_static
-        def forward(self, input_data):
-            return self.batch_norm(input_data)
-
-    class BatchNorm3D(nn.Layer):
-        def __init__(self):
-            super(BatchNorm3D, self).__init__()
-            self.batch_norm = nn.BatchNorm3D(2)
-
-        @paddle.jit.to_static
-        def forward(self, input_data):
-            return self.batch_norm(input_data)
-
-    input_data = paddle.rand((2, 2, 3), dtype="float32")
-    verify_model(BatchNorm1D(), input_data=input_data)
-    input_data = paddle.rand((2, 2, 2, 3), dtype="float32")
-    verify_model(BatchNorm2D(), input_data=input_data)
-    input_data = paddle.rand((2, 2, 2, 2, 3), dtype="float32")
-    verify_model(BatchNorm3D(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_bmm():
-    class Bmm(nn.Layer):
-        def __init__(self):
-            super(Bmm, self).__init__()
-
-        @paddle.jit.to_static
-        def forward(self, x, y):
-            return paddle.bmm(x, y)
-
-    x_shapes = [[10, 3, 4], [5, 6, 2], [1, 7, 7]]
-    y_shapes = [[10, 4, 5], [5, 2, 7], [1, 7, 3]]
-    for i in range(len(x_shapes)):
-        x_data = paddle.rand(x_shapes[i], dtype="float32")
-        y_data = paddle.rand(y_shapes[i], dtype="float32")
-        verify_model(Bmm(), input_data=[x_data, y_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_cast():
-    @paddle.jit.to_static
-    def cast1(inputs, dtype="uint8"):
-        return paddle.cast(inputs, dtype)
-
-    @paddle.jit.to_static
-    def cast2(inputs, dtype="int64"):
-        return inputs.cast(dtype)
-
-    input_shape = [2, 3]
-    input_data = paddle.rand(input_shape, dtype="float32") * 100
-    verify_model(
-        cast1,
-        [
-            input_data,
-        ],
-    )
-    verify_model(
-        cast2,
-        [
-            input_data,
-        ],
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_check_tensor():
-    class IsFinite(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.cast(paddle.isfinite(inputs), "int32")
-
-    class IsNan(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.cast(paddle.isnan(inputs), "int32")
-
-    class IsInf(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.cast(paddle.isinf(inputs), "int32")
-
-    input_shapes = [[32], [8, 32], [2, 5, 20], [2, 3, 8, 8], [2, 2, 3, 6, 6]]
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(IsFinite(), input_data=input_data)
-        verify_model(IsNan(), input_data=input_data)
-        verify_model(IsInf(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_clip():
-    class Clip1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.clip(inputs, min=0.3, max=0.55)
-
-    class Clip2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs, max_value):
-            return paddle.clip(inputs, max=max_value)
-
-    class Clip3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs, min_value):
-            return paddle.clip(inputs, min=min_value)
-
-    class Clip4(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs, min_value, max_value):
-            return paddle.clip(inputs, min=min_value, max=max_value)
-
-    input_data = paddle.rand((2, 2, 2, 3), dtype="float32")
-    max_value = paddle.to_tensor([0.55])
-    min_value = paddle.to_tensor([0.3])
-    verify_model(Clip1(), input_data)
-    verify_model(Clip2(), [input_data, max_value])
-    verify_model(Clip3(), [input_data, min_value])
-    verify_model(Clip4(), [input_data, min_value, max_value])
-
-
-@tvm.testing.uses_gpu
-def test_forward_concat_unsqueeze():
-    @paddle.jit.to_static
-    def concat_unsqueeze1(inputs):
-        return paddle.concat([inputs[:, 0].unsqueeze(1), inputs[:, 1].unsqueeze(1)], axis=1)
-
-    @paddle.jit.to_static
-    def concat_unsqueeze2(inputs):
-        a = (inputs[:, :, 0] + 2) * 7
-        b = (inputs[:, :, 1] + 3) * 11
-        c = (inputs[:, :, 2] + 5) * 13
-        return paddle.concat([paddle.unsqueeze(t, axis=2) for t in [a, b, c]], axis=2)
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(concat_unsqueeze1, input_data=input_data)
-    verify_model(concat_unsqueeze2, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_cumsum():
-    @paddle.jit.to_static
-    def cusum1(inputs):
-        return paddle.cumsum(inputs)
-
-    @paddle.jit.to_static
-    def cusum2(inputs):
-        return paddle.cumsum(inputs, axis=0)
-
-    @paddle.jit.to_static
-    def cusum3(inputs):
-        return paddle.cumsum(inputs, axis=1)
-
-    input_data = paddle.randint(0, 100, (10, 10), dtype=paddle.int32)
-    verify_model(cusum1, [input_data])
-    verify_model(cusum1, [input_data.astype(paddle.int64)])
-    verify_model(
-        cusum2,
-        [
-            input_data,
-        ],
-    )
-    verify_model(
-        cusum3,
-        [
-            input_data,
-        ],
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_conv():
-    class Conv2D1(nn.Layer):
-        def __init__(self, stride=1, padding=0, dilation=1, groups=1, padding_mode="zeros"):
-            super(Conv2D1, self).__init__()
-            self.conv = nn.Conv2D(
-                3,
-                6,
-                3,
-                stride=stride,
-                padding=padding,
-                dilation=dilation,
-                groups=groups,
-                padding_mode=padding_mode,
-            )
-            self.softmax = nn.Softmax()
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.softmax(self.conv(inputs))
-
-    class Conv2D2(nn.Layer):
-        def __init__(
-            self,
-            stride=1,
-            padding=0,
-            dilation=1,
-            groups=1,
-            padding_mode="zeros",
-            data_format="NCHW",
-        ):
-            super(Conv2D2, self).__init__()
-            self.conv = nn.Conv2D(
-                3,
-                6,
-                3,
-                stride=stride,
-                padding=padding,
-                dilation=dilation,
-                groups=groups,
-                padding_mode=padding_mode,
-                data_format=data_format,
-            )
-            self.softmax = nn.Softmax()
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.softmax(self.conv(inputs))
-
-    input_shapes = [[1, 3, 10, 10], [1, 3, 12, 12]]
-
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(Conv2D1(), input_data=input_data)
-        verify_model(Conv2D1(stride=2, padding="VALID", dilation=3), input_data=input_data)
-        verify_model(Conv2D1(stride=2, padding="SAME", dilation=3), input_data=input_data)
-        verify_model(
-            Conv2D1(stride=2, padding=3, dilation=3, padding_mode="replicate"),
-            input_data=input_data,
-        )
-        verify_model(Conv2D1(stride=2, padding="SAME", dilation=2, groups=3), input_data=input_data)
-        verify_model(
-            Conv2D2(stride=2, padding="SAME", dilation=2, groups=3, data_format="NCHW"),
-            input_data=input_data,
-        )
-
-
-@tvm.testing.uses_gpu
-def test_forward_conv_transpose():
-    class Conv2DTranspose(nn.Layer):
-        def __init__(self, stride=1, padding=0, dilation=1, groups=1, padding_mode="zeros"):
-            super(Conv2DTranspose, self).__init__()
-            self.conv = nn.Conv2DTranspose(
-                6,
-                3,
-                3,
-                stride=stride,
-                padding=padding,
-                dilation=dilation,
-                groups=groups,
-            )
-            self.softmax = nn.Softmax()
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.softmax(self.conv(inputs))
-
-    input_shapes = [[1, 6, 10, 10], [2, 6, 8, 8]]
-
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(Conv2DTranspose(), input_data=input_data)
-        verify_model(Conv2DTranspose(stride=2, padding="VALID"), input_data=input_data)
-        verify_model(Conv2DTranspose(stride=2, padding="SAME", dilation=1), input_data=input_data)
-        verify_model(Conv2DTranspose(stride=2, padding=3), input_data=input_data)
-        verify_model(Conv2DTranspose(stride=3, padding="SAME", groups=1), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_conv3d():
-    class Conv3D(nn.Layer):
-        def __init__(self, stride=1, padding=0, dilation=1, groups=1, padding_mode="zeros"):
-            super(Conv3D, self).__init__()
-            self.conv = nn.Conv3D(
-                3,
-                6,
-                3,
-                stride=stride,
-                padding=padding,
-                dilation=dilation,
-                groups=groups,
-                padding_mode=padding_mode,
-            )
-            self.softmax = nn.Softmax()
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.softmax(self.conv(inputs))
-
-    class Conv3D2(nn.Layer):
-        def __init__(
-            self,
-            stride=1,
-            padding=0,
-            dilation=1,
-            groups=1,
-            padding_mode="zeros",
-            data_format="NCDHW",
-        ):
-            super(Conv3D2, self).__init__()
-            self.conv = nn.Conv3D(
-                3,
-                6,
-                3,
-                stride=stride,
-                padding=padding,
-                dilation=dilation,
-                groups=groups,
-                padding_mode=padding_mode,
-                data_format=data_format,
-            )
-            self.softmax = nn.Softmax()
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.softmax(self.conv(inputs))
-
-    input_shapes = [[1, 3, 10, 10, 10], [1, 3, 12, 12, 12]]
-
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(Conv3D(), input_data=input_data)
-        verify_model(Conv3D(stride=2, padding="VALID", dilation=3), input_data=input_data)
-        verify_model(Conv3D(stride=2, padding="SAME", dilation=3), input_data=input_data)
-        verify_model(
-            Conv3D(stride=2, padding=(3, 3, 4, 4, 2, 2), dilation=3),
-            input_data=input_data,
-        )
-        verify_model(
-            Conv3D(stride=2, padding=3, dilation=3, padding_mode="reflect"),
-            input_data=input_data,
-        )
-        verify_model(
-            Conv3D(stride=2, padding=3, dilation=3, padding_mode="replicate"),
-            input_data=input_data,
-        )
-        verify_model(Conv3D(stride=2, padding="SAME", dilation=2, groups=3), input_data=input_data)
-        verify_model(
-            Conv3D2(stride=2, padding="SAME", dilation=2, groups=3, data_format="NCDHW"),
-            input_data=input_data,
-        )
-
-
-@tvm.testing.uses_gpu
-def test_forward_dot():
-    class Dot(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, x, y):
-            return paddle.dot(x, y)
-
-    input_shapes = [[128], [8, 24]]
-    for input_shape in input_shapes:
-        x_data = paddle.rand(input_shape, dtype="float32")
-        y_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(Dot(), input_data=[x_data, y_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_dropout():
-    @paddle.jit.to_static
-    def dropout(inputs):
-        return nn.functional.dropout(inputs)
-
-    @paddle.jit.to_static
-    def dropout1(inputs):
-        return nn.functional.dropout(inputs, 0.1)
-
-    @paddle.jit.to_static
-    def dropout2(inputs):
-        return nn.functional.dropout(inputs, 0.1, mode="downscale_in_infer")
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(dropout, input_data=input_data[0, 0])
-    verify_model(dropout, input_data=input_data)
-    verify_model(dropout1, input_data=input_data[0, 0])
-    verify_model(dropout1, input_data=input_data)
-    verify_model(dropout2, input_data=input_data[0, 0])
-    verify_model(dropout2, input_data=input_data)
-
-
-def test_forward_elemwise():
-    class ElemwiseAPI(nn.Layer):
-        def __init__(self, api_name):
-            super(ElemwiseAPI, self).__init__()
-            self.api_name_ = api_name
-            for candidate in (paddle, paddle.nn.functional):
-                self.func = getattr(candidate, api_name, None)
-                if self.func:
-                    break
-
-        @paddle.jit.to_static
-        def forward(self, input1, input2):
-            y = self.func(input1, input2)
-            if "equal" in self.api_name_ or "than" in self.api_name_:
-                # for compare operation, cast boolean result to int32
-                y = paddle.cast(y, "int32")
-            return y
-
-    api_list = [
-        "equal",
-        "floor_divide",
-        "greater_equal",
-        "greater_than",
-        "less_equal",
-        "less_than",
-        "maximum",
-        "minimum",
-        "pow",
-    ]
-    x_shapes = [[128], [8, 20], [4, 20, 3], [2, 3, 8, 8], [2, 3, 3, 9, 9]]
-    y_shapes = [[1], [8, 20], [4, 1, 1], [2, 3, 8, 8], [2, 3, 3, 9, 1]]
-    for x_shape, y_shape in zip(x_shapes, y_shapes):
-        x_data = paddle.randint(1, 10, x_shape, dtype="int32")
-        y_data = paddle.randint(1, 10, y_shape, dtype="int32")
-        for api_name in api_list:
-            if api_name == "pow":
-                # only support float for pow
-                x_data = x_data.astype("float32")
-                y_data = y_data.astype("float32")
-            verify_model(ElemwiseAPI(api_name), [x_data, y_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_expand():
-    @paddle.jit.to_static
-    def expand1(inputs):
-        return paddle.expand(inputs, shape=[2, 128])
-
-    @paddle.jit.to_static
-    def expand2(inputs):
-        return paddle.expand(inputs, shape=[2, 1, 4, 16])
-
-    @paddle.jit.to_static
-    def expand3(inputs):
-        return paddle.expand(inputs, shape=[2, 1, 3, 7, 7])
-
-    @paddle.jit.to_static
-    def expand4(inputs):
-        shape = paddle.to_tensor(np.array([2, 128]).astype("int32"))
-        return paddle.expand(inputs, shape=shape)
-
-    @paddle.jit.to_static
-    def expand5(inputs):
-        shape = paddle.to_tensor(np.array([2, 1, 4, 16]).astype("int32"))
-        return paddle.expand(inputs, shape=shape)
-
-    @paddle.jit.to_static
-    def expand6(inputs):
-        shape = paddle.to_tensor(np.array([2, 1, 3, 7, 7]).astype("int32"))
-        return paddle.expand(inputs, shape=shape)
-
-    data = paddle.rand([128], dtype="float32")
-    verify_model(expand1, input_data=[data])
-    verify_model(expand4, input_data=[data])
-    data = paddle.rand([4, 16], dtype="float32")
-    verify_model(expand2, input_data=[data])
-    verify_model(expand5, input_data=[data])
-    data = paddle.rand([1, 3, 7, 7], dtype="float32")
-    verify_model(expand3, input_data=[data])
-    verify_model(expand6, input_data=[data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_expand_as():
-    class ExpandAs(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, x, y):
-            z = paddle.expand_as(x, y)
-            z += y
-            return z
-
-    x_shapes = [[1], [8, 128], [8, 1, 1], [2, 3, 229, 229], [2, 3, 3, 224, 1]]
-    y_shapes = [[128], [8, 128], [8, 200, 300], [2, 3, 229, 229], [2, 3, 3, 224, 224]]
-    for x_shape, y_shape in zip(x_shapes, y_shapes):
-        x_data = paddle.rand(x_shape, dtype="float32")
-        y_data = paddle.rand(y_shape, dtype="float32")
-        verify_model(ExpandAs(), [x_data, y_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_fill_zeros_like():
-    class FilZeroLike(nn.Layer):
-        def __init__(self, dtype=None):
-            super(FilZeroLike, self).__init__()
-            self.dtype = dtype
-
-        @paddle.jit.to_static
-        def forward(self, x):
-            return paddle.zeros_like(x, dtype=self.dtype)
-
-    input_shape = [2, 3, 5]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(FilZeroLike("float32"), input_data=input_data)
-    verify_model(FilZeroLike("int32"), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_flatten():
-    class Flatten(nn.Layer):
-        def __init__(self, start_axis=0, stop_axis=-1):
-            super(Flatten, self).__init__()
-            self.start_axis = start_axis
-            self.stop_axis = stop_axis
-
-        @paddle.jit.to_static
-        def forward(self, x):
-            return paddle.flatten(x, start_axis=self.start_axis, stop_axis=self.stop_axis)
-
-    input_data = paddle.rand([2, 3, 4, 5, 2], dtype="float32")
-    verify_model(Flatten(), input_data=input_data)
-    verify_model(Flatten(2), input_data=input_data)
-    verify_model(Flatten(2, -2), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_flip():
-    class Flip(nn.Layer):
-        def __init__(self, axis):
-            super(Flip, self).__init__()
-            self.axis = axis
-
-        @paddle.jit.to_static
-        def forward(self, x):
-            return paddle.flip(x, axis=self.axis)
-
-    input_data = paddle.rand([2, 3, 4], dtype="float32")
-    verify_model(Flip(0), input_data)
-    verify_model(Flip(-1), input_data)
-    verify_model(Flip([0, 1]), input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_gather():
-    class Gather(nn.Layer):
-        def __init__(self, axis=None):
-            super(Gather, self).__init__()
-            self.axis = axis
-
-        @paddle.jit.to_static
-        def forward(self, x, index):
-            return paddle.gather(x, index, axis=self.axis)
-
-    x_shapes = [[20, 10], [10, 10, 8]]
-    index = paddle.to_tensor(np.array([1, 3, 5]).astype("int64"))
-    for x_shape in x_shapes:
-        x_data = paddle.rand(x_shape, dtype="float32")
-        verify_model(Gather(), [x_data, index])
-        verify_model(Gather(axis=0), [x_data, index])
-        verify_model(Gather(axis=1), [x_data, index])
-
-
-@tvm.testing.uses_gpu
-def test_forward_gather_nd():
-    class GatherNd(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, x, index):
-            return paddle.gather_nd(x, index)
-
-    x_shapes = [[20], [8, 8], [4, 5, 6], [3, 4, 3, 5]]
-    y_shapes = [[2, 1], [2], [1, 2, 3], [3]]
-    for x_shape, y_shape in zip(x_shapes, y_shapes):
-        x_data = paddle.rand(x_shape, dtype="float32")
-        y_data = paddle.randint(low=0, high=3, shape=y_shape, dtype="int64")
-        verify_model(GatherNd(), [x_data, y_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_group_norm():
-    class GroupNorm(nn.Layer):
-        def __init__(self, channels, groups):
-            super(GroupNorm, self).__init__()
-            self.group_norm = paddle.nn.GroupNorm(num_channels=channels, num_groups=groups)
-
-        def forward(self, inputs):
-            return self.group_norm(inputs)
-
-    input_shapes = [[1, 4, 6, 6], [2, 2, 4, 7], [2, 8, 1, 1]]
-    for input_shape in input_shapes:
-        num_channels = input_shape[1]
-        input_data = paddle.uniform(input_shape)
-        verify_model(GroupNorm(num_channels, 1), input_data, rtol=1e-4, atol=1e-4)
-        verify_model(GroupNorm(num_channels, 2), input_data, rtol=1e-4, atol=1e-4)
-
-
-@tvm.testing.uses_gpu
-def test_forward_grid_sampler():
-    class GridSampler(nn.Layer):
-        def __init__(self, mode="bilinear", padding_mode="zeros", align_corners=True):
-            super(GridSampler, self).__init__()
-            self.mode = mode
-            self.padding_mode = padding_mode
-            self.align_corners = align_corners
-
-        def forward(self, x, grid):
-            return paddle.nn.functional.grid_sample(
-                x,
-                grid,
-                mode=self.mode,
-                padding_mode=self.padding_mode,
-                align_corners=self.align_corners,
-            )
-
-    x_2D = paddle.rand(shape=[4, 4, 8, 8], dtype="float32")
-    grid_2D = paddle.rand(shape=[4, 8, 8, 2], dtype="float32")
-    verify_model(GridSampler(mode="nearest"), input_data=[x_2D, grid_2D])
-    verify_model(GridSampler(padding_mode="reflection"), input_data=[x_2D, grid_2D])
-    verify_model(GridSampler(padding_mode="border"), input_data=[x_2D, grid_2D])
-    verify_model(GridSampler(align_corners=False), input_data=[x_2D, grid_2D])
-
-    x_3D = paddle.rand(shape=[4, 4, 4, 4, 4], dtype="float32")
-    grid_3D = paddle.rand(shape=[4, 8, 8, 8, 3], dtype="float32")
-    verify_model(GridSampler(mode="nearest"), input_data=[x_3D, grid_3D])
-    verify_model(GridSampler(padding_mode="reflection"), input_data=[x_3D, grid_3D])
-    verify_model(GridSampler(padding_mode="border"), input_data=[x_3D, grid_3D])
-    verify_model(GridSampler(align_corners=False), input_data=[x_3D, grid_3D])
-
-
-@tvm.testing.uses_gpu
-def test_forward_scatter():
-    class Scatter(nn.Layer):
-        def __init__(self, overwrite=True):
-            super(Scatter, self).__init__()
-            self.overwrite = overwrite
-
-        @paddle.jit.to_static
-        def forward(self, x, index, updates):
-            return paddle.scatter(x, index, updates, overwrite=self.overwrite)
-
-    x_shapes = [[10], [4, 5], [6, 4, 5], [4, 5, 6, 4]]
-    index_shapes = [[10], [4], [6], [4]]
-    for x_shape, index_shape in zip(x_shapes, index_shapes):
-        x_data = paddle.rand(x_shape, dtype="float32")
-        updates = paddle.rand(x_shape, dtype="float32") + 1.0
-        index = paddle.randint(low=0, high=3, shape=index_shape)
-        verify_model(Scatter(), [x_data, index, updates])
-        verify_model(Scatter(False), [x_data, index, updates])
-
-
-def test_forward_scatter_nd():
-    @paddle.jit.to_static
-    def scatter_nd(index, updates):
-        shape = [3, 5, 9, 10]
-        return paddle.scatter_nd(index, updates, shape)
-
-    @paddle.jit.to_static
-    def scatter_nd_add(x, index, updates):
-        return paddle.scatter_nd_add(x, index, updates)
-
-    index_data = np.array([[1, 1], [0, 1], [1, 3]]).astype(np.int64)
-    index = paddle.to_tensor(index_data)
-    updates = paddle.rand(shape=[3, 9, 10], dtype="float32")
-    verify_model(scatter_nd, [index, updates])
-    x = paddle.rand(shape=[3, 5, 4, 9, 10], dtype="float32")
-    updates = paddle.rand(shape=[3, 2, 9, 10], dtype="float32")
-    index = paddle.randint(0, 3, shape=[3, 2, 3])
-    verify_model(scatter_nd_add, [x, index, updates])
-
-
-@tvm.testing.uses_gpu
-def test_forward_shape_full():
-    @paddle.jit.to_static
-    def full1(inputs):
-        return paddle.full(paddle.shape(inputs), 3.14)
-
-    @paddle.jit.to_static
-    def full2(inputs):
-        return paddle.full(paddle.shape(inputs), 1.0, dtype=inputs.dtype)
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(full1, input_data=[input_data])
-    verify_model(full2, input_data=[input_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_split():
-    class Split(nn.Layer):
-        def __init__(
-            self, axis=None, num_or_sections=None, axis_is_tensor=False, num_is_tensor=False
-        ):
-            super(Split, self).__init__()
-            self.axis = axis
-            self.num_or_sections = num_or_sections
-            self.axis_is_tensor = axis_is_tensor
-            self.num_is_tensor = num_is_tensor
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            axis = self.axis
-            if self.axis_is_tensor:
-                axis = paddle.to_tensor(axis, dtype="int32")
-            num_or_sections = self.num_or_sections
-            if self.num_is_tensor:
-                new_num_or_sections = []
-                for i in num_or_sections:
-                    if isinstance(i, list):
-                        i = paddle.to_tensor(i, dtype="int32")
-                    new_num_or_sections.append(i)
-                num_or_sections = new_num_or_sections
-            return paddle.split(inputs, num_or_sections=num_or_sections, axis=axis)
-
-    input_shape = [3, 6, 2]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(Split(axis=1, num_or_sections=3), input_data=input_data)
-    verify_model(
-        Split(axis=[1], num_or_sections=[2, 3, 1], axis_is_tensor=True), input_data=input_data
-    )
-    verify_model(
-        Split(axis=1, num_or_sections=[2, -1, [3]], num_is_tensor=True), input_data=input_data
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_squeeze():
-    class Squeeze(nn.Layer):
-        def __init__(self, axis=None):
-            super(Squeeze, self).__init__()
-            self.axis = axis
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.squeeze(inputs, axis=self.axis)
-
-    input_shapes = [[1, 1, 3, 1, 5], [5, 1, 6]]
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(Squeeze(axis=None), input_data=input_data)
-        verify_model(Squeeze(axis=1), input_data=input_data)
-    input_data = paddle.rand([1], dtype="float32")
-    verify_model(Squeeze(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_ones_like():
-    @paddle.jit.to_static
-    def ones_like1(inputs):
-        return paddle.ones_like(inputs)
-
-    @paddle.jit.to_static
-    def ones_like2(inputs):
-        return paddle.ones_like(inputs, dtype="int32")
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(ones_like1, input_data=input_data)
-    verify_model(ones_like2, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_gelu():
-    @paddle.jit.to_static
-    def gelu(inputs):
-        return nn.functional.gelu(inputs)
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(gelu, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_hard_sigmoid():
-    @paddle.jit.to_static
-    def hard_sigmoid(inputs):
-        return nn.functional.hardsigmoid(inputs)
-
-    def hard_sigmoid1(inputs):
-        return nn.functional.hardsigmoid(inputs, offset=0.6)
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(hard_sigmoid, input_data=input_data)
-    verify_model(hard_sigmoid1, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_hard_swish():
-    @paddle.jit.to_static
-    def hard_swish(inputs):
-        return nn.functional.hardswish(inputs)
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(hard_swish, input_data=input_data)
-
-
-def test_forward_instance_norm():
-    class InstanceNorm(nn.Layer):
-        def __init__(self, num_features, epsilon=1e-05):
-            super(InstanceNorm, self).__init__()
-            self.instance_norm = paddle.nn.InstanceNorm2D(
-                num_features=num_features, epsilon=epsilon
-            )
-
-        def forward(self, inputs):
-            return self.instance_norm(inputs)
-
-    input_shapes = [[2, 2, 2, 3], [1, 3, 5, 5]]
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(InstanceNorm(input_shape[1]), input_data)
-        verify_model(InstanceNorm(input_shape[1], 1e-03), input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_interpolate():
-    class Interpolate(nn.Layer):
-        def __init__(
-            self,
-            mode="nearest",
-            align_corners=False,
-            align_mode=0,
-            data_format="NCHW",
-            use_scale=False,
-            use_list=False,
-            use_const=False,
-            use_scaler=False,
-        ):
-            super(Interpolate, self).__init__()
-            self.mode = mode
-            self.align_corners = align_corners
-            self.align_mode = align_mode
-            self.data_format = data_format
-            self.use_scale = use_scale
-            self.use_list = use_list
-            self.use_const = use_const
-            self.use_scaler = use_scaler
-
-        @paddle.jit.to_static
-        def forward(self, x):
-            size = np.array([15, 19]).astype("int32")
-            scale = np.array([2.0, 1.0]).astype("float32")
-            if not self.use_list and not self.use_const:
-                size = paddle.to_tensor(size)
-                scale = paddle.to_tensor(scale)
-            elif not self.use_const:
-                size0 = paddle.to_tensor(size[0:1])
-                size = [size0, int(size[1])]
-            elif not self.use_scaler:
-                size = size.tolist()
-                scale = scale.tolist()
-            else:
-                size = list(size)
-                h, w = paddle.rand(size).shape  # add decrease_axis
-                size = [h, w]
-            if not self.use_scale:
-                return paddle.nn.functional.interpolate(
-                    x,
-                    size=size,
-                    mode=self.mode,
-                    align_corners=self.align_corners,
-                    align_mode=self.align_mode,
-                    data_format=self.data_format,
-                )
-            else:
-                return paddle.nn.functional.interpolate(
-                    x,
-                    scale_factor=scale,
-                    mode=self.mode,
-                    align_corners=self.align_corners,
-                    align_mode=self.align_mode,
-                    data_format=self.data_format,
-                )
-
-    input_data = paddle.rand([1, 2, 8, 12]).astype("float32")
-    verify_model(Interpolate(), input_data)
-    verify_model(Interpolate(use_list=True), input_data)
-    verify_model(Interpolate(use_scale=True, use_const=True), input_data)
-    verify_model(Interpolate(use_const=True, use_scaler=True), input_data)
-    verify_model(Interpolate("bilinear", use_scale=True), input_data)
-    verify_model(Interpolate("bilinear", use_scale=True, align_corners=True), input_data)
-    verify_model(
-        Interpolate(
-            "bilinear",
-            use_scale=True,
-            align_corners=True,
-            align_mode=1,
-            data_format="NHWC",
-            use_const=True,
-        ),
-        input_data,
-    )
-    verify_model(
-        Interpolate("bicubic", use_scale=True, align_corners=True, align_mode=1), input_data
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_layer_norm():
-    @paddle.jit.to_static
-    def layer_norm(inputs, weight, bias):
-        return nn.functional.layer_norm(inputs, inputs.shape[-1], weight=weight, bias=bias)
-
-    class LayerNorm(nn.Layer):
-        def __init__(self):
-            super(LayerNorm, self).__init__()
-            data_shape = [10]
-            self.layer_norm = nn.LayerNorm(data_shape)
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.layer_norm(inputs)
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    weight = paddle.rand([10], dtype="float32")
-    bias = paddle.rand([10], dtype="float32")
-    verify_model(layer_norm, input_data=[input_data, weight, bias])
-    verify_model(LayerNorm(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_leaky_relu():
-    @paddle.jit.to_static
-    def leaky_relu(inputs):
-        return nn.functional.leaky_relu(inputs)
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(leaky_relu, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_logical_api():
-    class LogicalAPI(nn.Layer):
-        def __init__(self, api_name):
-            super(LogicalAPI, self).__init__()
-            for candidate in (paddle, paddle.nn.functional):
-                self.func = getattr(candidate, api_name, None)
-                if self.func:
-                    break
-
-        @paddle.jit.to_static
-        def forward(self, x, y):
-            z = self.func(x, y)
-            return paddle.cast(z, "int32")
-
-    x_shapes = [[128], [8, 20], [4, 20, 3], [2, 3, 8, 8], [2, 3, 3, 9, 9]]
-    y_shapes = [[1], [8, 20], [4, 1, 1], [2, 3, 8, 8], [2, 3, 3, 9, 1]]
-    for x_shape, y_shape in zip(x_shapes, y_shapes):
-        x_data = paddle.randint(0, 2, x_shape).astype("bool")
-        y_data = paddle.randint(0, 2, y_shape).astype("bool")
-        verify_model(LogicalAPI("logical_and"), [x_data, y_data])
-        verify_model(LogicalAPI("logical_or"), [x_data, y_data])
-        verify_model(LogicalAPI("logical_xor"), [x_data, y_data])
-
-
-@tvm.testing.uses_gpu
-def test_forward_logical_not():
-    class LogicalNot(nn.Layer):
-        def __init__(self):
-            super(LogicalNot, self).__init__()
-
-        @paddle.jit.to_static
-        def forward(self, x):
-            return paddle.logical_not(x).astype("int32")
-
-    input_shapes = [[128], [8, 20], [4, 20, 3], [2, 3, 8, 8], [2, 3, 3, 9, 9]]
-    for input_shape in input_shapes:
-        input_data = paddle.randint(-2, 2, input_shape).astype("bool")
-        verify_model(LogicalNot(), input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_look_up():
-    @paddle.jit.to_static
-    def look_up(inputs, weight):
-        return nn.functional.embedding(inputs, weight)
-
-    class LookUp(nn.Layer):
-        def __init__(self):
-            super(LookUp, self).__init__()
-            self.embedding = paddle.nn.Embedding(10, 4, sparse=True)
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.embedding(inputs)
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.randint(0, 10, input_shape, dtype="int32")
-    weight = paddle.rand([10, 4], dtype="float32")
-    verify_model(look_up, input_data=[input_data, weight])
-    verify_model(LookUp(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_multiply():
-    @paddle.jit.to_static
-    def multiply1(inputs):
-        return inputs * inputs
-
-    @paddle.jit.to_static
-    def multiply2(inputs):
-        return inputs * 1.0 / 2.0
-
-    @paddle.jit.to_static
-    def multiply3(inputs, inputs2):
-        ones = paddle.ones([10], dtype="float32")
-        return inputs * ones / inputs2
-
-    input_shape = [10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(multiply1, input_data=input_data)
-    verify_model(multiply2, input_data=input_data)
-    input_data2 = paddle.rand(input_shape, dtype="float32")
-    verify_model(multiply3, input_data=[input_data, input_data2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_matmul():
-    class MatMul1(nn.Layer):
-        def forward(self, input1, input2):
-            return paddle.matmul(input1, input2)
-
-    # matrix x vector
-    input_data1 = paddle.randn((3, 4), dtype="float32")
-    input_data2 = paddle.randn((4,), dtype="float32")
-    verify_model(MatMul1(), input_data=[input_data1, input_data2])
-
-    # matrix x matrix
-    input_data1 = paddle.randn((5, 4), dtype="float32")
-    input_data2 = paddle.randn((4, 5), dtype="float32")
-    verify_model(MatMul1(), input_data=[input_data1, input_data2])
-
-    # batched matrix x batched matrix
-    input_data1 = paddle.randn((10, 3, 4), dtype="float32")
-    input_data2 = paddle.randn((10, 4, 5), dtype="float32")
-    verify_model(MatMul1(), input_data=[input_data1, input_data2])
-
-    # batched matrix x broadcasted matrix
-    input_data1 = paddle.randn((10, 3, 4), dtype="float32")
-    input_data2 = paddle.randn((4, 5), dtype="float32")
-    verify_model(MatMul1(), input_data=[input_data1, input_data2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_pool2d():
-    class Pool2D1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.avg_pool2d(inputs, kernel_size=2, stride=2, padding=0)
-
-    class Pool2D2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.adaptive_avg_pool2d(inputs, output_size=[3, 3])
-
-    class Pool2D3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.avg_pool2d(
-                inputs,
-                kernel_size=3,
-                stride=1,
-                padding=[1, 1],
-                exclusive=False,
-                divisor_override=2.5,
-            )
-
-    input_shapes = [[1, 2, 8, 8], [1, 3, 10, 10]]
-    for input_shape in input_shapes:
-        input_data = paddle.uniform(shape=input_shape, dtype="float32", min=-1, max=1)
-        verify_model(Pool2D1(), input_data=input_data)
-        verify_model(Pool2D2(), input_data=input_data)
-        verify_model(Pool2D3(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_pad1d():
-    class Pad1D(nn.Layer):
-        def __init__(self, padding=0, mode="constant", value=0.0, data_format="NCL"):
-            super(Pad1D, self).__init__()
-            self.pad1d = paddle.nn.Pad1D(padding, mode=mode, value=value, data_format=data_format)
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.pad1d(inputs)
-
-    input_shapes = [[1, 2, 5], [2, 5, 9]]
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(Pad1D(padding=2), input_data=input_data)
-        verify_model(Pad1D(padding=[1, 2], data_format="NLC"), input_data=input_data)
-        verify_model(Pad1D(padding=[0, 2], value=0.3), input_data=input_data)
-        verify_model(Pad1D(padding=[2, 2], mode="reflect"), input_data=input_data)
-        verify_model(Pad1D(padding=3, mode="replicate"), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_pad2d():
-    class Pad2D(nn.Layer):
-        def __init__(self, padding=0, mode="constant", value=0.0, data_format="NCHW"):
-            super(Pad2D, self).__init__()
-            self.pad2d = paddle.nn.Pad2D(padding, mode=mode, value=value, data_format=data_format)
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.pad2d(inputs)
-
-    input_shapes = [[1, 2, 5, 5], [2, 2, 5, 9]]
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(Pad2D(padding=2), input_data=input_data)
-        verify_model(Pad2D(padding=[1, 2, 0, 2], data_format="NHWC"), input_data=input_data)
-        verify_model(Pad2D(padding=[1, 2, 0, 2], value=0.3), input_data=input_data)
-        verify_model(Pad2D(padding=[1, 2, 0, 2], mode="reflect"), input_data=input_data)
-        verify_model(Pad2D(padding=3, mode="replicate"), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_pad3d():
-    class Pad3D(nn.Layer):
-        def __init__(self, padding=0, mode="constant", value=0.0, data_format="NCDHW"):
-            super(Pad3D, self).__init__()
-            self.pad3d = paddle.nn.Pad3D(padding, mode=mode, value=value, data_format=data_format)
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.pad3d(inputs)
-
-    input_shapes = [[1, 2, 2, 5, 5], [1, 2, 2, 5, 9]]
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        verify_model(Pad3D(padding=2), input_data=input_data)
-        verify_model(Pad3D(padding=[1, 2, 0, 2, 1, 1], data_format="NDHWC"), input_data=input_data)
-        verify_model(Pad3D(padding=[1, 2, 0, 2, 1, 1], value=0.3), input_data=input_data)
-        verify_model(Pad3D(padding=[1, 2, 0, 2, 1, 1], mode="reflect"), input_data=input_data)
-        verify_model(Pad3D(padding=3, mode="replicate"), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_transpose():
-    class Transpose(nn.Layer):
-        def __init__(self, perm):
-            super(Transpose, self).__init__()
-            self.perm = perm
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            inputs = inputs * 2
-            return paddle.transpose(inputs, perm=self.perm)
-
-    input_data = paddle.rand([1, 3, 5, 4, 3], dtype="float32")
-    verify_model(Transpose([0, 1, 2, 3, 4]), input_data=input_data)
-    verify_model(Transpose([4, 3, 2, 0, 1]), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_reduce():
-    class Reduce(nn.Layer):
-        def __init__(self, op_name, axis=None, keepdim=False):
-            super(Reduce, self).__init__()
-            self.op_name = op_name
-            self.axis = axis
-            self.keepdim = keepdim
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            result = getattr(paddle, self.op_name)(inputs, axis=self.axis, keepdim=self.keepdim)
-            result = result.astype("float32")
-            return result
-
-    input_shapes = [[1, 2, 2, 5, 5], [2, 3, 4], [4, 20], [2, 3, 30, 30]]
-    for input_shape in input_shapes:
-        input_data = paddle.uniform(min=-3, max=3, shape=input_shape, dtype="float32")
-        verify_model(Reduce("all"), input_data=input_data.astype("bool"))
-        verify_model(Reduce("any", 1), input_data=input_data.astype("bool"))
-        verify_model(Reduce("max", 0, True), input_data=input_data)
-        verify_model(Reduce("min", 1, True), input_data=input_data)
-        verify_model(Reduce("prod", 0), input_data=input_data)
-        verify_model(Reduce("sum", 0, True), input_data=input_data)
-        verify_model(Reduce("mean", -1, True), input_data=input_data)
-        # logsumexp only supports tensor with rank less than 5
-        if len(input_shape) < 5:
-            verify_model(Reduce("logsumexp", -1, True), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_reshape():
-    @paddle.jit.to_static
-    def reshape1(inputs, x):
-        new_shape = paddle.shape(x)
-        return paddle.reshape(inputs, new_shape)
-
-    @paddle.jit.to_static
-    def reshape2(inputs):
-        return inputs.reshape([-1])
-
-    @paddle.jit.to_static
-    def reshape3(inputs):
-        data_shape = inputs.shape
-        return inputs.reshape([data_shape[0] * data_shape[1], data_shape[2]])
-
-    @paddle.jit.to_static
-    def reshape4(inputs, x):
-        new_shape = paddle.shape(x)
-        return paddle.reshape(inputs, [new_shape[2], 2, -1])
-
-    input_shape = [2, 1, 10, 1, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    input_data2 = paddle.randn([2, 1, 10, 10])
-    verify_model(reshape1, input_data=[input_data, input_data2])
-    verify_model(reshape2, input_data=input_data)
-    verify_model(reshape3, input_data=paddle.randn((2, 3, 4)))
-    verify_model(reshape4, input_data=[input_data, input_data2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_scale():
-    @paddle.jit.to_static
-    def scale1(inputs):
-        return paddle.scale(inputs, scale=2.0, bias=1.0)
-
-    @paddle.jit.to_static
-    def scale2(inputs):
-        return paddle.scale(inputs, scale=3, bias=2.1, act="gelu")
-
-    input_data = paddle.randn(shape=[2, 3], dtype="float32")
-    verify_model(
-        scale1,
-        input_data=[
-            input_data,
-        ],
-    )
-    verify_model(scale2, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_slice():
-    @paddle.jit.to_static
-    def slice1(inputs):
-        return inputs[:, :, :, :3]
-
-    @paddle.jit.to_static
-    def slice2(inputs):
-        return inputs[0, :, :-3, :]
-
-    @paddle.jit.to_static
-    def slice3(inputs):
-        return inputs[0::2, 0::2] + inputs[1::2, 1::2]
-
-    @paddle.jit.to_static
-    def slice4(inputs):
-        x0 = paddle.to_tensor([2]) - paddle.to_tensor([1])
-        x1 = paddle.to_tensor([3]) + paddle.to_tensor([1])
-        return inputs[:, x0:, 1:x1, :]
-
-    @paddle.jit.to_static
-    def slice5(inputs):
-        b, c, h, w = inputs  # add decrease_axis
-        return h
-
-    input_shape = [1, 3, 10, 10]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(
-        slice1,
-        input_data=[
-            input_data,
-        ],
-    )
-    verify_model(slice2, input_data=input_data)
-    verify_model(slice3, input_data=paddle.randn((4, 4)))
-    verify_model(slice4, input_data=input_data)
-    # verify_model(slice5, input_data=paddle.randn((4,)))
-
-
-@tvm.testing.uses_gpu
-def test_forward_unique():
-    class Unique(nn.Layer):
-        def __init__(
-            self,
-            return_index=False,
-            return_inverse=False,
-            return_counts=False,
-            axis=None,
-            dtype="int64",
-        ):
-            super(Unique, self).__init__()
-            self.return_index = return_index
-            self.return_inverse = return_inverse
-            self.return_counts = return_counts
-            self.axis = None
-            self.dtype = dtype
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            result = paddle.unique(
-                inputs,
-                return_inverse=self.return_inverse,
-                return_counts=self.return_counts,
-                axis=self.axis,
-                dtype=self.dtype,
-            )
-            return result
-
-    input_shape = [2, 3, 5]
-    input_data = paddle.rand(input_shape)
-    verify_model(Unique(), input_data=input_data)
-    verify_model(Unique(return_index=True), input_data=input_data)
-    verify_model(Unique(return_index=True, return_inverse=True), input_data=input_data)
-    verify_model(
-        Unique(return_index=True, return_inverse=True, return_counts=True), input_data=input_data
-    )
-
-
-@tvm.testing.uses_gpu
-def run_math_api(func):
-    api_name = func.__name__.split("_")[-1]
-    print("func_name:", api_name)
-
-    class MathAPI(nn.Layer):
-        def __init__(self, api_name):
-            super(MathAPI, self).__init__()
-            for candidate in (paddle, paddle.nn.functional):
-                self.func = getattr(candidate, api_name, None)
-                if self.func:
-                    break
-
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return self.func(inputs)
-
-    input_shapes = [[128], [2, 100], [10, 2, 5], [7, 3, 4, 1]]
-    for input_shape in input_shapes:
-        input_data = paddle.rand(input_shape, dtype="float32")
-        if api_name in ["log", "log2", "log10", "reciprocal", "sqrt", "rsqrt"]:
-            # avoid illegal input, all elements should be positive
-            input_data = paddle.uniform(input_shape, min=0.01, max=0.99)
-        verify_model(MathAPI(api_name), input_data=input_data)
-
-
-@run_math_api
-def test_forward_abs():
-    pass
-
-
-@run_math_api
-def test_forward_acos():
-    pass
-
-
-@run_math_api
-def test_forward_abs():
-    pass
-
-
-@run_math_api
-def test_forward_atan():
-    pass
-
-
-@run_math_api
-def test_forward_ceil():
-    pass
-
-
-@run_math_api
-def test_forward_cos():
-    pass
-
-
-@run_math_api
-def test_forward_cosh():
-    pass
-
-
-@run_math_api
-def test_forward_elu():
-    pass
-
-
-@run_math_api
-def test_forward_erf():
-    pass
-
-
-@run_math_api
-def test_forward_exp():
-    pass
-
-
-@run_math_api
-def test_forward_floor():
-    pass
-
-
-@run_math_api
-def test_forward_hardshrink():
-    pass
-
-
-@run_math_api
-def test_forward_hardtanh():
-    pass
-
-
-@run_math_api
-def test_forward_log_sigmoid():
-    pass
-
-
-@run_math_api
-def test_forward_log_softmax():
-    pass
-
-
-@run_math_api
-def test_forward_log():
-    pass
-
-
-@run_math_api
-def test_forward_log2():
-    pass
-
-
-@run_math_api
-def test_forward_log10():
-    pass
-
-
-@run_math_api
-def test_forward_log1p():
-    pass
-
-
-@run_math_api
-def test_forward_reciprocal():
-    pass
-
-
-@run_math_api
-def test_forward_relu():
-    pass
-
-
-@run_math_api
-def test_forward_round():
-    pass
-
-
-@run_math_api
-def test_forward_rsqrt():
-    pass
-
-
-@run_math_api
-def test_forward_selu():
-    pass
-
-
-@run_math_api
-def test_forward_sigmoid():
-    pass
-
-
-@run_math_api
-def test_forward_sign():
-    pass
-
-
-@run_math_api
-def test_forward_sin():
-    pass
-
-
-@tvm.testing.uses_gpu
-def test_forward_softplus():
-    @paddle.jit.to_static
-    def Softplus1(input):
-        return paddle.nn.functional.softplus(input, beta=1.0, threshold=20.0)
-
-    @paddle.jit.to_static
-    def Softplus2(input):
-        return paddle.nn.functional.softplus(input, beta=6.0, threshold=20.0)
-
-    @paddle.jit.to_static
-    def Softplus3(input):
-        return paddle.nn.functional.softplus(input, beta=1.0, threshold=10.0)
-
-    x = paddle.to_tensor([-8.0, -12.0, 1.0, 18.0, 25.0])
-    verify_model(Softplus1, x)
-    verify_model(Softplus2, x)
-    verify_model(Softplus3, x)
-
-    input_shapes = [[10], [2, 3], [5, 10, 11], [3, 4, 5, 6]]
-    for input_shape in input_shapes:
-        input_data = paddle.randn(shape=input_shape, dtype="float32")
-        verify_model(Softplus1, input_data=input_data)
-        verify_model(Softplus2, input_data=input_data)
-        verify_model(Softplus3, input_data=input_data)
-
-
-@run_math_api
-def test_forward_sqrt():
-    pass
-
-
-@run_math_api
-def test_forward_square():
-    pass
-
-
-@run_math_api
-def test_forward_sin():
-    pass
-
-
-@run_math_api
-def test_forward_softsign():
-    pass
-
-
-@run_math_api
-def test_forward_sqrt():
-    pass
-
-
-@run_math_api
-def test_forward_square():
-    pass
-
-
-@run_math_api
-def test_forward_swish():
-    pass
-
-
-@run_math_api
-def test_forward_tan():
-    pass
-
-
-@run_math_api
-def test_forward_tanh():
-    pass
-
-
-@tvm.testing.uses_gpu
-def test_forward_meshgrid():
-    @paddle.jit.to_static
-    def t(x, y, z):
-        return paddle.meshgrid(x, y, z)
-
-    x = paddle.randint(low=0, high=100, shape=[2])
-    y = paddle.randint(low=0, high=100, shape=[3])
-    z = paddle.randint(low=0, high=100, shape=[5])
-    verify_model(t, [x, y, z])
-
-
-@tvm.testing.uses_gpu
-def test_forward_mv():
-    class Mv(nn.Layer):
-        def forward(self, input1, input2):
-            return paddle.mv(input1, input2)
-
-    # matrix x vector
-    input_data1 = paddle.randn((3, 4), dtype="float32")
-    input_data2 = paddle.randn((4,), dtype="float32")
-    verify_model(Mv(), input_data=[input_data1, input_data2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_pixel_shuffle():
-    class PixelShuffle(nn.Layer):
-        def __init__(self, upscale_factor, data_format="NCHW"):
-            super(PixelShuffle, self).__init__()
-            self.pixel_shuffle = paddle.nn.PixelShuffle(upscale_factor, data_format)
-
-        @paddle.jit.to_static
-        def forward(self, x):
-            return self.pixel_shuffle(x)
-
-    input_shapes = [[1, 4, 3, 3], [2, 8, 2, 5]]
-    for input_shape in input_shapes:
-        x = paddle.rand(input_shape, dtype="float32")
-        verify_model(PixelShuffle(2), x)
-
-    input_shapes = [[1, 3, 3, 4], [2, 2, 5, 8]]
-    for input_shape in input_shapes:
-        x = paddle.rand(input_shape, dtype="float32")
-        verify_model(PixelShuffle(2, data_format="NHWC"), x)
-
-
-@tvm.testing.uses_gpu
-def test_forward_prelu():
-    class PRelu(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, x, w):
-            return paddle.nn.functional.prelu(x, w)
-
-    x = paddle.normal(shape=[4, 3, 5, 5])
-    w = paddle.to_tensor(
-        np.array(
-            [
-                0.25,
-            ]
-        ).astype("float32")
-    )
-    verify_model(PRelu(), [x, w])
-    w2 = paddle.to_tensor(np.array([0.25, 0.5, 0.8]).astype("float32"))
-    verify_model(PRelu(), [x, w2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_arange():
-    @paddle.jit.to_static
-    def arange(inputs):
-        return paddle.arange(paddle.shape(inputs)[0], 9, 2.0)
-
-    @paddle.jit.to_static
-    def arange1(inputs):
-        return inputs + paddle.arange(0, 10.0, 8, dtype="float32")
-
-    input_shape = [2, 2]
-    input_data = paddle.rand(input_shape, dtype="float32")
-    verify_model(arange, input_data)
-    verify_model(arange1, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_rnn():
-    class RNN(nn.Layer):
-        def __init__(self, api_name, input_size, hidden_size, num_layers, direction="forward"):
-            super(RNN, self).__init__()
-            rnn_func = getattr(paddle.nn, api_name, None)
-            self.rnn = rnn_func(input_size, hidden_size, num_layers, direction=direction)
-
-        @paddle.jit.to_static
-        def forward(self, inputs, prev_h):
-            y, h = self.rnn(inputs, prev_h)
-            return y
-
-    input_size, hidden_size, num_layers = 8, 16, 2
-    input_shape = [4, 5, 8]
-    input_data = paddle.rand(input_shape, dtype="float32")
-
-    for api_name in ("SimpleRNN", "GRU"):
-        prev_h = paddle.rand([4, 4, 16], dtype="float32")
-        verify_model(
-            RNN(api_name, input_size, hidden_size, num_layers, direction="bidirectional"),
-            input_data=[input_data, prev_h],
-        )
-        prev_h = paddle.rand([2, 4, 16], dtype="float32")
-        verify_model(
-            RNN(api_name, input_size, hidden_size, num_layers), input_data=[input_data, prev_h]
-        )
-
-
-@tvm.testing.uses_gpu
-def test_forward_topk():
-    @paddle.jit.to_static
-    def topk1(inputs):
-        return paddle.topk(inputs, k=1)
-
-    @paddle.jit.to_static
-    def topk2(inputs):
-        k = paddle.to_tensor([1], dtype=paddle.int32)
-        return paddle.topk(inputs, k=k)
-
-    @paddle.jit.to_static
-    def topk3(inputs):
-        return paddle.topk(inputs, k=1, largest=False)
-
-    @paddle.jit.to_static
-    def topk4(inputs):
-        return paddle.topk(inputs, k=2, sorted=True)
-
-    @paddle.jit.to_static
-    def topk5(inputs):
-        return paddle.topk(inputs, k=2, sorted=False)
-
-    @paddle.jit.to_static
-    def topk6(inputs):
-        return paddle.topk(inputs, k=1, axis=0)
-
-    # paddle.fluid.layers.topk
-    @paddle.jit.to_static
-    def topk7(inputs):
-        return paddle.fluid.layers.topk(inputs, k=1)
-
-    @paddle.jit.to_static
-    def topk8(inputs):
-        return paddle.fluid.layers.topk(inputs, k=2)
-
-    input_data = paddle.to_tensor([[1, 4, 5, 7], [3, 6, 2, 5]], dtype=paddle.int32)
-    input_data_fp32 = paddle.to_tensor([[1, 4, 5, 7], [3, 6, 2, 5]], dtype=paddle.float32)
-    verify_model(topk1, input_data=input_data)
-    # verify_model(topk2, input_data=input_data)
-    verify_model(topk3, input_data=input_data)
-    verify_model(topk4, input_data=input_data)
-    verify_model(topk5, input_data=input_data)
-    verify_model(topk6, input_data=input_data)
-    verify_model(topk7, input_data=input_data_fp32)
-    verify_model(topk8, input_data=input_data_fp32)
-
-
-@tvm.testing.uses_gpu
-def test_forward_tanhshrink():
-    @paddle.jit.to_static
-    def tanhshrink(inputs):
-        return paddle.nn.functional.tanhshrink(inputs)
-
-    input_data = paddle.randn(shape=[2, 3], dtype="float32")
-    verify_model(tanhshrink, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_one_hot_v2():
-    @paddle.jit.to_static
-    def one_hot_v2_1(inputs):
-        return nn.functional.one_hot(inputs, num_classes=4)
-
-    input_data = paddle.to_tensor([1, 1, 3, 0], dtype=paddle.int32)
-    verify_model(one_hot_v2_1, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_norm():
-    @paddle.jit.to_static
-    def norm_1(inputs):
-        return paddle.fluid.layers.l2_normalize(inputs, -1, 1e-12)
-
-    def norm_2(inputs):
-        return paddle.fluid.layers.l2_normalize(inputs, 1, 1e-12)
-
-    input_data = paddle.to_tensor(
-        [[[1, 2], [3, 1], [4, 5]], [[3, 1], [3, 5], [2, 4]]], dtype=paddle.float32
-    )
-    verify_model(norm_1, input_data=input_data)
-    verify_model(norm_2, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_where_index():
-    @paddle.jit.to_static
-    def where_index_1(inputs):
-        return paddle.nonzero(inputs)
-
-    input_data = paddle.to_tensor([[1.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 3.0]])
-    verify_model(where_index_1, input_data=input_data, use_vm=True)
-
-
-@tvm.testing.uses_gpu
-def test_forward_take_along_axis():
-    @paddle.jit.to_static
-    def take_along_axis_1(inputs, index):
-        return paddle.take_along_axis(inputs, index, 0)
-
-    input_data = paddle.to_tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
-    index = paddle.to_tensor([[0]])
-    verify_model(take_along_axis_1, input_data=[input_data, index])
-
-
-@tvm.testing.uses_gpu
-def test_forward_stack():
-    class Stack1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, input0, input1, input2):
-            return paddle.stack([input0, input1, input2], axis=-1)
-
-    class Stack2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, input0, input1, input2):
-            return paddle.stack([input0, input1, input2], axis=1)
-
-    class Stack3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, input0, input1, input2):
-            return paddle.stack([input0, input1, input2], axis=2)
-
-    input_shapes = [[2, 3], [5, 10, 11], [3, 4, 5, 6]]
-    for input_shape in input_shapes:
-        input_data_0 = paddle.randn(shape=input_shape, dtype="float32")
-        input_data_1 = paddle.randn(shape=input_shape, dtype="float32")
-        input_data_2 = paddle.randn(shape=input_shape, dtype="float32")
-        verify_model(Stack1(), [input_data_0, input_data_1, input_data_2])
-        verify_model(Stack2(), [input_data_0, input_data_1, input_data_2])
-        verify_model(Stack3(), [input_data_0, input_data_1, input_data_2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_unstack():
-    class UnStack1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.unstack(inputs, axis=-1)
-
-    class UnStack2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.unstack(inputs, axis=1)
-
-    class UnStack3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.unstack(inputs, axis=0)
-
-    input_shapes = [[2, 3], [5, 10, 11], [3, 4, 5, 6], [1, 3, 4, 1, 1]]
-    for input_shape in input_shapes:
-        input_data = paddle.randn(shape=input_shape, dtype="float32")
-        verify_model(UnStack1(), input_data)
-        verify_model(UnStack2(), input_data)
-        verify_model(UnStack3(), input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_silu():
-    class Silu(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.silu(inputs)
-
-    input_shapes = [[10], [2, 3], [5, 10, 11], [3, 4, 5, 6]]
-    for input_shape in input_shapes:
-        input_data = paddle.randn(shape=input_shape, dtype="float32")
-        verify_model(Silu(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_softshrink():
-    @paddle.jit.to_static
-    def Softshrink1(input):
-        return nn.functional.softshrink(input, threshold=0.0)
-
-    @paddle.jit.to_static
-    def Softshrink2(input):
-        return nn.functional.softshrink(input, threshold=0.5)
-
-    @paddle.jit.to_static
-    def Softshrink3(input):
-        return nn.functional.softshrink(input, threshold=1.0)
-
-    x = paddle.to_tensor([-0.9, -0.2, 0.1, 0.8])
-    verify_model(Softshrink2, x)
-
-    input_shapes = [[10], [2, 3], [5, 10, 11], [3, 4, 5, 6]]
-    for input_shape in input_shapes:
-        input_data = paddle.randn(shape=input_shape, dtype="float32")
-        verify_model(Softshrink1, input_data=input_data)
-        verify_model(Softshrink2, input_data=input_data)
-        verify_model(Softshrink3, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_where():
-    @paddle.jit.to_static
-    def where1(x, y):
-        return paddle.where(x > 1, x, y)
-
-    @paddle.jit.to_static
-    def where2(x, y):
-        return paddle.where(x > y, x, y)
-
-    x = paddle.to_tensor([0.9383, 0.1983, 3.2, 1.2])
-    y = paddle.to_tensor([1.0, 1.0, 1.0, 1.0])
-    verify_model(where1, [x, y])
-
-    input_shapes = [[10], [2, 3], [5, 10, 11], [3, 4, 5, 6]]
-    for input_shape in input_shapes:
-        x = paddle.randn(shape=input_shape, dtype="float32")
-        y = paddle.randn(shape=input_shape, dtype="float32")
-        verify_model(where1, [x, y])
-        verify_model(where2, [x, y])
-
-
-@tvm.testing.uses_gpu
-def test_forward_tile():
-    class Tile1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.tile(inputs, repeat_times=[10])
-
-    class Tile2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.tile(inputs, repeat_times=[2, 3])
-
-    class Tile3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.tile(inputs, repeat_times=[1, 2, 3])
-
-    class Tile4(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.tile(inputs, repeat_times=[2, 3, 4, 1, 5])
-
-    class Tile5(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            reps = paddle.to_tensor([3, 2])
-            reps = paddle.cast(reps, "int32")
-            return paddle.tile(inputs, repeat_times=reps)
-
-    class Tile6(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            rep_0 = paddle.to_tensor([3])
-            rep_1 = paddle.to_tensor([2])
-            rep_0 = paddle.cast(rep_0, "int32")
-            rep_1 = paddle.cast(rep_1, "int32")
-            return paddle.tile(inputs, repeat_times=[rep_0, rep_1])
-
-    input_shapes = [
-        [10],
-        [2, 3],
-        [3, 4, 5],
-        [5, 3, 1, 4],
-        [1, 3, 1, 6, 7],
-    ]
-    for input_shape in input_shapes:
-        input_data = paddle.randn(shape=input_shape, dtype="float32")
-        verify_model(Tile1(), input_data=input_data)
-        verify_model(Tile2(), input_data=input_data)
-        verify_model(Tile3(), input_data=input_data)
-        verify_model(Tile4(), input_data=input_data)
-        verify_model(Tile5(), input_data=input_data)
-        verify_model(Tile6(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_mish():
-    class Mish(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.mish(inputs)
-
-    input_shapes = [[10], [2, 3], [5, 10, 11], [3, 4, 5, 6]]
-    if paddle.version.full_version >= "2.4.2":
-        for input_shape in input_shapes:
-            input_data = paddle.randn(shape=input_shape, dtype="float32")
-            verify_model(Mish(), input_data=input_data)
-            input_data += 20.0
-            verify_model(Mish(), input_data=input_data)
-
-        input_data = paddle.to_tensor([-5.0, 0.0, 5.0, 23.1, 20.0])
-        verify_model(Mish(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_thresholded_relu():
-    class ThresholdedRelu1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.thresholded_relu(inputs)
-
-    class ThresholdedRelu2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.thresholded_relu(inputs, threshold=0.5)
-
-    input_shapes = [[10], [2, 3], [5, 10, 11], [3, 4, 5, 6]]
-    for input_shape in input_shapes:
-        input_data = paddle.randn(shape=input_shape, dtype="float32")
-        verify_model(ThresholdedRelu1(), input_data=input_data)
-        verify_model(ThresholdedRelu2(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_index_select():
-    class IndexSelect1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, x, index):
-            return paddle.index_select(x, index, axis=0)
-
-    class IndexSelect2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, x, index):
-            return paddle.index_select(x, index, axis=-1)
-
-    input_shapes = [[10], [2, 3], [5, 10, 11], [3, 4, 5, 6]]
-    for input_shape in input_shapes:
-        input_data = paddle.randn(shape=input_shape, dtype="float32")
-        index = paddle.to_tensor([0, 1, 1], dtype="int32")
-        verify_model(IndexSelect1(), input_data=[input_data, index])
-        verify_model(IndexSelect2(), input_data=[input_data, index])
-
-
-@tvm.testing.uses_gpu
-def test_forward_eye():
-    class Eye1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.eye(3, 5, dtype="int32"), paddle.eye(3, 5, dtype="float32"), inputs
-
-    class Eye2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.eye(5, 3, dtype="int64"), paddle.eye(5, 3, dtype="float64"), inputs
-
-    class Eye3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.eye(0, 3, dtype="int64"), paddle.eye(0, 0, dtype="float64"), inputs
-
-    class Eye4(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return paddle.eye(4, None, dtype="int64"), paddle.eye(4, None, dtype="float64"), inputs
-
-    x = paddle.to_tensor([1], dtype="float32")
-    verify_model(Eye1(), input_data=[x])
-    verify_model(Eye2(), input_data=[x])
-    verify_model(Eye3(), input_data=[x])
-    verify_model(Eye4(), input_data=[x])
-
-
-@tvm.testing.uses_gpu
-def test_forward_linspace():
-    class Linspace1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            out1 = paddle.linspace(0.5, 7, 1, "int32")
-            out2 = paddle.linspace(1.3, 7.1, 5, "float32")
-            out3 = paddle.linspace(1, 1000000000, 10, "int64")
-            out4 = paddle.linspace(1, 7.1, 5, "float64")
-            return out1, out2, out3, out4, inputs
-
-    class Linspace2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            start = paddle.to_tensor([-2.5])
-            stop = paddle.to_tensor([31.6])
-            num = paddle.to_tensor([13])
-            start = paddle.cast(start, "float32")
-            stop = paddle.cast(stop, "float32")
-            num = paddle.cast(num, "int32")
-            out1 = paddle.linspace(start, stop, num, "int32")
-            out2 = paddle.linspace(start, stop, num, "float32")
-            out3 = paddle.linspace(start, stop, num, "int64")
-            out4 = paddle.linspace(start, stop, num, "float64")
-            return out1, out2, out3, out4, inputs
-
-    class Linspace3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, start, stop, num):
-            out1 = paddle.linspace(start, stop, num, "int32")
-            out2 = paddle.linspace(start, stop, num, "float32")
-            out3 = paddle.linspace(start, stop, num, "int64")
-            out4 = paddle.linspace(start, stop, num, "float32")
-            return out1
-
-    start = paddle.to_tensor([1.3])
-    stop = paddle.to_tensor([5.1])
-    num = paddle.to_tensor([3])
-    start = paddle.cast(start, "float32")
-    stop = paddle.cast(stop, "float32")
-    num = paddle.cast(num, "int32")
-    x = paddle.to_tensor([1], dtype="float32")
-    verify_model(Linspace1(), input_data=[x])
-    verify_model(Linspace2(), input_data=[x])
-    verify_model(Linspace3(), input_data=[start, stop, num], use_vm=True)
-    num = paddle.to_tensor([1])
-    num = paddle.cast(num, "int32")
-    verify_model(Linspace3(), input_data=[start, stop, num], use_vm=True)
-
-
-@tvm.testing.uses_gpu
-def test_forward_dist():
-    class Dist(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, x, y):
-            l0_norm = paddle.dist(x, y, 0)
-            l2_norm = paddle.dist(x, y, 2)
-            float_norm = paddle.dist(x, y, 1.3)
-            inf_norm = paddle.dist(x, y, float("inf"))
-            ninf_norm = paddle.dist(x, y, float("-inf"))
-            return l0_norm, l2_norm, float_norm, inf_norm, ninf_norm
-
-    x = paddle.to_tensor([[3, 3], [3, 3]], dtype="float32")
-    y = paddle.to_tensor([[1, 2], [3, 4]], dtype="float32")
-    w = paddle.to_tensor([[1, 2]], dtype="float32")
-    v = paddle.to_tensor([[2.1]], dtype="float32")
-    verify_model(Dist(), input_data=[x, y])
-    verify_model(Dist(), input_data=[x, w])
-    verify_model(Dist(), input_data=[w, v])
-    verify_model(Dist(), input_data=[y, v])
-
-
-@tvm.testing.uses_gpu
-def test_forward_p_norm():
-    class PNorm(nn.Layer):
-        def __init__(self, axis, keepdim, p=1):
-            super(PNorm, self).__init__()
-            self.p = p
-            self.axis = axis
-            self.keepdim = keepdim
-
-        @paddle.jit.to_static
-        def forward(self, input_data):
-            return paddle.norm(input_data, p=self.p, axis=self.axis, keepdim=self.keepdim)
-
-    input_data = paddle.rand((2, 2, 3), dtype="float32")
-    verify_model(PNorm(axis=0, keepdim=True), input_data=input_data)
-    verify_model(PNorm(axis=0, keepdim=False), input_data=input_data)
-    verify_model(PNorm(axis=1, keepdim=True, p=1.5), input_data=input_data)
-    verify_model(PNorm(axis=-1, keepdim=True, p=3.4), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_roi_align():
-    class RoiAlign(nn.Layer):
-        def __init__(self, spatial_scale=1.0, sampling_ratio=-1, aligned=False):
-            super(RoiAlign, self).__init__()
-            self.spatial_scale = spatial_scale
-            self.sampling_ratio = sampling_ratio
-            self.aligned = aligned
-
-        @paddle.jit.to_static
-        def forward(self, input_data, rois, rois_num):
-            return paddle.vision.ops.roi_align(
-                input_data, rois, rois_num, 3, self.spatial_scale, self.sampling_ratio, self.aligned
-            )
-
-    input_data = paddle.rand((1, 128, 32, 32), dtype="float32")
-    boxes = paddle.rand([3, 4])
-    boxes[:, 2] += boxes[:, 0] + 3
-    boxes[:, 3] += boxes[:, 1] + 4
-    boxes_num = paddle.to_tensor([3]).astype("int32")
-    verify_model(RoiAlign(), input_data=[input_data, boxes, boxes_num])
-    verify_model(RoiAlign(aligned=True), input_data=[input_data, boxes, boxes_num])
-    verify_model(
-        RoiAlign(spatial_scale=2.0, aligned=True), input_data=[input_data, boxes, boxes_num]
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_softmax_with_cross_entropy():
-    class SoftmaxWithCrossEntropy(nn.Layer):
-        def __init__(self, soft_label=False, ignore_index=-100, return_softmax=False, axis=-1):
-            super(SoftmaxWithCrossEntropy, self).__init__()
-            self.soft_label = soft_label
-            self.ignore_index = ignore_index
-            self.return_softmax = return_softmax
-            self.axis = axis
-
-        @paddle.jit.to_static
-        def forward(self, input_data, label):
-            return paddle.nn.functional.softmax_with_cross_entropy(
-                input_data,
-                label,
-                soft_label=self.soft_label,
-                ignore_index=self.ignore_index,
-                return_softmax=self.return_softmax,
-                axis=self.axis,
-            )
-
-    input_data = paddle.rand([5, 3], dtype="float32")
-    label = paddle.randint(0, 2, [5, 1])
-    verify_model(SoftmaxWithCrossEntropy(), input_data=[input_data, label])
-    verify_model(SoftmaxWithCrossEntropy(return_softmax=True), input_data=[input_data, label])
-    verify_model(
-        SoftmaxWithCrossEntropy(return_softmax=True, ignore_index=1), input_data=[input_data, label]
-    )
-    input_data = paddle.rand([5, 4, 3], dtype="float32")
-    label = paddle.randint(0, 2, [5, 1, 3])
-    verify_model(SoftmaxWithCrossEntropy(axis=1), input_data=[input_data, label])
-    label = paddle.randint(0, 2, [5, 4, 3]).astype("float32")
-    verify_model(SoftmaxWithCrossEntropy(soft_label=True), input_data=[input_data, label])
-    verify_model(SoftmaxWithCrossEntropy(soft_label=True, axis=0), input_data=[input_data, label])
-
-
-@tvm.testing.uses_gpu
-def test_forward_pool3d():
-    class Pool3D1(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.avg_pool3d(inputs, kernel_size=2, stride=2, padding=0)
-
-    class Pool3D2(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.adaptive_avg_pool3d(inputs, output_size=[3, 3, 3])
-
-    class Pool3D3(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.avg_pool3d(
-                inputs,
-                kernel_size=3,
-                stride=1,
-                padding=[1, 1, 1],
-                exclusive=False,
-                divisor_override=2.5,
-            )
-
-    class Pool3D4(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.avg_pool3d(
-                inputs,
-                kernel_size=2,
-                stride=1,
-                padding=[[0, 0], [0, 0], [1, 1], [1, 1], [1, 1]],
-                ceil_mode=True,
-                data_format="NCDHW",
-            )
-
-    class Pool3D5(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs):
-            return nn.functional.avg_pool3d(
-                inputs,
-                kernel_size=2,
-                stride=1,
-                padding=[[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]],
-                ceil_mode=True,
-                data_format="NDHWC",
-            )
-
-    input_shapes = [[1, 2, 2, 8, 8], [1, 2, 3, 10, 10]]  # [N, C, D, H, W]
-    for input_shape in input_shapes:
-        input_data = paddle.uniform(shape=input_shape, dtype="float32", min=-1, max=1)
-        verify_model(Pool3D1(), input_data=input_data)
-        verify_model(Pool3D2(), input_data=input_data)
-        verify_model(Pool3D3(), input_data=input_data)
-        verify_model(Pool3D4(), input_data=input_data)
-        verify_model(Pool3D5(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_set_value():
-    class SetValue(nn.Layer):
-        @paddle.jit.to_static
-        def forward(self, inputs, update_input):
-            x = inputs + 1
-            x[3:] = 3
-            x[1:] = 3.0
-            x[2:] = update_input
-            x[0] = 1
-            x[-3:-2] = 1
-            x[0][0] = 5
-            return x
-
-    input_shapes = [[5, 2], [10, 3], [10, 3, 3]]
-    for input_shape in input_shapes:
-        input_data = paddle.uniform(shape=input_shape, dtype="float32", min=-1, max=1)
-        update_shape = input_shape.copy()
-        update_shape[0] = input_shape[0] - 2
-        update_input = paddle.uniform(shape=update_shape, dtype="float32", min=-1, max=1)
-        verify_model(SetValue(), input_data=[input_data, update_input])
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/pytorch/qnn_test.py b/tests/python/frontend/pytorch/qnn_test.py
deleted file mode 100644
index 1cc1a46cea6b..000000000000
--- a/tests/python/frontend/pytorch/qnn_test.py
+++ /dev/null
@@ -1,803 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-""" Tests on quantized torch model conversion """
-import os
-
-import numpy as np
-import torch
-import tvm
-import tvm.testing
-from PIL import Image
-from torch import nn
-from torch.quantization import (
-    DeQuantStub,
-    QuantStub,
-    QuantWrapper,
-    fuse_modules,
-    get_default_qat_qconfig,
-    prepare_qat,
-)
-from tvm import relay
-from tvm.contrib.download import download_testdata
-from tvm.relay.frontend.pytorch_utils import is_version_greater_than
-from tvm.relay.op.contrib.register import get_pattern_table, register_pattern_table
-
-
-def torch_version_check():
-    from packaging import version
-
-    return version.parse(torch.__version__) > version.parse("1.4.0")
-
-
-def get_tvm_runtime(script_module, input_name, ishape, keep_quantized_weight=False, target="llvm"):
-    input_shapes = [(input_name, ishape)]
-    with tvm.testing.disable_span_filling():
-        mod, params = relay.frontend.from_pytorch(
-            script_module, input_shapes, keep_quantized_weight=keep_quantized_weight
-        )
-    with tvm.testing.enable_span_filling():
-        mod_with_span, _ = relay.frontend.from_pytorch(
-            script_module, input_shapes, keep_quantized_weight=keep_quantized_weight
-        )
-    tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-
-    if keep_quantized_weight:
-        for p in params.values():
-            assert p.dtype in ["int8", "int32"]
-
-    with tvm.transform.PassContext(opt_level=3):
-        # test on only cpu for now, torch cannot run quant models on cuda
-        # also not to make CI too slow
-        lib = relay.build(mod, target=target, params=params)
-
-    runtime = tvm.contrib.graph_executor.GraphModule(lib["default"](tvm.device(target, 0)))
-    return runtime
-
-
-def get_qconfig(per_channel):
-    from torch.quantization.observer import (
-        MovingAverageMinMaxObserver,
-        default_weight_observer,
-    )
-
-    if per_channel:
-        return torch.quantization.get_default_qconfig("fbgemm")
-    else:
-        act = MovingAverageMinMaxObserver.with_args(reduce_range=False)
-        return torch.quantization.QConfig(activation=act, weight=default_weight_observer)
-
-
-def quantize_model(model, inp, per_channel=False):
-    model.fuse_model()
-    model.qconfig = get_qconfig(per_channel)
-    torch.quantization.prepare(model, inplace=True)
-    model(inp)
-    torch.quantization.convert(model, inplace=True)
-
-
-class ConvBn(nn.Module):
-    def __init__(self, with_relu=False):
-        super().__init__()
-        layers = [nn.Conv2d(3, 32, 3, bias=True), nn.BatchNorm2d(32)]
-        if with_relu:
-            layers.append(nn.ReLU())
-        self.conv = nn.Sequential(*layers)
-        self.quant_wrap = QuantWrapper(self.conv)
-        self.with_relu = with_relu
-
-    def forward(self, x):
-        return self.quant_wrap(x)
-
-    def fuse_model(self):
-        indices = ["0", "1"]
-        if self.with_relu:
-            indices.append("2")
-        fuse_modules(self.conv, indices, inplace=True)
-
-
-class ConvTranspose(nn.Module):
-    def __init__(self):
-        super().__init__()
-        layers = [nn.ConvTranspose2d(3, 32, 3, bias=True)]
-        self.conv = nn.Sequential(*layers)
-        self.quant_wrap = QuantWrapper(self.conv)
-
-    def forward(self, x):
-        return self.quant_wrap(x)
-
-    def fuse_model(self):
-        pass
-
-
-class Linear(nn.Module):
-    def __init__(self, with_relu=False):
-        super().__init__()
-        layers = [nn.Linear(16, 32)]
-        if with_relu:
-            layers.append(nn.ReLU())
-        self.fc = nn.Sequential(*layers)
-        self.quant_wrap = QuantWrapper(self.fc)
-        self.with_relu = with_relu
-
-    def forward(self, x):
-        return self.quant_wrap(x)
-
-    def fuse_model(self):
-        if self.with_relu:
-            fuse_modules(self.fc, ["0", "1"], inplace=True)
-
-
-class ReLU(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.relu = QuantWrapper(nn.ReLU())
-
-    def forward(self, x):
-        return self.relu(x)
-
-    def fuse_model(self):
-        pass
-
-
-class LeakyReLU(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.leaky_relu = QuantWrapper(nn.LeakyReLU())
-
-    def forward(self, x):
-        return self.leaky_relu(x)
-
-    def fuse_model(self):
-        pass
-
-
-# Mobilenet V3 related modules
-class Hsigmoid(nn.Module):
-    def __init__(self, add_stub=False):
-        super().__init__()
-        self.quant = QuantStub()
-        self.dequant = DeQuantStub()
-        self.add_stub = add_stub
-        self.hsigmoid = nn.Hardsigmoid()
-
-    def forward(self, x):
-        if self.add_stub:
-            x = self.quant(x)
-        x = self.hsigmoid(x)
-        if self.add_stub:
-            x = self.dequant(x)
-        return x
-
-    def fuse_model(self):
-        pass
-
-
-class Hswish(nn.Module):
-    def __init__(self, add_stub=False):
-        super().__init__()
-        self.hswish = QuantWrapper(nn.Hardswish())
-
-    def forward(self, x):
-        return self.hswish(x)
-
-    def fuse_model(self):
-        pass
-
-
-class SqueezeExcite(nn.Module):
-    def __init__(self, channel, reduction=4, add_stub=False):
-        super(SqueezeExcite, self).__init__()
-        self.avg_pool = nn.AdaptiveAvgPool2d(1)
-        self.fc = nn.Sequential(
-            nn.Linear(channel, channel // reduction, bias=False),
-            nn.ReLU(inplace=True),
-            nn.Linear(channel // reduction, channel, bias=False),
-            Hsigmoid(add_stub=False),
-        )
-        self.fmul = nn.quantized.FloatFunctional()
-        self.quant = QuantStub()
-        self.dequant = DeQuantStub()
-        self.add_stub = add_stub
-
-    def forward(self, x):
-        b, c, _, _ = x.size()
-        if self.add_stub:
-            x = self.quant(x)
-        y = self.avg_pool(x).view(b, c)
-        y = self.fc(y).view(b, c, 1, 1)
-        out = self.fmul.mul(x, y.expand_as(x))
-        if self.add_stub:
-            return self.dequant(out)
-        else:
-            return out
-
-    def fuse_model(self):
-        fuse_modules(self.fc, ["0", "1"], inplace=True)
-
-
-# test on quantized::mul_scalar with negative scale
-class MulScalarNegative(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.float_op = nn.quantized.FloatFunctional()
-        self.quant = QuantStub()
-        self.dequant = DeQuantStub()
-
-    def forward(self, x):
-        x = self.quant(x)
-        mul = self.float_op.mul_scalar(x, -0.3)
-        return self.dequant(mul)
-
-    def fuse_model(self):
-        pass
-
-
-class UpsamplingBilinear(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.quant = QuantStub()
-        self.dequant = DeQuantStub()
-
-    def forward(self, x):
-        x = self.quant(x)
-        upsample = nn.functional.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True)
-        return self.dequant(upsample)
-
-    def fuse_model(self):
-        pass
-
-
-class AvgPool2d(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.pool = QuantWrapper(nn.AvgPool2d(kernel_size=2))
-
-    def forward(self, x):
-        return self.pool(x)
-
-    def fuse_model(self):
-        pass
-
-
-class AdaptiveAvgPool2d(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.pool = QuantWrapper(nn.AdaptiveAvgPool2d((1, 1)))
-
-    def forward(self, x):
-        return self.pool(x)
-
-    def fuse_model(self):
-        pass
-
-
-def test_quantized_modules():
-    imagenet_ishape = (1, 3, 224, 224)
-
-    qmodules = [
-        ("relu", imagenet_ishape, ReLU(), False),
-        ("upsample bilinear", (1, 3, 64, 64), UpsamplingBilinear(), False),
-        ("avgpool", imagenet_ishape, AvgPool2d(), False),
-    ]
-
-    for per_channel in [False, True]:
-        if per_channel:
-            postfix = ", per_channel"
-        else:
-            postfix = ""
-
-        qmodules += [
-            ("conv_bn" + postfix, imagenet_ishape, ConvBn(), per_channel),
-            ("conv_bn_relu" + postfix, imagenet_ishape, ConvBn(with_relu=True), per_channel),
-            ("linear" + postfix, (16, 16), Linear(), per_channel),
-            ("linear_relu" + postfix, (16, 16), Linear(with_relu=True), per_channel),
-            ("conv_transpose", imagenet_ishape, ConvTranspose(), False),
-            ("hsigmoid", imagenet_ishape, Hsigmoid(add_stub=True), False),
-            ("hswish", imagenet_ishape, Hswish(), False),
-            ("semodule", (1, 16, 64, 64), SqueezeExcite(16, add_stub=True), False),
-            ("semodule, per_channel", (1, 16, 64, 64), SqueezeExcite(16, add_stub=True), True),
-            ("mul_scalar negative", imagenet_ishape, MulScalarNegative(), False),
-            ("leaky_relu", imagenet_ishape, LeakyReLU(), False),
-        ]
-
-    for (module_name, ishape, raw_module, per_channel) in qmodules:
-        raw_module.eval()
-        inp = torch.rand(ishape)
-
-        # quantized conv_transpose2d is supported only with qnnpack engine before torch v1.8.0.
-        if module_name == "conv_transpose" and not is_version_greater_than("1.7.1"):
-            prev_engine = torch.backends.quantized.engine
-            torch.backends.quantized.engine = "qnnpack"
-            quantize_model(raw_module, inp, per_channel=per_channel)
-            torch.backends.quantized.engine = prev_engine
-        else:
-            quantize_model(raw_module, inp, per_channel=per_channel)
-
-        script_module = torch.jit.trace(raw_module, inp).eval()
-
-        with torch.no_grad():
-            pt_result = script_module(inp.clone()).numpy()
-
-        input_name = "input"
-        runtime = get_tvm_runtime(script_module, input_name, ishape)
-        runtime.set_input(input_name, inp.numpy().copy())
-        runtime.run()
-        tvm_result = runtime.get_output(0).numpy()
-
-        max_abs_diff = np.max(np.abs(tvm_result - pt_result))
-        mean_abs_diff = np.mean(np.abs(tvm_result - pt_result))
-        num_identical = np.sum(tvm_result == pt_result)
-        match_ratio = num_identical / float(np.prod(tvm_result.shape))
-
-        print(module_name, max_abs_diff, mean_abs_diff, match_ratio)
-
-        if "linear" in module_name and tvm.get_global_func("tvm.contrib.cublas.matmul", True):
-            runtime = get_tvm_runtime(script_module, input_name, ishape, target="cuda -libs=cublas")
-            runtime.set_input(input_name, inp.numpy().copy())
-            runtime.run()
-            cublas_result = runtime.get_output(0).numpy()
-            # It is generally safe to enable this assertion, but disabled for CI
-            # tvm.testing.assert_allclose(cublas_result, pt_result, atol=1e-5, rtol=1e-5)
-            print(np.max(np.abs(cublas_result - pt_result)))
-
-        # sample outputs
-        """
-        relu 0.0039215684 2.6052087e-08 0.9999933567176871
-        leaky_relu 0.0 0.0 1.0
-        upsample bilinear 0.0 0.0 1.0
-        conv_bn 0.22062653 0.011478779 0.6909348115006899
-        conv_bn_relu 0.3700896 0.010921672 0.7489366477964451
-        linear 0.15987062 0.009231662 0.794921875
-        linear_relu 0.14180502 0.0053220326 0.8828125
-        conv_transpose 0.0033792555 4.4658788e-07 0.9998678439971806
-        conv_bn, per_channel 0.01654929 2.9486866e-06 0.9998218235127019
-        conv_bn_relu, per_channel 0.009089053 1.4926576e-06 0.9998357732732732
-        linear, per_channel 0.0 0.0 1.0
-        linear_relu, per_channel 0.0 0.0 1.0
-        hsigmoid 0.002614379 0.00020525524 0.9214896896258503
-        hswish 0.0026143193 1.7367661e-08 0.9999933567176871
-        hswish, per_channel 0.0 0.0 1.0
-        semodule, per_channel 0.0039885044 0.0008620687 0.7838592529296875
-        mul_scalar negative 0.0011764616 7.815566e-09 0.9999933567176871
-        """
-
-        # we cannot make any guarantee on how close the raw output is to torch
-        # tvm.testing.assert_allclose(tvm_result, pt_result, rtol=1e-1, atol=1e-1)
-
-
-def test_quantized_imagenet():
-    def get_transform():
-        import torchvision.transforms as transforms
-
-        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-        return transforms.Compose(
-            [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize]
-        )
-
-    def get_real_image(im_height, im_width):
-        repo_base = "https://github.com/dmlc/web-data/raw/main/tensorflow/models/InceptionV1/"
-        img_name = "elephant-299.jpg"
-        image_url = os.path.join(repo_base, img_name)
-        img_path = download_testdata(image_url, img_name, module="data")
-        return Image.open(img_path).resize((im_height, im_width))
-
-    def get_imagenet_input():
-        im = get_real_image(224, 224)
-        preprocess = get_transform()
-        pt_tensor = preprocess(im)
-        return np.expand_dims(pt_tensor.numpy(), 0)
-
-    from torchvision.models.quantization import googlenet as qgooglenet
-    from torchvision.models.quantization import inception as qinception
-    from torchvision.models.quantization import mobilenet as qmobilenet
-    from torchvision.models.quantization import (
-        mobilenet_v3_large as qmobilenet_v3_large,
-    )
-    from torchvision.models.quantization import resnet as qresnet
-
-    per_channel = True
-    qmodels = [
-        ("resnet18", qresnet.resnet18(pretrained=True), per_channel),
-        ("mobilenet_v2", qmobilenet.mobilenet_v2(pretrained=True), per_channel),
-        ("inception_v3", qinception.inception_v3(pretrained=True), per_channel),
-        # tracing quantized googlenet broken as of v1.6
-        # ("googlenet", qgooglenet(pretrained=True), per_channel),
-        # As of v1.10, quantized mobilenet v3 has a weird segfault issue
-        # during make_conv_packed_param
-        # See https://ci.tlcpack.ai/blue/organizations/jenkins/tvm/detail/ci-docker-staging/192
-        # ("mobilenet_v3_large", qmobilenet_v3_large(pretrained=True, quantize=True).eval(), True)
-    ]
-
-    results = []
-
-    for (model_name, raw_model, per_channel) in qmodels:
-        raw_model.eval()
-
-        if per_channel:
-            model_name += ", per channel quantization"
-        else:
-            model_name += ", per tensor quantization"
-
-        inp = get_imagenet_input()
-        pt_inp = torch.from_numpy(inp)
-
-        if "mobilenet_v3_large" not in model_name:
-            # mv3 was qat-ed, quantize=True option above makes it already quantized
-            quantize_model(raw_model, pt_inp, per_channel=per_channel)
-
-        script_module = torch.jit.trace(raw_model, pt_inp).eval()
-
-        with torch.no_grad():
-            pt_result = script_module(pt_inp).numpy()
-
-        input_name = "image"
-        runtime = get_tvm_runtime(script_module, input_name, (1, 3, 224, 224))
-        runtime.set_input(input_name, inp)
-        runtime.run()
-
-        tvm_result = runtime.get_output(0).numpy()
-
-        results.append((model_name, pt_result[0], tvm_result[0]))
-
-    for (model_name, pt_result, tvm_result) in results:
-        max_abs_diff = np.max(np.abs(tvm_result - pt_result))
-        mean_abs_diff = np.mean(np.abs(tvm_result - pt_result))
-        num_identical = np.sum(tvm_result == pt_result)
-        pt_top3_labels = np.argsort(pt_result)[::-1][:3]
-        tvm_top3_labels = np.argsort(tvm_result)[::-1][:3]
-
-        print("\nModel name: %s" % model_name)
-        print("PyTorch top3 label:", pt_top3_labels)
-        print("TVM top3 label:", tvm_top3_labels)
-        print("max abs diff:", max_abs_diff)
-        print("mean abs_diff:", mean_abs_diff)
-        print("%d in 1000 raw outputs identical." % num_identical)
-
-        assert set(pt_top3_labels) == set(tvm_top3_labels)
-
-        # sample outputs
-        """
-        Model name: resnet18, per tensor quantization
-        PyTorch top3 label: [386 101 385]
-        TVM top3 label: [386 101 385]
-        max abs diff: 0.65681696
-        mean abs_diff: 0.14055882
-        236 in 1000 raw outputs identical.
-
-        Model name: mobilenet_v2, per tensor quantization
-        PyTorch top3 label: [101 386 385]
-        TVM top3 label: [101 386 385]
-        max abs diff: 2.1262953
-        mean abs_diff: 0.41025686
-        101 in 1000 raw outputs identical.
-
-        Model name: inception_v3, per tensor quantization
-        PyTorch top3 label: [101 386 385]
-        TVM top3 label: [101 386 385]
-        max abs diff: 0.9994669
-        mean abs_diff: 0.098697364
-        272 in 1000 raw outputs identical.
-
-        Model name: googlenet, per tensor quantization
-        PyTorch top3 label: [101 386 385]
-        TVM top3 label: [101 386 385]
-        max abs diff: 0.28248847
-        mean abs_diff: 0.0634469
-        274 in 1000 raw outputs identical.
-
-        Model name: resnet18, per channel quantization
-        PyTorch top3 label: [101 386 385]
-        TVM top3 label: [101 386 385]
-        max abs diff: 0.65908074
-        mean abs_diff: 0.1274223
-        469 in 1000 raw outputs identical.
-
-        Model name: mobilenet_v2, per channel quantization
-        PyTorch top3 label: [101 386 385]
-        TVM top3 label: [101 386 385]
-        max abs diff: 0.71120834
-        mean abs_diff: 0.15883648
-        423 in 1000 raw outputs identical.
-
-        Model name: inception_v3, per channel quantization
-        PyTorch top3 label: [386 101 385]
-        TVM top3 label: [386 101 385]
-        max abs diff: 1.3372154
-        mean abs_diff: 0.1225224
-        401 in 1000 raw outputs identical.
-
-        Model name: googlenet, per channel quantization
-        PyTorch top3 label: [101 386 385]
-        TVM top3 label: [101 386 385]
-        max abs diff: 0.34015465
-        mean abs_diff: 0.054197952
-        558 in 1000 raw outputs identical.
-        """
-
-
-def test_serialized_modules():
-    ishape = (1, 16, 64, 64)
-    raw_module = AdaptiveAvgPool2d().eval()
-    inp = torch.rand(ishape)
-
-    quantize_model(raw_module, inp)
-    script_module = torch.jit.trace(raw_module, inp).eval()
-
-    fname = "tmp.pt"
-    torch.jit.save(script_module, fname)
-    loaded = torch.jit.load(fname)
-    os.remove(fname)
-
-    with torch.no_grad():
-        pt_result = loaded(inp.clone()).numpy()
-
-    input_name = "input"
-    runtime = get_tvm_runtime(loaded, input_name, ishape)
-    runtime.set_input(input_name, inp.numpy().copy())
-    runtime.run()
-    tvm_result = runtime.get_output(0).numpy()
-
-    # with 0.5ish results, 1e-2 is relative accuracy close to 2**-6.
-    # for simple layers like here this should be achievable
-    # with 8 bit quantization
-    # we only require 90% match just to be sure
-    num_identical = np.sum(np.abs(tvm_result - pt_result) < 1e-2)
-    match_ratio = num_identical / float(np.prod(tvm_result.shape))
-    assert match_ratio > 0.90
-
-
-def test_quantize_dynamic():
-    # A wrapper is required for quantize_dynamic to work correctly
-    class LinearWrapper(nn.Module):
-        def __init__(self, in_dim, hidden_dim):
-            super().__init__()
-            self.linear = nn.Linear(in_dim, hidden_dim)
-
-        def forward(self, inp):
-            return self.linear(inp)
-
-    torch.manual_seed(0)
-    mod = LinearWrapper(16, 32)
-
-    for qconfig in [
-        torch.quantization.per_channel_dynamic_qconfig,
-        torch.quantization.default_dynamic_qconfig,
-    ]:
-        for ishape in [(16, 16), (10, 16, 16)]:
-            qspec = {nn.Linear: qconfig}
-            qmod = torch.quantization.quantize_dynamic(mod, qconfig_spec=qspec, dtype=torch.qint8)
-
-            inp = torch.randn(*ishape)
-            script_module = torch.jit.trace(qmod, inp).eval()
-
-            with torch.no_grad():
-                pt_result = script_module(inp.clone()).numpy()
-
-            input_name = "input"
-            runtime = get_tvm_runtime(script_module, "input", inp.shape)
-            runtime.set_input(input_name, inp.numpy().copy())
-            runtime.run()
-            tvm_result = runtime.get_output(0).numpy()
-
-            # Only compare with the PyTorch result for version v1.6 or newer
-            # Have seen a strange accuracy problem from PyTorch 1.4 and 1.5
-            # Even with the manual random seed set, the same PyTorch
-            # version can outputs slightly different results depending on an environment.
-            # Outputs from v1.6 seem reliable. TVM's outputs are always the same
-            if is_version_greater_than("1.5.1"):
-                tvm.testing.assert_allclose(tvm_result, pt_result, rtol=1e-4, atol=1e-4)
-
-
-def make_qnn_add_pattern():
-    from tvm.relay.dataflow_pattern import is_op, wildcard
-
-    lhs = wildcard()
-    rhs = wildcard()
-    lhs_scale = wildcard()
-    lhs_zero_point = wildcard()
-    rhs_scale = wildcard()
-    rhs_zero_point = wildcard()
-    output_scale = wildcard()
-    output_zero_point = wildcard()
-    qadd = is_op("qnn.add")(
-        lhs,
-        rhs,
-        lhs_scale,
-        lhs_zero_point,
-        rhs_scale,
-        rhs_zero_point,
-        output_scale,
-        output_zero_point,
-    )
-    return qadd.optional(is_op("clip"))
-
-
-@register_pattern_table("test_table")
-def pattern_table():
-    return [
-        ("qnn_add", make_qnn_add_pattern()),
-    ]
-
-
-def run_qnn_mergecomposite(script_module, input_name, ishape):
-    input_shapes = [(input_name, ishape)]
-    with tvm.testing.disable_span_filling():
-        mod, params = relay.frontend.from_pytorch(script_module, input_shapes)
-    with tvm.testing.enable_span_filling():
-        mod_with_span, _ = relay.frontend.from_pytorch(script_module, input_shapes)
-    tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-    pattern_table = get_pattern_table("test_table")
-    with tvm.transform.PassContext(opt_level=3):
-        pass_list = [
-            tvm.relay.transform.SimplifyInference(),
-            tvm.relay.transform.MergeComposite(pattern_table),
-        ]
-        composite_partition = tvm.transform.Sequential(pass_list)
-        partitioned = composite_partition(mod)
-
-
-def test_qnn_mergecomposite():
-    from torchvision.models.quantization import resnet as qresnet
-
-    model = qresnet.resnet18(pretrained=True)
-    model.eval()
-
-    inp = torch.zeros((1, 3, 224, 224))
-    model.fuse_model()
-    model.qconfig = torch.quantization.get_default_qconfig("fbgemm")
-    torch.quantization.prepare(model, inplace=True)
-    model(inp)
-    torch.quantization.convert(model, inplace=True)
-    script_module = torch.jit.trace(model, inp).eval()
-
-    input_name = "image"
-    run_qnn_mergecomposite(script_module, input_name, inp.shape)
-
-
-def test_keep_quantized_weight():
-    qmodules = []
-
-    for per_channel in [False, True]:
-        qmodules += [
-            ((1, 3, 224, 224), ConvBn(), per_channel),
-            ((16, 16), Linear(), per_channel),
-        ]
-
-    for (ishape, raw_module, per_channel) in qmodules:
-        raw_module.eval()
-        inp = torch.rand(ishape)
-
-        quantize_model(raw_module, inp, per_channel=per_channel)
-        script_module = torch.jit.trace(raw_module, inp).eval()
-
-        input_name = "input"
-
-        runtime = get_tvm_runtime(script_module, input_name, ishape, keep_quantized_weight=False)
-        runtime.set_input(input_name, inp.numpy().copy())
-        runtime.run()
-        tvm_result = runtime.get_output(0).numpy()
-
-        runtime_int8_weight = get_tvm_runtime(
-            script_module, input_name, ishape, keep_quantized_weight=True
-        )
-        runtime_int8_weight.set_input(input_name, inp.numpy().copy())
-        runtime_int8_weight.run()
-        tvm_result_int8_weight = runtime_int8_weight.get_output(0).numpy()
-
-        tvm.testing.assert_allclose(tvm_result, tvm_result_int8_weight)
-
-
-def test_tuple_lowered():
-    # See the following discuss thread for details
-    # https://discuss.tvm.apache.org/t/bug-frontend-pytorch-relay-ir-is-inconsistent-with-that-of-the-original-model/12010
-
-    class ConvBnRelu(nn.Module):
-        def __init__(self, inp, oup, kernel_size=3, stride=1, padding=1, bias=True, groups=1):
-            super(ConvBnRelu, self).__init__()
-            if groups > 1:
-                self.conv = nn.Conv2d(
-                    inp, inp, kernel_size, stride, padding, bias=bias, groups=groups
-                )
-                self.bn = nn.BatchNorm2d(inp)
-            else:
-                self.conv = nn.Conv2d(
-                    inp, oup, kernel_size, stride, padding, bias=bias, groups=groups
-                )
-                self.bn = nn.BatchNorm2d(oup)
-            self.relu = nn.ReLU(inplace=True)
-
-        def forward(self, inputs):
-            x = self.conv(inputs)
-            x = self.bn(x)
-            x = self.relu(x)
-            return x
-
-    def conv_bn(inp, oup, stride=1, width_multiplier=1):
-        return ConvBnRelu(inp, oup, kernel_size=3, stride=stride, padding=1, bias=False)
-
-    def conv_dw(inp, oup, stride, width_multiplier=1, padding=1):
-        dw_block = nn.Sequential()
-        depth_wise = ConvBnRelu(
-            inp, oup, kernel_size=3, stride=stride, padding=padding, bias=False, groups=inp
-        )
-        point_wise = ConvBnRelu(inp, oup, kernel_size=1, stride=1, padding=0, bias=False)
-
-        dw_block.add_module("depth_wise", depth_wise)
-        dw_block.add_module("point_wise", point_wise)
-
-        return dw_block
-
-    class Backbone(nn.Module):
-        def __init__(self, width_multiplier=1):
-            super(Backbone, self).__init__()
-            self.width_multiplier = width_multiplier
-            self.conv1 = conv_bn(3, 16, 2, self.width_multiplier)
-            self.conv2 = conv_dw(16, 32, 1, self.width_multiplier)
-
-        def forward(self, inputs):
-            x1 = self.conv1(inputs)
-            x2 = self.conv2(x1)
-            return [x1, x2]
-
-    class QuantizableBackbone(nn.Module):
-        def __init__(self, inputsize=(128, 128)):
-            super(QuantizableBackbone, self).__init__()
-            self.quant = QuantStub()
-            self.dequant = DeQuantStub()
-            self.backbone = Backbone()
-
-        def fuse_model(self):
-            fuse_modules_qat = getattr(torch.ao.quantization, "fuse_modules_qat", fuse_modules)
-            for idx, m in enumerate(self.modules()):
-                if type(m) == ConvBnRelu:
-                    fuse_modules_qat(m, ["conv", "bn", "relu"], inplace=True)
-
-        def forward(self, input):
-            input = self.quant(input)
-            y0, y1 = self.backbone(input)
-            y0 = self.dequant(y0)
-            y1 = self.dequant(y1)
-            return y0, y1
-
-    fp32_input = torch.randn(1, 3, 128, 128)
-    model = QuantizableBackbone()
-    model.train()
-    model.fuse_model()
-    model.qconfig = get_default_qat_qconfig("qnnpack")
-
-    prepare_qat(model, inplace=True)
-
-    model.eval()
-    model(fp32_input)
-
-    model_int8 = torch.quantization.convert(model, inplace=True)
-    script_module = torch.jit.trace(model_int8, fp32_input).eval()
-
-    input_infos = [("input", (fp32_input.shape, "float32"))]
-    with tvm.testing.disable_span_filling():
-        mod, _ = relay.frontend.from_pytorch(script_module, input_infos)
-    with tvm.testing.enable_span_filling():
-        mod_with_span, _ = relay.frontend.from_pytorch(script_module, input_infos)
-    tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-    output = mod["main"].body
-
-    assert isinstance(output, relay.Tuple) and len(output) == 2
-    dq1, dq2 = output
-    assert dq1.op.name == "qnn.dequantize" and dq2.op.name == "qnn.dequantize"
-    scale1 = dq1.args[1].data.numpy().item()
-    scale2 = dq2.args[1].data.numpy().item()
-    assert scale1 != scale2
diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py
deleted file mode 100644
index 9f8fac93061c..000000000000
--- a/tests/python/frontend/pytorch/test_forward.py
+++ /dev/null
@@ -1,5884 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument, missing-function-docstring
-"""Unit tests for various models and operators"""
-import os
-import platform
-import sys
-
-from packaging import version as package_version
-
-import pytest
-import numpy as np
-
-import torch
-from torch.nn import Module
-from torch.nn import functional as F
-import torchvision
-
-import tvm
-import tvm.testing
-from tvm import relay
-from tvm.contrib import graph_executor
-from tvm.contrib.nvcc import have_fp16
-from tvm.contrib import cudnn, utils
-from relay.utils.tag_span import _create_span, _set_span, _verify_structural_equal_with_span
-
-sys.setrecursionlimit(10000)
-if torch.cuda.is_available():
-    torch.backends.cuda.matmul.allow_tf32 = False
-    torch.backends.cudnn.allow_tf32 = False
-
-
-def list_ops(expr):
-    """list_ops"""
-
-    class OpLister(tvm.relay.ExprVisitor):
-        """OpLister inherits from ExprVisitor"""
-
-        def visit_op(self, op):
-            if op not in self.node_set:
-                self.node_list.append(op)
-            return super().visit_op(op)
-
-        def list_nodes(self, expr):
-            self.node_set = {}
-            self.node_list = []
-            self.visit(expr)
-            return self.node_list
-
-    return OpLister().list_nodes(expr)
-
-
-def assert_shapes_match(tru, est):
-    """Verfiy whether the shapes are equal"""
-    if tru.shape != est.shape:
-        msg = "Output shapes {} and {} don't match"
-        raise AssertionError(msg.format(tru.shape, est.shape))
-
-
-def load_torchvision(model_name):
-    """Given a model name, returns a Torchvision model in eval mode as well
-    as an example input."""
-    with torch.no_grad():
-        if model_name.startswith("inception"):
-            height = width = 299
-            mean = [0.5, 0.5, 0.5]
-            std = [0.5, 0.5, 0.5]
-        else:
-            height = width = 224
-            mean = [0.485, 0.456, 0.406]
-            std = [0.229, 0.224, 0.225]
-        input_shape = [1, 3, height, width]
-        input_data = torch.randn(input_shape).float()
-        for channel in range(3):
-            input_data[:, channel] -= mean[channel]
-            input_data[:, channel] /= std[channel]
-
-        if model_name.startswith("googlenet"):
-            model = getattr(torchvision.models, model_name)(pretrained=True, aux_logits=True)
-        else:
-            model = getattr(torchvision.models, model_name)(pretrained=True)
-        model = model.float().eval()
-        return model, [input_data]
-
-
-def load_pretrainedmodels(model_name):
-    """Given a model name, returns a pretrainedmodels.pytorch model in eval
-    mode as well as an example input."""
-    # pylint: disable=import-outside-toplevel
-    import pretrainedmodels  # https://github.com/Cadene/pretrained-models.pytorch
-
-    model = getattr(pretrainedmodels, model_name)().float().eval()
-    input_shape = [1, *model.input_size]
-    input_data = torch.rand(input_shape).float() * 256
-    for channel in range(3):
-        input_data[:, channel] -= model.mean[channel]
-        input_data[:, channel] /= model.std[channel]
-    return model, [input_data]
-
-
-def load_model(model_name):
-    """Given a model name, returns a model as well as an example input."""
-    if hasattr(torchvision.models, model_name):
-        return load_torchvision(model_name)
-    # pylint: disable=import-outside-toplevel
-    try:
-        import pretrainedmodels
-
-        if hasattr(pretrainedmodels, model_name):
-            return load_pretrainedmodels(model_name)
-    except ModuleNotFoundError as e:
-        raise ModuleNotFoundError("Please install pretrainedmodels.pytorch") from e
-    raise RuntimeError("Model not supported")
-
-
-def verify_model(
-    model_name,
-    input_data=None,
-    custom_convert_map=None,
-    rtol=1e-5,
-    atol=1e-5,
-    expected_ops=None,
-    kind="graph",
-    check_correctness=True,
-    cpu_only=False,
-    validate_structural_equal=True,
-):
-    """Assert that the output of a compiled model matches with that of its
-    baseline."""
-    input_data = [] if input_data is None else input_data
-    custom_convert_map = custom_convert_map or {}
-    expected_ops = expected_ops or []
-    if isinstance(model_name, str):
-        baseline_model, baseline_input = load_model(model_name)
-    elif isinstance(input_data, list):
-        baseline_model = model_name
-        baseline_input = input_data
-    elif isinstance(input_data, torch.Tensor) or not input_data.shape:
-        baseline_model = model_name
-        baseline_input = [input_data]
-    else:
-        assert False, "Unexpected input format"
-    if torch.cuda.is_available():
-        if isinstance(baseline_model, torch.nn.Module):
-            baseline_model = baseline_model.cuda()
-        baseline_input = [inp.cuda() for inp in baseline_input]
-
-    with torch.no_grad():
-        baseline_outputs = baseline_model(*[input.clone() for input in baseline_input])
-
-    if isinstance(baseline_outputs, tuple):
-        baseline_outputs = tuple(out.cpu().numpy() for out in baseline_outputs)
-    else:
-        baseline_outputs = (baseline_outputs.cpu().numpy(),)
-
-    trace = torch.jit.trace(baseline_model, [input.clone() for input in baseline_input])
-    if isinstance(baseline_model, torch.nn.Module):
-        trace = trace.float().eval()
-
-        if torch.cuda.is_available():
-            trace = trace.cuda()
-        else:
-            trace = trace.cpu()
-
-    input_names = [f"input{idx}" for idx, _ in enumerate(baseline_input)]
-    input_shapes = list(zip(input_names, [inp.shape for inp in baseline_input]))
-    with tvm.testing.disable_span_filling():
-        mod, params = relay.frontend.from_pytorch(trace, input_shapes, custom_convert_map)
-    if validate_structural_equal:
-        with tvm.testing.enable_span_filling():
-            mod_with_span, _ = relay.frontend.from_pytorch(trace, input_shapes, custom_convert_map)
-        tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-
-    for arg in mod["main"].params[: len(input_names)]:
-        assert arg.name_hint in input_names
-    compiled_input = dict(zip(input_names, [inp.clone().cpu().numpy() for inp in baseline_input]))
-
-    targets = ["llvm"]
-    if not cpu_only:
-        targets.append("cuda")
-
-    with tvm.transform.PassContext(opt_level=3):
-        for target in targets:
-            if not tvm.runtime.enabled(target):
-                continue
-            dev = tvm.device(target, 0)
-            exe = relay.create_executor(
-                kind, mod=mod, params=params, device=dev, target=target
-            ).evaluate()
-            result = exe(**compiled_input)
-            if not isinstance(result, list):
-                result = [result]
-
-            for i, baseline_output in enumerate(baseline_outputs):
-                output = result[i].numpy()
-
-                assert_shapes_match(baseline_output, output)
-                if check_correctness:
-                    tvm.testing.assert_allclose(baseline_output, output, rtol=rtol, atol=atol)
-
-    if expected_ops:
-
-        def visit(op):
-            if isinstance(op, tvm.ir.op.Op):
-                if op.name in expected_ops:
-                    expected_ops.remove(op.name)
-
-        tvm.relay.analysis.post_order_visit(mod["main"].body, visit)
-
-        if expected_ops:
-            msg = "TVM Relay do not contain expected ops {}"
-            raise AssertionError(msg.format(expected_ops))
-
-    del model_name
-    del baseline_model
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-
-
-def verify_model_with_input(
-    test_func,
-    input_data,
-    *,
-    input_dict=None,
-    custom_convert_map=None,
-    rtol=1e-5,
-    atol=1e-5,
-    assert_shape_only=False,
-    validate_structural_equal=True,
-):
-    """Generic function to generate and compare Pytorch and TVM output"""
-    input_dict = input_dict or {}
-    custom_convert_map = custom_convert_map or {}
-    baseline_outputs = test_func(*input_data)
-    trace = torch.jit.trace(test_func, [input.clone() for input in input_data])
-    input_names = [f"input{idx}" for idx, _ in enumerate(input_data)]
-    input_shapes = list(zip(input_names, [inp.shape for inp in input_data]))
-    with tvm.testing.disable_span_filling():
-        mod, params = relay.frontend.from_pytorch(trace, input_shapes, custom_convert_map)
-    if validate_structural_equal:
-        with tvm.testing.enable_span_filling():
-            mod_with_span, _ = relay.frontend.from_pytorch(trace, input_shapes, custom_convert_map)
-        tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-
-    with tvm.transform.PassContext(opt_level=3):
-        for target in ["llvm", "cuda"]:
-            if not tvm.runtime.enabled(target):
-                continue
-            dev = tvm.device(target, 0)
-            lib = relay.build(mod, target=target, params=params)
-            relay_model = graph_executor.GraphModule(lib["default"](dev))
-            for name, value in input_dict.items():
-                relay_model.set_input(name, value)
-            relay_model.run()
-
-            compiled_output = relay_model.get_output(0).numpy()
-            assert_shapes_match(baseline_outputs, compiled_output)
-            if assert_shape_only is False:
-                tvm.testing.assert_allclose(baseline_outputs, compiled_output, rtol=rtol, atol=atol)
-
-
-def gen_ir_module(model, inputs, use_parser_friendly_name=False):
-    """Helper function to generate IRModule with meaningful source information"""
-
-    trace = torch.jit.trace(model, inputs)
-    input_names = ["input{}".format(idx) for idx, _ in enumerate(inputs)]
-    input_shapes = list(zip(input_names, [inp.shape for inp in inputs]))
-    mod, _ = relay.frontend.from_pytorch(
-        trace,
-        input_shapes,
-        use_parser_friendly_name=use_parser_friendly_name,
-    )
-    return mod
-
-
-# Single operator tests
-@tvm.testing.uses_gpu
-def test_forward_pixel_shuffle():
-    """test_forward_pixel_shuffle"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 144, 16, 16]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.PixelShuffle(2).float().eval(), input_data=input_data)
-    verify_model(torch.nn.PixelShuffle(3).float().eval(), input_data=input_data)
-    verify_model(torch.nn.PixelShuffle(4).float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_add():
-    """test_forward_add"""
-    torch.set_grad_enabled(False)
-    input_shape = [10]
-
-    class Add1(Module):
-        def forward(self, *args):
-            return args[0] + args[0]
-
-    class Add2(Module):
-        def forward(self, *args):
-            return args[0] + 1
-
-    class Add3(Module):
-        def forward(self, *args):
-            ones = torch.ones(input_shape, dtype=torch.float)
-            if torch.cuda.is_available():
-                ones = ones.cuda()
-            return args[0] + ones
-
-    class Add4(Module):
-        def forward(self, *args):
-            ones = torch.ones([], dtype=torch.float)
-            if torch.cuda.is_available():
-                ones = ones.cuda()
-            return args[0] + ones
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Add1().float().eval(), input_data=input_data)
-    verify_model(Add2().float().eval(), input_data=input_data)
-    verify_model(Add3().float().eval(), input_data=input_data)
-    verify_model(Add4().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_subtract():
-    """test_forward_subtract"""
-    torch.set_grad_enabled(False)
-    input_shape = [10]
-
-    class Subtract1(Module):
-        def forward(self, *args):
-            return args[0] - args[0]
-
-    class Subtract2(Module):
-        def forward(self, *args):
-            return args[0] - 1
-
-    class Subtract3(Module):
-        def forward(self, *args):
-            ones = torch.ones(input_shape)
-            if torch.cuda.is_available():
-                ones = ones.cuda()
-            return args[0] - ones
-
-    class Subtract4(Module):
-        def forward(self, *args):
-            ones = torch.ones([])
-            if torch.cuda.is_available():
-                ones = ones.cuda()
-            return args[0] - ones
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Subtract1().float().eval(), input_data=input_data)
-    verify_model(Subtract2().float().eval(), input_data=input_data)
-    verify_model(Subtract3().float().eval(), input_data=input_data)
-    verify_model(Subtract4().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_multiply():
-    """test_forward_multiply"""
-    torch.set_grad_enabled(False)
-    input_shape = [10]
-
-    class Multiply1(Module):
-        def forward(self, *args):
-            return args[0] * args[0]
-
-    class Multiply2(Module):
-        def forward(self, *args):
-            return args[0] * 1.0
-
-    class Multiply3(Module):
-        def forward(self, *args):
-            ones = torch.ones(input_shape)
-            if torch.cuda.is_available():
-                ones = ones.cuda()
-            return args[0] * ones
-
-    class Multiply4(Module):
-        def forward(self, *args):
-            ones = torch.ones([])
-            if torch.cuda.is_available():
-                ones = ones.cuda()
-            return args[0] * ones
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Multiply1().float().eval(), input_data=input_data)
-    verify_model(Multiply2().float().eval(), input_data=input_data)
-    verify_model(Multiply3().float().eval(), input_data=input_data)
-    verify_model(Multiply4().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_min_max():
-    """test_min_max"""
-
-    class Max(Module):
-        def forward(self, inp):
-            return torch.max(inp)
-
-    class Min(Module):
-        def forward(self, inp):
-            return torch.min(inp)
-
-    class Max2(Module):
-        def forward(self, inp):
-            out, _ = torch.max(inp, 1, keepdim=True)
-            return out
-
-    class Min2(Module):
-        def forward(self, inp):
-            out, _ = torch.min(inp, 0, keepdim=False)
-            return out
-
-    class Max3(Module):
-        def forward(self, lhs, rhs):
-            return torch.max(lhs, rhs)
-
-    class Min3(Module):
-        def forward(self, lhs, rhs):
-            return torch.min(lhs, rhs)
-
-    class Max4(Module):
-        def forward(self, inp):
-            out = torch.amax(inp, (1, 2), keepdim=True)
-            return out
-
-    class Min4(Module):
-        def forward(self, inp):
-            out = torch.amin(inp, (0, 3), keepdim=False)
-            return out
-
-    input_data = [torch.rand((10, 10, 10, 10)), torch.rand((10, 10, 10, 10))]
-
-    verify_model(Max(), input_data=input_data[0])
-    verify_model(Min(), input_data=input_data[0])
-    verify_model(Max2(), input_data=input_data[0])
-    verify_model(Min2(), input_data=input_data[0])
-    verify_model(Max3(), input_data=input_data)
-    verify_model(Min3(), input_data=input_data)
-    verify_model(Max4(), input_data=input_data[0])
-    verify_model(Min4(), input_data=input_data[0])
-
-
-@tvm.testing.uses_gpu
-def test_minimum_maximum():
-    """test_minimum_maximum"""
-
-    class Maximum(Module):
-        def forward(self, lhs, rhs):
-            return torch.maximum(lhs, rhs)
-
-    class Minimum(Module):
-        def forward(self, lhs, rhs):
-            return torch.minimum(lhs, rhs)
-
-    input_data = [torch.rand((10, 10, 10, 10)), torch.rand((10, 10, 10, 10))]
-
-    verify_model(Maximum(), input_data=input_data)
-    verify_model(Minimum(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_reciprocal():
-    """test_forward_reciprocal"""
-    torch.set_grad_enabled(False)
-    input_shape = [2, 1, 10, 1, 10]
-
-    class Reciprocal1(Module):
-        def forward(self, *args):
-            return args[0].reciprocal()
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Reciprocal1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_repeat():
-    """test_forward_repeat"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3]
-
-    class Repeat1(Module):
-        def forward(self, *args):
-            return args[0].repeat(1, 1)
-
-    class Repeat2(Module):
-        def forward(self, *args):
-            return args[0].repeat(4, 2)
-
-    class Repeat3(Module):
-        def forward(self, *args):
-            return args[0].repeat(4, 2, 1)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Repeat1().float().eval(), input_data=input_data)
-    verify_model(Repeat2().float().eval(), input_data=input_data)
-    verify_model(Repeat3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_repeat_interleave():
-    """test_forward_repeat_interleave"""
-    torch.set_grad_enabled(False)
-    input_shape = [2, 2, 3]
-
-    class RepeatInterleave1(Module):
-        def forward(self, *args):
-            return args[0].repeat_interleave(2)
-
-    class RepeatInterleave2(Module):
-        def forward(self, *args):
-            return args[0].repeat_interleave(3, dim=0)
-
-    class RepeatInterleave3(Module):
-        def forward(self, *args):
-            return args[0].repeat_interleave(2, dim=1)
-
-    class RepeatInterleave4(Module):
-        def forward(self, *args):
-            return args[0].repeat_interleave(4, dim=2)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(RepeatInterleave1().float().eval(), input_data=input_data)
-    verify_model(RepeatInterleave2().float().eval(), input_data=input_data)
-    verify_model(RepeatInterleave3().float().eval(), input_data=input_data)
-    verify_model(RepeatInterleave4().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_unsqueeze():
-    """test_forward_unsqueeze"""
-    torch.set_grad_enabled(False)
-    input_shape = [10, 10]
-
-    class Unsqueeze1(Module):
-        def forward(self, *args):
-            return args[0].unsqueeze(2)
-
-    class Unsqueeze2(Module):
-        def forward(self, *args):
-            _ = args[0].unsqueeze_(2)
-            # Check whether operations after inplace unsqueeze works as expected
-            y = args[0].squeeze(2)
-            return torch.add(y, y)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Unsqueeze1().float().eval(), input_data=input_data)
-    verify_model(Unsqueeze2().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_squeeze():
-    """test_forward_squeeze"""
-    torch.set_grad_enabled(False)
-    input_shape = [2, 1, 10, 1, 10]
-
-    class Squeeze1(Module):
-        def forward(self, *args):
-            return args[0].squeeze()
-
-    class Squeeze2(Module):
-        def forward(self, *args):
-            return args[0].squeeze(1)
-
-    class Squeeze3(Module):
-        def forward(self, *args):
-            return args[0].squeeze((1, 3))
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Squeeze1().float().eval(), input_data=input_data)
-    verify_model(Squeeze2().float().eval(), input_data=input_data)
-    if package_version.parse(torch.__version__) >= package_version.parse("2.0.0"):
-        verify_model(Squeeze3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_arange():
-    """test_forward_arange"""
-    torch.set_grad_enabled(False)
-
-    class Arange1(Module):
-        def forward(self, *args):
-            return torch.arange(5)
-
-    class Arange2(Module):
-        def forward(self, *args):
-            return torch.arange(2.5)
-
-    class Arange3(Module):
-        def forward(self, *args):
-            return torch.arange(1, 4)
-
-    class Arange4(Module):
-        def forward(self, *args):
-            return torch.arange(1, 2.5, 0.5)
-
-    class Arange5(Module):
-        def forward(self, *args):
-            return torch.arange(1, 2, 1, dtype=torch.int32)
-
-    class Arange6(Module):
-        def forward(self, *args):
-            return torch.arange(start=1, end=6, step=2)
-
-    class Arange7(Module):
-        def forward(self, *args):
-            return torch.arange(1, 4, dtype=torch.float32)
-
-    class Arange8(Module):
-        def forward(self, *args):
-            return torch.arange(1, 2, 1, dtype=torch.int16)
-
-    class Arange9(Module):
-        def forward(self, *args):
-            end = torch.add(torch.tensor(4), 1)
-            return torch.arange(end) + torch.ones((5,), dtype=torch.int64)
-
-    class Arange10(Module):
-        def forward(self, *args):
-            end = torch.add(torch.tensor(4.0), torch.tensor(1.0))
-            return torch.arange(end) + torch.ones((5,), dtype=torch.float)
-
-    class Arange11(Module):
-        def forward(self, *args):
-            start = torch.add(torch.tensor(1), 1)
-            end = torch.add(torch.tensor(4), 1)
-            step = torch.add(torch.tensor(2), 1)
-            out = torch.arange(start, end, step)
-            return out + torch.ones((3,), dtype=torch.int64)
-
-    class Arange12(Module):
-        def forward(self, *args):
-            start = torch.add(torch.tensor(1), 1)
-            end = torch.add(torch.tensor(4), 1)
-            step = torch.add(torch.tensor(2.5), torch.tensor(4.1))
-            out = torch.arange(start, end, step)
-            return out + torch.ones((3,), dtype=torch.float)
-
-    verify_model(Arange1().float().eval())
-    verify_model(Arange2().float().eval())
-    verify_model(Arange3().float().eval())
-    verify_model(Arange4().float().eval())
-    verify_model(Arange5().float().eval())
-    verify_model(Arange6().float().eval())
-    verify_model(Arange7().float().eval())
-    verify_model(Arange8().float().eval())
-    verify_model(Arange9().float().eval())
-    verify_model(Arange10().float().eval())
-    verify_model(Arange11().float().eval())
-    verify_model(Arange12().float().eval())
-
-
-@tvm.testing.uses_gpu
-def test_forward_mesh_grid():
-    """test_forward_mesh_grid"""
-    torch.set_grad_enabled(False)
-
-    class MeshGrid1(Module):
-        def forward(self, *args):
-            x = torch.tensor([1, 2, 3])
-            y = torch.tensor([4, 5, 6])
-            grid_x, grid_y = torch.meshgrid([x, y])
-            return grid_x, grid_y
-
-    class MeshGrid2(Module):
-        def forward(self, *args):
-            x = torch.tensor([1, 2, 3], dtype=torch.float32)
-            y = torch.add(torch.tensor(5, dtype=torch.float32), 1)
-            grid_x, grid_y = torch.meshgrid([x, y])
-            return grid_x, grid_y
-
-    verify_model(MeshGrid1().float().eval())
-    verify_model(MeshGrid2().float().eval())
-
-
-@tvm.testing.uses_gpu
-def test_forward_abs():
-    """test_forward_abs"""
-    torch.set_grad_enabled(False)
-    input_shape = [2, 1, 10, 1, 10]
-
-    class Abs1(Module):
-        def forward(self, *args):
-            return args[0].abs()
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Abs1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_concatenate():
-    """test_forward_concatenate"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Concatenate1(Module):
-        def forward(self, *args):
-            return torch.cat([args[0][:, 0].unsqueeze(1), args[0][:, 1].unsqueeze(1)], 1)
-
-    class Concatenate2(Module):
-        def forward(self, *args):
-            a = (args[0][:, :, 0] + 2) * 7
-            b = (args[0][:, :, 1] + 3) * 11
-            c = (args[0][:, :, 2] + 5) * 13
-            return torch.cat([t.unsqueeze(2) for t in [a, b, c]], 2)
-
-    class Concatenate3(Module):
-        """
-        torch.concat is preserved as aten::concat only when in a nested module.
-        (In the most cases, It is converted to aten::cat instead of aten::concat.)
-        """
-
-        def __init__(self):
-            super().__init__()
-
-            class _Concatenate(Module):
-                def forward(self, *args):
-                    a = (args[0][:, :, 0] + 2) * 7
-                    b = (args[0][:, :, 1] + 3) * 11
-                    c = (args[0][:, :, 2] + 5) * 13
-                    return torch.concat([t.unsqueeze(2) for t in [a, b, c]], 2)
-
-            self.mod = _Concatenate()
-
-        def forward(self, *args):
-            return self.mod(*args)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Concatenate1().float().eval(), input_data=input_data)
-    verify_model(Concatenate2().float().eval(), input_data=input_data)
-    verify_model(Concatenate3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_relu():
-    """test_forward_relu"""
-    torch.set_grad_enabled(False)
-    input_shape = [10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.ReLU().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_relu6():
-    """test_forward_relu6"""
-    torch.set_grad_enabled(False)
-    input_shape = [10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.ReLU6().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_prelu():
-    """test_forward_prelu"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.PReLU(num_parameters=3).eval(), input_data=input_data)
-    # Test when input channel > 1 and num parameters = 1
-    verify_model(torch.nn.PReLU(num_parameters=1).eval(), input_data=input_data)
-    # Test when input dims < 2
-    verify_model(torch.nn.PReLU(num_parameters=1).eval(), input_data=torch.randn(2))
-
-
-@tvm.testing.uses_gpu
-def test_forward_leakyrelu():
-    """test_forward_leakyrelu"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.LeakyReLU().eval(), input_data=input_data)
-    verify_model(torch.nn.LeakyReLU(negative_slope=0.05).eval(), input_data=input_data)
-    verify_model(torch.nn.LeakyReLU(negative_slope=1.0, inplace=True).eval(), input_data=input_data)
-    verify_model(
-        torch.nn.LeakyReLU(negative_slope=1.25, inplace=True).eval(), input_data=input_data
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_elu():
-    """test_forward_elu"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.randn(input_shape).float()
-    verify_model(torch.nn.ELU().eval(), input_data=input_data)
-    verify_model(torch.nn.ELU(alpha=0.3).eval(), input_data=input_data)
-    verify_model(torch.nn.ELU(alpha=1.0).eval(), input_data=input_data)
-    verify_model(torch.nn.ELU(alpha=1.3).eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_celu():
-    """test_forward_celu"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.CELU().eval(), input_data=input_data)
-    verify_model(torch.nn.CELU(alpha=0.3).eval(), input_data=input_data)
-    verify_model(torch.nn.CELU(alpha=1.0).eval(), input_data=input_data)
-    verify_model(torch.nn.CELU(alpha=1.3).eval(), input_data=input_data)
-    input_data = torch.tensor([-1.0, 2.0], dtype=torch.float32)
-    verify_model(torch.nn.CELU().eval(), input_data=input_data)
-
-    input_shape = [2, 0, 1]
-    input_data = torch.rand(input_shape).float()
-    with pytest.raises(RuntimeError):
-        verify_model(torch.nn.CELU().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_gelu():
-    """test_forward_gelu"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.GELU().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_selu():
-    """test_forward_selu"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.SELU().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_silu():
-    """test_forward_silu"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.SiLU().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_glu():
-    """test_forward_glu"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.GLU().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_softplus():
-    """test_forward_softplus"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.Softplus().eval(), input_data=input_data)
-    verify_model(torch.nn.Softplus(beta=1.5, threshold=20).eval(), input_data=input_data)
-    verify_model(torch.nn.Softplus(beta=5, threshold=10).eval(), input_data=input_data)
-    verify_model(torch.nn.Softplus(beta=5, threshold=1).eval(), input_data=input_data)
-    verify_model(torch.nn.Softplus(beta=1, threshold=2).eval(), input_data=input_data)
-    verify_model(torch.nn.Softplus(beta=1, threshold=-1).eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_softsign():
-    """test_forward_softsign"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.Softsign().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_log_sigmoid():
-    """test_forward_log_sigmoid"""
-    torch.set_grad_enabled(False)
-    input_shape = [10, 10]
-    input_data = torch.rand(input_shape).float()
-    input_data_overflow = torch.tensor([-300.0, -100.0]).float()
-    verify_model(torch.nn.LogSigmoid().eval(), input_data=input_data)
-    verify_model(torch.nn.LogSigmoid().eval(), input_data=input_data_overflow)
-
-
-@tvm.testing.uses_gpu
-def test_forward_adaptive_avgpool():
-    """test_forward_adaptive_avgpool"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.AdaptiveAvgPool2d([1, 1]).eval(), input_data=input_data)
-    verify_model(torch.nn.AdaptiveAvgPool2d([10, 10]).eval(), input_data=input_data)
-
-    input_data = torch.rand([1, 3, 10]).float()
-    verify_model(torch.nn.AdaptiveAvgPool1d([1]).eval(), input_data=input_data)
-    verify_model(torch.nn.AdaptiveAvgPool1d([5]).eval(), input_data=input_data)
-
-    input_data = torch.rand([1, 3, 5, 6]).float()
-    verify_model(torch.nn.AdaptiveAvgPool2d([3, None]).eval(), input_data=input_data)
-    input_data = torch.rand([1, 1, 3, 5, 6]).float()
-    verify_model(torch.nn.AdaptiveAvgPool3d([3, None, None]).eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_adaptive_maxpool():
-    """test_forward_adaptive_maxpool"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.AdaptiveMaxPool2d([1, 1]).eval(), input_data=input_data)
-    verify_model(torch.nn.AdaptiveMaxPool2d([10, 10]).eval(), input_data=input_data)
-
-    input_data = torch.rand([1, 3, 10]).float()
-    verify_model(torch.nn.AdaptiveMaxPool1d([1]).eval(), input_data=input_data)
-    verify_model(torch.nn.AdaptiveMaxPool1d([5]).eval(), input_data=input_data)
-
-    input_data = torch.rand([1, 3, 5, 6]).float()
-    verify_model(torch.nn.AdaptiveMaxPool2d([3, None]).eval(), input_data=input_data)
-    input_data = torch.rand([1, 1, 3, 5, 6]).float()
-    verify_model(torch.nn.AdaptiveMaxPool3d([3, None, None]).eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_maxpool2d():
-    """test_forward_maxpool2d"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-
-    verify_model(torch.nn.MaxPool2d(kernel_size=[1, 1]).eval(), input_data)
-    verify_model(torch.nn.MaxPool2d(kernel_size=[2, 2], dilation=[2, 3]).eval(), input_data)
-    verify_model(torch.nn.MaxPool2d(kernel_size=[10, 10]).eval(), input_data)
-    verify_model(torch.nn.MaxPool2d(kernel_size=[4, 4], padding=2, stride=2).eval(), input_data)
-
-    # A functional variant (default strides = None case)
-    class MaxPool2D(Module):
-        def forward(self, *args):
-            return torch.nn.functional.max_pool2d(args[0], kernel_size=[10, 10])
-
-    verify_model(MaxPool2D(), input_data=input_data)
-
-    class MaxPool2DWithIndices(Module):
-        def __init__(self):
-            super().__init__()
-            self.pool = torch.nn.MaxPool2d(kernel_size=[1, 1], return_indices=True)
-
-        def forward(self, *args):
-            output, _ = self.pool(args[0])
-            return output
-
-    class MaxPool2DWithIntStrides(Module):
-        def forward(self, *args):
-            # Makes kernel_size and strides a Relay expr to test converting back to int
-            x_shape = args[0].shape
-            # kernel_size = [torch.tensor(x_shape[1]).int(), torch.tensor(x_shape[1]).int()]
-            strides = [torch.tensor(x_shape[0]).int(), torch.tensor(x_shape[0]).int()]
-            return torch.nn.functional.max_pool2d(args[0], kernel_size=[4, 4], stride=strides)
-
-    verify_model(MaxPool2DWithIndices().float().eval(), input_data=input_data)
-    verify_model(MaxPool2DWithIntStrides().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_maxpool1d():
-    """test_forward_maxpool1d"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10]
-    input_data = torch.rand(input_shape).float()
-
-    verify_model(torch.nn.MaxPool1d(kernel_size=1).eval(), input_data)
-    verify_model(torch.nn.MaxPool1d(kernel_size=2, dilation=[1]).eval(), input_data)
-    verify_model(torch.nn.MaxPool1d(kernel_size=10).eval(), input_data)
-    verify_model(torch.nn.MaxPool1d(kernel_size=4, padding=2, stride=2).eval(), input_data)
-
-    # A functional variant (default strides = None case)
-    class MaxPool1D(Module):
-        def forward(self, *args):
-            return torch.nn.functional.max_pool1d(args[0], kernel_size=10)
-
-    verify_model(MaxPool1D(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_maxpool3d():
-    """test_forward_maxpool3d"""
-    torch.set_grad_enabled(False)
-    for input_shape in [(1, 3, 10, 10, 10), (3, 10, 10, 10)]:
-        input_data = torch.rand(input_shape).float()
-
-        verify_model(torch.nn.MaxPool3d(kernel_size=[1, 1, 1]).eval(), input_data)
-        verify_model(
-            torch.nn.MaxPool3d(kernel_size=[2, 2, 2], dilation=[1, 2, 3]).eval(), input_data
-        )
-        verify_model(torch.nn.MaxPool3d(kernel_size=[10, 10, 10]).eval(), input_data)
-        verify_model(
-            torch.nn.MaxPool3d(kernel_size=[4, 4, 4], padding=2, stride=2).eval(), input_data
-        )
-
-    # A functional variant (default strides = None case)
-    class MaxPool3D(Module):
-        def forward(self, *args):
-            return torch.nn.functional.max_pool3d(args[0], kernel_size=[10, 10, 10])
-
-    verify_model(MaxPool3D(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_split():
-    """test_forward_split"""
-    torch.set_grad_enabled(False)
-    input_shape = [4, 10]
-
-    class Split(Module):
-        def __init__(self, split_size_or_sections, dim):
-            super().__init__()
-            self.split_size_or_sections = split_size_or_sections
-            self.dim = dim
-
-        def forward(self, *args):
-            return torch.split(args[0], self.split_size_or_sections, self.dim)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Split(2, 0).float().eval(), input_data=input_data)
-    verify_model(Split(3, 1).float().eval(), input_data=input_data)
-    verify_model(Split(4, 1).float().eval(), input_data=input_data)
-    verify_model(Split([2, 3, 5], 1).float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_tensor_split():
-    """test_forward_tensor_split"""
-    torch.set_grad_enabled(False)
-    input_shape = [4, 10]
-
-    class Tensor_Split(Module):
-        def __init__(self, split_size_or_sections, dim):
-            super().__init__()
-            self.split_size_or_sections = split_size_or_sections
-            self.dim = dim
-
-        def forward(self, *args):
-            return torch.tensor_split(args[0], self.split_size_or_sections, self.dim)
-
-    # tensor_split was introduced when torch > 1.7.1
-    if package_version.parse(torch.__version__) > package_version.parse("1.7.1"):
-        input_data = torch.rand(input_shape).float()
-        verify_model(Tensor_Split(2, 0).float().eval(), input_data=input_data)
-        verify_model(Tensor_Split(torch.tensor(3), 1).float().eval(), input_data=input_data)
-        verify_model(Tensor_Split([2, 3, 5], 1).float().eval(), input_data=input_data)
-        verify_model(Tensor_Split((2, 3, 5), 1).float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_avgpool1d():
-    """test_forward_avgpool1d"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10]
-
-    class AvgPool1D2(Module):
-        def forward(self, *args):
-            return torch.nn.functional.avg_pool1d(args[0], kernel_size=[10])
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.AvgPool1d(kernel_size=[10]).eval(), input_data=input_data)
-    verify_model(AvgPool1D2().float().eval(), input_data=input_data)
-    verify_model(
-        torch.nn.AvgPool1d(kernel_size=[5], stride=2, padding=2).eval(), input_data=input_data
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_avgpool2d():
-    """test_forward_avgpool2d"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class AvgPool2D2(Module):
-        def forward(self, *args):
-            return torch.nn.functional.avg_pool2d(args[0], kernel_size=[10, 10])
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.AvgPool2d(kernel_size=[10, 10]).eval(), input_data=input_data)
-    verify_model(AvgPool2D2().float().eval(), input_data=input_data)
-    verify_model(
-        torch.nn.AvgPool2d(kernel_size=5, stride=2, padding=2).eval(), input_data=input_data
-    )
-
-    input_shape = [1, 1, 1, 9]
-    input_data = torch.rand(input_shape).float()
-    verify_model(
-        torch.nn.AvgPool2d(
-            kernel_size=[1, 2], stride=[1, 2], ceil_mode=True, count_include_pad=True
-        ).eval(),
-        input_data=input_data,
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_avgpool3d():
-    """test_forward_avgpool3d"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10, 10]
-
-    class AvgPool3D1(Module):
-        def forward(self, *args):
-            return torch.nn.functional.avg_pool3d(args[0], kernel_size=[10, 10, 10])
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.AvgPool3d(kernel_size=[10, 10, 10]).eval(), input_data=input_data)
-    verify_model(AvgPool3D1().float().eval(), input_data=input_data)
-    verify_model(
-        torch.nn.AvgPool3d(kernel_size=5, stride=2, padding=2).eval(), input_data=input_data
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_hardtanh():
-    """test_forward_hardtanh"""
-    torch.set_grad_enabled(False)
-    input_shape = [10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.Hardtanh().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_conv():
-    """test_forward_conv"""
-    torch.set_grad_enabled(False)
-    conv1d_input_shape = [1, 3, 10]
-    conv2d_input_shape = [1, 3, 10, 10]
-
-    class Conv2D1(Module):
-        def __init__(self):
-            super().__init__()
-            self.conv = torch.nn.Conv2d(3, 6, 7, bias=True)
-            self.softmax = torch.nn.Softmax()
-
-        def forward(self, *args):
-            return self.softmax(self.conv(args[0]))
-
-    class Conv2D2(Module):
-        def __init__(self):
-            super().__init__()
-            self.conv = torch.nn.Conv2d(3, 6, 7, bias=False)
-            self.softmax = torch.nn.Softmax()
-
-        def forward(self, *args):
-            return self.softmax(self.conv(args[0]))
-
-    class Conv2D3(Module):
-        def __init__(self):
-            super().__init__()
-            self.conv = torch.nn.Conv2d(3, 6, 7, groups=3, bias=False)
-            self.softmax = torch.nn.Softmax()
-
-        def forward(self, *args):
-            return self.softmax(self.conv(args[0]))
-
-    class Conv1D1(Module):
-        def __init__(self):
-            super().__init__()
-            self.conv = torch.nn.Conv1d(3, 6, 7)
-            self.softmax = torch.nn.Softmax()
-
-        def forward(self, *args):
-            return self.softmax(self.conv(args[0]))
-
-    class Conv1D2(Module):
-        def __init__(self):
-            super().__init__()
-            self.conv = torch.nn.Conv1d(3, 6, 7, bias=False)
-            self.softmax = torch.nn.Softmax()
-
-        def forward(self, *args):
-            return self.softmax(self.conv(args[0]))
-
-    class Conv1D3(Module):
-        def __init__(self):
-            super().__init__()
-            self.conv = torch.nn.Conv1d(3, 6, 7, groups=3, bias=False)
-            self.softmax = torch.nn.Softmax()
-
-        def forward(self, *args):
-            return self.softmax(self.conv(args[0]))
-
-    conv2d_input_data = torch.rand(conv2d_input_shape).float()
-    verify_model(Conv2D1().float().eval(), input_data=conv2d_input_data)
-    verify_model(Conv2D2().float().eval(), input_data=conv2d_input_data)
-    # depth wise conv with channel mult 2
-    verify_model(Conv2D3().float().eval(), input_data=conv2d_input_data)
-    # group conv
-    verify_model(
-        torch.nn.Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), groups=2).eval(),
-        input_data=torch.randn((1, 8, 16, 16)),
-    )
-
-    conv1d_input_data = torch.rand(conv1d_input_shape).float()
-    verify_model(Conv1D1().float().eval(), input_data=conv1d_input_data)
-    verify_model(Conv1D2().float().eval(), input_data=conv1d_input_data)
-    verify_model(Conv1D3().float().eval(), input_data=conv1d_input_data)
-
-
-@tvm.testing.uses_gpu
-@pytest.mark.parametrize("in_channels", [3], ids=lambda x: "in_channels=" + str(x))
-@pytest.mark.parametrize("out_channels", [5], ids=lambda x: "out_channels=" + str(x))
-@pytest.mark.parametrize("kernel_size", [3], ids=lambda x: "kernel_size=" + str(x))
-@pytest.mark.parametrize("output_padding", [0, 1, 2], ids=lambda x: "output_padding=" + str(x))
-@pytest.mark.parametrize("groups", [1], ids=lambda x: "groups=" + str(x))
-@pytest.mark.parametrize("bias", [True, False], ids=lambda x: "bias=" + str(x))
-def test_forward_conv_transpose(
-    in_channels, out_channels, kernel_size, output_padding, bias, groups
-):
-    """test_forward_conv_transpose"""
-    # Note we do not test with groups  > 1 because that is not supported
-    # in tvm for conv transpose operations
-
-    # Output padding must be smaller than either stride or dilation so we
-    # opt to make the stride 1 + output padding
-    stride = output_padding + 1
-
-    # Conv 3D Transpose Tests
-    conv3d_input_shape = [1, in_channels, 16, 16, 16]
-    conv3d_input_data = torch.rand(conv3d_input_shape).float()
-    conv3d_transpose = torch.nn.ConvTranspose3d(
-        in_channels=in_channels,
-        out_channels=out_channels,
-        kernel_size=kernel_size,
-        stride=stride,
-        output_padding=output_padding,
-        groups=groups,
-        bias=bias,
-    ).eval()
-    verify_model(conv3d_transpose, conv3d_input_data)
-
-    # Conv 2D Transpose Tests
-    conv2d_input_shape = [1, in_channels, 128, 256]
-    conv2d_input_data = torch.rand(conv2d_input_shape).float()
-    conv2d_transpose = torch.nn.ConvTranspose2d(
-        in_channels=in_channels,
-        out_channels=out_channels,
-        kernel_size=kernel_size,
-        stride=stride,
-        output_padding=output_padding,
-        groups=groups,
-        bias=bias,
-    ).eval()
-    verify_model(conv2d_transpose, conv2d_input_data)
-
-    # # Conv 1D Transpose Tests
-    conv1d_input_shape = [1, in_channels, 10]
-    conv1d_input_data = torch.rand(conv1d_input_shape).float()
-    conv1d_transpose = torch.nn.ConvTranspose1d(
-        in_channels=in_channels,
-        out_channels=out_channels,
-        kernel_size=kernel_size,
-        stride=stride,
-        output_padding=output_padding,
-        groups=groups,
-        bias=bias,
-    ).eval()
-    verify_model(conv1d_transpose, conv1d_input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_conv2d_transpose_group():
-    """test_forward_conv2d_transpose_group"""
-    # https://github.com/apache/tvm/issues/10223
-
-    class ModulatedConvTranspose2D(torch.nn.Module):
-        """ModulatedConvTranspose2D module"""
-
-        def forward(self, x, w, s):
-            """forward"""
-            B, C, H, W = x.shape
-            I, O, KH, KW = w.shape
-
-            # weight is different for each input in batch (this is why we want grouped conv
-            # transpose)
-            w = w.unsqueeze(0) * s.reshape(B, 1, 1, 1, 1)
-            w = w.reshape(B * I, O, KH, KW)
-            x = x.reshape(1, B * C, H, W)
-            x = torch.nn.functional.conv_transpose2d(
-                x, w, stride=(2, 2), padding=(1, 1), output_padding=(1, 1), groups=B
-            )
-            return x.reshape(B, O, H * 2, W * 2)
-
-    b, c, h, w, k = 4, 512, 8, 16, 3
-    inputs = torch.rand(b, c, h, w)
-    weights = torch.rand(c, c // 2, k, k)
-    styles = torch.rand(b)
-
-    # cuda not supported for group > 1 conv2d_transpose
-    targets = ["llvm"]
-
-    if cudnn.exists():
-        targets.append("cuda -libs=cudnn")
-
-    verify_trace_model(ModulatedConvTranspose2D().eval(), [inputs, weights, styles], targets)
-
-
-def test_forward_deform_conv():
-    """test_forward_deform_conv"""
-    torch.set_grad_enabled(False)
-
-    def test_run(
-        batch_size,
-        in_channels,
-        out_channels,
-        in_height,
-        in_width,
-        out_height,
-        out_width,
-        offset_groups,
-        kh,
-        kw,
-        groups,
-    ):
-        input_shape = [batch_size, in_channels, in_height, in_width]
-        offset_shape = [batch_size, 2 * offset_groups * kh * kw, out_height, out_width]
-        weight_shape = [out_channels, in_channels // groups, kh, kw]
-        input_data = torch.rand(input_shape)
-        offset_data = torch.rand(offset_shape)
-        weight_data = torch.rand(weight_shape)
-
-        class DeformConv2D(Module):
-            def forward(self, *args):
-                return torchvision.ops.deform_conv2d(args[0], args[1], args[2])
-
-        verify_model(
-            DeformConv2D().float().eval(),
-            input_data=[input_data, offset_data, weight_data],
-            rtol=1e-4,
-            atol=1e-4,
-        )
-
-    batch_size = 4
-    in_channels, out_channels = 4, 6
-    in_height, in_width = 10, 10
-    out_height, out_width = 8, 8
-    offset_groups = 2
-    kh, kw = 3, 3
-    groups = 1
-
-    test_run(
-        batch_size,
-        in_channels,
-        out_channels,
-        in_height,
-        in_width,
-        out_height,
-        out_width,
-        offset_groups,
-        kh,
-        kw,
-        groups,
-    )
-
-    batch_size = 5
-    in_channels, out_channels = 4, 6
-    in_height, in_width = 10, 10
-    out_height, out_width = 8, 8
-    offset_groups = 1
-    kh, kw = 3, 3
-    groups = 1
-
-    test_run(
-        batch_size,
-        in_channels,
-        out_channels,
-        in_height,
-        in_width,
-        out_height,
-        out_width,
-        offset_groups,
-        kh,
-        kw,
-        groups,
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_threshold():
-    """test_forward_threshold"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.Threshold(0, 0).float().eval(), input_data=input_data)
-    input_data = torch.tensor([[-1.0, 2.0]], dtype=torch.float32)
-    verify_model(torch.nn.Threshold(1, 1).float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_contiguous():
-    """test_forward_contiguous"""
-    torch.set_grad_enabled(False)
-    input_shape = [10]
-
-    class Contiguous1(Module):
-        def forward(self, *args):
-            return args[0].contiguous()
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Contiguous1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_batchnorm():
-    """test_forward_batchnorm"""
-
-    def init_weight(m):
-        torch.nn.init.normal_(m.weight, 0, 0.01)
-        torch.nn.init.normal_(m.bias)
-
-    inp_2d = torch.rand((1, 16, 10, 10))
-    inp_3d = torch.rand((1, 16, 10, 10, 10))
-
-    class BatchNorm(Module):
-        def __init__(self, weight, bias):
-            super().__init__()
-            self.weight = weight
-            self.bias = bias
-
-        def forward(self, *args):
-            return torch.nn.functional.batch_norm(
-                args[0],
-                running_mean=torch.zeros(args[0].shape[1]),
-                running_var=torch.ones(args[0].shape[1]),
-                weight=self.weight,
-                bias=self.bias,
-            )
-
-    for bn, inp in [(torch.nn.BatchNorm2d(16), inp_2d), (torch.nn.BatchNorm3d(16), inp_3d)]:
-        init_weight(bn.eval())
-        verify_model(bn.eval(), input_data=inp)
-        verify_model(BatchNorm(bn.weight, None).eval(), input_data=inp)
-        verify_model(BatchNorm(bn.weight, bn.bias).eval(), input_data=inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_instancenorm():
-    """test_forward_instancenorm"""
-    inp_2d = torch.rand((1, 16, 10, 10))
-    inp_3d = torch.rand((1, 16, 10, 10, 10))
-
-    for ins_norm, inp in [
-        (torch.nn.InstanceNorm2d(16), inp_2d),
-        (torch.nn.InstanceNorm3d(16), inp_3d),
-        (torch.nn.InstanceNorm2d(16, track_running_stats=True), inp_2d),
-        (torch.nn.InstanceNorm3d(16, track_running_stats=True), inp_3d),
-    ]:
-        verify_model(ins_norm.eval(), input_data=inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_layernorm():
-    """test_forward_layernorm"""
-
-    def init_weight(m):
-        torch.nn.init.normal_(m.weight, 0, 0.01)
-        torch.nn.init.normal_(m.bias, 0.02)
-
-    inp_2d = torch.rand((1, 16, 10, 10))
-    inp_3d = torch.rand((1, 16, 10, 10, 10))
-    for ln, inp in [(torch.nn.LayerNorm(10), inp_2d), (torch.nn.LayerNorm(10), inp_3d)]:
-        init_weight(ln.eval())
-        verify_model(ln.eval(), input_data=inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_groupnorm():
-    """test_forward_groupnorm"""
-    input_shape = [10, 6, 5, 5]
-    input_data = torch.rand(input_shape).float()
-
-    # Separate 6 channels into 3 groups
-    verify_model(torch.nn.GroupNorm(3, 6).eval(), input_data=input_data)
-
-    # Put all 6 channels into a single group (equivalent with LayerNorm)
-    verify_model(torch.nn.GroupNorm(1, 6).eval(), input_data=input_data)
-
-    # Separate 6 channels into 6 groups (equivalent with InstanceNorm)
-    verify_model(torch.nn.GroupNorm(6, 6).eval(), input_data=input_data)
-
-    input_shape = [1, 10, 4, 7]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.GroupNorm(1, 10).eval(), input_data=input_data)
-    verify_model(torch.nn.GroupNorm(2, 10).eval(), input_data=input_data)
-    verify_model(torch.nn.GroupNorm(5, 10).eval(), input_data=input_data)
-    verify_model(torch.nn.GroupNorm(10, 10).eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_reshape():
-    """test_forward_reshape"""
-    torch.set_grad_enabled(False)
-    input_shape = [2, 1, 10, 1, 10]
-    new_shape = [2, 1, 10, 10]
-
-    class Reshape1(Module):
-        def forward(self, *args):
-            return args[0].reshape(new_shape)
-
-    class Reshape2(Module):
-        def forward(self, *args):
-            return args[0].reshape([-1])
-
-    class Reshape3(torch.nn.Module):
-        def forward(self, x):
-            x_shape = x.shape
-            return x.reshape((x_shape[0] * x_shape[1], x_shape[2]))
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Reshape1(), input_data=input_data)
-    verify_model(Reshape2(), input_data=input_data)
-    verify_model(Reshape3(), input_data=torch.randn(2, 3, 4))
-
-
-@tvm.testing.uses_gpu
-def test_forward_reshape_as():
-    """test_forward_reshape_as"""
-
-    def test_func(input_tensor, other_tensor):
-        return input_tensor.reshape_as(other_tensor)
-
-    input_data = [torch.rand([2, 1, 10, 1, 10]), torch.rand([2, 1, 10, 10])]
-
-    verify_model_with_input(test_func, input_data, input_dict={"input0": input_data[0]})
-
-
-@tvm.testing.uses_gpu
-def test_flatten():
-    """test_flatten"""
-
-    def _test_flatten(start_dim, end_dim):
-        return lambda inp: torch.flatten(inp, start_dim, end_dim)
-
-    inp = torch.rand((3, 5, 2, 2))
-
-    # [3, 5, 2, 2] -> [60]
-    verify_model(_test_flatten(0, -1), inp)
-    verify_model(_test_flatten(0, 3), inp)
-    verify_model(_test_flatten(-4, 3), inp)
-    verify_model(_test_flatten(-4, -1), inp)
-
-    # [3, 5, 2, 2] -> [3, 5, 2, 2]
-    verify_model(_test_flatten(3, -1), inp)
-    verify_model(_test_flatten(-1, -1), inp)
-    verify_model(_test_flatten(0, -4), inp)
-    verify_model(_test_flatten(-4, -4), inp)
-
-    # [3, 5, 2, 2] -> [3, 10, 2]
-    verify_model(_test_flatten(1, 2), inp)
-    verify_model(_test_flatten(1, -2), inp)
-    verify_model(_test_flatten(-3, 2), inp)
-    verify_model(_test_flatten(-3, -2), inp)
-
-
-@tvm.testing.uses_gpu
-def test_unflatten():
-    """test_unflatten"""
-
-    def _test_unflatten(dim, unflattened_size):
-        return lambda inp: torch.unflatten(inp, dim, unflattened_size)
-
-    inp = torch.rand(60)
-
-    # [60] -> [3, 5, 2, 2]
-    verify_model(_test_unflatten(0, (3, 5, 2, 2)), inp)
-    verify_model(_test_unflatten(0, (-1, 5, 2, 2)), inp)
-    verify_model(_test_unflatten(0, (3, -1, 2, 2)), inp)
-    verify_model(_test_unflatten(0, (3, 5, -1, 2)), inp)
-    verify_model(_test_unflatten(0, (3, 5, 2, -1)), inp)
-    verify_model(_test_unflatten(-1, (3, 5, 2, 2)), inp)
-    verify_model(_test_unflatten(-1, (-1, 5, 2, 2)), inp)
-    verify_model(_test_unflatten(-1, (3, -1, 2, 2)), inp)
-    verify_model(_test_unflatten(-1, (3, 5, -1, 2)), inp)
-    verify_model(_test_unflatten(-1, (3, 5, 2, -1)), inp)
-
-    inp = torch.rand(3, 4, 1)
-
-    # [3, 4, 1] -> [3, 2, 2, 1]
-    verify_model(_test_unflatten(1, (2, 2)), inp)
-    verify_model(_test_unflatten(1, (-1, 2)), inp)
-
-    inp = torch.rand(5, 12, 3)
-
-    # [5, 12, 3] -> [5, 2, 2, 3, 1, 1, 3]
-    verify_model(_test_unflatten(1, (2, 2, 3, 1, 1)), inp)
-    verify_model(_test_unflatten(-2, (2, 2, 3, 1, 1)), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_transpose():
-    """test_forward_transpose"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Transpose1(Module):
-        def forward(self, *args):
-            return args[0].transpose(2, 3)
-
-    class Transpose2(Module):
-        def forward(self, *args):
-            return args[0].transpose(-2, -1)
-
-    class Transpose3(Module):
-        def forward(self, *args):
-            return args[0].permute(0, 2, 3, 1)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Transpose1().float().eval(), input_data=input_data)
-    verify_model(Transpose2().float().eval(), input_data=input_data)
-    verify_model(Transpose3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_numpy_T():
-    """test_forward_numpy_T"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    def test_fn(x):
-        return x.T
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(test_fn, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_size():
-    """test_forward_size"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3]
-
-    class Size1(Module):
-        def forward(self, *args):
-            return float(args[0].size(0)) * args[0]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Size1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_type_as():
-    """test_type_as"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3]
-
-    def _create_module(dtype):
-        class TypeAs(Module):
-            def forward(self, *args):
-                expected_type_tensor = torch.zeros(1, 3, dtype=dtype)
-                return args[0].type_as(expected_type_tensor)
-
-        return TypeAs()
-
-    input_data = torch.randn(input_shape).float()
-    verify_model(_create_module(torch.float64), input_data=input_data)
-    verify_model(_create_module(torch.float32), input_data=input_data)
-    verify_model(_create_module(torch.int64), input_data=input_data)
-    verify_model(_create_module(torch.int32), input_data=input_data)
-    verify_model(_create_module(torch.int16), input_data=input_data)
-    verify_model(_create_module(torch.int8), input_data=input_data)
-
-    if torch.cuda.is_available():
-        check_fp16 = False
-        try:
-            # Only check half precision on supported hardwares.
-            if have_fp16(tvm.cuda(0).compute_version):
-                check_fp16 = True
-        # pylint: disable=broad-except
-        except Exception:
-            # If GPU is not enabled in TVM, skip the fp16 test.
-            pass
-
-        # Temporary disable fp16 test
-        check_fp16 = False
-
-        if check_fp16:
-            verify_model(_create_module(torch.float16), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_view():
-    """test_forward_view"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class View1(Module):
-        def forward(self, *args):
-            return args[0].view((1, 3 * 10 * 10))
-
-    class View2(Module):
-        def forward(self, *args):
-            return args[0].view(args[0].shape[0], -1)
-
-    class View3(Module):
-        def forward(self, *args):
-            d1 = torch.tensor(3) * torch.tensor(10) * torch.tensor(10)
-            return args[0].view(args[0].shape[0], d1)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(View1().float().eval(), input_data=input_data)
-    verify_model(View2().float().eval(), input_data=input_data)
-    verify_model(View3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_view_as():
-    """test_forward_view_as"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10]
-
-    class ViewAs1(Module):
-        def forward(self, *args):
-            t1 = torch.ones((1 * 3 * 10))
-            return args[0].view_as(t1)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(ViewAs1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_select():
-    """test_forward_select"""
-    torch.set_grad_enabled(False)
-    input_shape = [5, 3, 10, 10]
-
-    class Select1(Module):
-        def forward(self, *args):
-            return args[0].select(1, 1)
-
-    class IndexedSelect(Module):
-        def __init__(self, inp, dim):
-            super().__init__()
-            self.inp = inp
-            self.dim = dim
-            if torch.cuda.is_available():
-                self.inp = self.inp.cuda()
-
-        def forward(self, index):
-            return torch.index_select(self.inp, self.dim, index)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Select1().float().eval(), input_data=input_data)
-
-    # test negative indexing
-    verify_model(lambda x: x[-1], input_data=input_data)
-
-    x = torch.randn(3, 4)
-    indices = torch.tensor([0, 2])
-    verify_model(IndexedSelect(x, 0).eval(), input_data=indices)
-    verify_model(IndexedSelect(x, 1).eval(), input_data=indices)
-
-
-@tvm.testing.uses_gpu
-def test_forward_clone():
-    """test_forward_clone"""
-    torch.set_grad_enabled(False)
-    input_shape = [10]
-
-    class Clone1(Module):
-        def forward(self, *args):
-            return args[0].clone()
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Clone1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_gather():
-    """test_forward_gather"""
-    torch.set_grad_enabled(False)
-
-    class Gather1(Module):
-        def forward(self, *args):
-            return torch.gather(args[0], 0, args[1])
-
-    class Gather2(Module):
-        def forward(self, *args):
-            return torch.gather(args[0], 1, args[1])
-
-    class Gather3(Module):
-        def forward(self, *args):
-            return torch.gather(args[0], 2, args[1])
-
-    input_data = torch.rand((4,)).float()
-    index = torch.tensor([1])
-    verify_model(Gather1().float().eval(), input_data=[input_data, index])
-
-    input_data = torch.rand((2, 2)).float()
-    index = torch.tensor([[1, 0], [0, 1]])
-    verify_model(Gather1().float().eval(), input_data=[input_data, index])
-
-    input_data = torch.tensor([[1, 2], [3, 4]])
-    index = torch.tensor([[0, 0], [1, 0]])
-    verify_model(Gather2().float().eval(), input_data=[input_data, index])
-
-    input_data = torch.rand((2, 2)).float()
-    index = torch.tensor([[1, 0], [0, 1]])
-    verify_model(Gather2().float().eval(), input_data=[input_data, index])
-
-    input_data = torch.rand((3, 3, 3)).float()
-    index = torch.tensor(
-        [
-            [[1, 0, 0], [1, 0, 1], [0, 1, 1]],
-            [[1, 1, 1], [1, 2, 1], [1, 0, 1]],
-            [[1, 2, 1], [1, 2, 1], [1, 2, 1]],
-        ]
-    )
-    verify_model(Gather3().float().eval(), input_data=[input_data, index])
-
-
-@tvm.testing.uses_gpu
-def test_forward_logsoftmax():
-    """test_forward_logsoftmax"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class LogSoftmax1(Module):
-        def forward(self, *args):
-            return torch.nn.LogSoftmax(dim=1)(args[0][0, 0])
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(LogSoftmax1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_norm():
-    """test_forward_norm"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Norm1(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float("inf"), dim=None, keepdim=False)
-
-    class Norm2(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float("-inf"), dim=None, keepdim=False)
-
-    class Norm3(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float("-inf"), dim=None, keepdim=True)
-
-    class Norm4(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float("inf"), dim=(1, 2), keepdim=False)
-
-    class Norm5(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float("inf"), dim=(1), keepdim=True)
-
-    class Norm6(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float(0.5), dim=(1), keepdim=True)
-
-    class Norm7(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float(1), dim=None, keepdim=False)
-
-    class Norm8(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float(2.0), dim=(1), keepdim=True)
-
-    class Norm9(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float(-0.5), dim=(1, 2), keepdim=True)
-
-    class Norm10(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p=float(-2), dim=(1), keepdim=False)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Norm1().float().eval(), input_data=input_data)
-    verify_model(Norm2().float().eval(), input_data=input_data)
-    verify_model(Norm3().float().eval(), input_data=input_data)
-    verify_model(Norm4().float().eval(), input_data=input_data)
-    verify_model(Norm5().float().eval(), input_data=input_data)
-    verify_model(Norm6().float().eval(), input_data=input_data)
-    verify_model(Norm7().float().eval(), input_data=input_data)
-    verify_model(Norm8().float().eval(), input_data=input_data)
-    verify_model(Norm9().float().eval(), input_data=input_data)
-    verify_model(Norm10().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_frobenius_norm():
-    """test_forward_frobenius_norm"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class FroNorm1(Module):
-        def forward(self, *args):
-            return torch.norm(args[0])
-
-    class FroNorm2(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p="fro", dim=None, keepdim=True)
-
-    class FroNorm3(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], p="fro", dim=(1), keepdim=True)
-
-    class FroNorm4(Module):
-        def forward(self, *args):
-            return torch.norm(args[0], dim=None, keepdim=False)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(FroNorm1().float().eval(), input_data=input_data)
-    verify_model(FroNorm2().float().eval(), input_data=input_data)
-    verify_model(FroNorm3().float().eval(), input_data=input_data)
-    verify_model(FroNorm4().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_sigmoid():
-    """test_forward_sigmoid"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.Sigmoid().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_dense():
-    """test_forward_dense"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Dense1(Module):
-        def __init__(self):
-            super().__init__()
-            self.linear = torch.nn.Linear(10, 7, bias=True)
-
-        def forward(self, *args):
-            return self.linear(args[0][0, 0])
-
-    class Dense2(Module):
-        def __init__(self):
-            super().__init__()
-            self.linear = torch.nn.Linear(10, 7, bias=False)
-
-        def forward(self, *args):
-            return self.linear(args[0][0, 0])
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Dense1().float().eval(), input_data=input_data)
-    verify_model(Dense2().float().eval(), input_data=input_data)
-
-    trace = torch.jit.trace(Dense1(), [input_data])
-    mod, _ = relay.frontend.from_pytorch(
-        trace,
-        [("input", input_shape)],
-    )
-    assert not any(list(op.name == "multiply" for op in list_ops(mod["main"])))
-
-
-@tvm.testing.uses_gpu
-def test_forward_linear():
-    """test_forward_linear"""
-    torch.set_grad_enabled(False)
-
-    class Linear(Module):
-        def forward(self, inputs, weight, bias):
-            return F.linear(inputs, weight, bias)
-
-    class LinearNoBias(Module):
-        def forward(self, inputs, weight):
-            return F.linear(inputs, weight)
-
-    class LinearNested(Module):
-        def forward(self, x, y, z):
-            return F.linear(x, F.linear(y, z))
-
-    input1d = torch.rand([2]).float()
-    input2d = torch.rand([2, 2]).float()
-    input3d = torch.rand([4, 3, 2]).float()
-    weight1d = torch.rand([2]).float()
-    weight2d = torch.rand([2, 2]).float()
-    weight3x2 = torch.rand([3, 2]).float()
-    bias0d = torch.rand([]).float()
-    bias1d = torch.rand([2]).float()
-    bias2d = torch.rand([2, 2]).float()
-    # 2D input, 2D weight, 1D bias
-    verify_model(Linear(), input_data=[input2d, weight2d, bias1d])
-    # 2D input, 2D weight, 2D bias
-    verify_model(Linear(), input_data=[input2d, weight2d, bias2d])
-    # 2D input, 2D weight, no bias
-    verify_model(LinearNoBias(), input_data=[input2d, weight2d])
-    verify_model(LinearNoBias(), input_data=[input2d, weight3x2])
-    # 2D input, 1D weight, 1D bias is not supported by torch.linear()
-    # 2D input, 1D weight, no bias
-    verify_model(LinearNoBias(), input_data=[input2d, weight1d])
-    # 3D input, 2D weight, no bias
-    verify_model(LinearNoBias(), input_data=[input3d, weight3x2])
-    # 3D input, 2D weight, 1D bias
-    verify_model(Linear(), input_data=[input3d, weight2d, bias1d])
-
-    verify_model(LinearNested(), input_data=[torch.randn(10, 10) for _ in range(3)])
-
-    # 1D input, 2D weight, 1D bias
-    verify_model(Linear(), input_data=[input1d, weight2d, bias1d])
-    # 1D input, 2D weight, no bias
-    verify_model(LinearNoBias(), input_data=[input1d, weight2d])
-    # 1D input, 1D weight, scalar bias
-    verify_model(Linear(), input_data=[input1d, weight1d, bias0d])
-    # 1D input, 1D weight, no bias
-    verify_model(LinearNoBias(), input_data=[input1d, weight1d])
-
-
-@tvm.testing.uses_gpu
-def test_forward_dropout():
-    """test_forward_dropout"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(torch.nn.Dropout(p=0.5).eval(), input_data=input_data[0, 0])
-    verify_model(torch.nn.Dropout2d(p=0.5).eval(), input_data=input_data[0])
-    verify_model(torch.nn.Dropout3d(p=0.5).eval(), input_data=input_data)
-    verify_model(torch.nn.AlphaDropout(p=0.5).eval(), input_data=input_data[0, 0])
-
-
-@tvm.testing.uses_gpu
-def test_forward_slice():
-    """test_forward_slice"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Slice1(Module):
-        def forward(self, *args):
-            return args[0][:, :, :, :3]
-
-    class Slice2(Module):
-        def forward(self, *args):
-            return args[0][0, :, :-3, :]
-
-    class Slice3(Module):
-        def forward(self, *args):
-            x0 = torch.tensor(2) - torch.tensor(1)
-            x1 = torch.tensor(3) + torch.tensor(1)
-            return args[0][:, x0:, 1:x1, :]
-
-    class SliceWithStride(torch.nn.Module):
-        def forward(self, x):
-            return x[..., 0::2] + x[..., 1::2]
-
-    class SliceWithStride2(torch.nn.Module):
-        def forward(self, x):
-            return x[0::2, 0::2] + x[1::2, 1::2]
-
-    class DynamicLengthSlice(torch.nn.Module):
-        def forward(self, values, length):
-            return values[0:length]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Slice1(), input_data=input_data)
-    verify_model(Slice2(), input_data=input_data)
-    verify_model(Slice3(), input_data=input_data)
-    verify_model(SliceWithStride(), input_data=torch.randn(1, 4))
-    verify_model(SliceWithStride2(), input_data=torch.randn(4, 4))
-
-    inp = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
-    slice_len = torch.tensor(2)
-    targets = ["llvm", "cuda"]
-    verify_trace_model(DynamicLengthSlice(), [inp, slice_len], targets)
-
-
-@tvm.testing.uses_gpu
-def test_forward_narrow():
-    """test_forward_narrow"""
-    torch.set_grad_enabled(False)
-    input_shape = [3, 3]
-
-    class Narrow1(Module):
-        def forward(self, *args):
-            return torch.narrow(args[0], 0, 0, 2)
-
-    class Narrow2(Module):
-        def forward(self, *args):
-            return torch.narrow(args[0], 1, 1, 2)
-
-    class Narrow3(Module):
-        def forward(self, *args):
-            begin = torch.tensor(2) - torch.tensor(1)
-            length = torch.tensor(1) * torch.tensor(2)
-            return torch.narrow(args[0], 1, begin, length)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Narrow1(), input_data=input_data)
-    verify_model(Narrow2(), input_data=input_data)
-    verify_model(Narrow3(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_mean():
-    """test_forward_mean"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Mean1(Module):
-        def forward(self, *args):
-            return args[0].mean(2)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Mean1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_expand():
-    """test_forward_expand"""
-    torch.set_grad_enabled(False)
-
-    class Expand1(Module):
-        def forward(self, *args):
-            return args[0].expand((3, -1, -1, -1))
-
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(Expand1().float().eval(), input_data=input_data)
-
-    class Expand2(Module):
-        def forward(self, *args):
-            return args[0].expand((3, 3, 3, 1))
-
-    input_shape = [3, 1]
-    input_data = torch.rand(input_shape).float()
-    verify_model(Expand2().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_broadcast_tensors():
-    """test_forward_broadcast_tensors"""
-    torch.set_grad_enabled(False)
-
-    class BroadCastTensors1(Module):
-        def forward(self, x, y):
-            return torch.broadcast_tensors(x, y)
-
-    x = torch.arange(3).view(1, 1, 3)
-    y = torch.arange(2).view(1, 2, 1)
-    verify_model(BroadCastTensors1().float().eval(), input_data=[x, y])
-
-    class BroadCastTensors2(Module):
-        def forward(self, x, y, z):
-            return torch.broadcast_tensors(x, y, z)
-
-    x = torch.arange(3).view(1, 1, 3)
-    y = torch.arange(2).view(1, 2, 1)
-    z = torch.arange(4).view(4, 1, 1)
-    verify_model(BroadCastTensors2().float().eval(), input_data=[x, y, z])
-
-
-@tvm.testing.uses_gpu
-def test_forward_broadcast_to():
-    """test_forward_broadcast_to"""
-    torch.set_grad_enabled(False)
-
-    class BroadCastTo1(Module):
-        def forward(self, x):
-            return torch.broadcast_to(x, (3, 3))
-
-    x = torch.tensor([1, 2, 3])
-    verify_model(BroadCastTo1().float().eval(), input_data=[x])
-
-    class BroadCastTo2(Module):
-        def __init__(self):
-            super().__init__()
-            self.y = torch.tensor(1)
-            self.z = torch.tensor(2)
-
-        def forward(self, x):
-            return torch.broadcast_to(x, (self.y + self.z, 3))
-
-    x = torch.tensor([1, 2, 3])
-    verify_model(BroadCastTo2().float().eval(), input_data=[x])
-
-
-@tvm.testing.uses_gpu
-def test_forward_pow():
-    """test_forward_pow"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Pow1(Module):
-        def forward(self, *args):
-            return args[0] ** 2
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Pow1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_chunk():
-    """test_forward_chunk"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 14, 14]
-
-    class Chunk1(Module):
-        def forward(self, *args):
-            chunks = args[0].chunk(7, 2)
-            return torch.cat(chunks, 2)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Chunk1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_upsample():
-    """test_upsample"""
-
-    class Upsample(Module):
-        def __init__(self, size=None, scale=None, mode="nearest", align_corners=None):
-            super().__init__()
-            self.size = size
-            self.scale = scale
-            self.mode = mode
-            self.align_corners = align_corners
-
-        def forward(self, x):
-            return torch.nn.functional.interpolate(
-                x,
-                size=self.size,
-                scale_factor=self.scale,
-                mode=self.mode,
-                align_corners=self.align_corners,
-            )
-
-    inp = torch.rand((1, 3, 32, 32))
-    verify_model(Upsample(size=(64, 64), mode="nearest"), inp)
-    verify_model(Upsample(scale=2, mode="nearest"), inp)
-    verify_model(Upsample(size=(50, 50), mode="nearest"), inp)
-    verify_model(Upsample(size=(64, 64), mode="bilinear", align_corners=True), inp)
-    verify_model(Upsample(scale=2, mode="bilinear", align_corners=True), inp)
-    verify_model(Upsample(size=(50, 50), mode="bilinear", align_corners=True), inp)
-    verify_model(Upsample(size=(64, 64), mode="bicubic", align_corners=True), inp)
-    verify_model(Upsample(scale=2, mode="bicubic", align_corners=True), inp)
-    verify_model(Upsample(size=(50, 50), mode="bicubic", align_corners=True), inp)
-
-
-@tvm.testing.uses_gpu
-def test_to():
-    """test for aten::to(...)"""
-
-    class ToCPU(Module):
-        def forward(self, x):
-            return x.to("cpu")
-
-    class ToFloat(Module):
-        def forward(self, x):
-            return x.float()
-
-    class ToInt(Module):
-        def forward(self, x):
-            return x.int()
-
-    class ToLong(Module):
-        def forward(self, x):
-            return x.long()
-
-    class ToDouble(Module):
-        def forward(self, x):
-            return x.double()
-
-    class ToFloat16(Module):
-        def forward(self, x):
-            return x.to(torch.float16)
-
-    verify_model(ToCPU().eval(), torch.rand((1, 3, 32, 32)))
-    verify_model(ToFloat().eval(), torch.zeros((1, 3, 32, 32), dtype=torch.int))
-    verify_model(ToFloat().eval(), torch.tensor(2, dtype=torch.int))
-    verify_model(ToInt().eval(), torch.zeros((1, 3, 32, 32)))
-    verify_model(ToInt().eval(), torch.tensor(0.8))
-    verify_model(ToLong().eval(), torch.tensor(0.8))
-    verify_model(ToDouble().eval(), torch.tensor(0.8))
-    verify_model(ToFloat16().eval(), torch.tensor(2, dtype=torch.float32))
-    verify_model(ToFloat16().eval(), torch.zeros((1, 3, 32, 32), dtype=torch.int))
-
-
-@tvm.testing.uses_gpu
-def test_adaptive_pool3d():
-    """test_adaptive_pool3d"""
-    for ishape in [(1, 32, 16, 16, 16), (1, 32, 9, 15, 15), (1, 32, 13, 7, 7)]:
-        inp = torch.rand(ishape)
-        verify_model(torch.nn.AdaptiveMaxPool3d((1, 1, 1)).eval(), inp)
-        verify_model(torch.nn.AdaptiveMaxPool3d((2, 2, 2)).eval(), inp)
-        verify_model(torch.nn.AdaptiveAvgPool3d((1, 1, 1)).eval(), inp)
-        verify_model(torch.nn.AdaptiveAvgPool3d((2, 2, 2)).eval(), inp)
-        verify_model(torch.nn.AdaptiveAvgPool3d((4, 8, 8)).eval(), inp)
-        verify_model(torch.nn.AdaptiveMaxPool3d((7, 8, 9)).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_functional_pad():
-    """test_forward_functional_pad"""
-    torch.set_grad_enabled(False)
-    pad = (0, 0)
-
-    class Pad1(Module):
-        def forward(self, *args):
-            return torch.nn.functional.pad(args[0], pad, "constant", 0)
-
-    input_data = torch.rand((3, 3, 4, 2))
-    pad = (1, 1)
-    verify_model(Pad1().float().eval(), input_data=input_data)
-
-    pad = (1, 1, 2, 2)
-    verify_model(Pad1().float().eval(), input_data=input_data)
-
-    pad = (0, 1, 2, 1, 3, 3)
-    verify_model(Pad1().float().eval(), input_data=input_data)
-
-    class Pad2(Module):
-        def forward(self, *args):
-            return torch.nn.functional.pad(args[0], pad, "constant", 1)
-
-    input_data = torch.rand((3, 3, 4, 2))
-    pad = (1, 1)
-    verify_model(Pad2().float().eval(), input_data=input_data)
-
-    pad = (1, 1, 2, 2)
-    verify_model(Pad2().float().eval(), input_data=input_data)
-
-    pad = (0, 1, 2, 1, 3, 3)
-    verify_model(Pad2().float().eval(), input_data=input_data)
-
-    class Pad3(Module):
-        def forward(self, *args):
-            return torch.nn.functional.pad(args[0], pad, "constant", 1.0)
-
-    input_data = torch.rand((3, 3, 4, 2))
-    pad = (1, 1)
-    verify_model(Pad3().float().eval(), input_data=input_data)
-
-    pad = (1, 1, 2, 2)
-    verify_model(Pad3().float().eval(), input_data=input_data)
-
-    pad = (0, 1, 2, 1, 3, 3)
-    verify_model(Pad3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_zero_pad2d():
-    """test_forward_zero_pad2d"""
-    inp = torch.rand((1, 1, 3, 3))
-    verify_model(torch.nn.ZeroPad2d(2).eval(), inp)
-    verify_model(torch.nn.ZeroPad2d((1, 1, 2, 0)).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_constant_pad1d():
-    """test_forward_constant_pad1d"""
-    inp = torch.rand((1, 2, 4))
-    verify_model(torch.nn.ConstantPad1d(2, 3.5).eval(), inp)
-
-    inp = torch.rand((1, 2, 3))
-    verify_model(torch.nn.ConstantPad1d((3, 1), 3.5).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_constant_pad2d():
-    """test_forward_constant_pad2d"""
-    inp = torch.rand((1, 2, 2, 2))
-    verify_model(torch.nn.ConstantPad2d(2, 3.5).eval(), inp)
-    verify_model(torch.nn.ConstantPad2d((3, 0, 2, 1), 3.5).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_constant_pad3d():
-    """test_forward_constant_pad3d"""
-    inp = torch.rand((1, 3, 2, 2, 2))
-    verify_model(torch.nn.ConstantPad3d(3, 3.5).eval(), inp)
-    verify_model(torch.nn.ConstantPad3d((3, 4, 5, 6, 0, 1), 3.5).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_reflection_pad1d():
-    """test_forward_reflection_pad1d"""
-    inp = torch.rand((1, 2, 4))
-    verify_model(torch.nn.ReflectionPad1d(2).eval(), inp)
-    verify_model(torch.nn.ReflectionPad1d((3, 1)).eval(), inp)
-
-    inp = torch.rand((2, 4, 5))
-    verify_model(torch.nn.ReflectionPad1d((2, 3)).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_reflection_pad2d():
-    """test_forward_reflection_pad2d"""
-    inp = torch.rand((1, 1, 3, 3))
-    verify_model(torch.nn.ReflectionPad2d(2).eval(), inp)
-    verify_model(torch.nn.ReflectionPad2d((1, 1, 2, 0)).eval(), inp)
-
-    inp = torch.rand((2, 4, 5, 6))
-    verify_model(torch.nn.ReflectionPad2d((1, 3, 2, 4)).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_replication_pad1d():
-    """test_forward_replication_pad1d"""
-    inp = torch.rand((1, 2, 4))
-    verify_model(torch.nn.ReplicationPad1d(2).eval(), inp)
-    verify_model(torch.nn.ReplicationPad1d((3, 1)).eval(), inp)
-
-    inp = torch.rand((2, 4, 5))
-    verify_model(torch.nn.ReplicationPad1d((2, 3)).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_replication_pad2d():
-    """test_forward_replication_pad2d"""
-    inp = torch.rand((1, 1, 3, 3))
-    verify_model(torch.nn.ReplicationPad2d(2).eval(), inp)
-    verify_model(torch.nn.ReplicationPad2d((1, 1, 2, 0)).eval(), inp)
-
-    inp = torch.rand((2, 4, 5, 6))
-    verify_model(torch.nn.ReplicationPad2d((1, 3, 2, 4)).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_replication_pad3d():
-    """test_forward_replication_pad3d"""
-    inp = torch.rand((1, 1, 3, 3, 3))
-    verify_model(torch.nn.ReplicationPad3d(3).eval(), inp)
-    verify_model(torch.nn.ReplicationPad3d((1, 1, 2, 2, 1, 1)).eval(), inp)
-
-    inp = torch.rand((7, 5, 4, 5, 6))
-    verify_model(torch.nn.ReplicationPad3d((2, 3, 2, 5, 1, 4)).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_forward_upsample3d():
-    """test_forward_upsample3d"""
-    inp = torch.arange(1, 9, dtype=torch.float32).view(1, 1, 2, 2, 2)
-    verify_model(torch.nn.Upsample(scale_factor=2, mode="nearest").eval(), inp)
-    verify_model(torch.nn.Upsample(scale_factor=2, mode="trilinear").eval(), inp)
-    verify_model(
-        torch.nn.Upsample(scale_factor=2, mode="trilinear", align_corners=True).eval(), inp
-    )
-
-
-def test_forward_nms():
-    """dynamic Non-Maximum Suppression"""
-    torch.set_grad_enabled(False)
-
-    class NonMaxSupression(Module):
-        def __init__(self, iou_thres):
-            super().__init__()
-            self.iou_threshold = iou_thres
-
-        def forward(self, *args):
-            return torchvision.ops.nms(args[0], args[1], self.iou_threshold)
-
-    # Generate random input data
-    def _gen_rand_inputs(num_boxes):
-        box_len = 4
-        boxes = torch.rand(num_boxes, box_len, dtype=torch.float) * 0.5
-        boxes[:, 2] += boxes[:, 0]
-        boxes[:, 3] += boxes[:, 1]
-        scores = np.linspace(0, 1, num=num_boxes).astype("float32")
-        np.random.shuffle(scores)
-        return boxes, torch.from_numpy(scores)
-
-    targets = ["llvm", "cuda"]
-
-    for num_boxes, iou_thres in [(10, 0.3), (100, 0.5), (500, 0.9)]:
-        in_boxes, in_scores = _gen_rand_inputs(num_boxes)
-        verify_trace_model(NonMaxSupression(iou_thres), [in_boxes, in_scores], targets)
-
-
-def test_forward_roi_align():
-    """ROI align"""
-    torch.set_grad_enabled(False)
-
-    class ROIAlign(Module):
-        def __init__(self, output_sizes, spatial_scale=1.0, sampling_ratio=-1):
-            super().__init__()
-            self.spatial_scale = spatial_scale
-            self.sampling_ratio = sampling_ratio
-            self.output_sizes = output_sizes
-
-        def forward(self, *args):
-            return torchvision.ops.roi_align(
-                args[0],
-                args[1],
-                self.output_sizes,
-                self.spatial_scale,
-                self.sampling_ratio,
-            )
-
-    in_data = torch.Tensor(np.random.uniform(size=(1, 8, 100, 100)))
-    in_boxes = torch.Tensor(np.random.uniform(0.0, 100.0, size=(35, 4)))
-    in_batch = torch.zeros((35, 1), dtype=torch.float)
-    in_boxes = torch.cat([in_batch, in_boxes], dim=1)
-
-    verify_model(ROIAlign(7), [in_data, in_boxes])
-    verify_model(ROIAlign((10, 10), 0.7, 5), [in_data, in_boxes])
-    verify_model(ROIAlign(15, 0.9, 3), [in_data, in_boxes])
-
-
-@tvm.testing.uses_gpu
-def test_conv3d():
-    """test_conv3d"""
-    for ishape in [(1, 32, 16, 16, 16), (1, 32, 9, 15, 15), (1, 32, 13, 7, 7)]:
-        inp = torch.rand(ishape)
-        verify_model(torch.nn.Conv3d(32, 16, (3, 3, 3), padding=(1, 1, 1)).eval(), inp)
-        verify_model(torch.nn.Conv3d(32, 16, (5, 5, 5), padding=(2, 2, 2)).eval(), inp)
-        verify_model(torch.nn.Conv3d(32, 16, kernel_size=1).eval(), inp)
-        # downsample
-        verify_model(torch.nn.Conv3d(32, 16, kernel_size=1, stride=2).eval(), inp)
-
-
-@tvm.testing.uses_gpu
-def test_conv3d_transpose():
-    """test_conv3d_transpose"""
-    for ishape in [(1, 8, 10, 5, 10), (1, 8, 5, 8, 8), (1, 8, 13, 7, 7)]:
-        inp = torch.rand(ishape)
-        verify_model(
-            torch.nn.ConvTranspose3d(
-                in_channels=8, out_channels=33, kernel_size=3, stride=2
-            ).eval(),
-            inp,
-        )
-        verify_model(
-            torch.nn.ConvTranspose3d(
-                in_channels=8,
-                out_channels=20,
-                kernel_size=(3, 5, 2),
-                stride=(2, 1, 1),
-                padding=(0, 4, 2),
-            ).eval(),
-            inp,
-        )
-        verify_model(
-            torch.nn.ConvTranspose3d(in_channels=8, out_channels=20, kernel_size=1).eval(), inp
-        )
-        verify_model(
-            torch.nn.ConvTranspose3d(in_channels=8, out_channels=5, kernel_size=1, stride=2).eval(),
-            inp,
-        )
-
-
-# Model tests
-@tvm.testing.uses_gpu
-def test_resnet18():
-    """test_resnet18"""
-    torch.set_grad_enabled(False)
-    verify_model("resnet18", atol=1e-4, rtol=1e-4)
-
-
-@tvm.testing.uses_gpu
-def test_squeezenet1_0():
-    """test_squeezenet1_0"""
-    torch.set_grad_enabled(False)
-    verify_model("squeezenet1_0", atol=1e-4, rtol=1e-4)
-
-
-@tvm.testing.uses_gpu
-def test_squeezenet1_1():
-    """test_squeezenet1_1"""
-    torch.set_grad_enabled(False)
-    verify_model("squeezenet1_1", atol=1e-4, rtol=1e-4)
-
-
-@tvm.testing.uses_gpu
-def test_densenet121():
-    """test_densenet121"""
-    torch.set_grad_enabled(False)
-    verify_model("densenet121", atol=1e-4, rtol=1e-4)
-
-
-@tvm.testing.uses_gpu
-def test_inception_v3():
-    """test_inception_v3"""
-    torch.set_grad_enabled(False)
-    verify_model("inception_v3", atol=1e-4, rtol=1e-4)
-
-
-@tvm.testing.uses_gpu
-def test_googlenet():
-    """test_googlenet"""
-    torch.set_grad_enabled(False)
-    verify_model("googlenet", atol=1e-4, rtol=1e-4)
-
-
-@tvm.testing.uses_gpu
-def test_mnasnet0_5():
-    """test_mnasnet0_5"""
-    torch.set_grad_enabled(False)
-    verify_model("mnasnet0_5", atol=1e-4, rtol=1e-4)
-
-
-@tvm.testing.uses_gpu
-def test_mobilenet_v2():
-    """test_mobilenet_v2"""
-    torch.set_grad_enabled(False)
-    verify_model("mobilenet_v2", atol=1e-4, rtol=1e-4)
-
-
-# pylint: disable=pointless-string-statement
-"""
-#TODO: Fix VGG and AlexNet issues (probably due to pooling)
-@tvm.testing.uses_gpu
-def test_alexnet():
-    torch.set_grad_enabled(False)
-    verify_model("alexnet")
-
-@tvm.testing.uses_gpu
-def test_vgg11():
-    torch.set_grad_enabled(False)
-    verify_model("vgg11")
-
-@tvm.testing.uses_gpu
-def test_vgg11_bn():
-    torch.set_grad_enabled(False)
-    verify_model("vgg11_bn")
-"""
-
-
-@tvm.testing.uses_gpu
-def test_custom_conversion_map():
-    """test_custom_conversion_map"""
-
-    def get_roi_align():
-        pool_size = 5
-        n_channels = 2 * (pool_size**2)
-        x = torch.rand(2, n_channels, 10, 10)
-        rois = torch.tensor(
-            [
-                [0, 0, 0, 9, 9],  # format is (xyxy)
-                [0, 0, 5, 4, 9],
-                [0, 5, 5, 9, 9],
-                [1, 0, 0, 9, 9],
-            ],
-            dtype=torch.float,
-        )
-        roi_align = torchvision.ops.RoIAlign(pool_size, spatial_scale=1, sampling_ratio=-1)
-        return roi_align.eval(), [x, rois]
-
-    def convert_roi_align():
-        def _impl(inputs, input_types):
-            spatial_scale = inputs[2]
-            pooled_size = (inputs[3], inputs[4])
-            sampling_ratio = inputs[5]
-            return relay.op.vision.roi_align(
-                inputs[0], inputs[1], pooled_size, spatial_scale, sampling_ratio
-            )
-
-        return _impl
-
-    custom_map = {"torchvision::roi_align": convert_roi_align()}
-    model, inputs = get_roi_align()
-
-    verify_model(model, inputs, custom_map)
-
-
-@tvm.testing.uses_gpu
-def test_segmentation_models():
-    """test_segmentation_models"""
-
-    class SegmentationModelWrapper(Module):
-        def __init__(self, model):
-            super().__init__()
-            self.model = model
-
-        def forward(self, inp):
-            out = self.model(inp)
-            return out["out"]
-
-    fcn = torchvision.models.segmentation.fcn_resnet101(pretrained=True)
-    deeplab = torchvision.models.segmentation.deeplabv3_resnet101(pretrained=True)
-
-    inp = [torch.rand((1, 3, 300, 300), dtype=torch.float)]
-
-    verify_model(SegmentationModelWrapper(fcn.eval()), inp, atol=1e-4, rtol=1e-4)
-    verify_model(SegmentationModelWrapper(deeplab.eval()), inp, atol=1e-4, rtol=1e-4)
-
-
-@tvm.testing.uses_gpu
-def test_3d_models():
-    """test_3d_models"""
-    input_shape = (1, 3, 4, 56, 56)
-    resnet3d = torchvision.models.video.r3d_18(pretrained=True).eval()
-    verify_model(resnet3d, [torch.rand(input_shape)], atol=1e-4, rtol=1e-4)
-
-
-def _get_default_vm_targets():
-    """Get default vm targets"""
-    return ["llvm", "cuda"]
-
-
-def verify_script_model(pt_model, ishapes, targets, idtype=None):
-    """verify_script_model"""
-    script_module = torch.jit.script(pt_model)
-
-    verify_model_vm(script_module, ishapes, idtype=idtype, targets=targets)
-
-
-def verify_trace_model(pt_model, idata, targets):
-    """verify_trace_model"""
-    traced_model = torch.jit.trace(pt_model, idata)
-    ishapes = [data.shape for data in idata]
-    verify_model_vm(traced_model, ishapes, idata=idata, targets=targets)
-
-
-def convert_pt_to_tvm_type(idtype):
-    """Accepts a pytorch dtype and returns string TVM dtype."""
-    # TVM does not support PyTorch complex dtypes
-    if idtype == torch.float64:
-        curr_dtype = "float64"
-    elif idtype == torch.float32:
-        curr_dtype = "float32"
-    elif idtype == torch.float16:
-        curr_dtype = "float16"
-    elif idtype == torch.bfloat16:
-        curr_dtype = "bfloat16"
-    elif idtype == torch.int64:
-        curr_dtype = "int64"
-    elif idtype == torch.int32:
-        curr_dtype = "int32"
-    elif idtype == torch.int16:
-        curr_dtype = "int16"
-    elif idtype == torch.int8:
-        curr_dtype = "int8"
-    elif idtype == torch.uint8:
-        curr_dtype = "uint8"
-    elif idtype == torch.bool:
-        curr_dtype = "bool"
-    else:
-        raise NotImplementedError(f"Unsupported dtype: {idtype}")
-    return curr_dtype
-
-
-def verify_model_vm(input_model, ishapes, idtype=None, idata=None, targets=None):
-    """verify_model_vm"""
-    targets = targets or ["llvm"]
-    if not idtype:
-        idtype = torch.float
-
-    input_names = [f"i{idx}" for idx, _ in enumerate(ishapes)]
-    tvm_dtype = convert_pt_to_tvm_type(idtype)
-    input_dtypes = [tvm_dtype] * len(input_names)
-    input_shapes = list(zip(input_names, list(zip(ishapes, input_dtypes))))
-
-    if idata:
-        input_data = idata
-    # If no input_data provided, generate random data of specified dtype
-    else:
-        if idtype == torch.bool:
-            input_data = [
-                torch.Tensor.bool(torch.randint(low=0, high=2, size=shape)) for shape in ishapes
-            ]
-        # Torch dtype can be float, complex, int, or Bool. Complex not supported,
-        # so if not float or Bool, dtype must be int!
-        elif not idtype.is_floating_point:
-            input_data = [
-                torch.randint(low=0, high=10, size=shape, dtype=idtype) for shape in ishapes
-            ]
-        else:
-            input_data = [torch.randn(shape, dtype=idtype) for shape in ishapes]
-
-    # Compile via VM
-    with tvm.testing.disable_span_filling():
-        mod, params = relay.frontend.from_pytorch(input_model, input_shapes)
-    with tvm.testing.enable_span_filling():
-        mod_with_span, _ = relay.frontend.from_pytorch(input_model, input_shapes)
-    tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-
-    for tgt in targets:
-        if not tvm.testing.device_enabled(tgt):
-            continue
-        print("Running on target", tgt)
-
-        dev = tvm.device(tgt, 0)
-
-        evaluator = relay.create_executor("vm", mod=mod, device=dev, target=tgt).evaluate()
-
-        # Inference
-        for name, inp in zip(input_names, input_data):
-            params[name] = inp.numpy()
-        vm_res = evaluator(**params)
-
-        # Baseline result
-        with torch.no_grad():
-            pt_result = input_model(*input_data)
-
-        # Verify the accuracy
-        if isinstance(pt_result, tuple):
-            # handle multiple outputs
-            for i, pt_result in enumerate(pt_result):
-                tvm_res = vm_res[i].numpy()
-                tvm.testing.assert_allclose(tvm_res, pt_result.numpy(), rtol=1e-5, atol=1e-5)
-        elif not isinstance(pt_result, torch.Tensor):
-            tvm_res = vm_res.numpy().item()
-            assert pt_result == tvm_res
-        else:
-            tvm.testing.assert_allclose(vm_res.numpy(), pt_result.numpy(), rtol=1e-5, atol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_control_flow():
-    """test_control_flow"""
-
-    class SimpleIf(torch.nn.Module):
-        """SimpleIf module"""
-
-        def __init__(self, N, M):
-            super().__init__()
-            self.weight = torch.nn.Parameter(torch.rand(N, M))
-
-        def forward(self, inp):
-            if inp.sum() > 0.0:
-                output = self.weight + inp
-            else:
-                output = self.weight - inp
-            return output
-
-    class NestedIf(torch.nn.Module):
-        """NestedIf module"""
-
-        def __init__(self, N, M):
-            super().__init__()
-            self.weight = torch.nn.Parameter(torch.rand(N, M))
-
-        def forward(self, inp):
-            """forward"""
-            if inp.sum() > 0.0:
-                if inp.mean() > 0.0:
-                    output = self.weight + inp
-                else:
-                    output = self.weight - inp
-            else:
-                if inp.mean() >= 0.0:
-                    output = self.weight * inp
-                else:
-                    output = self.weight / inp
-
-            return output
-
-    class ScalarLoop(torch.nn.Module):
-        """ScalarLoop module"""
-
-        def forward(self, inp):
-            """forward"""
-            a = 0
-            for i in range(inp.size(0)):
-                b = i * i
-                b = b + 1
-                a += b
-            if a != 0:
-                a += 1
-            else:
-                a += 2
-            return a
-
-    class SimpleLoop(torch.nn.Module):
-        def forward(self, inp):
-            a = inp
-            for _ in range(inp.size(0)):
-                b = a * 2.0
-                c = a + b
-                a += c
-            return a
-
-    class LoopWithIf(torch.nn.Module):
-        """LoopWithIf module"""
-
-        def forward(self, inp):
-            a = inp
-            for _ in range(inp.size(0)):
-                b = a * 2.0
-                b = a + b
-                if b.sum() > 0.0:
-                    a += b
-                else:
-                    a -= b
-            return a
-
-    class NestedLoop(torch.nn.Module):
-        def forward(self, inp):
-            a = inp
-            for i in range(inp.size(0)):
-                b = a * float(i)
-                for j in range(inp.size(1)):
-                    a += b * float(j)
-            return a
-
-    class SimpleScalarWhileLoop(torch.nn.Module):
-        """SimpleScalarWhileLoop module"""
-
-        def forward(self, inp):
-            """forward"""
-            a = 1
-            i = 0
-            while i <= inp.size(0):
-                a += i
-                i += 2
-            i = 0
-            # also test constant init cond
-            while i < 10:
-                a += i
-                i += 3
-            return a
-
-    class SimpleWhileLoop(torch.nn.Module):
-        def forward(self, inp):
-            a = inp
-            i = 0
-            while i < inp.size(0):
-                a += a * float(i) * 2.0
-                i += 1
-            return a
-
-    models = [
-        SimpleIf(10, 20),
-        NestedIf(10, 20),
-        ScalarLoop(),
-        SimpleLoop(),
-        LoopWithIf(),
-        SimpleScalarWhileLoop(),
-        SimpleWhileLoop(),
-        NestedLoop(),
-    ]
-
-    for pt_model in models:
-        verify_script_model(pt_model.eval(), [(10, 20)], _get_default_vm_targets())
-
-
-@tvm.testing.uses_gpu
-def test_simple_rnn():
-    """test_simple_rnn"""
-    # The mixed tracing and scripting example from
-    # https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html#mixing-scripting-and-tracing
-    class DecisionGate(torch.nn.Module):
-        def forward(self, x):
-            if x.sum() > 0:
-                return x
-            else:
-                return -x
-
-    class Cell(torch.nn.Module):
-        def __init__(self, dg):
-            super().__init__()
-            self.dg = dg
-            self.linear = torch.nn.Linear(4, 4)
-
-        def forward(self, x, h):
-            new_h = torch.tanh(self.dg(self.linear(x)) + h)
-            return new_h, new_h
-
-    class RNNLoop(torch.nn.Module):
-        """Pytorch RNNLoop module"""
-
-        def __init__(self):
-            super().__init__()
-            x = torch.rand(10, 4, dtype=torch.float)
-            h = torch.rand(10, 4, dtype=torch.float)
-            self.cell = torch.jit.trace(Cell(DecisionGate()), (x, h))
-
-        def forward(self, xs):
-            h = torch.zeros(10, 4, dtype=torch.float)
-            y = torch.zeros(10, 4, dtype=torch.float)
-            for i in range(xs.size(0)):
-                y, h = self.cell(xs[i], h)
-            return y
-
-    verify_script_model(RNNLoop().eval(), [(10, 10, 4)], _get_default_vm_targets())
-
-
-@tvm.testing.uses_gpu
-def test_forward_reduce_sum():
-    """test_forward_reduce_sum"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class ReduceSum1(Module):
-        def forward(self, *args):
-            return args[0].sum(1)
-
-    class ReduceSum2(Module):
-        def forward(self, *args):
-            return args[0].sum(dim=1, keepdim=False)
-
-    class ReduceSum3(Module):
-        def forward(self, *args):
-            return args[0].sum(dim=2, keepdim=True)
-
-    class ReduceSum4(Module):
-        def forward(self, *args):
-            return args[0].sum(dim=(2, 3), keepdim=True)
-
-    class ReduceSum5(Module):
-        def forward(self, *args):
-            return args[0].sum(dim=(2, 3), keepdim=False)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(ReduceSum1().float().eval(), input_data=input_data)
-    verify_model(ReduceSum2().float().eval(), input_data=input_data)
-    verify_model(ReduceSum3().float().eval(), input_data=input_data)
-    verify_model(ReduceSum4().float().eval(), input_data=input_data)
-    verify_model(ReduceSum5().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_reduce_prod():
-    """test_forward_reduce_prod"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class ReduceProd1(Module):
-        def forward(self, *args):
-            return args[0].prod(1)
-
-    class ReduceProd2(Module):
-        def forward(self, *args):
-            return args[0].prod(dim=1, keepdim=False)
-
-    class ReduceProd3(Module):
-        def forward(self, *args):
-            return args[0].prod(dim=2, keepdim=True)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(ReduceProd1().float().eval(), input_data=input_data)
-    verify_model(ReduceProd2().float().eval(), input_data=input_data)
-    verify_model(ReduceProd3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_argmin():
-    """test_forward_argmin"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class ArgMin1(Module):
-        def forward(self, *args):
-            return args[0].argmin(1)
-
-    class ArgMin2(Module):
-        def forward(self, *args):
-            return args[0].argmin(dim=1, keepdim=False)
-
-    class ArgMin3(Module):
-        def forward(self, *args):
-            return args[0].argmin(dim=2, keepdim=True)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(ArgMin1().float().eval(), input_data=input_data)
-    verify_model(ArgMin2().float().eval(), input_data=input_data)
-    verify_model(ArgMin3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_argmax():
-    """test_forward_argmax"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class ArgMax1(Module):
-        def forward(self, *args):
-            return args[0].argmax(1)
-
-    class ArgMax2(Module):
-        def forward(self, *args):
-            return args[0].argmax(dim=1, keepdim=False)
-
-    class ArgMax3(Module):
-        def forward(self, *args):
-            return args[0].argmax(dim=2, keepdim=True)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(ArgMax1().float().eval(), input_data=input_data)
-    verify_model(ArgMax2().float().eval(), input_data=input_data)
-    verify_model(ArgMax3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_std():
-    """test_forward_std"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Std1(Module):
-        def forward(self, *args):
-            return args[0].std(1, unbiased=False)
-
-    class Std2(Module):
-        def forward(self, *args):
-            return args[0].std(dim=1, keepdim=False, unbiased=False)
-
-    class Std3(Module):
-        def forward(self, *args):
-            return args[0].std(dim=2, keepdim=True, unbiased=False)
-
-    class Std4(Module):
-        def forward(self, *args):
-            return args[0].std(dim=(2, 3), keepdim=True, unbiased=False)
-
-    class Std5(Module):
-        def forward(self, *args):
-            return args[0].std(dim=(2, 3), keepdim=False, unbiased=False)
-
-    class Std6(Module):
-        def forward(self, *args):
-            return args[0].std(unbiased=False)
-
-    class Std7(Module):
-        def forward(self, *args):
-            return args[0].std(dim=1, keepdim=False, unbiased=True)
-
-    class Std8(Module):
-        def forward(self, *args):
-            return args[0].std(dim=(2, 3), keepdim=True, unbiased=True)
-
-    class Std9(Module):
-        def forward(self, *args):
-            return args[0].std(unbiased=True)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Std1().float().eval(), input_data=input_data)
-    verify_model(Std2().float().eval(), input_data=input_data)
-    verify_model(Std3().float().eval(), input_data=input_data)
-    verify_model(Std4().float().eval(), input_data=input_data)
-    verify_model(Std5().float().eval(), input_data=input_data)
-    verify_model(Std6().float().eval(), input_data=input_data)
-    verify_model(Std7().float().eval(), input_data=input_data)
-    verify_model(Std8().float().eval(), input_data=input_data)
-    verify_model(Std9().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_var_mean():
-    """test_forward_var_mean"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class VarMean1(Module):
-        def forward(self, *args):
-            return torch.var_mean(args[0], 1, unbiased=False)
-
-    class VarMean2(Module):
-        def forward(self, *args):
-            return torch.var_mean(args[0], dim=1, keepdim=False, unbiased=False)
-
-    class VarMean3(Module):
-        def forward(self, *args):
-            return torch.var_mean(args[0], dim=2, keepdim=True, unbiased=False)
-
-    class VarMean4(Module):
-        def forward(self, *args):
-            return torch.var_mean(args[0], dim=(2, 3), keepdim=True, unbiased=False)
-
-    class VarMean5(Module):
-        def forward(self, *args):
-            return torch.var_mean(args[0], dim=(2, 3), keepdim=False, unbiased=False)
-
-    class VarMean6(Module):
-        def forward(self, *args):
-            return torch.var_mean(args[0], unbiased=False)
-
-    class VarMean7(Module):
-        def forward(self, *args):
-            return torch.var_mean(args[0], dim=1, keepdim=False, unbiased=True)
-
-    class VarMean8(Module):
-        def forward(self, *args):
-            return torch.var_mean(args[0], dim=(2, 3), keepdim=True, unbiased=True)
-
-    class VarMean9(Module):
-        def forward(self, *args):
-            return torch.var_mean(args[0], unbiased=True)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(VarMean1().float().eval(), input_data=input_data)
-    verify_model(VarMean2().float().eval(), input_data=input_data)
-    verify_model(VarMean3().float().eval(), input_data=input_data)
-    verify_model(VarMean4().float().eval(), input_data=input_data)
-    verify_model(VarMean5().float().eval(), input_data=input_data)
-    verify_model(VarMean6().float().eval(), input_data=input_data)
-    verify_model(VarMean7().float().eval(), input_data=input_data)
-    verify_model(VarMean8().float().eval(), input_data=input_data)
-    verify_model(VarMean9().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_variance():
-    """test_forward_variance"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Variance1(Module):
-        def forward(self, *args):
-            return args[0].var(1, unbiased=False)
-
-    class Variance2(Module):
-        def forward(self, *args):
-            return args[0].var(dim=1, keepdim=False, unbiased=False)
-
-    class Variance3(Module):
-        def forward(self, *args):
-            return args[0].var(dim=2, keepdim=True, unbiased=False)
-
-    class Variance4(Module):
-        def forward(self, *args):
-            return args[0].var(dim=(2, 3), keepdim=True, unbiased=False)
-
-    class Variance5(Module):
-        def forward(self, *args):
-            return args[0].var(dim=(2, 3), keepdim=False, unbiased=False)
-
-    class Variance6(Module):
-        def forward(self, *args):
-            return args[0].var(unbiased=False)
-
-    class Variance7(Module):
-        def forward(self, *args):
-            return args[0].var(dim=1, keepdim=False, unbiased=True)
-
-    class Variance8(Module):
-        def forward(self, *args):
-            return args[0].var(dim=(2, 3), keepdim=True, unbiased=True)
-
-    class Variance9(Module):
-        def forward(self, *args):
-            return args[0].var(unbiased=True)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Variance1().float().eval(), input_data=input_data)
-    verify_model(Variance2().float().eval(), input_data=input_data)
-    verify_model(Variance3().float().eval(), input_data=input_data)
-    verify_model(Variance4().float().eval(), input_data=input_data)
-    verify_model(Variance5().float().eval(), input_data=input_data)
-    verify_model(Variance6().float().eval(), input_data=input_data)
-    verify_model(Variance7().float().eval(), input_data=input_data)
-    verify_model(Variance8().float().eval(), input_data=input_data)
-    verify_model(Variance9().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_rsub():
-    """test_forward_rsub"""
-    torch.set_grad_enabled(False)
-
-    class Rsub1(Module):
-        def forward(self, *args):
-            return torch.rsub(args[0], args[1])
-
-    class Rsub2(Module):
-        def forward(self, *args):
-            return torch.rsub(args[0], args[1], alpha=0.5)
-
-    d1 = torch.rand([1, 3]).float()
-    d2 = torch.rand([1, 3]).float()
-    d3 = torch.rand([1, 3]).int()
-    verify_model(Rsub1().float().eval(), input_data=[d1, d2])
-    verify_model(Rsub1().float().eval(), input_data=[d1, d3])
-    verify_model(Rsub2().float().eval(), input_data=[d1, d2])
-    verify_model(Rsub2().float().eval(), input_data=[d1, d3])
-
-    d1 = torch.rand([1, 3]).half()
-    d2 = torch.rand([1, 3]).half()
-    verify_model(Rsub1().half().eval(), input_data=[d1, d2])
-    verify_model(Rsub1().half().eval(), input_data=[d1, d3])
-    verify_model(Rsub2().half().eval(), input_data=[d1, d2])
-    verify_model(Rsub2().half().eval(), input_data=[d1, d3])
-
-
-@tvm.testing.uses_gpu
-def test_forward_embedding():
-    """test_forward_embedding"""
-    torch.set_grad_enabled(False)
-
-    input_data = torch.randint(0, 10, [2, 4]).long()
-    verify_model(torch.nn.Embedding(10, 3).float().eval(), input_data=input_data)
-
-    input_data = torch.randint(0, 4, [2, 3, 4]).long()
-    verify_model(torch.nn.Embedding(4, 5, sparse=False).float().eval(), input_data=input_data)
-
-    input_data = torch.randint(0, 4, [2, 3, 4]).long()
-    verify_model(torch.nn.Embedding(4, 5, sparse=True).float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_onehot():
-    """test_forward_onehot"""
-    torch.set_grad_enabled(False)
-
-    class OneHot1(Module):
-        def forward(self, *args):
-            return torch.nn.functional.one_hot(args[0], num_classes=3)
-
-    class OneHot2(Module):
-        def forward(self, *args):
-            return torch.nn.functional.one_hot(args[0], num_classes=5)
-
-    input_data = torch.arange(0, 5) % 3
-    verify_model(OneHot1().float().eval(), input_data=input_data)
-
-    input_data = torch.arange(0, 5) % 4
-    verify_model(OneHot2().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_isfinite():
-    """test_forward_isfinite"""
-    torch.set_grad_enabled(False)
-
-    class IsFinite1(Module):
-        def forward(self, *args):
-            return torch.isfinite(args[0])
-
-    input_data = torch.tensor([1, float("inf"), 2, float("-inf"), float("nan")]).float()
-    verify_model(IsFinite1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_isnan():
-    """test_forward_isnan"""
-    torch.set_grad_enabled(False)
-
-    class IsNan1(Module):
-        def forward(self, *args):
-            return torch.isnan(args[0])
-
-    input_data = torch.tensor([1, float("inf"), 2, float("-inf"), float("nan")]).float()
-    verify_model(IsNan1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_isinf():
-    """test_forward_isinf"""
-    torch.set_grad_enabled(False)
-
-    class IsInf1(Module):
-        def forward(self, *args):
-            return torch.isinf(args[0])
-
-    input_data = torch.tensor([1, float("inf"), 2, float("-inf"), float("nan")]).float()
-    verify_model(IsInf1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_clamp():
-    """test_forward_clamp"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class Clamp1(Module):
-        def forward(self, *args):
-            return torch.clamp(args[0], min=-0.5, max=0.5)
-
-    class Clamp2(Module):
-        def forward(self, *args):
-            return torch.clamp(args[0], min=-0.3)
-
-    class Clamp3(Module):
-        def forward(self, *args):
-            return torch.clamp(args[0], max=1.0)
-
-    class Clamp_MinExpr_MaxConstant(Module):
-        def forward(self, *args):
-            h, w = args[0].shape[2:]
-            amin = h / 100.0
-            return torch.clamp(args[0], min=amin, max=w)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Clamp1().float().eval(), input_data=input_data)
-    verify_model(Clamp2().float().eval(), input_data=input_data)
-    verify_model(Clamp3().float().eval(), input_data=input_data)
-    verify_model(Clamp_MinExpr_MaxConstant().float().eval(), input_data=input_data)
-
-    verify_model(lambda inp: torch.clamp_min(inp, 0.5), input_data)
-    inp_uint8 = torch.randint(low=0, high=256, size=(100, 100), dtype=torch.uint8)
-    verify_model(lambda inp: torch.clamp_max(inp, 125), inp_uint8)
-
-
-@tvm.testing.uses_gpu
-def test_forward_clamp_():
-    """test_forward_clamp_"""
-    torch.set_grad_enabled(False)
-
-    class ClampInPlace(Module):
-        def __init__(self, i_min, i_max):
-            super().__init__()
-            self.min = i_min
-            self.max = i_max
-
-        def forward(self, *args):
-            return torch.clamp_(args[0], self.min, self.max)
-
-    for ishape, i_min, i_max in (([4, 8], 0.1, 0.9), ([7, 6], 0.2, 0.5)):
-        input_data = torch.rand(ishape).float()
-        verify_model(ClampInPlace(i_min, i_max).float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_ones():
-    """test_forward_ones"""
-    torch.set_grad_enabled(False)
-
-    class Ones1(Module):
-        def forward(self, *args):
-            return torch.ones(2, 3)
-
-    verify_model(Ones1().float().eval(), input_data=[])
-
-
-@tvm.testing.uses_gpu
-def test_forward_ones_like():
-    """test_forward_ones_like"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class OnesLike1(Module):
-        def forward(self, *args):
-            return torch.ones_like(args[0])
-
-    class OnesLike2(Module):
-        def forward(self, *args):
-            return torch.ones_like(args[0], dtype=torch.int8)
-
-    class OnesLike3(Module):
-        def forward(self, *args):
-            return torch.ones_like(args[0], dtype=torch.float)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(OnesLike1().float().eval(), input_data=input_data)
-    verify_model(OnesLike2().float().eval(), input_data=input_data)
-    verify_model(OnesLike3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_new_ones():
-    """test_forward_new_ones"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    def test_func(input_tensor):
-        return input_tensor.new_ones([3, 10, 10])
-
-    verify_model_with_input(test_func, [torch.rand(input_shape).float()])
-
-
-@tvm.testing.uses_gpu
-def test_forward_zeros():
-    """test_forward_zeros"""
-    torch.set_grad_enabled(False)
-
-    class Zeros1(Module):
-        def forward(self, *args):
-            return torch.zeros(2, 3)
-
-    verify_model(Zeros1().float().eval(), input_data=[])
-
-
-def test_forward_zero_():
-    def test_func(x):
-        return x.zero_()
-
-    verify_model_with_input(test_func, [torch.rand([1, 3, 10, 10]).float()])
-
-
-@tvm.testing.uses_gpu
-def test_forward_zeros_like():
-    """test_forward_zeros_like"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class ZerosLike1(Module):
-        def forward(self, *args):
-            return torch.zeros_like(args[0])
-
-    class ZerosLike2(Module):
-        def forward(self, *args):
-            return torch.zeros_like(args[0], dtype=torch.int32)
-
-    class ZerosLike3(Module):
-        def forward(self, *args):
-            return torch.zeros_like(args[0], dtype=torch.float)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(ZerosLike1().float().eval(), input_data=input_data)
-    verify_model(ZerosLike2().float().eval(), input_data=input_data)
-    verify_model(ZerosLike3().float().eval(), input_data=input_data)
-
-
-def test_forward_new_zeros():
-    def test_func(x):
-        return x.new_zeros((2, 3))
-
-    verify_model_with_input(test_func, [torch.rand([1, 3, 10, 10]).float()])
-
-
-@tvm.testing.uses_gpu
-def test_forward_full():
-    """test_forward_full"""
-    torch.set_grad_enabled(False)
-
-    class Full1(Module):
-        def forward(self, *args):
-            return torch.full((2, 3), 3.14)
-
-    class Full2(Module):
-        def forward(self, *args):
-            return torch.full((1, 2, 3), 1.0, dtype=torch.int32)
-
-    verify_model(Full1().float().eval(), input_data=[])
-    verify_model(Full2().float().eval(), input_data=[])
-
-
-@tvm.testing.uses_gpu
-def test_forward_adaptive_max_pool1d():
-    """test_forward_adaptive_max_pool1d"""
-    torch.set_grad_enabled(False)
-    input_data = [torch.randn([2, 2, 4], dtype=torch.float32)]
-    m = torch.nn.AdaptiveMaxPool1d(3)
-
-    verify_model(m.float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_instance_norm():
-    """test_forward_instance_norm"""
-
-    class instance_norm(Module):
-        def forward(self, *args):
-            return torch.nn.functional.instance_norm(args[0], use_input_stats=True)
-
-    m = instance_norm().float().eval()
-    input_data = torch.randn([1, 1, 1, 2], dtype=torch.float64)
-
-    verify_model(m.float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_full_like():
-    """test_forward_full_like"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    class FullLike1(Module):
-        def forward(self, *args):
-            return torch.full_like(args[0], 3.14)
-
-    class FullLike2(Module):
-        def forward(self, *args):
-            return torch.full_like(args[0], 22.22, dtype=torch.int32)
-
-    class FullLike3(Module):
-        def forward(self, *args):
-            return torch.full_like(args[0], 1.4, dtype=torch.float)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(FullLike1().float().eval(), input_data=input_data)
-    verify_model(FullLike2().float().eval(), input_data=input_data)
-    verify_model(FullLike3().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_new_full():
-    """test_forward_new_full"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    def test_func(input_tensor):
-        return input_tensor.new_full([2, 3], 1)
-
-    verify_model_with_input(test_func, [torch.rand(input_shape).float()])
-
-
-def test_forward_fill_():
-    def test_func(x):
-        return x.fill_(3)
-
-    verify_model_with_input(test_func, [torch.rand([1, 3, 10, 10]).float()])
-
-
-def test_forward_fill_with_div():
-    """test_forward_fill_with_div"""
-
-    def test_func(x):
-        y = torch.div(torch.tensor(6.0), torch.tensor(2.0))
-        return x.fill_(y)
-
-    verify_model_with_input(test_func, [torch.rand([1, 3, 10, 10]).float()])
-
-
-@tvm.testing.uses_gpu
-def test_forward_linspace():
-    """test_forward_linspace"""
-    torch.set_grad_enabled(False)
-
-    class Linspace1(Module):
-        def forward(self, *args):
-            return torch.linspace(5, 10, steps=100)
-
-    class Linspace2(Module):
-        def forward(self, *args):
-            return torch.linspace(-10, 10, steps=5)
-
-    class Linspace3(Module):
-        def forward(self, *args):
-            return torch.linspace(start=-10, end=10, steps=5)
-
-    class Linspace4(Module):
-        def forward(self, *args):
-            return torch.linspace(start=-10, end=10, steps=1)
-
-    class Linspace5(Module):
-        def forward(self, *args):
-            return torch.linspace(1, 2, 1, dtype=torch.int32)
-
-    class Linspace6(Module):
-        def forward(self, *args):
-            return torch.linspace(start=1, end=6, steps=2)
-
-    class Linspace7(Module):
-        def forward(self, *args):
-            return torch.linspace(1, 4, steps=100, dtype=torch.float32)
-
-    class Linspace8(Module):
-        def forward(self, *args):
-            return torch.linspace(1, 2, 1, dtype=torch.int16)
-
-    class Linspace9(Module):
-        def forward(self, *args):
-            return torch.linspace(0, 8, 10)
-
-    verify_model(Linspace1().float().eval())
-    verify_model(Linspace2().float().eval())
-    verify_model(Linspace3().float().eval())
-    verify_model(Linspace4().float().eval())
-    verify_model(Linspace5().float().eval())
-    verify_model(Linspace6().float().eval())
-    verify_model(Linspace7().float().eval())
-    verify_model(Linspace8().float().eval())
-    verify_model(Linspace9().float().eval())
-
-
-@tvm.testing.uses_gpu
-def test_forward_take():
-    """test_forward_take"""
-    torch.set_grad_enabled(False)
-
-    class Take1(Module):
-        def forward(self, *args):
-            indices = torch.tensor([[0, 0], [1, 0]])
-            if torch.cuda.is_available():
-                indices = indices.cuda()
-            return torch.take(args[0], indices)
-
-    class Take2(Module):
-        def forward(self, *args):
-            return torch.take(args[0], args[1])
-
-    input_data = torch.tensor([[1, 2], [3, 4]])
-    verify_model(Take1().float().eval(), input_data=input_data)
-    indices = torch.tensor([[0, 0], [1, 0]])
-    verify_model(Take2().float().eval(), input_data=[input_data, indices])
-    indices = torch.tensor([0, -1])
-    verify_model(Take2().float().eval(), input_data=[input_data, indices])
-
-
-@tvm.testing.uses_gpu
-def test_forward_topk():
-    """test_forward_topk"""
-    torch.set_grad_enabled(False)
-
-    class Topk1(Module):
-        def forward(self, *args):
-            return torch.topk(args[0], k=3)
-
-    class Topk2(Module):
-        def forward(self, *args):
-            return torch.topk(args[0], k=3, dim=-2)
-
-    class Topk3(Module):
-        def forward(self, *args):
-            return torch.topk(args[0], k=3, dim=3)
-
-    class Topk4(Module):
-        def forward(self, *args):
-            return torch.topk(args[0], k=3, largest=True)
-
-    class Topk5(Module):
-        def forward(self, *args):
-            return torch.topk(args[0], k=3, largest=False)
-
-    class Topk6(Module):
-        def forward(self, *args):
-            return torch.topk(args[0], k=3, sorted=True)
-
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(Topk1().float().eval(), input_data=input_data)
-    verify_model(Topk2().float().eval(), input_data=input_data)
-    verify_model(Topk3().float().eval(), input_data=input_data)
-    verify_model(Topk4().float().eval(), input_data=input_data)
-    verify_model(Topk5().float().eval(), input_data=input_data)
-    verify_model(Topk6().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_logical_not():
-    """test_forward_logical_not"""
-    torch.set_grad_enabled(False)
-
-    class LogicalNot1(Module):
-        def forward(self, *args):
-            return torch.logical_not(args[0])
-
-    input_data = torch.tensor([True, False])
-    verify_model(LogicalNot1().float().eval(), input_data=input_data)
-
-    input_data = torch.tensor([0, 1, -10], dtype=torch.int8)
-    verify_model(LogicalNot1().float().eval(), input_data=input_data)
-
-    input_data = torch.tensor([0.0, 1.5, -10.0], dtype=torch.double)
-    verify_model(LogicalNot1().float().eval(), input_data=input_data)
-
-    input_data = torch.tensor([0.0, 1.0, -10.0], dtype=torch.int32)
-    verify_model(LogicalNot1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_bitwise_not():
-    """test_forward_bitwise_not"""
-    torch.set_grad_enabled(False)
-
-    class BitwiseNot1(Module):
-        def forward(self, *args):
-            return torch.bitwise_not(args[0])
-
-    input_data = torch.tensor([0, 1, -10], dtype=torch.int8)
-    verify_model(BitwiseNot1().float().eval(), input_data=input_data)
-
-    input_data = torch.tensor([0.0, 1.0, -10.0], dtype=torch.int32)
-    verify_model(BitwiseNot1().float().eval(), input_data=input_data)
-
-    input_data = torch.tensor([True, False])
-    verify_model(BitwiseNot1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_bitwise_xor():
-    """test_forward_bitwise_xor"""
-    torch.set_grad_enabled(False)
-
-    class BitwiseXor1(Module):
-        def forward(self, *args):
-            return torch.bitwise_xor(args[0], args[1])
-
-    class BitwiseXor2(Module):
-        def forward(self, *args):
-            rhs = torch.tensor([1, 0, 3], dtype=torch.int8)
-            if torch.cuda.is_available():
-                rhs = rhs.cuda()
-            return torch.bitwise_xor(args[0], rhs)
-
-    lhs = torch.tensor([-1, -2, 3], dtype=torch.int8)
-    rhs = torch.tensor([1, 0, 3], dtype=torch.int8)
-    verify_model(BitwiseXor1().float().eval(), input_data=[lhs, rhs])
-
-    lhs = torch.tensor([True, True, False])
-    rhs = torch.tensor([False, True, False])
-    verify_model(BitwiseXor1().float().eval(), input_data=[lhs, rhs])
-
-    lhs = torch.tensor([-1, -2, 3], dtype=torch.int8)
-    verify_model(BitwiseXor2().float().eval(), input_data=[lhs])
-
-
-def test_forward_bitwise_and():
-    """test_forward_bitwise_and"""
-    torch.set_grad_enabled(False)
-
-    class BitwiseAnd1(Module):
-        def forward(self, *args):
-            return torch.bitwise_and(args[0], args[1])
-
-    class BitwiseAnd2(Module):
-        def forward(self, *args):
-            rhs = torch.tensor([1, 0, 3], dtype=torch.int8)
-            if torch.cuda.is_available():
-                rhs = rhs.cuda()
-            return torch.bitwise_and(args[0], rhs)
-
-    lhs = torch.tensor([-1, -2, 3], dtype=torch.int8)
-    rhs = torch.tensor([1, 0, 3], dtype=torch.int8)
-    verify_model(BitwiseAnd1().float().eval(), input_data=[lhs, rhs])
-
-    lhs = torch.tensor([True, True, False])
-    rhs = torch.tensor([False, True, False])
-    verify_model(BitwiseAnd1().float().eval(), input_data=[lhs, rhs])
-
-    lhs = torch.tensor([-1, -2, 3], dtype=torch.int8)
-    verify_model(BitwiseAnd2().float().eval(), input_data=[lhs])
-
-
-@tvm.testing.uses_gpu
-def test_forward_logical_xor():
-    """test_forward_logical_xor"""
-    torch.set_grad_enabled(False)
-
-    class LogicalXor1(Module):
-        def forward(self, *args):
-            return torch.logical_xor(args[0], args[1])
-
-    class LogicalXor2(Module):
-        def forward(self, *args):
-            rhs = torch.tensor([1, 0, 3], dtype=torch.int8)
-            if torch.cuda.is_available():
-                rhs = rhs.cuda()
-            return torch.logical_xor(args[0], rhs)
-
-    lhs = torch.tensor([-1, -2, 3], dtype=torch.int8)
-    rhs = torch.tensor([1, 0, 3], dtype=torch.int8)
-    verify_model(LogicalXor1().float().eval(), input_data=[lhs, rhs])
-
-    lhs = torch.tensor([True, True, False])
-    rhs = torch.tensor([False, True, False])
-    verify_model(LogicalXor1().float().eval(), input_data=[lhs, rhs])
-
-    lhs = torch.tensor([-1, -2, 3], dtype=torch.int8)
-    verify_model(LogicalXor2().float().eval(), input_data=[lhs])
-
-
-@tvm.testing.uses_gpu
-def test_forward_unary():
-    """test_forward_unary"""
-    torch.set_grad_enabled(False)
-
-    class Sqrt1(Module):
-        def forward(self, *args):
-            return torch.sqrt(args[0])
-
-    class RSqrt1(Module):
-        def forward(self, *args):
-            return torch.rsqrt(args[0])
-
-    class Ceil1(Module):
-        def forward(self, *args):
-            return torch.ceil(args[0])
-
-    class Floor1(Module):
-        def forward(self, *args):
-            return torch.floor(args[0])
-
-    class Round1(Module):
-        def forward(self, *args):
-            return torch.round(args[0])
-
-    class Cos1(Module):
-        def forward(self, *args):
-            return torch.cos(args[0])
-
-    class Sin1(Module):
-        def forward(self, *args):
-            return torch.sin(args[0])
-
-    class Tan1(Module):
-        def forward(self, *args):
-            return torch.tan(args[0])
-
-    class Tanh1(Module):
-        def forward(self, *args):
-            return torch.tanh(args[0])
-
-    class Acos1(Module):
-        def forward(self, *args):
-            return torch.acos(args[0])
-
-    class Asin1(Module):
-        def forward(self, *args):
-            return torch.asin(args[0])
-
-    class Atan1(Module):
-        def forward(self, *args):
-            return torch.atan(args[0])
-
-    class Log1(Module):
-        def forward(self, *args):
-            return torch.log(args[0])
-
-    class Exp1(Module):
-        def forward(self, *args):
-            return torch.exp(args[0])
-
-    class Erf1(Module):
-        def forward(self, *args):
-            return torch.erf(args[0])
-
-    class Trunc1(Module):
-        def forward(self, *args):
-            return torch.trunc(args[0])
-
-    class Sign1(Module):
-        def forward(self, *args):
-            return torch.sign(args[0])
-
-    class Neg1(Module):
-        def forward(self, *args):
-            return torch.neg(args[0])
-
-    class Sinh1(Module):
-        def forward(self, *args):
-            return torch.sinh(args[0])
-
-    class Cosh1(Module):
-        def forward(self, *args):
-            return torch.cosh(args[0])
-
-    class Log2_1(Module):
-        def forward(self, *args):
-            return torch.log2(args[0])
-
-    class Log10_1(Module):
-        def forward(self, *args):
-            return torch.log10(args[0])
-
-    class Log1p_1(Module):
-        def forward(self, *args):
-            return torch.log1p(args[0])
-
-    class Square(Module):
-        def forward(self, *args):
-            return torch.square(args[0])
-
-    input_shape = [1, 3, 10, 10]
-    input_data = torch.rand(input_shape).float()
-    verify_model(Square().float().eval(), input_data=input_data)
-    verify_model(Sqrt1().float().eval(), input_data=input_data)
-    verify_model(RSqrt1().float().eval(), input_data=input_data)
-    verify_model(Ceil1().float().eval(), input_data=input_data)
-    verify_model(Floor1().float().eval(), input_data=input_data)
-    verify_model(Round1().float().eval(), input_data=input_data)
-    verify_model(Cos1().float().eval(), input_data=input_data)
-    verify_model(Cosh1().float().eval(), input_data=input_data)
-    verify_model(Sin1().float().eval(), input_data=input_data)
-    verify_model(Sinh1().float().eval(), input_data=input_data)
-    verify_model(Tan1().float().eval(), input_data=input_data)
-    verify_model(Tanh1().float().eval(), input_data=input_data)
-    verify_model(Acos1().float().eval(), input_data=input_data)
-    verify_model(Asin1().float().eval(), input_data=input_data)
-    verify_model(Atan1().float().eval(), input_data=input_data)
-    verify_model(Log1().float().eval(), input_data=input_data)
-    verify_model(Log2_1().float().eval(), input_data=input_data)
-    verify_model(Log10_1().float().eval(), input_data=input_data)
-    verify_model(Log1p_1().float().eval(), input_data=input_data)
-    verify_model(Exp1().float().eval(), input_data=input_data)
-    verify_model(Erf1().float().eval(), input_data=input_data)
-    verify_model(Trunc1().float().eval(), input_data=input_data)
-    verify_model(Sign1().float().eval(), input_data=input_data)
-    verify_model(Neg1().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_tril():
-    """test_forward_tril"""
-    torch.set_grad_enabled(False)
-
-    def test_func(input_data):
-        return torch.tril(input_data)
-
-    input_data = torch.rand([3, 3]).float()
-    verify_model(test_func, input_data=input_data)
-    input_data = torch.rand([1, 3, 10, 10]).float()
-    verify_model(test_func, input_data=input_data)
-
-    def test_func1(input_data):
-        return torch.tril(input_data, 1)
-
-    input_data = torch.rand([3, 3]).float()
-    verify_model(test_func1, input_data=input_data)
-    input_data = torch.rand([1, 3, 10, 10]).float()
-    verify_model(test_func1, input_data=input_data)
-
-    def test_func2(input_data):
-        return torch.tril(input_data, -1)
-
-    input_data = torch.rand([3, 3]).float()
-    verify_model(test_func2, input_data=input_data)
-    input_data = torch.rand([1, 3, 10, 10]).float()
-    verify_model(test_func2, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_triu():
-    """test_forward_triu"""
-    torch.set_grad_enabled(False)
-
-    def test_func(input_data):
-        return torch.triu(input_data)
-
-    input_data = torch.rand([3, 3]).float()
-    verify_model(test_func, input_data=input_data)
-    input_data = torch.rand([1, 3, 10, 10]).float()
-    verify_model(test_func, input_data=input_data)
-
-    def test_func1(input_data):
-        return torch.triu(input_data, 1)
-
-    input_data = torch.rand([3, 3]).float()
-    verify_model(test_func1, input_data=input_data)
-    input_data = torch.rand([1, 3, 10, 10]).float()
-    verify_model(test_func1, input_data=input_data)
-
-    def test_func2(input_data):
-        return torch.triu(input_data, -1)
-
-    input_data = torch.rand([3, 3]).float()
-    verify_model(test_func2, input_data=input_data)
-    input_data = torch.rand([1, 3, 10, 10]).float()
-    verify_model(test_func2, input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_where():
-    """test_forward_where"""
-    torch.set_grad_enabled(False)
-
-    class Where1(Module):
-        def forward(self, *args):
-            y = torch.ones([3, 2])
-            if torch.cuda.is_available():
-                y = y.cuda()
-            return torch.where(args[0] > 0, args[0], y)
-
-    class Where2(Module):
-        def forward(self, *args):
-            return torch.where(args[0] > 0, args[0], args[1])
-
-    class Where3(Module):
-        def forward(self, *args):
-            return torch.where(args[0])[0]
-
-    x = torch.rand([3, 2]).float()
-    verify_model(Where1(), input_data=[x])
-    y = torch.rand([3, 2])
-    verify_model(Where2(), input_data=[x, y])
-
-    # a single argument variant, equivalent to torch.nonzero(..., as_tuple=True)
-    inp = torch.rand([10])
-    inp[3:8] = 0
-    verify_trace_model(Where3(), [inp], ["llvm"])
-
-
-@tvm.testing.uses_gpu
-def test_forward_addcdiv():
-    """test_forward_addcdiv"""
-    torch.set_grad_enabled(False)
-
-    class Addcdiv1(Module):
-        def forward(self, *args):
-            t1 = torch.ones([3, 1])
-            t2 = torch.ones([1, 3])
-            if torch.cuda.is_available():
-                t1 = t1.cuda()
-                t2 = t2.cuda()
-            return torch.addcdiv(args[0], 0.1, t1, t2)
-
-    class Addcdiv2(Module):
-        def forward(self, *args):
-            return torch.addcdiv(args[0], 0.5, args[1], args[2])
-
-    input_data = torch.rand([1, 3]).float()
-    verify_model(Addcdiv1().float().eval(), input_data=input_data)
-    t1 = torch.rand([3, 1]).float()
-    t2 = torch.rand([1, 3]).float()
-    verify_model(Addcdiv2().float().eval(), input_data=[input_data, t1, t2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_addcmul():
-    """test_forward_addcmul"""
-    torch.set_grad_enabled(False)
-
-    class Addcmul1(Module):
-        def forward(self, *args):
-            t1 = torch.ones([3, 1])
-            t2 = torch.ones([1, 3])
-            if torch.cuda.is_available():
-                t1 = t1.cuda()
-                t2 = t2.cuda()
-            return torch.addcmul(args[0], 0.1, t1, t2)
-
-    class Addcmul2(Module):
-        def forward(self, *args):
-            return torch.addcmul(args[0], 0.5, args[1], args[2])
-
-    input_data = torch.rand([1, 3]).float()
-    verify_model(Addcmul1().float().eval(), input_data=input_data)
-    t1 = torch.rand([3, 1]).float()
-    t2 = torch.rand([1, 3]).float()
-    verify_model(Addcmul2().float().eval(), input_data=[input_data, t1, t2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_true_divide():
-    """test_forward_true_divide"""
-    if package_version.parse(torch.__version__) < package_version.parse("1.5.0"):
-        return
-    torch.set_grad_enabled(False)
-
-    class TrueDivide(Module):
-        def forward(self, *args):
-            return torch.true_divide(args[0], args[1])
-
-    dividend = torch.rand([5, 3]).float()
-    # divisor could be either tensor or scalar
-    divisor_tensor = torch.rand([5, 3]).float() + 0.5
-    divisor_scalar = torch.tensor(1.0, dtype=torch.float32)
-    verify_model(
-        TrueDivide().float().eval(), input_data=[dividend, divisor_tensor], atol=1e-4, rtol=1e-4
-    )
-    verify_model(
-        TrueDivide().float().eval(), input_data=[dividend, divisor_scalar], atol=1e-4, rtol=1e-4
-    )
-
-
-@tvm.testing.uses_gpu
-def test_forward_is_floating_point():
-    """test_forward_is_floating_point"""
-    torch.set_grad_enabled(False)
-
-    class IsFloatingPoint(Module):
-        def forward(self, arg):
-            # `torch.jit.trace` cannot accept something that outputs
-            # a Bool, so `torch.jit.script` will be used instead
-            return torch.is_floating_point(arg)
-
-    targets = _get_default_vm_targets()
-    verify_script_model(IsFloatingPoint(), [(1, 1)], targets, idtype=torch.float64)
-    verify_script_model(IsFloatingPoint(), [(1, 1)], targets, idtype=torch.float32)
-    verify_script_model(IsFloatingPoint(), [(1, 1)], targets, idtype=torch.float16)
-    # todo(dvisnty): Run the test for bfloat16 when full bfloat16 support is implemented
-    # verify_script_model(IsFloatingPoint(), [(1,1)], targets, idtype=torch.bfloat16)
-    verify_script_model(IsFloatingPoint(), [(1, 1)], targets, idtype=torch.int64)
-    verify_script_model(IsFloatingPoint(), [(1, 1)], targets, idtype=torch.int32)
-    verify_script_model(IsFloatingPoint(), [(1, 1)], targets, idtype=torch.int16)
-    verify_script_model(IsFloatingPoint(), [(1, 1)], targets, idtype=torch.int8)
-    verify_script_model(IsFloatingPoint(), [(1, 1)], targets, idtype=torch.uint8)
-
-
-@tvm.testing.uses_gpu
-def test_forward_traced_function():
-    """test_forward_traced_function"""
-
-    def fn(t1, t2):
-        return t1 + t2
-
-    tensor1 = torch.randn(3, 4)
-    tensor2 = torch.randn(3, 4)
-    verify_model(fn, input_data=[tensor1, tensor2])
-
-
-@tvm.testing.uses_gpu
-def test_forward_dtypes():
-    """test_forward_dtypes"""
-
-    def fn(t1, t2):
-        return 2.5 * t1 + t2
-
-    for dt in [torch.int32, torch.int64, torch.double]:
-        tensor1 = torch.randn(3, 4).to(dtype=dt)
-        tensor2 = torch.randn(3, 4).to(dtype=dt)
-        verify_model(fn, input_data=[tensor1, tensor2])
-
-    class ModuleWithIntParameters(Module):
-        def __init__(self, arr):
-            super().__init__()
-            self.param = torch.nn.Parameter(torch.LongTensor(arr), requires_grad=False)
-
-        def forward(self, x):
-            return x.long() + self.param
-
-    shape = (10, 10)
-    param = torch.ones(shape, dtype=torch.long)
-    inp = torch.ones(shape, dtype=torch.int)
-    verify_model(ModuleWithIntParameters(param), input_data=inp)
-
-
-@tvm.testing.uses_gpu
-def test_weight_names():
-    tm = torch.jit.trace(torch.nn.Linear(3, 4), [torch.randn(2, 3)])
-    _, params = relay.frontend.from_pytorch(tm, [("input", (2, 3))])
-    keys = [key.split(".")[-1] for key in params.keys()]
-    assert set(keys) == set(n for n, p in tm.named_parameters())
-
-
-@tvm.testing.uses_gpu
-def test_duplicate_weight_use():
-    """test_duplicate_weight_use"""
-    # The test cases doesn't make any sense as a neural network,
-    # the issue popped up in shared input/output embeddings of bert,
-    # but this is quicker
-    class Test(Module):
-        def __init__(self):
-            super().__init__()
-            self.lin = torch.nn.Linear(5, 3)
-
-        def forward(self, x):
-            x = self.lin(x)
-            x = x @ self.lin.weight
-            return x
-
-    verify_model(Test(), input_data=[torch.randn(5, 5)])
-
-
-@tvm.testing.uses_gpu
-def test_forward_matmul():
-    """test_forward_matmul"""
-    torch.set_grad_enabled(False)
-
-    class MatMul1(Module):
-        def forward(self, *args):
-            return torch.matmul(args[0], args[1])
-
-    # vector x vector - 1D x 1D
-    tensor1 = torch.randn(4)
-    tensor2 = torch.randn(4)
-    verify_model(MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.dense"])
-
-    # vector x matrix - 1D x 2D
-    tensor1 = torch.randn(4)
-    tensor2 = torch.randn(4, 3)
-    verify_model(MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.dense"])
-
-    # vector x batched_matrix - 1D x ND
-    tensor1 = torch.randn(5)
-    tensor2 = torch.randn(2, 3, 5, 4)
-    verify_model(
-        MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.batch_matmul"]
-    )
-
-    # matrix x vector - 2D - 1D
-    tensor1 = torch.randn(3, 4)
-    tensor2 = torch.randn(4)
-    verify_model(MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.dense"])
-
-    # matrix x matrix - 2D x 2D
-    tensor1 = torch.randn(10, 4)
-    tensor2 = torch.randn(4, 10)
-    verify_model(MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.dense"])
-
-    # broadcasted matrix x batched matrix - 2D x ND
-    tensor1 = torch.randn(10, 4)
-    tensor2 = torch.randn(2, 3, 4, 5)
-    verify_model(
-        MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.batch_matmul"]
-    )
-
-    # batched matrix x vector - ND x 1D
-    tensor1 = torch.randn(2, 3, 4, 5)
-    tensor2 = torch.randn(5)
-    verify_model(
-        MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.batch_matmul"]
-    )
-
-    # batched matrix x broadcasted matrix - ND x 2D
-    tensor1 = torch.randn(10, 3, 4)
-    tensor2 = torch.randn(4, 5)
-    verify_model(
-        MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.batch_matmul"]
-    )
-
-    # batched matrix x batched matrix - ND x ND
-    tensor1 = torch.randn(2, 10, 3, 4)
-    tensor2 = torch.randn(2, 10, 4, 5)
-    verify_model(
-        MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.batch_matmul"]
-    )
-
-    # batched matrix x broadcasted matrix - ND x ND
-    tensor1 = torch.randn(2, 5, 3, 4)
-    tensor2 = torch.randn(2, 1, 4, 5)
-    verify_model(
-        MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.batch_matmul"]
-    )
-
-    # broadcasted matrix x batched matrix - ND x ND
-    tensor1 = torch.randn(2, 1, 5, 4)
-    tensor2 = torch.randn(2, 5, 4, 3)
-    verify_model(
-        MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.batch_matmul"]
-    )
-
-    # broadcasted matrix x broadcasted matrix - ND x ND
-    tensor1 = torch.randn(3, 2, 3, 1, 5, 4)
-    tensor2 = torch.randn(2, 1, 5, 4, 3)
-    verify_model(
-        MatMul1().float().eval(), input_data=[tensor1, tensor2], expected_ops=["nn.batch_matmul"]
-    )
-
-
-@pytest.mark.skip(reason="unsupported op aten::lift_fresh")
-def test_forward_index():
-    """test_forward_index"""
-    torch.set_grad_enabled(False)
-    input_shape = [3, 4, 5, 6]
-
-    class Index0(Module):
-        def forward(self, x):
-            return x[[0, 1], [0, 2], :2, 4]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Index0().eval(), input_data=input_data)
-
-    class Index1(Module):
-        def forward(self, x):
-            return x[[0], [1, 2, 3, 0], [3, 1, 2, 2], [4, 2, 1, 0]]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Index1().eval(), input_data=input_data)
-
-    class Index2(Module):
-        def forward(self, x):
-            return x[None, [2, 2]]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Index2().eval(), input_data=input_data)
-
-    class Index3(Module):
-        def forward(self, x):
-            return x[None, [0, 1, 2], 1, [2, 3, 4]]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Index3().eval(), input_data=input_data)
-
-    class Index4(Module):
-        def forward(self, x):
-            return x[None, [0, 0], None, np.array([[0], [1], [2]]), None]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Index4().eval(), input_data=input_data)
-
-    class Index5(Module):
-        def forward(self, x):
-            return x[None, None, [0, 0], np.array([[0], [1], [2]]), None]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Index5().eval(), input_data=input_data)
-
-    class Index6(Module):
-        def forward(self, x):
-            return x[None, 1, None, [1, 2, 3]]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Index6().eval(), input_data=input_data)
-
-    def test_fn_bool_mask():
-        return lambda data, mask: data[0, mask]
-
-    data = torch.tensor([[1, 2, 3], [4, 5, 6]])
-    mask = torch.tensor([True, True, False])
-
-    verify_trace_model(test_fn_bool_mask(), [data, mask], ["llvm", "cuda"])
-
-
-def test_logsumexp():
-    """test_logsumexp"""
-
-    class Logsumexp(Module):
-        def __init__(self, dim, keepdim=False):
-            super().__init__()
-            self.dim = dim
-            self.keepdim = keepdim
-
-        def forward(self, x):
-            return torch.logsumexp(x, self.dim, self.keepdim)
-
-    input_shape = (100, 100)
-    input_data = torch.rand(input_shape)
-
-    verify_model(Logsumexp(0), input_data=input_data)
-    verify_model(Logsumexp(0, keepdim=True), input_data=input_data)
-    # Also test on double
-    verify_model(Logsumexp(1, keepdim=True), input_data=input_data.double())
-
-
-def test_stack():
-    """test_stack"""
-
-    class Stack(torch.nn.Module):
-        def __init__(self, axis=0):
-            super().__init__()
-            self.axis = axis
-
-        def forward(self, x):
-            return torch.stack((x, x), dim=self.axis)
-
-    inp = torch.randn(8, 8, 8)
-    verify_model(Stack(), input_data=inp)
-    verify_model(Stack(axis=-1), input_data=inp)
-    verify_model(Stack(axis=3), input_data=inp)
-    verify_model(Stack(axis=-4), input_data=inp)
-
-
-def test_stack_dynamic():
-    """test_stack_dynamic"""
-
-    class Stack(torch.nn.Module):
-        def forward(self, x):
-            tensor_list = []
-            for i in range(x.size(0)):
-                # this is a workaround to avoid generating impure aten::append op
-                tensor_list += [x[i]]
-            # relay tensor array only supports stacking on the first axis
-            return torch.stack(tensor_list, dim=0)
-
-    verify_script_model(Stack(), [(8, 8, 8)], _get_default_vm_targets())
-
-
-def test_forward_unbind():
-    """test_forward_unbind"""
-
-    class Unbind(torch.nn.Module):
-        def __init__(self, axis=0):
-            super().__init__()
-            self.axis = axis
-
-        def forward(self, x):
-            return torch.unbind(x, self.axis)
-
-    inp = torch.randn(8, 8, 8)
-    verify_model(Unbind(0), input_data=inp)
-    verify_model(Unbind(1), input_data=inp)
-    verify_model(Unbind(2), input_data=inp)
-
-
-def test_forward_nonzero():
-    """test_forward_nonzero"""
-
-    class Nonzero(Module):
-        def __init__(self, as_tuple=False):
-            super().__init__()
-            self.as_tuple = as_tuple
-
-        def forward(self, data):
-            return torch.nonzero(data, as_tuple=self.as_tuple)
-
-    inp = torch.Tensor(np.array([[0, 1, 0], [2, 0, 9], [-1, -1, 0]]).astype("float32"))
-    verify_trace_model(Nonzero(), [inp], ["llvm"])
-    verify_trace_model(Nonzero(as_tuple=True), [inp], ["llvm"])
-
-
-def test_forward_scatter():
-    """test_forward_scatter"""
-    # integer cannot be traced
-    def test_fn_scatter(dim):
-        return lambda data, index, src: torch.scatter(data, dim=dim, index=index, src=src)
-
-    def test_fn_scatter_add(dim):
-        return lambda data, index, src: torch.scatter_add(data, dim=dim, index=index, src=src)
-
-    in_data = torch.zeros(3, 5)
-    in_index = torch.tensor([[0, 1, 2, 0, 0], [2, 0, 0, 1, 2]])
-    in_src = torch.rand(2, 5)
-
-    targets = ["llvm", "cuda"]
-    verify_trace_model(test_fn_scatter(0), [in_data, in_index, in_src], targets)
-    verify_trace_model(test_fn_scatter_add(0), [in_data, in_index, in_src], targets)
-
-    in_data = torch.zeros(2, 4)
-    in_index = torch.tensor([[2], [3]])
-    in_src = torch.rand(2, 1)
-
-    verify_trace_model(test_fn_scatter(1), [in_data, in_index, in_src], targets)
-    verify_trace_model(test_fn_scatter_add(1), [in_data, in_index, in_src], targets)
-
-    # Check empty indices
-    in_data = torch.zeros(2, 4)
-    in_index = torch.empty((0,))
-    in_src = torch.rand(2, 1)
-    verify_trace_model(test_fn_scatter(0), [in_data, in_index, in_src], targets)
-    verify_trace_model(test_fn_scatter_add(0), [in_data, in_index, in_src], targets)
-
-    # Check scalar source
-    # TODO(vvchernov): Scalar source is supported on TVM side, but torch failes with
-    # input Tuple(Tensor, Tensor, float). What does scalar mean for torch in this case?
-
-
-def test_forward_scatter_reduce():
-    """test_forward_scatter_reduce"""
-    # integer cannot be traced
-    def test_fn_scatter_reduce(dim, reduce):
-        return lambda data, index, src: torch.scatter_reduce(
-            data, dim=dim, index=index, src=src, reduce=reduce
-        )
-
-    in_data = torch.rand(3, 5) - 1
-    in_index = torch.tensor([[0, 1, 2, 0, 0], [2, 0, 0, 1, 2]])
-    in_src = torch.rand(2, 5) - 1
-
-    targets = ["llvm", "cuda"]
-    for reduce in ["sum", "prod", "amin", "amax", "mean"]:
-        verify_trace_model(test_fn_scatter_reduce(0, reduce), [in_data, in_index, in_src], targets)
-
-    in_data = torch.rand(2, 4) - 1
-    in_index = torch.tensor([[2], [3]])
-    in_src = torch.rand(2, 1) - 1
-
-    for reduce in ["sum", "prod", "amin", "amax", "mean"]:
-        verify_trace_model(test_fn_scatter_reduce(1, reduce), [in_data, in_index, in_src], targets)
-
-
-def test_forward_index_put():
-    """test_forward_index_put"""
-    # torch.index_put for 2D tensor and default accumulate (False)
-    def test_fn_index_put2():
-        return lambda data, xidx, yidx, values: torch.index_put(
-            data, indices=[xidx, yidx], values=values
-        )
-
-    # torch.index_put for 3D tensor and accumulate=True
-    def test_fn_index_put3a():
-        return lambda data, xidx, yidx, zidx, values: torch.index_put(
-            data, indices=[xidx, yidx, zidx], values=values, accumulate=True
-        )
-
-    shape = (3, 5)
-    in_data = torch.zeros(shape)
-    xidx = torch.tensor([0, 1, 2, 2])
-    yidx = torch.tensor([0, 1, 3, 4])
-    values = torch.tensor([2.0, 4.0, 7.0, 9.0])
-
-    targets = ["llvm", "cuda"]
-    verify_trace_model(test_fn_index_put2(), [in_data, xidx, yidx, values], targets)
-
-    shape = (3, 5, 3)
-    in_data = torch.zeros(shape)
-    xidx = torch.tensor([0, 1, 2, 2, 0])
-    yidx = torch.tensor([0, 1, 3, 4, 0])
-    zidx = torch.tensor([0, 1, 1, 2, 0])
-    values = torch.tensor([2.0, 4.0, 7.0, 9.0, 1.0])
-
-    verify_trace_model(test_fn_index_put3a(), [in_data, xidx, yidx, zidx, values], targets)
-
-
-def test_numel():
-    """test_numel"""
-
-    class Numel(Module):
-        def forward(self, data):
-            return torch.tensor(torch.numel(data))
-
-    targets = _get_default_vm_targets()
-    verify_script_model(Numel(), [(1,)], targets)
-    verify_script_model(Numel(), [(3, 5)], targets)
-    verify_script_model(Numel(), [(3, 5, 8)], targets)
-
-
-def test_empty():
-    """Test for aten::empty"""
-
-    def test_func():
-        return torch.empty([1, 3, 10, 10])
-
-    verify_model_with_input(test_func, [], assert_shape_only=True)
-
-
-def test_empty_like():
-    """Test for aten::empty_like"""
-
-    def test_func(data):
-        return torch.empty_like(data)
-
-    verify_model_with_input(test_func, [torch.rand([1, 3, 10, 10]).float()], assert_shape_only=True)
-
-
-@tvm.testing.uses_gpu
-def test_new_empty():
-    """test_forward_new_ones"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3, 10, 10]
-
-    def test_func(input_tensor):
-        return input_tensor.new_empty([3, 10, 10])
-
-    verify_model_with_input(test_func, [torch.rand(input_shape).float()], assert_shape_only=True)
-
-    def test_func1(input_tensor):
-        return input_tensor.new_empty([3, 10, 10], dtype=torch.int32)
-
-    verify_model_with_input(test_func1, [torch.rand(input_shape).float()], assert_shape_only=True)
-
-
-def test_randn():
-    """Test for aten::randn"""
-
-    def test_func():
-        return torch.randn([1, 3, 10, 10])
-
-    verify_model_with_input(test_func, [], assert_shape_only=True, validate_structural_equal=False)
-
-    def test_func1():
-        return torch.randn(1, 3, 10, 10)
-
-    verify_model_with_input(test_func1, [], assert_shape_only=True, validate_structural_equal=False)
-
-
-def test_forward_pretrained_bert_base_uncased():
-    ######################################################################
-    # This is an example how to run BERT models using TVM
-    # ---------------------------------------------------
-    """
-    Refer the bert example given in https://pypi.org/project/pytorch-pretrained-bert
-
-    # To get started, pretrained bert package needs to be installed as prerequisite.
-
-    .. code-block:: bash
-
-        # install bert package
-        pip install pytorch_pretrained_bert==0.6.2 --user
-    """
-    # pylint: disable=import-outside-toplevel
-    try:
-        from pytorch_pretrained_bert import BertForMaskedLM, BertTokenizer
-    except ImportError:
-        print("Torch pretrained bert package must be installed to run this script.")
-        return
-
-    ######################################################################
-    # Load the tokenizer and tokenize the input
-    # -----------------------------------------
-
-    # Load pre-trained model tokenizer (vocabulary)
-    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
-
-    # Tokenized input
-    text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
-    tokenized_text = tokenizer.tokenize(text)
-
-    # Mask a token that we will try to predict back with `BertForMaskedLM`
-    masked_index = 8
-    tokenized_text[masked_index] = "[MASK]"
-    assert tokenized_text == [
-        "[CLS]",
-        "who",
-        "was",
-        "jim",
-        "henson",
-        "?",
-        "[SEP]",
-        "jim",
-        "[MASK]",
-        "was",
-        "a",
-        "puppet",
-        "##eer",
-        "[SEP]",
-    ]
-
-    # Convert token to vocabulary indices
-    indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
-    # Define sentence A and B indices associated to 1st and 2nd sentences (see paper)
-    segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
-
-    # Convert inputs to PyTorch tensors
-    tokens_tensor = torch.tensor([indexed_tokens])
-    segments_tensors = torch.tensor([segments_ids])
-
-    ######################################################################
-    # Load a pretrained PyTorch model bert-base-uncased
-    # -------------------------------------------------
-
-    # Bert Model with a language modeling
-    model = BertForMaskedLM.from_pretrained("bert-base-uncased")
-    model.eval()
-
-    ######################################################################
-    # Predict all tokens with pytorch
-    # -------------------------------
-
-    with torch.no_grad():
-        torch_preds = model(tokens_tensor, segments_tensors)
-
-    ######################################################################
-    # Make TorchScripted model via jit trace
-    # --------------------------------------
-
-    scripted_model = torch.jit.trace(model, (tokens_tensor, segments_tensors)).eval()
-
-    ######################################################################
-    # Import the graph to Relay
-    # -------------------------
-    # Convert PyTorch graph to Relay graph. The input name can be arbitrary.
-
-    input_1 = "input_ids"
-    input_2 = "input.2"
-    shape_list = [(input_1, list(tokens_tensor.shape)), (input_2, list(segments_tensors.shape))]
-
-    mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
-
-    ######################################################################
-    # Compile the model with relay
-    # ----------------------------
-
-    target = "llvm"
-    with tvm.transform.PassContext(opt_level=3):
-        relay_graph, relay_lib, relay_params = relay.build(mod, target=target, params=params)
-
-    ######################################################################
-    # Execute on TVM
-    # --------------
-
-    dev = tvm.device(target, 0)
-    relay_model = graph_executor.create(relay_graph, relay_lib, dev)
-    relay_model.set_input(**relay_params)
-    relay_model.set_input(input_1, tokens_tensor)
-    relay_model.set_input(input_2, segments_tensors)
-    relay_model.run()
-    compiled_output = relay_model.get_output(0).numpy()
-
-    ######################################################################
-    # Validate the outputs
-    # --------------------
-    # Compare the torch and tvm outputs
-
-    tvm.testing.assert_allclose(torch_preds, compiled_output, rtol=1e-3, atol=1e-3)
-
-    ######################################################################
-    # Process the output
-    # ------------------
-    # Process the model output to token.
-
-    # Torch output to token
-    torch_pred_idx = torch.argmax(torch_preds[0, masked_index]).item()
-    torch_pred_token = tokenizer.convert_ids_to_tokens([torch_pred_idx])[0]
-
-    # TVM output to token
-    tvm_pred_idx = compiled_output[0, masked_index].argmax()
-    tvm_pred_token = tokenizer.convert_ids_to_tokens([tvm_pred_idx])[0]
-
-    assert torch_pred_idx == tvm_pred_idx
-    assert torch_pred_token == tvm_pred_token
-
-    # Print the outputs
-    print(f"Torch top-1 id: {torch_pred_idx}, token: {torch_pred_idx}")
-    print(f"TVM   top-1 id: {tvm_pred_idx}, token: {tvm_pred_token}")
-
-
-@pytest.mark.skipif(
-    platform.machine() == "aarch64",
-    reason="Currently failing on AArch64",
-)
-def test_convert_torch_script_with_input_types():
-    """test_convert_torch_script_with_input_types"""
-
-    def model_fn(x, y):
-        x = x.to(dtype=torch.int32)
-        y = x + y
-        return y
-
-    ishape = (4, 5)
-    input_x = torch.rand(ishape, dtype=torch.float32)
-    input_y = torch.randint(low=0, high=100, size=ishape, dtype=torch.int32)
-    inputs = [input_x, input_y]
-
-    verify_model(model_fn, input_data=inputs)
-
-
-def test_bincount():
-    """test_bincount"""
-
-    def test_fn(x, weights=None):
-        return torch.bincount(x, weights=weights)
-
-    inp = torch.randint(0, 100, (10000,), dtype=torch.int64)
-    weights = torch.linspace(0, 100, steps=10000)
-
-    targets = ["llvm", "cuda"]
-    verify_trace_model(test_fn, [inp], targets)
-    verify_trace_model(test_fn, [inp, weights], targets)
-
-
-def test_hard_swish():
-    """test_hard_swish"""
-    examples = [torch.rand(8).float(), torch.rand(8, 10).float(), torch.rand(1, 1, 10).float()]
-    for input_data in examples:
-        verify_model(torch.nn.Hardswish().eval(), input_data=input_data)
-        verify_model(torch.nn.Hardswish(inplace=True).eval(), input_data=input_data)
-
-
-def test_hard_sigmoid():
-    """test_hard_sigmoid"""
-    examples = [torch.rand(8).float(), torch.rand(8, 10).float(), torch.rand(1, 1, 10).float()]
-    for input_data in examples:
-        verify_model(torch.nn.Hardsigmoid().eval(), input_data=input_data)
-        verify_model(torch.nn.Hardsigmoid(inplace=True).eval(), input_data=input_data)
-
-
-def test_cumsum():
-    """test_cumsum"""
-
-    def test_fn(dim, dtype=None):
-        return lambda x: torch.cumsum(x, dim=dim, dtype=dtype)
-
-    inp = torch.randint(0, 100, (10000,), dtype=torch.int32)
-    verify_model(test_fn(0), [inp])
-    verify_model(test_fn(0), [inp.to(torch.int64)])
-    verify_model(test_fn(0, dtype=torch.int64), [inp.to(torch.int64)])
-
-    inp = torch.randn((100, 100), dtype=torch.float32)
-    verify_model(test_fn(dim=0, dtype=torch.float64), [inp])
-    verify_model(test_fn(dim=1), [inp])
-
-    inp = torch.randn((100, 100), dtype=torch.float32) > 0.5
-    verify_model(test_fn(dim=0, dtype=torch.int32), [inp])
-
-
-def test_masked_fill():
-    """test_transformer"""
-
-    def test_fn(x, mask):
-        return torch.masked_fill(x, mask, 0.0)
-
-    inp = torch.randn(100, 100)
-    verify_model(test_fn, [inp, inp > 0.5])
-    verify_model(test_fn, [inp.to(torch.float64), inp > 0.5])
-
-
-def test_transformer():
-    """test_transformer"""
-    model = torch.nn.Transformer(d_model=256, nhead=8, num_encoder_layers=6, num_decoder_layers=6)
-    model = model.eval()
-    src = torch.rand((10, 32, 256))
-    tgt = torch.rand((20, 32, 256))
-    verify_model(model.eval(), input_data=[src, tgt])
-
-
-def test_argsort():
-    """test_argsort"""
-
-    def test_fn(dim, descending):
-        return lambda x: torch.argsort(x, dim=dim, descending=descending)
-
-    inp = torch.randn(100)
-    verify_model(test_fn(0, True), [inp])
-    verify_model(test_fn(0, False), [inp])
-
-    inp = torch.randn(100, 100)
-    verify_model(test_fn(0, True), [inp])
-    verify_model(test_fn(0, False), [inp])
-    verify_model(test_fn(1, True), [inp])
-    verify_model(test_fn(1, False), [inp])
-
-
-def test_sort():
-    """test_sort"""
-
-    def test_fn(dim, descending):
-        return lambda x: torch.sort(x, dim=dim, descending=descending)
-
-    inp = torch.randn(100)
-    verify_model(test_fn(0, True), [inp])
-    verify_model(test_fn(-1, False), [inp])
-
-    inp = torch.randn(100, 100)
-    verify_model(test_fn(0, True), [inp])
-    verify_model(test_fn(-2, False), [inp])
-    verify_model(test_fn(1, True), [inp])
-    verify_model(test_fn(-1, False), [inp])
-
-
-def test_logical_and():
-    """test_logical_and"""
-
-    def test_fn(x, y):
-        return torch.logical_and(x, y)
-
-    a = torch.tensor([0, 1, 10, 0], dtype=torch.int8)
-    b = torch.tensor([4, 0, 1, 0], dtype=torch.int8)
-    verify_model(test_fn, [a, b])
-
-    a = torch.tensor([True, False, True])
-    b = torch.tensor([True, False, False])
-    verify_model(test_fn, [a, b])
-
-
-def test_logical_or():
-    """test_logical_or"""
-
-    def test_fn(x, y):
-        return torch.logical_or(x, y)
-
-    a = torch.tensor([0, 1, 10, 0], dtype=torch.int8)
-    b = torch.tensor([4, 0, 1, 0], dtype=torch.int8)
-    verify_model(test_fn, [a, b])
-
-    a = torch.tensor([True, False, True])
-    b = torch.tensor([True, False, False])
-    verify_model(test_fn, [a, b])
-
-
-def test_masked_select():
-    """test_masked_select"""
-
-    def test_fn(x, mask):
-        return torch.masked_select(x, mask)
-
-    for shape in [(10,), (3, 4), (16, 32, 64)]:
-        x = torch.randn(*shape)
-        mask = x.ge(0.5)
-        verify_trace_model(test_fn, [x, mask], ["llvm", "cuda"])
-
-
-def test_unique():
-    """test_unique"""
-
-    def test_fn(is_sorted, return_inverse, return_counts):
-        return lambda x: torch.unique(x, is_sorted, return_inverse, return_counts)
-
-    in_data = torch.randint(0, 20, (10,), dtype=torch.int32)
-    targets = ["llvm", "cuda"]
-    verify_trace_model(test_fn(True, True, True), [in_data], targets)
-    verify_trace_model(test_fn(True, False, True), [in_data], targets)
-    verify_trace_model(test_fn(True, True, False), [in_data], targets)
-    verify_trace_model(test_fn(True, False, True), [in_data], targets)
-    in_data = torch.randint(0, 20, (20,), dtype=torch.int64)
-    verify_trace_model(test_fn(True, True, True), [in_data], targets)
-    verify_trace_model(test_fn(True, False, True), [in_data], targets)
-    verify_trace_model(test_fn(True, True, False), [in_data], targets)
-    verify_trace_model(test_fn(True, False, True), [in_data], targets)
-
-
-def test_forward_nll_loss():
-    """test_forward_nll_loss"""
-    torch.set_grad_enabled(False)
-    N, C = 10, 3
-    predictions = torch.rand((N, C)).float()
-    targets = torch.randint(0, 3, (N,))
-    weights = torch.tensor([1, 2, 3]).float()
-    verify_model(torch.nn.NLLLoss().eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.NLLLoss(weight=weights).eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.NLLLoss(ignore_index=1).eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.NLLLoss(reduction="sum").eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.NLLLoss(reduction="none").eval(), input_data=[predictions, targets])
-
-    # multidimension nll loss (aten::nll_loss2d)
-    d1, d2 = 2, 3
-    predictions = torch.rand((N, C, d1, d2)).float()
-    targets = torch.randint(0, 3, (N, d1, d2))
-    verify_model(torch.nn.NLLLoss().eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.NLLLoss(weight=weights).eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.NLLLoss(ignore_index=1).eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.NLLLoss(reduction="sum").eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.NLLLoss(reduction="none").eval(), input_data=[predictions, targets])
-
-
-def test_cross_entropy_loss():
-    """test_cross_entropy_loss"""
-    torch.set_grad_enabled(False)
-    N, C = 10, 3
-    # class indices
-    predictions = torch.rand((N, C)).float()
-    targets = torch.randint(0, 3, (N,))
-    weights = torch.tensor([1, 2, 3]).float()
-    verify_model(torch.nn.CrossEntropyLoss().eval(), input_data=[predictions, targets])
-    verify_model(
-        torch.nn.CrossEntropyLoss(weight=weights).eval(), input_data=[predictions, targets]
-    )
-
-    # class probabilities
-    predictions = torch.randn(N, C).float()
-    targets = torch.randn(N, C)
-    verify_model(torch.nn.CrossEntropyLoss().eval(), input_data=[predictions, targets])
-
-
-def test_forward_l1_loss():
-    """test_forward_l1_loss"""
-    torch.set_grad_enabled(False)
-    N, C = 10, 3
-    predictions = torch.rand((N, C)).float()
-    targets = torch.rand((N, C)).float()
-    verify_model(torch.nn.L1Loss().eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.L1Loss(reduction="sum").eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.L1Loss(reduction="none").eval(), input_data=[predictions, targets])
-
-    # multidimension l1 loss
-    d1, d2 = 2, 3
-    predictions = torch.rand((N, C, d1, d2)).float()
-    targets = torch.rand((N, C, d1, d2)).float()
-    verify_model(torch.nn.L1Loss().eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.L1Loss(reduction="sum").eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.L1Loss(reduction="none").eval(), input_data=[predictions, targets])
-
-
-def test_forward_mse_loss():
-    """test_forward_mse_loss"""
-    torch.set_grad_enabled(False)
-    N, C = 10, 3
-    predictions = torch.rand((N, C)).float()
-    targets = torch.rand((N, C)).float()
-    verify_model(torch.nn.MSELoss().eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.MSELoss(reduction="sum").eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.MSELoss(reduction="none").eval(), input_data=[predictions, targets])
-
-    # multidimension mse loss
-    d1, d2 = 2, 3
-    predictions = torch.rand((N, C, d1, d2)).float()
-    targets = torch.rand((N, C, d1, d2)).float()
-    verify_model(torch.nn.MSELoss().eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.MSELoss(reduction="sum").eval(), input_data=[predictions, targets])
-    verify_model(torch.nn.MSELoss(reduction="none").eval(), input_data=[predictions, targets])
-
-
-@tvm.testing.uses_gpu
-def test_forward_flip():
-    """Test for aten::flip"""
-    torch.set_grad_enabled(False)
-
-    class Flip(Module):
-        def __init__(self, axis=0):
-            super().__init__()
-            self.axis = axis
-
-        def forward(self, x):
-            return x.flip(self.axis)
-
-    input_t = torch.randn(2, 3, 4)
-    verify_model(Flip(axis=[0]), input_data=input_t)
-    verify_model(Flip(axis=[1]), input_data=input_t)
-    verify_model(Flip(axis=[2]), input_data=input_t)
-    verify_model(Flip(axis=[-1]), input_data=input_t)
-    verify_model(Flip(axis=[0, 1]), input_data=input_t)
-
-
-def test_annotate_span():
-    """test_annotate_span"""
-    model = torchvision.models.resnet18().eval()
-    inp = torch.randn([1, 3, 224, 224])
-    trace = torch.jit.trace(model, inp).eval()
-    mod, _ = relay.frontend.from_pytorch(
-        trace, [("input", inp.shape)], use_parser_friendly_name=True
-    )
-    relay.transform.AnnotateSpans()(mod)
-
-
-@tvm.testing.uses_gpu
-def test_all_any():
-    """test_all_any"""
-
-    def test_fn(f, dim=None, keepdim=False):
-        return lambda x: f(x, dim=dim, keepdim=keepdim)
-
-    def test_fn_no_arg(f):
-        return lambda x: f(x)  # pylint: disable=unnecessary-lambda
-
-    for f in [torch.all, torch.any]:
-        verify_model(test_fn(f, 0), [torch.rand(1, 2).bool()])
-        verify_model(test_fn(f, 0), [torch.arange(0, 3).to(torch.uint8)])
-        verify_model(test_fn(f, 1), [torch.rand(4, 2).bool()])
-        verify_model(test_fn(f, 0, keepdim=True), [torch.rand(4, 2).bool()])
-        verify_model(test_fn_no_arg(f), [torch.rand(1, 2).bool()])
-        verify_model(test_fn_no_arg(f), [torch.arange(0, 3).to(torch.uint8)])
-
-
-@tvm.testing.uses_gpu
-def test_searchsorted():
-    """test_searchsorted"""
-
-    def test_fn(out_int32=False, right=False):
-        return lambda x, y: torch.searchsorted(x, y, out_int32=out_int32, right=right)
-
-    sorted_sequence = torch.tensor([[1, 3, 5, 7, 9], [2, 4, 6, 8, 10]])
-    values = torch.tensor([[3, 6, 9], [3, 6, 9]])
-    verify_model(test_fn(), [sorted_sequence, values])
-    verify_model(test_fn(out_int32=True), [sorted_sequence[0], values[0]])
-    verify_model(test_fn(right=True), [sorted_sequence, values])
-
-    sorted_sequence_1d = torch.tensor([1, 3, 5, 7, 9])
-    values = torch.tensor([[3, 6, 9], [4, 2, 7]])
-    verify_model(test_fn(), [sorted_sequence_1d, values])
-
-    verify_model(test_fn(), [sorted_sequence_1d, torch.tensor(6)])
-
-
-@tvm.testing.uses_gpu
-def test_bucketize():
-    """test_bucketize"""
-
-    def test_fn(out_int32=False, right=False):
-        return lambda x, y: torch.bucketize(x, y, out_int32=out_int32, right=right)
-
-    boundaries = torch.tensor([1, 3, 5, 7, 9])
-    values = torch.tensor([3, 6, 9])
-
-    verify_model(test_fn(), [values, boundaries])
-    verify_model(test_fn(out_int32=True, right=True), [values, boundaries])
-
-
-@tvm.testing.uses_gpu
-def test_roll():
-    """Test for aten::roll"""
-
-    def test_fn(shifts, dims):
-        return lambda x: torch.roll(x, shifts, dims)
-
-    x = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8]).view(4, 2)
-    verify_model(test_fn(1, 0), [x])
-    verify_model(test_fn(-1, 0), [x])
-    verify_model(test_fn(shifts=(2, 1), dims=(0, 1)), [x])
-
-
-@tvm.testing.uses_gpu
-def test_einsum():
-    """test_einsum"""
-
-    def test_fn(equation):
-        return lambda *x: torch.einsum(equation, *x)
-
-    x = torch.ones([2, 3])
-    y = torch.ones([3, 4])
-    z = torch.ones([4, 5])
-    verify_model(test_fn("ij,jk"), [x, y])
-    verify_model(test_fn("ij,jk,km->im"), [x, y, z])
-
-
-def test_stft():
-    """test_stft"""
-
-    def test_fn(n_fft, hop_length, win_length, center, pad_mode, normalized, onesided):
-        return lambda input, window=None: torch.stft(
-            input=input,
-            n_fft=n_fft,
-            hop_length=hop_length,
-            win_length=win_length,
-            window=window,
-            center=center,
-            pad_mode=pad_mode,
-            normalized=normalized,
-            onesided=onesided,
-            return_complex=False,
-        )
-
-    input_t = torch.rand([1, 12]).float()
-    window = torch.tensor([2, 3, 4], dtype=torch.int32)
-    targets = ["llvm", "cuda"]
-    verify_trace_model(test_fn(3, 3, 3, False, "constant", False, True), [input_t, window], targets)
-    verify_trace_model(test_fn(3, 3, 3, True, "constant", False, True), [input_t, window], targets)
-    verify_trace_model(test_fn(3, 3, 3, False, "reflect", False, True), [input_t, window], targets)
-    verify_trace_model(test_fn(3, 3, 3, True, "reflect", False, True), [input_t, window], targets)
-    verify_trace_model(test_fn(3, 3, 3, True, "reflect", True, True), [input_t, window], targets)
-    verify_trace_model(test_fn(3, 3, 3, True, "reflect", False, False), [input_t, window], targets)
-    input_t = torch.rand([2, 12]).float()
-    window = torch.tensor([2, 3, 4], dtype=torch.int32)
-    verify_trace_model(test_fn(3, 3, 3, False, "reflect", False, True), [input_t, window], targets)
-    window = torch.tensor([1, 3], dtype=torch.int32)
-    verify_trace_model(test_fn(2, 1, 2, False, "reflect", False, True), [input_t, window], targets)
-    verify_trace_model(test_fn(2, 1, 2, False, "reflect", False, True), [input_t], targets)
-
-
-@tvm.testing.uses_gpu
-def test_dot():
-    """Test for aten::dot"""
-
-    def test_fn(x):
-        return x.dot(x)
-
-    x = torch.randn([4])
-    verify_model(test_fn, [x])
-
-
-@tvm.testing.uses_gpu
-def test_mv():
-    """Test for aten::mv"""
-
-    def test_fn(m, v):
-        return m.mv(v)
-
-    verify_model(test_fn, [torch.randn(4, 4), torch.randn(4)])
-    verify_model(test_fn, [torch.randn(2, 2), torch.randn(2)])
-    verify_model(test_fn, [torch.randn(3, 8), torch.randn(8)])
-
-
-def test_grid_sample():
-    """test_grid_sample"""
-
-    class Grid_sample(Module):
-        def __init__(self, method, padding_mode, align_corners):
-            super().__init__()
-            self._method = method
-            self._padding_mode = padding_mode
-            self._align_corners = align_corners
-
-        def forward(self, x, y):
-            return torch.nn.functional.grid_sample(
-                input=x,
-                grid=y,
-                mode=self._method,
-                padding_mode=self._padding_mode,
-                align_corners=self._align_corners,
-            )
-
-    methods = ["nearest", "bilinear", "bicubic"]
-    padding_modes = ["zeros", "border", "reflection"]
-    align_corners = [True, False]
-
-    data_2D = torch.rand([4, 4, 8, 8]).float()
-    grid_2D = torch.rand([4, 16, 16, 2]).float()
-    # choosing smaller sizes to be testable on weaker GPUs
-    data_3D = torch.rand([4, 4, 4, 4, 4]).float()
-    grid_3D = torch.rand([4, 8, 8, 8, 3]).float()
-
-    for _method in methods:
-        # bicubic was introduced when pytorch > 1.7.1
-        torch_version = package_version.parse(torch.__version__)
-        if _method == "bicubic" and torch_version <= package_version.parse("1.7.1"):
-            continue
-        for _padding in padding_modes:
-            for _align in align_corners:
-                # ATTENTION:
-                #   "nearest" + "reflection" result may be different with pytorch on cpu device,
-                #   because pytorch's cpu result is different with gpu result,
-                #   and gpu result used here as baseline in tvm topi.image.grid_sample.
-                model = Grid_sample(_method, _padding, _align)
-                verify_model(model, input_data=[data_2D, grid_2D])
-
-                # 3D "bicubic"(tricubic) is not supported in pytorch
-                if _method != "bicubic":
-                    verify_model(model, input_data=[data_3D, grid_3D])
-
-
-def test_list_tuple():
-    """test compilation error for a Python list followed by a prim::TupleConstruct."""
-
-    class List_tuple(Module):
-        """List_tuple"""
-
-        def forward(self, x):
-            """forward"""
-            merged = []
-            mask_list = []
-            for i in range(3):
-                w0 = torch.sigmoid(x)
-                merged.append((w0, w0))
-                mask_list.append(x)
-
-            for i in range(3):
-                merged[i] = merged[i][0] + merged[i][1]
-            return mask_list[2], merged
-
-    x = torch.rand([4, 4, 16, 32]).float()
-    script_module = torch.jit.trace(List_tuple(), x, strict=False).eval()
-    relay.frontend.from_pytorch(script_module, [("x", x.shape)])
-
-
-# pylint: disable=unnecessary-dunder-call
-@tvm.testing.uses_gpu
-def test_binary_bitwise():
-    """Test for binary bitwise"""
-
-    def test_ior(x, y):
-        return x.__ior__(y)
-
-    def test_iand(x, y):
-        return x.__iand__(y)
-
-    def test_ixor(x, y):
-        return x.__ixor__(y)
-
-    x = torch.tensor([7, 49, 16, 1, 2, 3], dtype=torch.uint8)
-    y = torch.tensor([39, 128, 99, 228, 63, 17], dtype=torch.uint8)
-
-    for test_fn in [test_ior, test_iand, test_ixor]:
-        verify_model(test_fn, [x, y])
-
-
-@tvm.testing.uses_gpu
-def test_shift():
-    """Test for aten::__lshift__, aten::__rshift__"""
-
-    def test_lshift(x, y):
-        return x << y
-
-    def test_rshift(x, y):
-        return x >> y
-
-    x = torch.tensor([39, 128, 99, 228, 63, 17], dtype=torch.int32)
-    y = torch.tensor([3, 2, 7, 4, 5, 9], dtype=torch.int32)
-
-    for test_fn in [test_lshift, test_rshift]:
-        verify_model(test_fn, [x, y])
-
-
-@tvm.testing.uses_gpu
-def test_mod():
-    """Test for aten::fmod"""
-
-    def test_fmod(x, y):
-        return torch.fmod(x, y)
-
-    def test_remainder(x, y):
-        return torch.remainder(x, y)
-
-    for test_fn in [test_fmod, test_remainder]:
-        verify_model(test_fn, [torch.tensor([-3.0, -2, -1, 1, 2, 3]), torch.tensor(2)])
-        verify_model(test_fn, [torch.tensor([1, 2, 3, 4, 5]), torch.tensor(-1.5)])
-
-
-def test_softmax_fuse():
-    """test_softmax_fuse"""
-    # https://github.com/apache/tvm/issues/12001
-    class Model(torch.nn.Module):
-        """Pytorch model module"""
-
-        def __init__(self, nchwc_post_op=False) -> None:
-            super().__init__()
-            self.conv = torch.nn.Conv2d(3, 3, (1, 1), 1)
-            self.nchwc_post_op = nchwc_post_op
-
-        @torch.no_grad()
-        def forward(self, x):
-            """forward"""
-            t0a = self.conv(x)
-            t0b = torch.floor(x)
-            t2b = torch.softmax(t0a, dim=2)
-
-            if self.nchwc_post_op:
-                t3a = t0a - t0b
-                t4a = t2b - t0b
-                t6a = t3a + t4a
-                return t6a
-
-            return t2b + 1
-
-    sh = [3, 3, 10, 1]
-    inp = torch.ones(*sh, dtype=torch.float32)
-
-    for model in [Model(nchwc_post_op=False).eval(), Model(nchwc_post_op=True).eval()]:
-        output_torch = model(inp).numpy()
-
-        mod, params = relay.frontend.from_pytorch(torch.jit.trace(model, inp), [("inp0", sh)])
-
-        with tvm.transform.PassContext(opt_level=4):
-            out = (
-                relay.create_executor("graph", mod, params=params)
-                .evaluate()(inp0=inp.numpy())
-                .numpy()
-            )
-
-        tvm.testing.assert_allclose(out, output_torch, rtol=1e-5, atol=1e-5)
-
-
-@tvm.testing.uses_gpu
-def test_lerp():
-    """test_lerp"""
-
-    def test_fn(x, y, w):
-        return torch.lerp(x, y, w)
-
-    input_shape = [16]
-    x = torch.rand(input_shape).float()
-    y = torch.rand(input_shape).float()
-    w = torch.rand(input_shape).float()
-
-    # weight can be tensor or scalar
-    verify_model(test_fn, [x, y, w])
-    verify_model(test_fn, [x, y, w[0]])
-
-
-def test_trilu():
-    def _test_trilu(op, diagonal):
-        return lambda inp: op(inp, diagonal)
-
-    for op in [torch.triu, torch.tril]:
-        verify_model(_test_trilu(op, 0), [torch.rand(size=[3, 3])])
-        verify_model(_test_trilu(op, 1), [torch.rand(size=[6, 6])])
-        verify_model(_test_trilu(op, -2), [torch.rand(size=[6, 6])])
-
-
-def test_multinomial():
-    """test_multinomial"""
-
-    def _test_multinomial(num_samples):
-        return lambda inp: torch.multinomial(inp, num_samples=num_samples, replacement=True)
-
-    # Dont check output since it's random. Instead we'll just make sure shapes are right.
-    verify_model(
-        _test_multinomial(2),
-        [torch.rand(size=[3]).float()],
-        cpu_only=True,
-        check_correctness=False,
-        validate_structural_equal=False,
-    )
-    verify_model(
-        _test_multinomial(1),
-        [torch.rand(size=[4, 5]).float()],
-        cpu_only=True,
-        check_correctness=False,
-        validate_structural_equal=False,
-    )
-
-
-def test_weight_norm():
-    """Test for atten::_weight_norm"""
-    in_channels = 32
-    out_channels = 64
-    input_data_conv = torch.rand((1, in_channels, 32, 32)).float()
-
-    conv_wn = torch.nn.utils.weight_norm(torch.nn.Conv2d(in_channels, out_channels, kernel_size=3))
-    verify_model(conv_wn.eval().float(), input_data_conv)
-
-    conv_wn_groups = torch.nn.utils.weight_norm(
-        torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, groups=2)
-    )
-    verify_model(conv_wn_groups.eval().float(), input_data_conv)
-
-    conv_wn = torch.nn.utils.weight_norm(
-        torch.nn.Conv2d(in_channels, out_channels, kernel_size=3), dim=1
-    )
-    verify_model(conv_wn.eval().float(), input_data_conv)
-
-    linear_wn = torch.nn.utils.weight_norm(torch.nn.Linear(in_channels, out_channels))
-    input_data_linear = torch.rand((128, in_channels)).float()
-    verify_model(linear_wn.eval().float(), input_data_linear)
-
-
-@tvm.testing.uses_gpu
-def test_addmm():
-    def test_fn(alpha, beta):
-        return lambda inp, batch1, batch2: torch.addmm(inp, batch1, batch2, beta=beta, alpha=alpha)
-
-    M = torch.randn(3, 5)
-    batch1 = torch.randn(3, 4)
-    batch2 = torch.randn(4, 5)
-
-    verify_model(test_fn(0.4, 0.8), [M, batch1, batch2])
-
-
-@tvm.testing.uses_gpu
-def test_baddbmm():
-    def test_fn(alpha, beta):
-        return lambda inp, batch1, batch2: torch.baddbmm(
-            inp, batch1, batch2, beta=beta, alpha=alpha
-        )
-
-    M = torch.randn(10, 3, 5)
-    batch1 = torch.randn(10, 3, 4)
-    batch2 = torch.randn(10, 4, 5)
-
-    verify_model(test_fn(0.5, 1.0), [M, batch1, batch2])
-
-
-def test_exporting_renamed_c_graph():
-    """test exproting model when export_renamed_model is set"""
-
-    # model definition
-    class Conv2D(Module):
-        def __init__(self):
-            super(Conv2D, self).__init__()
-            self.conv = torch.nn.Conv2d(3, 6, 3, bias=True)
-
-        def forward(self, *args):
-            return self.conv(args[0])
-
-    input_name, input_shape = "input", [1, 3, 10, 10]
-    shape_list = [(input_name, input_shape)]
-    temp_dir = utils.tempdir().path
-    script_module = torch.jit.trace(Conv2D(), [torch.rand(input_shape)])
-    _, _ = relay.frontend.from_pytorch(
-        script_module, shape_list, export_renamed_c_graph_path=temp_dir
-    )
-
-    exported_c_graph_name = os.listdir(temp_dir)[0]
-    assert "tvm_exported_c_graph_" in exported_c_graph_name
-
-    # make sure the renamed output variable presents in the restored _C.Graph
-    with open(f"{temp_dir}/{exported_c_graph_name}", "r") as f:
-        graph = f.read()
-        assert "%aten::_convolution_0" in graph
-
-
-def test_inplace_copy():
-    class SimpleInplaceCopy(torch.nn.Module):
-        def forward(self, x):
-            x[:5, 0, 5:] = x[:5, 0, 5:] + 1
-            return x
-
-    class NegativeSliceInplaceCopy(torch.nn.Module):
-        def forward(self, x):
-            x[5:-1, -1, :] = x[5:-1, -1, :] + 1
-            return x
-
-    class PartialDimensionInplaceCopy(torch.nn.Module):
-        def forward(self, x):
-            x[:5] = x[:5] + 1
-            x[0:5, ...] = x[0:5, ...] + 1
-            x[0:5, ..., -1] = x[0:5, ..., -1] + 1
-            return x
-
-    inputs = torch.randn(10, 10, 10)
-    verify_model(SimpleInplaceCopy(), [inputs])
-    inputs = torch.randn(10, 10, 10)
-    verify_model(NegativeSliceInplaceCopy(), [inputs])
-    inputs = torch.randn(10, 10, 10)
-    verify_model(PartialDimensionInplaceCopy(), [inputs])
-
-
-@tvm.testing.uses_gpu
-def test_swapaxes():
-    """test_swapaxes"""
-    torch.set_grad_enabled(False)
-    input_shape = [2, 3, 10, 5]
-
-    class Swapaxes1(Module):
-        def forward(self, *args):
-            return args[0].swapaxes(2, 3)
-
-    class Swapaxes2(Module):
-        def forward(self, *args):
-            return args[0].swapaxes(-2, -1)
-
-    class Swapaxes3(Module):
-        def forward(self, *args):
-            return args[0].swapaxes(1, 1)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Swapaxes1().float().eval(), input_data=input_data)
-    verify_model(Swapaxes2().float().eval(), input_data=input_data)
-    verify_model(Swapaxes3().float().eval(), input_data=input_data)
-
-
-def test_linalg_vector_norm():
-    """test_linalg_vector_norm"""
-    torch.set_grad_enabled(False)
-
-    def test_fn(order):
-        return lambda x: torch.linalg.vector_norm(x, ord=order)
-
-    input_shape = [3, 3]
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(test_fn(order=2), input_data=input_data)
-    verify_model(test_fn(order=3.5), input_data=input_data)
-    verify_model(test_fn(order=np.inf), input_data=input_data)
-    verify_model(test_fn(order=-np.inf), input_data=input_data)
-    verify_model(test_fn(order=0), input_data=input_data)
-
-    # Also test on double
-    input_data = torch.rand(input_shape).double()
-    verify_model(test_fn(order=2), input_data=input_data)
-    verify_model(test_fn(order=3.5), input_data=input_data)
-    verify_model(test_fn(order=np.inf), input_data=input_data)
-    verify_model(test_fn(order=-np.inf), input_data=input_data)
-    verify_model(test_fn(order=0), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_scaled_dot_product_attention():
-    """test_scaled_dot_product_attention"""
-    torch.set_grad_enabled(False)
-
-    def test_fn(attn_mask=None, is_causal=False):
-        return lambda query, key, value: torch.nn.functional.scaled_dot_product_attention(
-            query, key, value, attn_mask=attn_mask, is_causal=is_causal
-        )
-
-    L, S, E, Ev = 5, 7, 11, 13
-    query_4d = torch.randn(2, 3, L, E)
-    query_3d = torch.randn(3, L, E)
-    key_4d = torch.randn(2, 3, S, E)
-    key_3d = torch.randn(3, S, E)
-    value_4d = torch.randn(2, 3, S, Ev)
-    value_3d = torch.randn(3, S, Ev)
-
-    verify_model(test_fn(), [query_4d, key_4d, value_4d])
-    verify_model(test_fn(), [query_4d, key_4d, value_3d])
-    verify_model(test_fn(), [query_4d, key_3d, value_4d])
-    verify_model(test_fn(), [query_4d, key_3d, value_3d])
-    verify_model(test_fn(), [query_3d, key_4d, value_4d])
-    verify_model(test_fn(), [query_3d, key_4d, value_3d])
-    verify_model(test_fn(), [query_3d, key_3d, value_4d])
-    verify_model(test_fn(), [query_3d, key_3d, value_3d])
-
-    verify_model(test_fn(is_causal=True), [query_4d, key_4d, value_4d])
-    verify_model(test_fn(is_causal=True), [query_4d, key_4d, value_3d])
-    verify_model(test_fn(is_causal=True), [query_4d, key_3d, value_4d])
-    verify_model(test_fn(is_causal=True), [query_4d, key_3d, value_3d])
-    verify_model(test_fn(is_causal=True), [query_3d, key_4d, value_4d])
-    verify_model(test_fn(is_causal=True), [query_3d, key_4d, value_3d])
-    verify_model(test_fn(is_causal=True), [query_3d, key_3d, value_4d])
-    verify_model(test_fn(is_causal=True), [query_3d, key_3d, value_3d])
-
-    # Test with explicit attn_mask
-    attn_mask = torch.ones((L, S), dtype=torch.bool).tril(diagonal=0)
-    if torch.cuda.is_available():
-        attn_mask = attn_mask.cuda()
-    verify_model(test_fn(attn_mask=attn_mask), [query_4d, key_4d, value_4d])
-    verify_model(test_fn(attn_mask=attn_mask), [query_4d, key_4d, value_3d])
-    verify_model(test_fn(attn_mask=attn_mask), [query_4d, key_3d, value_4d])
-    verify_model(test_fn(attn_mask=attn_mask), [query_4d, key_3d, value_3d])
-    verify_model(test_fn(attn_mask=attn_mask), [query_3d, key_4d, value_4d])
-    verify_model(test_fn(attn_mask=attn_mask), [query_3d, key_4d, value_3d])
-    verify_model(test_fn(attn_mask=attn_mask), [query_3d, key_3d, value_4d])
-    verify_model(test_fn(attn_mask=attn_mask), [query_3d, key_3d, value_3d])
-
-    # Test with float64
-    query_4d = torch.randn(2, 3, L, E, dtype=torch.float64)
-    query_3d = torch.randn(3, L, E, dtype=torch.float64)
-    key_4d = torch.randn(2, 3, S, E, dtype=torch.float64)
-    key_3d = torch.randn(3, S, E, dtype=torch.float64)
-    value_4d = torch.randn(2, 3, S, Ev, dtype=torch.float64)
-    value_3d = torch.randn(3, S, Ev, dtype=torch.float64)
-    verify_model(test_fn(), [query_4d, key_4d, value_4d])
-    verify_model(test_fn(), [query_4d, key_4d, value_3d])
-    verify_model(test_fn(), [query_4d, key_3d, value_4d])
-    verify_model(test_fn(), [query_4d, key_3d, value_3d])
-    verify_model(test_fn(), [query_3d, key_4d, value_4d])
-    verify_model(test_fn(), [query_3d, key_4d, value_3d])
-    verify_model(test_fn(), [query_3d, key_3d, value_4d])
-    verify_model(test_fn(), [query_3d, key_3d, value_3d])
-
-    # Test with larger tensors
-    L, S, E, Ev = 128, 128, 64, 64
-    query_4d = torch.randn(32, 8, L, E)
-    query_3d = torch.randn(8, L, E)
-    key_4d = torch.randn(32, 8, S, E)
-    key_3d = torch.randn(8, S, E)
-    value_4d = torch.randn(32, 8, S, Ev)
-    value_3d = torch.randn(8, S, Ev)
-    verify_model(test_fn(), [query_4d, key_4d, value_4d])
-    verify_model(test_fn(), [query_4d, key_4d, value_3d])
-    verify_model(test_fn(), [query_4d, key_3d, value_4d])
-    verify_model(test_fn(), [query_4d, key_3d, value_3d])
-    verify_model(test_fn(), [query_3d, key_4d, value_4d])
-    verify_model(test_fn(), [query_3d, key_4d, value_3d])
-    verify_model(test_fn(), [query_3d, key_3d, value_4d])
-    verify_model(test_fn(), [query_3d, key_3d, value_3d])
-
-
-def test_parameterlist():
-    """test_parameterlist"""
-    torch.set_grad_enabled(False)
-
-    class ParamListModel(torch.nn.Module):
-        def __init__(self, num_layer=2):
-            super().__init__()
-            self.biases = torch.nn.ParameterList([torch.randn(10)] * num_layer)
-            self.weights = torch.nn.ParameterList([torch.randn(10, 10)] * num_layer)
-
-        def forward(self, x):
-            for i in range(len(self.weights) - 1):
-                x = torch.addmm(self.biases[i], x, self.weights[i])
-            return torch.addmm(self.biases[-1], x, self.weights[-1])
-
-    input_data = torch.randn(20, 10)
-    verify_model(ParamListModel().float().eval(), input_data=input_data)
-
-
-@tvm.testing.uses_gpu
-def test_forward_tile():
-    """test_forward_repeat"""
-    torch.set_grad_enabled(False)
-    input_shape = [1, 3]
-
-    class Tile1(Module):
-        def forward(self, *args):
-            return args[0].tile(1, 1)
-
-    class Tile2(Module):
-        def forward(self, *args):
-            return args[0].tile(4, 2)
-
-    class Tile3(Module):
-        def forward(self, *args):
-            return args[0].tile(4, 2, 1)
-
-    input_data = torch.rand(input_shape).float()
-    verify_model(Tile1().float().eval(), input_data=input_data)
-    verify_model(Tile2().float().eval(), input_data=input_data)
-    verify_model(Tile3().float().eval(), input_data=input_data)
-
-
-class TestSetSpan:
-    """test structural equal between translated / hand-crafted relay IR with span tagged."""
-
-    def _verify(self, res_fptr, golden_fptr):
-        with tvm.testing.enable_span_filling():
-            with_span = res_fptr()
-        with tvm.testing.disable_span_filling():
-            without_span = res_fptr()
-        tvm.ir.assert_structural_equal(with_span, without_span)
-        _verify_structural_equal_with_span(with_span, golden_fptr())
-
-    def test_conv2d_bias_add(self):
-        ker_sz, in_chs, out_chs = 7, 3, 6
-        input_shape = [1, 3, 10, 10]
-
-        def _res():
-            # model definition
-            class Conv2D(Module):
-                def __init__(self):
-                    super(Conv2D, self).__init__()
-                    self.conv = torch.nn.Conv2d(in_chs, out_chs, ker_sz, bias=True)
-
-                def forward(self, *args):
-                    return self.conv(args[0])
-
-            # get frontend model
-            mod = gen_ir_module(Conv2D(), [torch.rand(input_shape)])
-            return mod["main"]
-
-        def _golden():
-            conv_si = "aten::_convolution_0"
-            input_name = "input0"
-            input_0 = relay.var(
-                input_name,
-                shape=tuple(input_shape),
-                span=_create_span(f"{conv_si}.{input_name}"),
-            )
-            weight_name = f"{conv_si}.weight"
-            conv_weight = relay.var(
-                weight_name,
-                shape=(out_chs, in_chs, ker_sz, ker_sz),
-                span=_create_span(weight_name),
-            )
-            bias_name = f"{conv_si}.bias"
-            conv_bias = relay.var(
-                bias_name,
-                shape=(out_chs,),
-                span=_create_span(bias_name),
-            )
-            conv_out = _set_span(
-                relay.nn.conv2d(
-                    input_0,
-                    conv_weight,
-                    padding=[0] * 4,
-                    channels=out_chs,
-                    kernel_size=[ker_sz] * 2,
-                ),
-                conv_si,
-            )
-            bias_out = _set_span(relay.nn.bias_add(conv_out, conv_bias), conv_si)
-            return relay.Function([input_0, conv_weight, conv_bias], bias_out)
-
-        self._verify(_res, _golden)
-
-    def test_batchnorm_span(self):
-        features = 16
-        input_shape = [1, 16, 10, 10]
-
-        def _res():
-            # model definition
-            bn_2d = torch.nn.BatchNorm2d(features)
-
-            # get frontend model
-            mod = gen_ir_module(bn_2d, [torch.rand(input_shape)])
-            return mod["main"]
-
-        def _golden():
-            bn_si = "aten::batch_norm_0"
-            input_name = "input0"
-            input_0 = relay.var(
-                input_name,
-                shape=tuple(input_shape),
-                span=_create_span(f"{bn_si}.{input_name}"),
-            )
-            weight_name = f"{bn_si}.weight"
-            bn_weight = relay.var(
-                weight_name,
-                shape=(features,),
-                span=_create_span(weight_name),
-            )
-            bias_name = f"{bn_si}.bias"
-            bn_bias = relay.var(
-                bias_name,
-                shape=(features,),
-                span=_create_span(bias_name),
-            )
-            rm_name = f"{bn_si}.running_mean"
-            bn_rm = relay.var(
-                rm_name,
-                shape=(features,),
-                span=_create_span(rm_name),
-            )
-            rv_name = f"{bn_si}.running_var"
-            bn_rv = relay.var(
-                rv_name,
-                shape=(features,),
-                span=_create_span(rv_name),
-            )
-            bn_out = _set_span(
-                relay.nn.batch_norm(input_0, bn_weight, bn_bias, bn_rm, bn_rv),
-                bn_si,
-            )
-            bn_tuple_get_item = _set_span(relay.TupleGetItem(bn_out.tuple_value, 0), bn_si)
-            return relay.Function([input_0, bn_weight, bn_bias, bn_rm, bn_rv], bn_tuple_get_item)
-
-        self._verify(_res, _golden)
-
-    def test_reshape_span(self):
-        input_shape = [2, 1, 10, 1, 10]
-        new_shape = [2, 1, 10, 10]
-
-        def _res():
-            # model definition
-            class Reshape(Module):
-                def forward(self, *args):
-                    return args[0].reshape(new_shape)
-
-            # get frontend model
-            mod = gen_ir_module(Reshape(), [torch.rand(input_shape)])
-            return mod["main"]
-
-        def _golden():
-            reshape_si = "aten::reshape_0"
-            input_name = "input0"
-            input_0 = relay.var(
-                input_name,
-                shape=tuple(input_shape),
-                span=_create_span(f"{reshape_si}.{input_name}"),
-            )
-            reshape_out = _set_span(
-                relay.reshape(input_0, newshape=new_shape),
-                reshape_si,
-            )
-            return relay.Function([input_0], reshape_out)
-
-        self._verify(_res, _golden)
-
-    def test_dense_bias_add(self):
-        in_f, out_f = 10, 7
-        input_shape = [in_f, in_f]
-
-        def _res():
-            # model definition
-            class Dense(Module):
-                def __init__(self):
-                    super(Dense, self).__init__()
-                    self.linear = torch.nn.Linear(in_f, out_f, bias=True)
-
-                def forward(self, *args):
-                    return self.linear(args[0])
-
-            # get frontend model
-            mod = gen_ir_module(Dense(), [torch.rand(input_shape)])
-            return mod["main"]
-
-        def _golden():
-            dense_si = "aten::linear_0"
-            input_name = "input0"
-            input_0 = relay.var(
-                input_name,
-                shape=tuple(input_shape),
-                span=_create_span(f"{dense_si}.{input_name}"),
-            )
-            weight_name = f"{dense_si}.weight"
-            dense_weight = relay.var(
-                weight_name,
-                shape=(out_f, in_f),
-                span=_create_span(weight_name),
-            )
-            bias_name = f"{dense_si}.bias"
-            dense_bias = relay.var(
-                bias_name,
-                shape=(out_f,),
-                span=_create_span(bias_name),
-            )
-            dense_out = _set_span(
-                relay.nn.dense(input_0, dense_weight),
-                dense_si,
-            )
-            bias_out = _set_span(
-                relay.nn.bias_add(dense_out, dense_bias, axis=-1),
-                dense_si,
-            )
-            return relay.Function([input_0, dense_weight, dense_bias], bias_out)
-
-        self._verify(_res, _golden)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/pytorch/test_fx_quant.py b/tests/python/frontend/pytorch/test_fx_quant.py
deleted file mode 100644
index 8ed6e1a74797..000000000000
--- a/tests/python/frontend/pytorch/test_fx_quant.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-""" Tests on fx-quantized torch model conversion """
-import torch
-import torchvision
-import pytest
-import numpy as np
-from torch.quantization import get_default_qconfig
-from torch.quantization.quantize_fx import prepare_fx, convert_fx
-from torchvision.models.efficientnet import efficientnet_b4
-from torchvision.models.resnet import resnet50
-from tvm import relay
-import tvm.testing
-
-
-def quantize(model, example_inputs):
-    qconfig = get_default_qconfig("fbgemm")
-    qconfig_dict = {"": qconfig}
-    return convert_fx(prepare_fx(model, qconfig_dict, example_inputs))
-
-
-def quantize_and_build(model, in_size):
-    inp = torch.rand(1, 3, in_size, in_size)
-    input_name = "inp"
-    qmodel = quantize(model, inp)
-
-    with torch.no_grad():
-        script_module = torch.jit.trace(qmodel, inp)
-        with tvm.testing.disable_span_filling():
-            mod, _ = relay.frontend.from_pytorch(script_module, [(input_name, inp.shape)])
-        with tvm.testing.enable_span_filling():
-            mod_with_span, _ = relay.frontend.from_pytorch(script_module, [(input_name, inp.shape)])
-        tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-        mod = relay.transform.InferType()(mod)
-
-        # Make sure that the model is quantized
-        assert "qnn.conv2d" in mod.astext(show_meta_data=False)
-
-        # Skip building since it is slow on CI
-        # relay.build(mod, params=params, target="llvm")
-
-
-@pytest.mark.skip(reason="unsupported op aten::linalg_vector_norm")
-def test_ssd_vgg():
-    class TraceWrapper(torch.nn.Module):
-        def __init__(self, model):
-            super().__init__()
-            self.model = model
-
-        def forward(self, inp):
-            features = self.model.backbone(inp)
-            features = list(features.values())
-            out = self.model.head(features)
-            return out["bbox_regression"], out["cls_logits"]
-
-    model_func = torchvision.models.detection.ssd300_vgg16
-    model = TraceWrapper(model_func(num_classes=50, pretrained_backbone=True)).eval()
-    quantize_and_build(model, 300)
-
-
-def test_deeplab_v3():
-    class TraceWrapper(torch.nn.Module):
-        def __init__(self, model):
-            super().__init__()
-            self.model = model
-
-        def forward(self, inp):
-            out = self.model(inp)
-            return out["out"]
-
-    deeplabv3 = torchvision.models.segmentation.deeplabv3_mobilenet_v3_large(pretrained=True)
-    model = TraceWrapper(deeplabv3.eval()).eval()
-    quantize_and_build(model, 300)
-
-
-@pytest.mark.skip(
-    reason="Model binary isn't uploaded to S3. See https://github.com/apache/tvm/pull/17397"
-)
-def test_imagenet():
-    for model_func in [resnet50, efficientnet_b4]:
-        quantize_and_build(model_func(pretrained=True).eval(), 224)
diff --git a/tests/python/frontend/pytorch/test_lstm.py b/tests/python/frontend/pytorch/test_lstm.py
deleted file mode 100644
index da4e1ae96e03..000000000000
--- a/tests/python/frontend/pytorch/test_lstm.py
+++ /dev/null
@@ -1,372 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-""" Tests on torch lstm model conversion """
-# originally from https://github.com/pytorch/pytorch/blob/master/benchmarks/fastrnns/custom_lstms.py
-# described in https://pytorch.org/blog/optimizing-cuda-rnn-with-torchscript/
-import numpy as np
-import torch
-import torch.nn as nn
-from torch.nn import Parameter
-import torch.jit as jit
-from typing import List, Tuple
-from torch import Tensor
-
-import tvm
-import tvm.testing
-from tvm import relay
-from tvm.relay.frontend.pytorch import from_pytorch
-from tvm.relay.prelude import Prelude
-from tvm.runtime.container import ADT, tuple_object
-
-
-class LayerNormLSTMCell(jit.ScriptModule):
-    def __init__(self, input_size, hidden_size):
-        super().__init__()
-        self.input_size = input_size
-        self.hidden_size = hidden_size
-        self.weight_ih = Parameter(torch.randn(4 * hidden_size, input_size))
-        self.weight_hh = Parameter(torch.randn(4 * hidden_size, hidden_size))
-
-        ln = nn.LayerNorm
-
-        self.layernorm_i = ln(4 * hidden_size)
-        self.layernorm_h = ln(4 * hidden_size)
-        self.layernorm_c = ln(hidden_size)
-
-    @jit.script_method
-    def forward(self, input, state):
-        # type: (Tensor, Tuple[Tensor, Tensor]) -> Tuple[Tensor, Tuple[Tensor, Tensor]]
-        hx, cx = state
-        igates = self.layernorm_i(torch.mm(input, self.weight_ih.t()))
-        hgates = self.layernorm_h(torch.mm(hx, self.weight_hh.t()))
-        gates = igates + hgates
-        ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)
-
-        ingate = torch.sigmoid(ingate)
-        forgetgate = torch.sigmoid(forgetgate)
-        cellgate = torch.tanh(cellgate)
-        outgate = torch.sigmoid(outgate)
-
-        cy = self.layernorm_c((forgetgate * cx) + (ingate * cellgate))
-        hy = outgate * torch.tanh(cy)
-
-        return hy, (hy, cy)
-
-
-class LSTMLayer(jit.ScriptModule):
-    def __init__(self, cell, *cell_args):
-        super().__init__()
-        self.cell = cell(*cell_args)
-
-    @jit.script_method
-    def forward(self, input, state):
-        # type: (Tensor, Tuple[Tensor, Tensor]) -> Tuple[Tensor, Tuple[Tensor, Tensor]]
-        outputs = []
-        for i in range(input.size(0)):
-            out, state = self.cell(input[i], state)
-            outputs += [out]
-        return torch.stack(outputs), state
-
-
-class ReverseLSTMLayer(jit.ScriptModule):
-    def __init__(self, cell, *cell_args):
-        super(ReverseLSTMLayer, self).__init__()
-        self.cell = cell(*cell_args)
-
-    @jit.script_method
-    def forward(self, inputs, state):
-        # type: (Tensor, Tuple[Tensor, Tensor]) -> Tuple[Tensor, Tuple[Tensor, Tensor]]
-        outputs = jit.annotate(List[Tensor], [])
-        seq_len = inputs.size(0)
-        for i in range(seq_len):
-            out, state = self.cell(inputs[seq_len - i - 1], state)
-            # workaround for the lack of list rev support
-            outputs = [out] + outputs
-        return torch.stack(outputs), state
-
-
-class BidirLSTMLayer(jit.ScriptModule):
-    __constants__ = ["directions"]
-
-    def __init__(self, cell, *cell_args):
-        super(BidirLSTMLayer, self).__init__()
-        self.directions = nn.ModuleList(
-            [
-                LSTMLayer(cell, *cell_args),
-                ReverseLSTMLayer(cell, *cell_args),
-            ]
-        )
-
-    @jit.script_method
-    def forward(self, input, states):
-        # type: (Tensor, List[Tuple[Tensor, Tensor]]) -> Tuple[Tensor, List[Tuple[Tensor, Tensor]]]
-        # List[LSTMState]: [forward LSTMState, backward LSTMState]
-        outputs = jit.annotate(List[Tensor], [])
-        output_states = jit.annotate(List[Tuple[Tensor, Tensor]], [])
-        for (i, direction) in enumerate(self.directions):
-            state = states[i]
-            out, out_state = direction(input, state)
-            outputs += [out]
-            output_states += [out_state]
-        # tensor array concat assumes axis == 0 for now
-        # return torch.cat(outputs, -1), output_states
-        return torch.cat(outputs, 0), output_states
-
-
-def init_stacked_lstm(num_layers, layer, first_layer_args, other_layer_args):
-    layers = [layer(*first_layer_args)] + [layer(*other_layer_args) for _ in range(num_layers - 1)]
-    return nn.ModuleList(layers)
-
-
-class StackedLSTM(jit.ScriptModule):
-    __constants__ = ["layers"]  # Necessary for iterating through self.layers
-
-    def __init__(self, num_layers, layer, first_layer_args, other_layer_args):
-        super().__init__()
-        self.layers = init_stacked_lstm(num_layers, layer, first_layer_args, other_layer_args)
-
-    @jit.script_method
-    def forward(self, input, states):
-        # type: (Tensor, List[Tuple[Tensor, Tensor]]) -> Tuple[Tensor, List[Tuple[Tensor, Tensor]]]
-        # List[LSTMState]: One state per layer
-        output_states = jit.annotate(List[Tuple[Tensor, Tensor]], [])
-        output = input
-        for (i, rnn_layer) in enumerate(self.layers):
-            state = states[i]
-            output, out_state = rnn_layer(output, state)
-            output_states += [out_state]
-        return output, output_states
-
-
-class StackedBidirLSTM(jit.ScriptModule):
-    __constants__ = ["layers"]  # Necessary for iterating through self.layers
-
-    def __init__(self, num_layers, layer, first_layer_args, other_layer_args):
-        super(StackedBidirLSTM, self).__init__()
-        self.layers = init_stacked_lstm(num_layers, layer, first_layer_args, other_layer_args)
-
-    @jit.script_method
-    def forward(self, input, states):
-        # type: (Tensor, List[List[Tuple[Tensor, Tensor]]]) -> Tuple[Tensor, List[List[Tuple[Tensor, Tensor]]]]
-        # List[List[LSTMState]]: The outer list is for layers,
-        #                        inner list is for directions.
-        output_states = jit.annotate(List[List[Tuple[Tensor, Tensor]]], [])
-        output = input
-        for (i, rnn_layer) in enumerate(self.layers):
-            state = states[i]
-            output, out_state = rnn_layer(output, state)
-            output_states += [out_state]
-        return output, output_states
-
-
-def lstm(input_size, hidden_size):
-    return LSTMLayer(LayerNormLSTMCell, input_size, hidden_size)
-
-
-def stacked_lstm(input_size, hidden_size, num_layers):
-    return StackedLSTM(
-        num_layers,
-        LSTMLayer,
-        first_layer_args=[LayerNormLSTMCell, input_size, hidden_size],
-        other_layer_args=[LayerNormLSTMCell, hidden_size, hidden_size],
-    )
-
-
-def bidir_lstm(input_size, hidden_size):
-    return BidirLSTMLayer(LayerNormLSTMCell, input_size, hidden_size)
-
-
-def stacked_bidir_lstm(input_size, hidden_size, num_layers):
-    return StackedBidirLSTM(
-        num_layers,
-        BidirLSTMLayer,
-        first_layer_args=[LayerNormLSTMCell, input_size, hidden_size],
-        other_layer_args=[LayerNormLSTMCell, hidden_size, hidden_size],
-    )
-
-
-def vmobj_to_list(o, dtype="float32"):
-    if isinstance(o, tvm.nd.NDArray):
-        return [o]
-    elif isinstance(o, tvm.runtime.container.ADT):
-        result = []
-        for f in o:
-            result.extend(vmobj_to_list(f, dtype))
-        return result
-    else:
-        raise RuntimeError("Unknown object type: %s" % type(o))
-
-
-def assert_equal(tvm_result, torch_result):
-    if isinstance(torch_result, (tuple, list)):
-        assert isinstance(tvm_result, list)
-        for tvm_res, pt_res in zip(tvm_result, torch_result):
-            assert_equal(tvm_res, pt_res)
-    elif isinstance(torch_result, torch.Tensor):
-        tvm.testing.assert_allclose(tvm_result.numpy(), torch_result.numpy(), rtol=1e-4, atol=1e-4)
-
-
-def run_and_compare(mod, params, pt_result, target, device):
-    exec_res = relay.create_executor("vm", mod=mod, device=device, target=target).evaluate()(
-        **params
-    )
-
-    def flatten(nested):
-        res = []
-        for r in nested:
-            if isinstance(r, torch.Tensor):
-                res.append(r)
-            else:
-                res.extend(flatten(r))
-        return res
-
-    if isinstance(exec_res, tvm.runtime.container.ADT):
-        assert not isinstance(pt_result, torch.Tensor)
-        tvm_res = vmobj_to_list(exec_res)
-        torch_res = flatten(pt_result)
-    else:
-        tvm_res = exec_res
-        torch_res = pt_result
-
-    assert_equal(tvm_res, torch_res)
-
-
-def convert_list_to_vmobj(py_lst):
-    def wrap_nd_array(arr):
-        return tvm.nd.array(arr, device=tvm.cpu(0))
-
-    mod = tvm.IRModule()
-    prelude = Prelude(mod)
-    list, cons, nil = mod.get_type("List")
-    adt_lst = ADT(nil.tag, [])
-    for elem in reversed(py_lst):
-        if isinstance(elem, np.ndarray):
-            vmobj = wrap_nd_array(elem)
-        elif isinstance(elem, tuple):
-            vmobj = tuple_object([wrap_nd_array(e) for e in elem])
-        elif isinstance(elem, list):
-            vmobj = convert_list_to_vmobj(elem)
-        adt_lst = ADT(cons.tag, [vmobj, adt_lst])
-    return adt_lst
-
-
-@tvm.testing.uses_gpu
-def test_custom_lstm():
-    input_name = "input"
-    states_name = "states"
-    seq_len = 5
-    batch = 2
-    input_size = 3
-    hidden_size = 4
-    num_layers = 3
-    state_tensor_shape = (batch, hidden_size)
-
-    torch.manual_seed(1)
-
-    inp = torch.randn(seq_len, batch, input_size)
-
-    input_shapes = [
-        (input_name, (seq_len, batch, input_size)),
-        (states_name, (state_tensor_shape, state_tensor_shape)),
-    ]
-
-    input_shapes_stacked = [
-        (input_name, (seq_len, batch, input_size)),
-        (
-            states_name,
-            [(state_tensor_shape, state_tensor_shape), (state_tensor_shape, state_tensor_shape)],
-        ),
-    ]
-
-    input_shapes_stacked_bidir = [
-        (input_name, (seq_len, batch, input_size)),
-        (
-            states_name,
-            [
-                [(state_tensor_shape, state_tensor_shape) for _ in range(2)]
-                for _ in range(num_layers)
-            ],
-        ),
-    ]
-
-    states = [
-        (torch.randn(state_tensor_shape), torch.randn(state_tensor_shape))
-        for _ in range(num_layers)
-    ]
-
-    bidir_states = [
-        (torch.randn(state_tensor_shape), torch.randn(state_tensor_shape)) for _ in range(2)
-    ]
-
-    stacked_bidir_states = [
-        [(torch.randn(state_tensor_shape), torch.randn(state_tensor_shape)) for _ in range(2)]
-        for _ in range(num_layers)
-    ]
-
-    models = [
-        ("lstm", lstm(input_size, hidden_size).eval(), states[0], input_shapes),
-        (
-            "stacked",
-            stacked_lstm(input_size, hidden_size, num_layers).eval(),
-            states,
-            input_shapes_stacked,
-        ),
-        ("bidir", bidir_lstm(input_size, hidden_size).eval(), bidir_states, input_shapes_stacked),
-        # TODO(masahi): stacked bidir seems to have a rare accuracy issue
-        # (
-        #     "stacked_bidir",
-        #     stacked_bidir_lstm(input_size, hidden_size, num_layers).eval(),
-        #     stacked_bidir_states,
-        #     input_shapes_stacked_bidir,
-        # ),
-    ]
-
-    for (name, raw_model, states, input_shapes) in models:
-        script_module = torch.jit.script(raw_model)
-        with tvm.testing.disable_span_filling():
-            mod, params = from_pytorch(script_module, input_shapes)
-        with tvm.testing.enable_span_filling():
-            mod_with_span, _ = from_pytorch(script_module, input_shapes)
-        tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-
-        with torch.no_grad():
-            pt_result = raw_model(inp.clone(), states)
-
-        params[input_name] = inp.numpy()
-
-        if isinstance(states, tuple):
-            states_np = tuple(st.numpy() for st in states)
-        elif isinstance(states, list) and isinstance(states[0], torch.Tensor):
-            states_np = [st.numpy() for st in states]
-        elif isinstance(states, list) and isinstance(states[0], tuple):
-            states_np = [tuple(st.numpy() for st in states[i]) for i in range(len(states))]
-        elif isinstance(states, list) and isinstance(states[0], list):
-            states_np = [
-                [tuple(st.numpy() for st in states) for states in states[layer]]
-                for layer in range(num_layers)
-            ]
-        else:
-            assert False
-
-        if isinstance(states_np, list):
-            params[states_name] = convert_list_to_vmobj(states_np)
-        else:
-            params[states_name] = states_np
-
-        for tgt, dev in tvm.testing.enabled_targets():
-            print("Running %s on target %s" % (name, tgt))
-            run_and_compare(mod, params, pt_result, target=tgt, device=dev)
diff --git a/tests/python/frontend/pytorch/test_object_detection.py b/tests/python/frontend/pytorch/test_object_detection.py
deleted file mode 100644
index 9dd336f7e9d2..000000000000
--- a/tests/python/frontend/pytorch/test_object_detection.py
+++ /dev/null
@@ -1,167 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument
-"""Test torch vision fasterrcnn and maskrcnn models"""
-import numpy as np
-import cv2
-
-import torch
-import torchvision
-
-import tvm
-
-import tvm.testing
-from tvm import relay
-from tvm.runtime.vm import VirtualMachine
-from tvm.relay.frontend.pytorch_utils import (
-    rewrite_nms_to_batched_nms,
-    rewrite_batched_nms_with_max_out_size,
-    rewrite_scatter_to_gather,
-)
-from tvm.contrib.download import download
-
-in_size = 300
-
-
-def process_image(img):
-    img = cv2.imread(img).astype("float32")
-    img = cv2.resize(img, (in_size, in_size))
-    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-    img = torch.from_numpy(img / 255.0).permute(2, 0, 1).float()
-    img = torch.unsqueeze(img, axis=0)
-
-    return img
-
-
-def do_trace(model, inp, in_size=in_size):
-    model_trace = torch.jit.trace(model, inp)
-    model_trace.eval()
-    return model_trace
-
-
-def dict_to_tuple(out_dict):
-    if "masks" in out_dict.keys():
-        return out_dict["boxes"], out_dict["scores"], out_dict["labels"], out_dict["masks"]
-    return out_dict["boxes"], out_dict["scores"], out_dict["labels"]
-
-
-class TraceWrapper(torch.nn.Module):
-    def __init__(self, model):
-        super().__init__()
-        self.model = model
-
-    def forward(self, inp):
-        out = self.model(inp)
-        return dict_to_tuple(out[0])
-
-
-def generate_jit_model(index):
-    model_funcs = [
-        torchvision.models.detection.fasterrcnn_resnet50_fpn,
-        torchvision.models.detection.maskrcnn_resnet50_fpn,
-    ]
-
-    model_func = model_funcs[index]
-    model = TraceWrapper(model_func(pretrained=True, rpn_pre_nms_top_n_test=1000))
-
-    model.eval()
-    inp = torch.Tensor(np.random.uniform(0.0, 250.0, size=(1, 3, in_size, in_size)))
-
-    with torch.no_grad():
-        out = model(inp)
-
-        script_module = do_trace(model, inp)
-        script_out = script_module(inp)
-
-        assert len(out[0]) > 0 and len(script_out[0]) > 0
-        return script_module
-
-
-def test_detection_models():
-    img = "test_street_small.jpg"
-    img_url = (
-        "https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/detection/street_small.jpg"
-    )
-    download(img_url, img)
-
-    input_shape = (1, 3, in_size, in_size)
-
-    input_name = "input0"
-    shape_list = [(input_name, input_shape)]
-
-    scripted_model = generate_jit_model(1)
-    with tvm.testing.disable_span_filling():
-        mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
-    with tvm.testing.enable_span_filling():
-        mod_with_span, _ = relay.frontend.from_pytorch(scripted_model, shape_list)
-    tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-
-    data = process_image(img)
-    data_np = data.detach().numpy()
-
-    with torch.no_grad():
-        pt_res = scripted_model(data)
-
-    def compile_and_run_vm(mod, params, data_np, target):
-        with tvm.transform.PassContext(opt_level=3):
-            vm_exec = relay.vm.compile(mod, target=target, params=params)
-
-        dev = tvm.device(target, 0)
-        vm = VirtualMachine(vm_exec, dev)
-        vm.set_input("main", **{input_name: data_np})
-        return vm.run()
-
-    for target in ["llvm"]:
-        tvm_res = compile_and_run_vm(mod, params, data_np, target)
-
-        # Bounding boxes
-        tvm.testing.assert_allclose(
-            pt_res[0].cpu().numpy(), tvm_res[0].numpy(), rtol=1e-5, atol=1e-5
-        )
-        # Scores
-        tvm.testing.assert_allclose(
-            pt_res[1].cpu().numpy(), tvm_res[1].numpy(), rtol=1e-5, atol=1e-5
-        )
-        # Class ids
-        np.testing.assert_equal(pt_res[2].cpu().numpy(), tvm_res[2].numpy())
-
-        score_threshold = 0.9
-        print("Num boxes:", pt_res[0].cpu().numpy().shape[0])
-        print("Num valid boxes:", np.sum(pt_res[1].cpu().numpy() >= score_threshold))
-
-    before = mod["main"]
-    mod = rewrite_nms_to_batched_nms(mod)
-    after = mod["main"]
-    assert not tvm.ir.structural_equal(after, before)
-
-    # TODO(masahi): It seems this rewrite causes flaky segfaults on CI
-    # See https://github.com/apache/tvm/issues/7363
-    # before = mod["main"]
-    # mod = rewrite_batched_nms_with_max_out_size(mod)
-    # after = mod["main"]
-    # assert not tvm.ir.structural_equal(after, before)
-
-    before = mod["main"]
-    mod = rewrite_scatter_to_gather(mod, 4)  # num_scales is 4 for maskrcnn_resnet50_fpn
-    after = mod["main"]
-    assert not tvm.ir.structural_equal(after, before)
-
-    tvm_res_after_rewrite = compile_and_run_vm(mod, params, data_np, "llvm")
-
-    # Results should be equivalent after rewriting
-    for res1, res2 in zip(tvm_res, tvm_res_after_rewrite):
-        tvm.testing.assert_allclose(res1.numpy(), res2.numpy())
diff --git a/tests/python/frontend/pytorch/test_rnns.py b/tests/python/frontend/pytorch/test_rnns.py
deleted file mode 100644
index b43af58d69a3..000000000000
--- a/tests/python/frontend/pytorch/test_rnns.py
+++ /dev/null
@@ -1,521 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import torch
-import tvm
-import tvm.testing
-import onnx
-import io
-import sys
-
-from tvm import relay
-from tvm.contrib import graph_executor
-
-from torch import nn
-
-## LSTM parameters
-lstm_feature_size = 16
-lstm_hidden_size = 32
-lstm_projection_size = 20
-
-## GRU parameters
-gru_feature_size = 8
-gru_hidden_size = 16
-
-num_layers = 2
-seqs_length = 2
-batch_size = 2
-
-##RNN parameters
-rnn_feature_size = 8
-rnn_hidden_size = 16
-
-
-class RNN_Model(nn.Module):
-    """
-    It is base class for RNN layer classes.
-    It contains some common fields and methods for child classes.
-    """
-
-    def __init__(
-        self,
-    ):
-        super().__init__()
-
-        # model is defined in child class
-        self.model = None
-
-    def forward(self, input, hidden_init=None):
-        """
-        Computes the output tensor after input inference along RNN layer.
-
-        :param input: batch of data as a tensor of shape (seqs_length, batch_size, feature_size) or (batch_size, seqs_length, feature_size) if self.batch_first = True
-        :param hidden_init: initial hidden state(s) of the RNN as a tensor(s) of shape (num_layers, batch_size, hidden_size). Will default to a tensor of zeros if None.
-        :return: the output tensor of shape (batch_size, hidden_size)
-        """
-        if self.model is None:
-            raise NotImplementedError("self.model must be defined in subclasses!")
-        out, _ = self.model(input, hidden_init)
-
-        return out
-
-    def gen_rnd_weights(self):
-        """
-        Generate random weigths for the model
-        """
-        if self.model is None:
-            raise NotImplementedError("self.model must be defined in subclasses!")
-        with torch.no_grad():
-            for weight_group in self.model.all_weights:
-                for weight in weight_group:
-                    weight.data = torch.rand(weight.shape)
-
-    def get_dummy_inputs(self):
-        raise NotImplementedError("subclasses must override get_dummy_inputs()!")
-
-    def get_input_names(self):
-        raise NotImplementedError("subclasses must override get_input_names()!")
-
-    def get_shape_desc(self, frontend_type):
-        raise NotImplementedError("subclasses must override get_shape_desc(frontend_type)!")
-
-    def get_tvm_inputs(self, dtype):
-        raise NotImplementedError("subclasses must override get_tvm_inputs(dtype)!")
-
-
-class RNN_Model_Impl(RNN_Model):
-    def __init__(
-        self,
-        seq_len=seqs_length,
-        batch_size=batch_size,
-        feature_size=rnn_feature_size,
-        hidden_size=rnn_hidden_size,
-        batch_first=False,
-        layer_num=1,
-        bidirectional=False,
-        use_bias=True,
-        rnd_weights_init=False,
-        nonlinearity="tanh",
-        dropout=0.0,
-    ):
-        super().__init__()
-        # Shapes
-        self.shape = [seq_len, batch_size, feature_size]
-        if batch_first:
-            self.shape = [batch_size, seq_len, feature_size]
-        layers_num = 2 * layer_num if bidirectional else layer_num
-        self.h0_shape = [layers_num, batch_size, hidden_size]
-        # Dummy inputs
-        self.dummy_inputs = (torch.rand(self.shape), torch.zeros(self.h0_shape))
-
-        self.model = nn.RNN(
-            input_size=feature_size,
-            hidden_size=hidden_size,
-            num_layers=layer_num,
-            nonlinearity=nonlinearity,
-            bias=use_bias,
-            batch_first=batch_first,
-            dropout=dropout,
-            bidirectional=bidirectional,
-        )
-
-        if rnd_weights_init:
-            self.gen_rnd_weights()
-
-    def gen_rnd_weights(self):
-        super().gen_rnd_weights()
-
-    def get_dummy_inputs(self):
-        return self.dummy_inputs
-
-    def get_input_names(self):
-        return ["input", "h0"]
-
-    def get_shape_desc(self, frontend_type):
-        shape_desc = None
-        if frontend_type == "pt":  # PyTorch
-            shape_desc = [("input", self.shape)]
-        elif frontend_type == "onnx":  # ONNX
-            shape_desc = {
-                "input": self.shape,
-                "h0": self.h0_shape,
-            }
-        return shape_desc
-
-    def get_tvm_inputs(self, dtype):
-        return {
-            "input": tvm.nd.array(self.dummy_inputs[0].numpy().astype(dtype)),
-            "h0": tvm.nd.array(self.dummy_inputs[1].numpy().astype(dtype)),
-        }
-
-
-class GRU_Model(RNN_Model):
-    def __init__(
-        self,
-        seq_len=seqs_length,
-        batch_size=batch_size,
-        feature_size=gru_feature_size,
-        hidden_size=gru_hidden_size,
-        batch_first=False,
-        layer_num=1,
-        bidirectional=False,
-        use_bias=True,
-        rnd_weights_init=False,
-    ):
-        super().__init__()
-
-        # Shapes
-        self.shape = [seq_len, batch_size, feature_size]
-        if batch_first:
-            self.shape = [batch_size, seq_len, feature_size]
-        layers_num = 2 * layer_num if bidirectional else layer_num
-        self.h0_shape = [layers_num, batch_size, hidden_size]
-        # Dummy inputs
-        self.dummy_inputs = (torch.rand(self.shape), torch.zeros(self.h0_shape))
-
-        self.model = nn.GRU(
-            input_size=feature_size,
-            hidden_size=hidden_size,
-            num_layers=layer_num,
-            bidirectional=bidirectional,
-            batch_first=batch_first,
-            bias=use_bias,
-        )
-
-        if rnd_weights_init:
-            self.gen_rnd_weights()
-
-    def gen_rnd_weights(self):
-        """
-        Generate random weigths for the model with biases
-        For first uni- and bidirectional weights group:
-            Wi (3*hidden_size, feature_size)
-            Wh (3*hidden_size, hidden_size)
-            Bi (3*hidden_size)
-            Bh (3*hidden_size)
-        For other weights group:
-            Wi (3*hidden_size, hidden_size)
-            Wh (3*hidden_size, hidden_size)
-            Bi (3*hidden_size)
-            Bh (3*hidden_size)
-        For generation of random weigths for the model without biases the Bi and Bh weights are skipped
-        """
-        super().gen_rnd_weights()
-
-    def get_dummy_inputs(self):
-        return self.dummy_inputs
-
-    def get_input_names(self):
-        return ["input", "h0"]
-
-    def get_shape_desc(self, frontend_type):
-        shape_desc = None
-        if frontend_type == "pt":  # PyTorch
-            shape_desc = [("input", self.shape)]
-        elif frontend_type == "onnx":  # ONNX
-            shape_desc = {
-                "input": self.shape,
-                "h0": self.h0_shape,
-            }
-        return shape_desc
-
-    def get_tvm_inputs(self, dtype):
-        return {
-            "input": tvm.nd.array(self.dummy_inputs[0].numpy().astype(dtype)),
-            "h0": tvm.nd.array(self.dummy_inputs[1].numpy().astype(dtype)),
-        }
-
-
-def check_torch_version_for_proj_in_lstm():
-    """
-    proj_size parameter is supported in torch.nn.LSTM layer started from 1.8.0 torch version
-    """
-    me = False
-
-    version = torch.__version__
-    major, minor, micro = version.split(".")
-
-    if int(major) > 1:
-        me = True
-    elif int(major) == 1:
-        if int(minor) >= 8:
-            me = True
-
-    return me
-
-
-class LSTM_Model(RNN_Model):
-    def __init__(
-        self,
-        seq_len=seqs_length,
-        batch_size=batch_size,
-        feature_size=lstm_feature_size,
-        hidden_size=lstm_hidden_size,
-        batch_first=False,
-        layer_num=1,
-        bidirectional=False,
-        proj_size=0,
-        use_bias=True,
-        rnd_weights_init=False,
-    ):
-        super().__init__()
-
-        # Shapes
-        self.shape = [seq_len, batch_size, feature_size]
-        if batch_first:
-            self.shape = [batch_size, seq_len, feature_size]
-        layers_num = 2 * layer_num if bidirectional else layer_num
-        self.h0_shape = [layers_num, batch_size, hidden_size]
-        if proj_size > 0:
-            self.h0_shape = [layers_num, batch_size, proj_size]
-        self.c0_shape = [layers_num, batch_size, hidden_size]
-        # Dummy inputs
-        self.dummy_inputs = (
-            torch.rand(self.shape),
-            (torch.zeros(self.h0_shape), torch.zeros(self.c0_shape)),
-        )
-
-        if check_torch_version_for_proj_in_lstm():
-            self.model = nn.LSTM(
-                input_size=lstm_feature_size,
-                hidden_size=lstm_hidden_size,
-                num_layers=layer_num,
-                bidirectional=bidirectional,
-                proj_size=proj_size,
-                batch_first=batch_first,
-                bias=use_bias,
-            )
-        else:
-            if proj_size > 0:
-                print(
-                    "WARNING: projection is not supported for torch version less than 1.8.0! ",
-                    "LSTM was constructed without projection!",
-                )
-                # sys.exit()
-            self.model = nn.LSTM(
-                input_size=lstm_feature_size,
-                hidden_size=lstm_hidden_size,
-                num_layers=layer_num,
-                bidirectional=bidirectional,
-                batch_first=batch_first,
-                bias=use_bias,
-            )
-
-        if rnd_weights_init:
-            self.gen_rnd_weights()
-
-    def gen_rnd_weights(self):
-        """
-        Generate random weigths for the model with biases
-        Without projection:
-            For first weights group:
-                Wi (4*lstm_hidden_size, lstm_feature_size)
-                Wh (4*lstm_hidden_size, lstm_hidden_size)
-                Bi (4*lstm_hidden_size)
-                Bh (4*lstm_hidden_size)
-            For first bidirectional weights group:
-                Wi (4*lstm_hidden_size, lstm_feature_size)
-                Wh (4*lstm_hidden_size, lstm_hidden_size)
-                Bi (4*lstm_hidden_size)
-                Bh (4*lstm_hidden_size)
-            For other weights group:
-                Wi (4*lstm_hidden_size, lstm_hidden_size)
-                Wh (4*lstm_hidden_size, lstm_hidden_size)
-                Bi (4*lstm_hidden_size)
-                Bh (4*lstm_hidden_size)
-        With projection:
-            For first weights group:
-                Wi (4*lstm_hidden_size, lstm_feature_size)
-                Wh (4*lstm_hidden_size, proj_size)
-                Bi (4*lstm_hidden_size)
-                Bh (4*lstm_hidden_size)
-                P  (proj_size, lstm_hidden_size)
-            For first bidirectional weights group:
-                Wi (4*lstm_hidden_size, lstm_feature_size)
-                Wh (4*lstm_hidden_size, proj_size)
-                Bi (4*lstm_hidden_size)
-                Bh (4*lstm_hidden_size)
-                P  (proj_size, lstm_hidden_size)
-            For other weights group:
-                Wi (4*lstm_hidden_size, proj_size * num_directions)
-                Wh (4*lstm_hidden_size, proj_size)
-                Bi (4*lstm_hidden_size)
-                Bh (4*lstm_hidden_size)
-                P  (proj_size, lstm_hidden_size)
-        For generation of random weigths for the model without biases Bi and Bh are skipped
-        """
-        super().gen_rnd_weights()
-
-    def get_dummy_inputs(self):
-        return self.dummy_inputs
-
-    def get_input_names(self):
-        return ["input", "h0", "c0"]
-
-    def get_shape_desc(self, frontend_type):
-        shape_desc = None
-        if frontend_type == "pt":  # PyTorch
-            shape_desc = [("input", self.shape)]
-        elif frontend_type == "onnx":  # ONNX
-            shape_desc = {
-                "input": self.shape,
-                "h0": self.h0_shape,
-                "c0": self.c0_shape,
-            }
-        return shape_desc
-
-    def get_tvm_inputs(self, dtype):
-        return {
-            "input": tvm.nd.array(self.dummy_inputs[0].numpy().astype(dtype)),
-            "h0": tvm.nd.array(self.dummy_inputs[1][0].numpy().astype(dtype)),
-            "c0": tvm.nd.array(self.dummy_inputs[1][1].numpy().astype(dtype)),
-        }
-
-
-def compare(input, gold_data, rtol=1e-5, atol=1e-5):
-    tvm.testing.assert_allclose(input, gold_data, rtol=rtol, atol=atol)
-
-
-def check_rnn(rnn_type, rnn_mod, target=tvm.target.Target("llvm -mcpu=core-avx2"), dev=tvm.cpu(0)):
-    def get_model(
-        rnn_type,
-        rnn_mod,
-        args,
-    ):
-        # Fill args
-        if "b" in rnn_mod:
-            args["bidirectional"] = True
-        if "s" in rnn_mod:
-            args["layer_num"] = num_layers
-        if "tanh" in rnn_mod:
-            args["nonlinearity"] = "tanh"
-        if "relu" in rnn_mod:
-            args["nonlinearity"] = "relu"
-
-        if rnn_type == "GRU":
-            RNN_Model_selector = GRU_Model
-        elif rnn_type == "LSTM":
-            RNN_Model_selector = LSTM_Model
-            if "p" in rnn_mod:
-                args["proj_size"] = lstm_projection_size
-        elif rnn_type == "RNN":
-            RNN_Model_selector = RNN_Model_Impl
-
-        return RNN_Model_selector(**args)
-
-    def get_onnx_model(model):
-        onnx_io = io.BytesIO()
-        with torch.no_grad():
-            input_names = model.get_input_names()
-            inputs = model.get_dummy_inputs()
-
-            # default export (without dynamic input)
-            torch.onnx.export(model, inputs, onnx_io, input_names=input_names)
-
-        onnx_io.seek(0, 0)
-        return onnx.load_model(onnx_io)
-
-    model = None
-    dtype = "float32"
-    device = torch.device("cpu")
-    for batch_first in (True, False):
-        for use_bias in (True, False):
-            for rnd_weights in [True]:  # (True, False):
-                model_inputs = {
-                    "batch_first": batch_first,
-                    "use_bias": use_bias,
-                    "rnd_weights_init": rnd_weights,
-                }
-                model = get_model(rnn_type, rnn_mod, model_inputs)
-                model.to(device)
-                model.eval()
-
-                # Get golden output from original model
-                dummy_inputs = model.get_dummy_inputs()
-                golden_output = model.forward(dummy_inputs[0].to(device)).detach().cpu().numpy()
-
-                tvm_output = None
-                for format in ["pt"]:  # ["pt", "onnx"]:
-                    shape_desc = model.get_shape_desc(format)
-                    if format == "pt":
-                        # Use torch.jit.trace to generate a torch.jit.ScriptModule via tracing.
-                        traced_script_module = torch.jit.trace(model, dummy_inputs[0]).eval()
-
-                        # Import model to Relay
-                        with tvm.testing.disable_span_filling():
-                            mod, params = relay.frontend.from_pytorch(
-                                traced_script_module, shape_desc
-                            )
-                        with tvm.testing.enable_span_filling():
-                            mod_with_span, _ = relay.frontend.from_pytorch(
-                                traced_script_module, shape_desc
-                            )
-                        tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-                    elif format == "onnx":
-                        try:
-                            onnx_model = get_onnx_model(model)
-                        except:
-                            print(
-                                "WARNING: torch.onnx.export does not support conversion LSTM with projection "
-                                "from pytorch! TODO: waiting for the support and correct test after that."
-                            )
-                            continue
-
-                        # Import model to Relay
-                        with tvm.testing.disable_span_filling():
-                            mod, params = relay.frontend.from_onnx(onnx_model, shape_desc)
-                        with tvm.testing.enable_span_filling():
-                            mod_with_span, _ = relay.frontend.from_onnx(onnx_model, shape_desc)
-                        tvm.ir.assert_structural_equal(mod, mod_with_span, map_free_vars=True)
-
-                    # Model compilation by tvm
-                    with tvm.transform.PassContext(opt_level=3):
-                        lib = relay.build(mod, target=target, params=params)
-
-                    # Inference of the model with given input data
-                    m = graph_executor.GraphModule(lib["default"](dev))
-
-                    # Set inputs
-                    tvm_inputs = model.get_tvm_inputs(dtype)
-                    m.set_input(**tvm_inputs)
-                    # Execute
-                    m.run()
-                    # Get outputs (converted to numpy array)
-                    tvm_output = m.get_output(0).numpy()
-
-                    compare(tvm_output, golden_output)
-
-
-@tvm.testing.uses_gpu
-def test_rnns():
-    for target, dev in tvm.testing.enabled_targets():
-        # RNN types: GRU, LSTM
-        # GRU modifications: unidirectional, stacked, bidirectional, stacked bidirectional
-        for mod_type in ["uni", "s", "b", "sb"]:
-            check_rnn("GRU", mod_type, target, dev)
-        # LSTM modifications: unidirectional, stacked, bidirectional, stacked bidirectional,
-        # and all these types with projection ("p", "sp", "bp", "sbp")
-        # The latter are skiped for test acceleration
-        for mod_type in ["uni", "s", "b", "sb"]:
-            check_rnn("LSTM", mod_type, target, dev)
-
-        for mod_type in ["uni", "s", "b", "sb", "tanh", "relu"]:
-            check_rnn("RNN", mod_type, target, dev)
-
-
-if __name__ == "__main__":
-    test_rnns()
diff --git a/tests/python/frontend/pytorch/test_span_naming.py b/tests/python/frontend/pytorch/test_span_naming.py
deleted file mode 100644
index fb39ddf4f061..000000000000
--- a/tests/python/frontend/pytorch/test_span_naming.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, too-many-lines, len-as-condition, no-else-return, unused-variable, too-many-nested-blocks
-# pylint: disable=consider-iterating-dictionary, invalid-name, unused-argument, unused-variable, broad-except
-# pylint: disable=import-outside-toplevel, simplifiable-if-expression, cell-var-from-loop, unnecessary-lambda
-# pylint: disable=missing-function-docstring, redefined-builtin, use-implicit-booleaness-not-comparison
-"""Tests to ensure span names are correctly populated when importing Pytorch"""
-from torch import nn
-import torch
-import tvm
-
-
-class NestedConvModule(nn.Module):
-    """Module that performs Conv2d and relu activation"""
-
-    def __init__(self, in_channels, out_channels):
-        super().__init__()
-        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
-        self.relu = nn.ReLU()
-
-    def forward(self, x):
-        x = self.relu(self.conv(x))
-        return x
-
-
-class NestedFinalModule(nn.Module):
-    """Simple module that adds 2 inputs"""
-
-    def forward(self, x, y):
-        return x + y
-
-
-class SimpleTwoConvModule(nn.Module):
-    """
-    ML model that performs 2 convolutions and adds them together.
-    All operations are inside nested modules to make scope names interesting.
-    """
-
-    def __init__(self):
-        super().__init__()
-        # First convolutional module
-        self.image_block1 = NestedConvModule(in_channels=3, out_channels=64)
-        # Second convolutional module
-        self.image_block2 = NestedConvModule(in_channels=64, out_channels=64)
-        self.final_block = NestedFinalModule()
-
-    def forward(self, x):
-        # Forward pass through the first convolutional module
-        x1 = self.image_block1(x)
-        # Forward pass through the second convolutional module
-        x2 = self.image_block2(x1)
-        # Add the outputs of the two convolutional modules
-        return self.final_block(x1, x2)
-
-
-def test_pytorch_scope_based_span_names():
-    model = SimpleTwoConvModule()
-    sample_input = torch.zeros((1, 3, 64, 64), dtype=torch.float32)
-    with torch.no_grad():
-        traced_torch_model = torch.jit.trace(model, sample_input)
-    import_input = [("model_input", (1, 3, 64, 64))]
-    relay_model_ir, relay_model_params = tvm.relay.frontend.from_pytorch(
-        traced_torch_model, import_input, preserve_pytorch_scopes=True
-    )
-    # If specified, we are preserving the pytorch named spans
-    for block in [1, 2]:
-        for key in ["weight", "bias"]:
-            assert f"image_block{block}.conv.{key}" in relay_model_params.keys()
-    # Manually check all span names since asserting structural equality is not sufficient
-    current_call = relay_model_ir["main"].body
-    assert current_call.op.name == "add"
-    assert current_call.span is not None and current_call.span.source_name.name == "final_block"
-    current_call = current_call.args[1]
-    for block in [2, 1]:
-        assert current_call.op.name == "nn.relu"
-        assert (
-            current_call.span is not None
-            and current_call.span.source_name.name == f"image_block{block}.relu"
-        )
-        current_call = current_call.args[0]
-        assert current_call.op.name == "nn.bias_add"
-        assert (
-            current_call.span is not None
-            and current_call.span.source_name.name == f"image_block{block}.conv"
-        )
-        current_call = current_call.args[0]
-        assert current_call.op.name == "nn.conv2d"
-        assert (
-            current_call.span is not None
-            and current_call.span.source_name.name == f"image_block{block}.conv"
-        )
-        current_call = current_call.args[0]
diff --git a/tests/python/frontend/tensorflow/test_bn_dynamic.py b/tests/python/frontend/tensorflow/test_bn_dynamic.py
deleted file mode 100644
index 99d8f790028c..000000000000
--- a/tests/python/frontend/tensorflow/test_bn_dynamic.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-BatchNorm without given mean and variance given testcases
-====================
-This is a test script to test fused_batch_norm operators
-in TensorFlow frontend when mean and variance are not given.
-"""
-import tvm
-import tvm.testing
-import numpy as np
-
-try:
-    import tensorflow.compat.v1 as tf
-
-    tf.disable_v2_behavior()
-except ImportError:
-    import tensorflow as tf
-from tvm import relay
-from tensorflow.python.framework import graph_util
-
-
-def verify_fused_batch_norm(shape):
-    g = tf.Graph()
-    with g.as_default():
-        input_tensor = tf.placeholder(tf.float32, shape=shape, name="input")
-        alpha = tf.constant(
-            np.random.rand(
-                shape[-1],
-            ),
-            dtype=tf.float32,
-            name="alpha",
-        )
-        beta = tf.constant(
-            np.random.rand(
-                shape[-1],
-            ),
-            dtype=tf.float32,
-            name="beta",
-        )
-        bn = tf.nn.fused_batch_norm(x=input_tensor, offset=beta, scale=alpha, name="bn")
-        out = tf.identity(bn[0], name="output")
-    data = np.random.rand(*shape)
-    with tf.Session(graph=out.graph) as sess:
-        sess.run([tf.global_variables_initializer()])
-        tf_out = sess.run(out, feed_dict={input_tensor: data})
-        constant_graph = graph_util.convert_variables_to_constants(sess, sess.graph_def, ["output"])
-
-    for device in ["llvm"]:
-        dev = tvm.device(device, 0)
-        if not tvm.testing.device_enabled(device):
-            print("Skip because %s is not enabled" % device)
-            continue
-        with tvm.testing.disable_span_filling():
-            mod, params = relay.frontend.from_tensorflow(constant_graph, outputs=["output"])
-        with tvm.testing.enable_span_filling():
-            mod_with_span, _ = relay.frontend.from_tensorflow(constant_graph, outputs=["output"])
-        tvm.ir.assert_structural_equal(mod["main"], mod_with_span["main"])
-        with tvm.transform.PassContext(opt_level=3):
-            graph, lib, params = relay.build(mod, target=device, params=params)
-        from tvm.contrib import graph_executor
-
-        m = graph_executor.create(graph, lib, dev)
-        m.set_input(**params)
-        m.set_input("input", data)
-        m.run()
-        tvm_out = m.get_output(0)
-        tvm.testing.assert_allclose(
-            tvm_out.numpy(), tf_out.astype(tvm_out.dtype), atol=1e-3, rtol=1e-3
-        )
-
-
-def test_fused_batch_norm():
-    verify_fused_batch_norm(shape=(1, 12, 12, 32))
-    verify_fused_batch_norm(shape=(1, 24, 24, 64))
-    verify_fused_batch_norm(shape=(1, 64, 64, 128))
-    verify_fused_batch_norm(shape=(8, 12, 12, 32))
-    verify_fused_batch_norm(shape=(16, 12, 12, 32))
-    verify_fused_batch_norm(shape=(32, 12, 12, 32))
-
-
-if __name__ == "__main__":
-    test_fused_batch_norm()
diff --git a/tests/python/frontend/tensorflow/test_control_flow.py b/tests/python/frontend/tensorflow/test_control_flow.py
deleted file mode 100644
index 494deb46835f..000000000000
--- a/tests/python/frontend/tensorflow/test_control_flow.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Unit tests for converting TensorFlow control flow op to Relay."""
-import pytest
-
-try:
-    import tensorflow.compat.v1 as tf
-
-    tf.disable_v2_behavior()
-except ImportError:
-    import tensorflow as tf
-from tensorflow.python.ops import control_flow_ops
-import numpy as np
-from tvm import nd, relay, ir, testing
-from tvm.relay.frontend.tensorflow import from_tensorflow
-
-
-def check_equal(graph, tf_out, input_map=None):
-    with testing.disable_span_filling():
-        mod, params = from_tensorflow(graph.as_graph_def(add_shapes=True))
-    with testing.enable_span_filling():
-        mod_with_span, _ = from_tensorflow(graph.as_graph_def(add_shapes=True))
-    assert ir.structural_equal(mod["main"], mod_with_span["main"])
-
-    if input_map is not None:
-        params.update(input_map)
-    relay_out = relay.create_executor("vm", mod=mod).evaluate()(**params)
-    if isinstance(relay_out, nd.NDArray):
-        np.testing.assert_allclose(tf_out, relay_out.numpy())
-    else:
-        if not isinstance(tf_out, (list, tuple)):
-            tf_out = [tf_out]
-        for x, y in zip(tf_out, [r.numpy() for r in relay_out]):
-            np.testing.assert_allclose(x, y)
-
-
-def test_vanilla_loop():
-    graph = tf.Graph()
-    with graph.as_default():
-        i = tf.constant(0, name="while/constant")
-
-        def c(i):
-            return tf.less(i, 10)
-
-        def b(i):
-            return tf.add(i, 1)
-
-        r = tf.while_loop(c, b, [i])
-
-        with tf.Session() as sess:
-            tf_out = sess.run(r)
-
-        check_equal(graph, tf_out)
-
-
-def test_callnode_loop_vars():
-    graph = tf.Graph()
-    with graph.as_default():
-        i = tf.add(tf.constant(0), 1)
-
-        def c(i):
-            return tf.less(i, 10)
-
-        def b(i):
-            return tf.add(i, 1)
-
-        r = tf.while_loop(c, b, [i])
-
-        with tf.Session() as sess:
-            tf_out = sess.run(r)
-
-        check_equal(graph, tf_out)
-
-
-def test_loop_2_vars():
-    graph = tf.Graph()
-    with graph.as_default():
-        i0 = tf.constant(0)
-        j0 = tf.ones([2, 2])
-
-        def c(i, j):
-            return i < 10
-
-        def b(i, j):
-            return [tf.add(i, 1), j]
-
-        i1, i2 = tf.while_loop(c, b, loop_vars=[i0, j0])
-        i1 += tf.constant(1337)
-
-        with tf.Session() as sess:
-            tf_out = sess.run(i1)
-
-    check_equal(graph, tf_out)
-
-
-def test_loop_3_vars():
-    graph = tf.Graph()
-    with graph.as_default():
-        i0 = tf.constant(1)
-        j0 = tf.constant(2)
-        k0 = tf.constant(4)
-
-        def c(i, j, k):
-            return i < 10
-
-        def b(i, j, k):
-            return [i + 1, j * k, k + i]
-
-        r = tf.while_loop(c, b, loop_vars=[i0, j0, k0])
-
-        with tf.Session() as sess:
-            tf_out = sess.run(r)
-
-    check_equal(graph, tf_out)
-
-
-def test_loop_conditions():
-    graph = tf.Graph()
-    with graph.as_default():
-        i = tf.constant(1)
-        j = tf.constant(1)
-        k = tf.constant(5)
-
-        def c(i, j, k):
-            return tf.equal(
-                tf.not_equal(tf.less(i + j, 10), tf.less(j * k, 100)), tf.greater_equal(k, i + j)
-            )
-
-        def b(i, j, k):
-            return [i + j, j + k, k + 1]
-
-        r = tf.while_loop(c, b, loop_vars=[i, j, k])
-        with tf.Session() as sess:
-            tf_out = sess.run(r)
-
-    check_equal(graph, tf_out)
-
-
-@pytest.mark.skip
-def test_loop_bodies():
-    graph = tf.Graph()
-    with graph.as_default():
-
-        def body(x):
-            a = tf.constant(np.array([[5, 6], [7, 8]]), dtype=tf.int32)
-            b = tf.constant(np.array([[1, 2], [3, 4]]), dtype=tf.int32)
-            c = a + b
-            return tf.nn.relu(x + c)
-
-        def condition(x):
-            return tf.reduce_sum(x) < 100
-
-        x = tf.constant(0, shape=[2, 2])
-        r = tf.while_loop(condition, body, [x])
-        with tf.Session() as sess:
-            tf_out = sess.run(r)
-
-    check_equal(graph, tf_out)
-
-
-def test_nested_loop():
-    graph = tf.Graph()
-    with graph.as_default():
-
-        def body(x):
-            def nest_body(c):
-                return tf.multiply(c, 2)
-
-            def cd(c):
-                return tf.less(c, 10)
-
-            c = tf.constant(2)
-            res = tf.while_loop(cd, nest_body, loop_vars=[c])
-            return tf.nn.relu(x + res)
-
-        def condition(x):
-            return tf.greater(x, 100)
-
-        x = tf.constant(3)
-        r = tf.while_loop(condition, body, loop_vars=[x])
-
-        with tf.Session() as sess:
-            tf_out = sess.run(r)
-
-    check_equal(graph, tf_out)
-
-
-def test_vanilla_cond():
-    graph = tf.Graph()
-    with graph.as_default():
-        i = tf.constant(1)
-        j = tf.constant(4)
-
-        def f1():
-            return tf.multiply(1, 17)
-
-        def f2():
-            return tf.add(4, 23)
-
-        r = tf.cond(tf.less(i, j), f1, f2)
-
-    with tf.Session(graph=graph) as sess:
-        tf_out = sess.run(r)
-
-    check_equal(graph, tf_out)
-
-
-def test_multiple_cond_vars():
-    graph = tf.Graph()
-    with graph.as_default():
-        x1 = tf.constant(7)
-        x2 = tf.constant(12)
-        z = tf.constant(20)
-        r = tf.cond(tf.less(tf.add(x1, x2), 10), lambda: tf.add(10, 2), lambda: tf.square(5))
-
-        with tf.Session() as sess:
-            tf_out = sess.run(r)
-
-    check_equal(graph, tf_out)
-
-
-def test_cond_fn_parameters():
-    graph = tf.Graph()
-    with graph.as_default():
-
-        def fn1(x, y):
-            return tf.multiply(5, 6)
-
-        def fn2(x, y):
-            return tf.add(3, 4)
-
-        i = tf.constant(1)
-        j = tf.constant(2)
-        k = tf.constant(3)
-        r = tf.cond(tf.less(i, j), lambda: fn1(i, k), lambda: fn2(j, k))
-
-        with tf.Session() as sess:
-            tf_out = sess.run(r, feed_dict={i: 1, j: 2, k: 3})
-
-    check_equal(graph, tf_out)
-
-
-def test_nested_cond():
-    graph = tf.Graph()
-    with graph.as_default():
-
-        def fn1(a, b):
-            def nest_fn1():
-                return tf.add(1, 2)
-
-            def nest_fn2():
-                return tf.subtract(10, 5)
-
-            res = tf.cond(tf.less(1, 2), nest_fn1, nest_fn2)
-            return tf.multiply(tf.add(87, res), 10)
-
-        def fn2(a, b):
-            return tf.add(10, 10)
-
-        x = tf.constant(5)
-        y = tf.constant(6)
-        z = tf.constant(7)
-        pred = tf.less(x, y)
-        r = tf.cond(pred, lambda: fn1(x, y), lambda: fn2(y, z))
-
-        with tf.Session() as sess:
-            tf_out = sess.run(r, feed_dict={x: 1, y: 2, z: 3, pred: True})
-
-    check_equal(graph, tf_out)
-
-
-def test_loop_in_cond():
-    graph = tf.Graph()
-    with graph.as_default():
-
-        def fn1(a, b):
-            i = tf.constant(0)
-
-            def cd(i):
-                return tf.less(i, 10)
-
-            def bd(i):
-                return tf.add(i, 1)
-
-            res = tf.while_loop(cd, bd, [i])
-            return tf.multiply(tf.add(20, res), 10)
-
-        def fn2(a, b):
-            return tf.add(10, 20)
-
-        x = tf.constant(7)
-        y = tf.constant(20)
-        z = tf.constant(10)
-        pred = tf.less(x, y)
-        r = tf.cond(pred, lambda: fn1(x, y), lambda: fn2(y, z))
-
-        with tf.Session() as sess:
-            tf_out = sess.run(r, feed_dict={x: 1, y: 2, z: 3, pred: True})
-
-    check_equal(graph, tf_out)
-
-
-def test_cond_in_loop():
-    graph = tf.Graph()
-    with graph.as_default():
-
-        def body(x):
-            x = tf.constant(7)
-            z = tf.constant(20)
-            res = tf.cond(tf.less(x, 10), lambda: tf.add(10, 20), lambda: tf.square(10))
-            return tf.multiply(res, x)
-
-        x = tf.constant(21)
-
-        def condition(x):
-            return tf.less(x, 100)
-
-        r = tf.while_loop(condition, body, loop_vars=[x])
-        with tf.Session() as sess:
-            tf_out = sess.run(r)
-
-    check_equal(graph, tf_out)
-
-
-def test_vanilla_loop_bound():
-    graph = tf.Graph()
-    with graph.as_default():
-        dshape = (2, 10)
-        dtype = "float32"
-        dname = "data"
-        np_data = np.random.uniform(size=dshape).astype(dtype)
-        data = tf.placeholder(shape=dshape, dtype=dtype, name=dname)
-        x = tf.slice(data, [1, 4], [1, 4])
-        outer = x + 5.0
-
-        def body(x, y):
-            res = tf.cond(tf.less(y, 10), lambda: tf.add(10.0, 20.0), lambda: tf.square(10.0))
-            z = tf.constant(7)
-            res = tf.cond(tf.less(z, 10), lambda: res * 5, lambda: res + 10)
-            return tf.multiply(res, x * outer), y + 1
-
-        y = tf.constant(0)
-
-        def condition(x, y):
-            return tf.less(y, 20)
-
-        r = tf.while_loop(condition, body, loop_vars=[x, y])
-        with tf.Session() as sess:
-            tf_out = sess.run(r, feed_dict={"%s:0" % dname: np_data})
-
-    check_equal(graph, tf_out, {dname: np_data})
-
-
-def test_nested_loop_bound():
-    graph = tf.Graph()
-    with graph.as_default():
-        dshape = (2, 10)
-        dtype = "float32"
-        dname = "data"
-        np_data = np.random.uniform(size=dshape).astype(dtype)
-        data = tf.placeholder(shape=dshape, dtype=dtype, name=dname)
-        x = tf.slice(data, [1, 4], [1, 4])
-        outer = x + 5.0
-
-        def body(x, y):
-            res = tf.cond(tf.less(y, 10), lambda: tf.add(10.0, 20.0), lambda: tf.square(10.0))
-
-            def nested_body(nx, ny):
-                return nx + 1, res + 2.0
-
-            def nested_cond(nx, ny):
-                return tf.less(nx, 15)
-
-            nx = tf.constant(0)
-            ny = tf.constant(0.0)
-            nested_res = tf.while_loop(nested_cond, nested_body, loop_vars=[nx, ny])
-            res = res + nested_res[1]
-            z = tf.constant(7)
-            res = tf.cond(tf.less(z, 10), lambda: res * 5, lambda: res + 10)
-            return tf.multiply(res, x * outer), y + 1
-
-        y = tf.constant(0)
-
-        def condition(x, y):
-            return tf.less(y, 20)
-
-        r = tf.while_loop(condition, body, loop_vars=[x, y])
-        with tf.Session() as sess:
-            tf_out = sess.run(r, feed_dict={"%s:0" % dname: np_data})
-
-    check_equal(graph, tf_out, {dname: np_data})
-
-
-def test_switch():
-    graph = tf.Graph()
-
-    with graph.as_default():
-        data_np = np.random.uniform(0, 5, size=(2, 4, 5, 1)).astype("float32")
-        dname = "data"
-        flag_name = "flag"
-        data = tf.placeholder(shape=data_np.shape, dtype=data_np.dtype, name=dname)
-        split = tf.split(data, 2, axis=0)
-        flag = tf.placeholder(shape={}, dtype=tf.bool, name=flag_name)
-        output_false, output_true = control_flow_ops.switch(split[1], flag)
-        with tf.Session() as sess:
-            tf_out = sess.run(output_false, feed_dict={data.name: data_np, flag.name: False})
-
-    check_equal(graph, tf_out, {dname: data_np, flag_name: False})
-
-
-def test_loop_tuple_input():
-    graph = tf.Graph()
-
-    with graph.as_default():
-        data_np = np.random.uniform(0, 5, size=(2, 4, 5, 1)).astype("float32")
-        dname = "data"
-        data = tf.placeholder(shape=data_np.shape, dtype=data_np.dtype, name=dname)
-        split = tf.split(data, 2, axis=0)
-
-        def body(x, y):
-            return x + 2, y + 1
-
-        start = tf.constant(0)
-
-        def condition(x, y):
-            return tf.less(y, 20)
-
-        r = tf.while_loop(condition, body, loop_vars=[split[1], start])
-        with tf.Session() as sess:
-            tf_out = sess.run(r, feed_dict={data.name: data_np})
-
-    check_equal(graph, tf_out, {dname: data_np})
-
-
-if __name__ == "__main__":
-    # tf.while_loop
-    test_vanilla_loop()
-    test_loop_2_vars()
-    test_loop_3_vars()
-    test_loop_conditions()
-    # TODO(@jroesch): Need to fix memory alloc to support closure
-    # test_loop_bodies()
-    test_callnode_loop_vars()
-
-    # tf.cond
-    test_vanilla_cond()
-    test_multiple_cond_vars()
-    test_cond_fn_parameters()
-
-    # nested cases
-    test_nested_loop()
-    test_nested_cond()
-    test_loop_in_cond()
-    test_cond_in_loop()
-    test_vanilla_loop_bound()
-    test_nested_loop_bound()
-
-    test_switch()
-    test_loop_tuple_input()
diff --git a/tests/python/frontend/tensorflow/test_debugging.py b/tests/python/frontend/tensorflow/test_debugging.py
deleted file mode 100644
index 0f7c4dd7d65a..000000000000
--- a/tests/python/frontend/tensorflow/test_debugging.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Unit tests for converting TensorFlow debugging ops to Relay."""
-try:
-    import tensorflow.compat.v1 as tf
-
-    tf.disable_v2_behavior()
-except ImportError:
-    import tensorflow as tf
-import numpy as np
-from tvm import relay, ir, testing
-from tvm.relay.frontend.tensorflow import from_tensorflow
-
-
-def run_relay(graph, shape_dict=None, *vars):
-    with testing.disable_span_filling():
-        mod, params = from_tensorflow(graph.as_graph_def(add_shapes=True), shape=shape_dict)
-    with testing.enable_span_filling():
-        mod_with_span, _ = relay.frontend.from_tensorflow(
-            graph.as_graph_def(add_shapes=True), shape=shape_dict
-        )
-    assert ir.structural_equal(mod["main"], mod_with_span["main"])
-
-    return relay.create_executor("debug", mod=mod).evaluate()(*vars)
-
-
-def test_assert_true():
-    g = tf.Graph()
-    shape = (1, 2)
-    with g.as_default():
-        x = tf.placeholder(tf.float32, shape=shape, name="input")
-        assert_op = tf.Assert(tf.reduce_all(tf.less_equal(x, x)), ["it failed"])
-
-        with tf.Session() as sess:
-            x_value = np.random.rand(*shape)
-            assert sess.run(assert_op, feed_dict={x: x_value}) is None
-
-        # In TVM, tf.assert is converted to a no-op which is actually a 0,
-        # though it should probably be none or an empty tuple.
-        #
-        # ToDo: It appears that the frontend converter gets confused here and
-        # entirely eliminates all operands from main(). Likely because x <= x
-        # is always true, so the placeholder can be eliminated. But TF doesn't
-        # do that, it's happening in Relay, and that optimization shouldn't
-        # affect the arity of the main function. We should have to pass in
-        # x_value here.
-        np.testing.assert_allclose(0, run_relay(g, {"input": shape}).numpy())
-
-
-def test_assert_true_var_capture():
-    g = tf.Graph()
-    with g.as_default():
-        x = tf.placeholder(tf.float32, shape=())
-
-        # It turns out that tf.assert() creates a large and complex subgraph if
-        # you capture a variable as part of the error message. So we need to
-        # test that, too.
-        assert_op = tf.Assert(tf.less_equal(x, x), ["it failed", x])
-
-        with tf.Session() as sess:
-            x_value = np.random.rand()
-            assert sess.run(assert_op, feed_dict={x: x_value}) is None
-
-        # TODO: The frontend converter notes the output of
-        # the graph as a boolean, which is not correct - as you can see above,
-        # TF believes that the value of this graph is None.
-        np.testing.assert_allclose(True, run_relay(g, None, x_value).numpy())
-
-
-def test_assert_false():
-    g = tf.Graph()
-    with g.as_default():
-        assert_op = tf.Assert(tf.constant(False), ["it failed"])
-
-        with tf.Session() as sess:
-            try:
-                print(sess.run(assert_op))
-                assert False  # TF should have thrown an exception
-            except tf.errors.InvalidArgumentError as e:
-                assert "it failed" in e.message
-
-        # In TVM, tf.assert is converted to a no-op which is actually a 0,
-        # though it should probably be none or an empty tuple. For the same
-        # reason, there should not be an error here, even though the assertion
-        # argument is false.
-        np.testing.assert_allclose(0, run_relay(g).numpy())
-
-
-if __name__ == "__main__":
-    test_assert_true()
-    test_assert_true_var_capture()
-    test_assert_false()
diff --git a/tests/python/frontend/tensorflow/test_forward.py b/tests/python/frontend/tensorflow/test_forward.py
deleted file mode 100644
index 354ed38a62ce..000000000000
--- a/tests/python/frontend/tensorflow/test_forward.py
+++ /dev/null
@@ -1,6100 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument, ungrouped-imports, wrong-import-order
-"""
-Tensorflow testcases
-====================
-This article is a test script to test tensorflow operator with Relay.
-"""
-from __future__ import print_function
-
-import threading
-import platform
-import os.path
-from packaging import version as package_version
-import numpy as np
-import pytest
-
-from PIL import Image
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import graph_util
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-from tensorflow.python.ops import init_ops
-from tensorflow.python.framework import function
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import gen_functional_ops
-from tensorflow.python.client import device_lib
-
-try:
-    import tensorflow.compat.v1 as tf
-
-    tf.disable_v2_behavior()
-except ImportError:
-    import tensorflow as tf
-
-import tvm
-from tvm import relay, ir
-from tvm.runtime.vm import VirtualMachine
-from tvm.relay.frontend.tensorflow import from_tensorflow
-from tvm.contrib import graph_executor
-from tvm.contrib import utils
-import tvm.testing
-import tvm.relay.testing.tf as tf_testing
-from relay.utils.tag_span import _set_span, _create_span, _verify_structural_equal_with_span
-
-
-# Only allow TF to run on half the GPU RAM to save the other half
-# For TVM
-gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
-gpu_sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
-gpu_sess.close()
-
-
-#######################################################################
-# Generic run functions for TVM & tensorflow
-# ------------------------------------------
-
-
-def convert_to_list(x):
-    if not isinstance(x, list):
-        x = [x]
-    return x
-
-
-tf_dtypes = {
-    "float32": tf.float32,
-    "float16": tf.float16,
-    "float64": tf.float64,
-    "int32": tf.int32,
-    "uint8": tf.uint8,
-    "int8": tf.int8,
-    "int16": tf.int16,
-    "uint16": tf.uint16,
-    "int64": tf.int64,
-}
-
-
-def vmobj_to_list(o):
-    """Converts TVM objects returned by VM execution to Python List."""
-    if isinstance(o, tvm.nd.NDArray):
-        return [o.numpy()]
-    elif isinstance(o, tvm.runtime.container.ADT):
-        result = []
-        for f in o:
-            result.extend(vmobj_to_list(f))
-        return result
-    elif isinstance(o, tvm.relay.backend.interpreter.ConstructorValue):
-        if o.constructor.name_hint == "Cons":
-            tl = vmobj_to_list(o.fields[1])
-            hd = vmobj_to_list(o.fields[0])
-            hd.extend(tl)
-            return hd
-        elif o.constructor.name_hint == "Nil":
-            return []
-        elif "tensor_nil" in o.constructor.name_hint:
-            return [0]
-        elif "tensor" in o.constructor.name_hint:
-            return [o.fields[0].numpy()]
-        else:
-            raise RuntimeError(f"Unknown object type: {o.constructor.name_hint}")
-    else:
-        raise RuntimeError(f"Unknown object type: {type(o)}")
-
-
-def run_tvm_graph(
-    graph_def,
-    input_data,
-    input_node,
-    num_output=1,
-    target="llvm",
-    out_names=None,
-    opt_level=3,
-    mode="graph_executor",
-    cuda_layout="NCHW",
-    layout=None,
-    disabled_pass=None,
-    ignore_in_shape=False,
-    serialize=False,
-    convert_config=None,
-):
-    """Generic function to compile on relay and execute on tvm"""
-    input_data = convert_to_list(input_data)
-    input_node = convert_to_list(input_node)
-    if target == "cuda":
-        layout = cuda_layout
-    target_host = None
-    if ignore_in_shape:
-        shape_dict = None
-    else:
-        shape_dict = {
-            e: i.shape if hasattr(i, "shape") else () for e, i in zip(input_node, input_data)
-        }
-    with tvm.testing.disable_span_filling():
-        mod, params = relay.frontend.from_tensorflow(
-            graph_def,
-            layout=layout,
-            shape=shape_dict,
-            outputs=out_names,
-            convert_config=convert_config,
-        )
-    with tvm.testing.enable_span_filling():
-        mod_with_span, _ = relay.frontend.from_tensorflow(
-            graph_def,
-            layout=layout,
-            shape=shape_dict,
-            outputs=out_names,
-            convert_config=convert_config,
-        )
-    tvm.ir.assert_structural_equal(mod["main"], mod_with_span["main"], map_free_vars=True)
-
-    dev = tvm.device(target, 0)
-    if mode == "debug":
-        inputs = []
-        for param in mod["main"].params:
-            found = False
-            for i, n in enumerate(input_node):
-                if n == param.name_hint:
-                    found = True
-                    inputs.append(tvm.nd.array(input_data[i]))
-                    break
-            # Interpreter doesn't bind constants, so still need to find in params
-            if not found:
-                inputs.append(tvm.nd.array(params[param.name_hint]))
-        result = relay.create_executor(mode, mod=mod, device=tvm.cpu(), target="llvm").evaluate()(
-            *inputs
-        )
-        return vmobj_to_list(result)
-    elif mode == "vm":
-        with tvm.transform.PassContext(opt_level=opt_level, disabled_pass=disabled_pass):
-            mod = relay.transform.InferType()(mod)
-            vm_exec = relay.vm.compile(mod, target="llvm", params=params)
-        if serialize:
-            code, lib = vm_exec.save()
-            vm_exec = tvm.runtime.vm.Executable.load_exec(code, lib)
-        vm = VirtualMachine(vm_exec, tvm.cpu())
-        inputs = {}
-        for e, i in zip(input_node, input_data):
-            inputs[e] = tvm.nd.array(i)
-        result = vm.invoke("main", **inputs)
-        return vmobj_to_list(result)
-    else:
-        with tvm.transform.PassContext(opt_level=opt_level, disabled_pass=disabled_pass):
-            target = tvm.target.Target(target, target_host)
-            graph, lib, params = relay.build(mod, target=target, params=params)
-
-        m = graph_executor.create(graph, lib, dev)
-        # set inputs
-        for e, i in zip(input_node, input_data):
-            if e != "":
-                m.set_input(e, tvm.nd.array(i))
-
-        m.set_input(**params)
-        # execute
-        m.run()
-        # get outputs
-        assert out_names is None or num_output == len(
-            out_names
-        ), f"out_names: {out_names} num_output: {num_output}"
-        tvm_output_list = [m.get_output(i).numpy() for i in range(num_output)]
-        return tvm_output_list
-
-
-def run_tf_graph(sess, input_data, input_node, output_node):
-    """Generic function to execute tensorflow"""
-    input_data = convert_to_list(input_data)
-    input_node = convert_to_list(input_node)
-    output_node = convert_to_list(output_node)
-
-    tensor = [sess.graph.get_tensor_by_name(output_name) for output_name in output_node]
-
-    input_dict = {e: input_data[i] for i, e in enumerate(input_node)}
-    if len(input_node) == 1 and input_node[0] == "":
-        output_data = sess.run(tensor)
-    else:
-        output_data = sess.run(tensor, input_dict)
-    return output_data
-
-
-def compare_tf_with_tvm(
-    in_data,
-    in_name,
-    out_name,
-    init_global_variables=False,
-    no_gpu=False,
-    opt_level=3,
-    mode="graph_executor",
-    cuda_layout="NCHW",
-    add_shapes_to_graph_def=True,
-    targets=None,
-    ignore_in_shape=False,
-    convert_config=None,
-    atol=1e-5,
-    rtol=1e-5,
-):
-    """Generic function to generate and compare tensorflow and TVM output"""
-
-    def name_without_num(name):
-        return name.split(":")[0] if ":" in name else name
-
-    out_name = convert_to_list(out_name)
-    out_node = [name_without_num(name) for name in out_name]
-
-    in_data = convert_to_list(in_data)
-    in_name = convert_to_list(in_name)
-    in_node = [name_without_num(name) for name in in_name]
-    with tf.Session() as sess:
-        if init_global_variables:
-            sess.run(variables.global_variables_initializer())
-        final_graph_def = (
-            tf_testing.AddShapesToGraphDef(sess, out_node)
-            if add_shapes_to_graph_def
-            else tf.get_default_graph().as_graph_def()
-        )
-
-        tf_output = run_tf_graph(sess, in_data, in_name, out_name)
-
-        devices = targets if targets else ["llvm", "cuda"]
-
-        for device in devices:
-            _ = tvm.device(device, 0)
-            if not tvm.testing.device_enabled(device):
-                print(f"Skip because {device} is not enabled")
-                continue
-            if no_gpu and device == "cuda":
-                continue
-            if "cublas" in device and not tvm.get_global_func("tvm.contrib.cublas.matmul", True):
-                print(f"Skip because cublas is not enabled: {device}")
-                continue
-
-            tvm_output = run_tvm_graph(
-                final_graph_def,
-                in_data,
-                in_node,
-                target=device,
-                out_names=out_name,
-                num_output=len(out_name),
-                opt_level=opt_level,
-                mode=mode,
-                cuda_layout=cuda_layout,
-                ignore_in_shape=ignore_in_shape,
-                convert_config=convert_config,
-            )
-            # since the names from tensorflow and relay runs are not exactly same,
-            # first len(tf_output) will be compared
-            for i, tf_out in enumerate(tf_output):
-                if not isinstance(tf_out, np.ndarray):
-                    assert len(tvm_output[i].shape) == 0  # pylint: disable=len-as-condition
-                tvm.testing.assert_allclose(tf_out, tvm_output[i], atol=atol, rtol=rtol)
-
-        sess.close()
-
-
-def is_gpu_available():
-    """Verify gpu is available"""
-    local_device_protos = device_lib.list_local_devices()
-    gpu_list = [x.name for x in local_device_protos if x.device_type == "GPU"]
-    if gpu_list:
-        print("Tensorflow GPU:", gpu_list)
-        return True
-    else:
-        return False
-
-
-#######################################################################
-# Pooling
-# -------
-
-
-def _test_pooling_iteration(input_shape, **kwargs):
-    """One iteration of pool operation with given shapes and attributes"""
-
-    x = -np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape) - 1
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=input_shape, dtype="float32")
-        nn_ops.pool(in_data, **kwargs)
-
-        if kwargs["pooling_type"] == "MAX":
-            out_name = "max_pool:0"
-        else:
-            out_name = "avg_pool:0"
-
-        compare_tf_with_tvm(x, "Placeholder:0", out_name)
-
-
-def _test_pooling(input_shape, **kwargs):
-    _test_pooling_iteration(input_shape, **kwargs)
-
-    if is_gpu_available():
-        if len(input_shape) == 4:
-            input_shape = [input_shape[ii] for ii in (0, 3, 1, 2)]
-            if isinstance(kwargs["padding"], list):
-                kwargs["padding"] = [kwargs["padding"][ii] for ii in (0, 3, 1, 2)]
-            kwargs["data_format"] = "NCHW"
-            _test_pooling_iteration(input_shape, **kwargs)
-
-
-def _test_pooling_dynamic(input_shape, np_shape, **kwargs):
-    """Pooling with dynamic height and width dimensions."""
-    x = -np.arange(np.prod(np_shape), dtype=np.float32).reshape(np_shape) - 1
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=input_shape, dtype="float32")
-        nn_ops.pool(in_data, **kwargs)
-
-        if kwargs["pooling_type"] == "MAX":
-            out_name = "max_pool:0"
-        else:
-            out_name = "avg_pool:0"
-
-        compare_tf_with_tvm(x, "Placeholder:0", out_name, mode="vm", ignore_in_shape=True)
-
-
-@tvm.testing.uses_gpu
-def test_forward_pooling():
-    """Pooling"""
-    # TensorFlow only supports NDHWC for max_pool3d on CPU
-    for pool_type in ["AVG", "MAX"]:
-        # NDHWC is the default layout for max_pool3d and avg_pool3d in TensorFlow
-        _test_pooling(
-            input_shape=[1, 3, 32, 32, 32],
-            window_shape=[2, 2, 2],
-            padding="VALID",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1, 1],
-            strides=[2, 2, 2],
-        )
-
-        _test_pooling(
-            input_shape=[1, 3, 32, 32, 32],
-            window_shape=[1, 1, 1],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1, 1],
-            strides=[1, 1, 1],
-        )
-
-        _test_pooling(
-            input_shape=[1, 3, 32, 32, 32],
-            window_shape=[2, 2, 2],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1, 1],
-            strides=[2, 2, 2],
-        )
-
-        _test_pooling_dynamic(
-            input_shape=[1, None, None, 3],
-            np_shape=[1, 32, 32, 3],
-            window_shape=[2, 2],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1],
-            strides=[1, 1],
-        )
-
-        # test cases for max_pool3d & avg_pool3d with layout NCDHW
-        # TensorFlow pool3d  doesn't support NCDHW on cpu
-        if is_gpu_available():
-            _test_pooling(
-                input_shape=[1, 3, 32, 32, 32],
-                window_shape=[1, 1, 1],
-                padding="SAME",
-                pooling_type=pool_type,
-                dilation_rate=[1, 1, 1],
-                strides=[1, 1, 1],
-                data_format="NCDHW",
-            )
-
-            _test_pooling(
-                input_shape=[1, 3, 32, 32, 32],
-                window_shape=[2, 2, 2],
-                padding="VALID",
-                pooling_type=pool_type,
-                dilation_rate=[1, 1, 1],
-                strides=[2, 2, 2],
-                data_format="NCDHW",
-            )
-
-        _test_pooling(
-            input_shape=[2, 9, 10, 2],
-            window_shape=[1, 1],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1],
-            strides=[1, 1],
-        )
-
-        _test_pooling(
-            input_shape=[2, 10, 9, 2],
-            window_shape=[1, 1],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1],
-            strides=[1, 1],
-        )
-
-        _test_pooling(
-            input_shape=[2, 9, 10, 2],
-            window_shape=[2, 1],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1],
-            strides=[1, 1],
-        )
-
-        _test_pooling(
-            input_shape=[2, 10, 9, 2],
-            window_shape=[2, 3],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1],
-            strides=[2, 1],
-        )
-
-        # Tests involving SpaceToBatchND
-        _test_pooling(
-            input_shape=[1, 1, 2, 1],
-            window_shape=[1, 1],
-            padding="VALID",
-            pooling_type=pool_type,
-            dilation_rate=[1, 2],
-        )
-
-        _test_pooling(
-            input_shape=[1, 2, 1],
-            window_shape=[1],
-            padding="VALID",
-            pooling_type=pool_type,
-            dilation_rate=[2],
-        )
-    # Explicit padding
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.4.1"):
-        _test_pooling(
-            input_shape=[2, 9, 10, 2],
-            window_shape=[4, 4],
-            padding=[[0, 0], [0, 1], [2, 3], [0, 0]],
-            pooling_type="MAX",
-            dilation_rate=[1, 1],
-            strides=[1, 1],
-        )
-
-
-#######################################################################
-# Convolution
-# -----------
-
-
-def _test_convolution(
-    opname,
-    tensor_in_sizes,
-    filter_in_sizes,
-    dilations,
-    strides,
-    padding,
-    data_format,
-    deconv_output_shape=None,
-    add_shapes_to_graph_def=True,
-):
-    """One iteration of convolution with given shapes and attributes"""
-    deconv_output_shape = deconv_output_shape or []
-    total_size_1 = np.prod(tensor_in_sizes)
-    total_size_2 = np.prod(filter_in_sizes)
-    # Initializes the input tensor with array containing incrementing
-    # numbers from 1.
-    data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
-    filter_array = [f * 1.0 for f in range(1, total_size_2 + 1)]
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype="float32")
-        in_filter = constant_op.constant(filter_array, shape=filter_in_sizes, dtype="float32")
-        if data_format == "NHWC":
-            strides = [1] + strides + [1]
-            dilations = [1] + dilations + [1]
-        else:
-            strides = [1, 1] + strides
-            dilations = [1, 1] + dilations
-
-        if opname == "conv":
-            nn_ops.conv2d(
-                in_data,
-                in_filter,
-                strides=strides,
-                dilations=dilations,
-                padding=padding,
-                data_format=data_format,
-            )
-
-            compare_tf_with_tvm(
-                np.reshape(data_array, tensor_in_sizes).astype("float32"),
-                "Placeholder:0",
-                "Conv2D:0",
-                add_shapes_to_graph_def=add_shapes_to_graph_def,
-            )
-        elif opname == "conv_transpose":
-            nn_ops.conv2d_transpose(
-                in_data,
-                in_filter,
-                output_shape=deconv_output_shape,
-                strides=strides,
-                padding=padding,
-                data_format=data_format,
-            )
-
-            compare_tf_with_tvm(
-                np.reshape(data_array, tensor_in_sizes).astype("float32"),
-                "Placeholder:0",
-                "conv2d_transpose:0",
-                add_shapes_to_graph_def=add_shapes_to_graph_def,
-            )
-        else:
-            nn_ops.depthwise_conv2d_native(
-                in_data,
-                in_filter,
-                strides=strides,
-                dilations=dilations,
-                padding=padding,
-                data_format=data_format,
-            )
-
-            compare_tf_with_tvm(
-                np.reshape(data_array, tensor_in_sizes).astype("float32"),
-                "Placeholder:0",
-                "DepthwiseConv2dNative:0",
-                add_shapes_to_graph_def=add_shapes_to_graph_def,
-            )
-
-
-@pytest.mark.skip(reason="See https://github.com/apache/tvm/issues/10275")
-@tvm.testing.uses_gpu
-def test_forward_convolution():
-    """Convolution"""
-    if is_gpu_available():
-        _test_convolution("conv", [4, 176, 8, 8], [1, 1, 176, 32], [1, 1], [1, 1], "SAME", "NCHW")
-        _test_convolution("conv", [4, 19, 17, 17], [3, 3, 19, 19], [1, 1], [2, 2], "VALID", "NCHW")
-        _test_convolution("conv", [4, 124, 17, 17], [1, 1, 124, 19], [1, 1], [1, 1], "SAME", "NCHW")
-        _test_convolution("conv", [4, 12, 17, 17], [3, 3, 12, 32], [1, 1], [2, 2], "VALID", "NCHW")
-        _test_convolution(
-            "depthwise", [4, 176, 8, 8], [1, 1, 176, 1], [1, 1], [1, 1], "SAME", "NCHW"
-        )
-        _test_convolution(
-            "depthwise", [4, 19, 17, 17], [3, 3, 19, 1], [1, 1], [2, 2], "VALID", "NCHW"
-        )
-        _test_convolution(
-            "depthwise", [4, 124, 17, 17], [1, 1, 124, 1], [1, 1], [1, 1], "SAME", "NCHW"
-        )
-        _test_convolution(
-            "depthwise", [4, 12, 17, 17], [3, 3, 12, 1], [1, 1], [2, 2], "VALID", "NCHW"
-        )
-        _test_convolution(
-            "depthwise", [4, 12, 17, 17], [3, 3, 12, 2], [1, 1], [2, 2], "VALID", "NCHW"
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 32, 8, 8],
-            [1, 1, 176, 32],
-            [1, 1],
-            [1, 1],
-            "SAME",
-            "NCHW",
-            [4, 176, 8, 8],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 32, 8, 8],
-            [2, 2, 176, 32],
-            [1, 1],
-            [1, 1],
-            "SAME",
-            "NCHW",
-            [4, 176, 8, 8],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 32, 8, 8],
-            [2, 2, 176, 32],
-            [1, 1],
-            [2, 2],
-            "SAME",
-            "NCHW",
-            [4, 176, 15, 15],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 32, 8, 8],
-            [3, 3, 176, 32],
-            [1, 1],
-            [1, 1],
-            "SAME",
-            "NCHW",
-            [4, 176, 8, 8],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 32, 8, 8],
-            [3, 3, 176, 32],
-            [1, 1],
-            [2, 2],
-            "SAME",
-            "NCHW",
-            [4, 176, 15, 15],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 32, 8, 8],
-            [3, 3, 176, 32],
-            [1, 1],
-            [2, 2],
-            "SAME",
-            "NCHW",
-            [4, 176, 16, 16],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 19, 8, 8],
-            [3, 3, 19, 19],
-            [1, 1],
-            [2, 2],
-            "VALID",
-            "NCHW",
-            [4, 19, 17, 17],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 19, 17, 17],
-            [1, 1, 124, 19],
-            [1, 1],
-            [1, 1],
-            "SAME",
-            "NCHW",
-            [4, 124, 17, 17],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 19, 17, 17],
-            [3, 3, 124, 19],
-            [1, 1],
-            [1, 1],
-            "SAME",
-            "NCHW",
-            [4, 124, 17, 17],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 32, 8, 8],
-            [3, 3, 12, 32],
-            [1, 1],
-            [2, 2],
-            "VALID",
-            "NCHW",
-            [4, 12, 17, 17],
-        )
-        # kernel 2x2, strides (2,2)
-        _test_convolution(
-            "conv_transpose",
-            [4, 19, 8, 8],
-            [2, 2, 19, 19],
-            [1, 1],
-            [2, 2],
-            "VALID",
-            "NCHW",
-            [4, 19, 16, 16],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 32, 8, 8],
-            [2, 2, 12, 32],
-            [1, 1],
-            [2, 2],
-            "VALID",
-            "NCHW",
-            [4, 12, 16, 16],
-        )
-        # output channel is 1
-        _test_convolution(
-            "conv_transpose",
-            [1, 19, 8, 8],
-            [1, 1, 1, 19],
-            [1, 1],
-            [1, 1],
-            "VALID",
-            "NCHW",
-            [1, 1, 8, 8],
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 19, 8, 8],
-            [2, 2, 66, 19],
-            [1, 1],
-            [2, 2],
-            "VALID",
-            "NCHW",
-            [4, 66, 16, 16],
-        )
-    _test_convolution("conv", [4, 8, 8, 176], [1, 1, 176, 32], [1, 1], [1, 1], "SAME", "NHWC")
-    _test_convolution("conv", [4, 17, 17, 19], [3, 3, 19, 19], [1, 1], [2, 2], "VALID", "NHWC")
-    _test_convolution("conv", [4, 17, 17, 124], [1, 1, 124, 19], [1, 1], [1, 1], "SAME", "NHWC")
-    _test_convolution("conv", [4, 17, 17, 12], [3, 3, 12, 32], [1, 1], [2, 2], "VALID", "NHWC")
-    _test_convolution(
-        "conv",
-        [4, 17, 17, 12],
-        [3, 3, 12, 32],
-        [1, 1],
-        [2, 2],
-        "VALID",
-        "NHWC",
-        add_shapes_to_graph_def=False,
-    )
-    _test_convolution("depthwise", [4, 8, 8, 176], [1, 1, 176, 1], [1, 1], [1, 1], "SAME", "NHWC")
-    _test_convolution("depthwise", [4, 17, 17, 19], [3, 3, 19, 1], [1, 1], [2, 2], "VALID", "NHWC")
-    _test_convolution("depthwise", [4, 17, 17, 124], [1, 1, 124, 1], [1, 1], [1, 1], "SAME", "NHWC")
-    _test_convolution("depthwise", [4, 17, 17, 12], [3, 3, 12, 1], [1, 1], [2, 2], "VALID", "NHWC")
-    _test_convolution("depthwise", [4, 17, 17, 12], [3, 3, 12, 2], [1, 1], [2, 2], "VALID", "NHWC")
-    _test_convolution(
-        "depthwise",
-        [4, 17, 17, 12],
-        [3, 3, 12, 2],
-        [1, 1],
-        [2, 2],
-        "VALID",
-        "NHWC",
-        add_shapes_to_graph_def=False,
-    )
-
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 32],
-        [1, 1, 176, 32],
-        [1, 1],
-        [1, 1],
-        "SAME",
-        "NHWC",
-        [4, 8, 8, 176],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 32],
-        [2, 2, 176, 32],
-        [1, 1],
-        [1, 1],
-        "SAME",
-        "NHWC",
-        [4, 8, 8, 176],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 32],
-        [2, 2, 176, 32],
-        [1, 1],
-        [2, 2],
-        "SAME",
-        "NHWC",
-        [4, 15, 15, 176],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 32],
-        [3, 3, 176, 32],
-        [1, 1],
-        [1, 1],
-        "SAME",
-        "NHWC",
-        [4, 8, 8, 176],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 32],
-        [3, 3, 176, 32],
-        [1, 1],
-        [2, 2],
-        "SAME",
-        "NHWC",
-        [4, 15, 15, 176],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 32],
-        [3, 3, 176, 32],
-        [1, 1],
-        [2, 2],
-        "SAME",
-        "NHWC",
-        [4, 16, 16, 176],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 19],
-        [3, 3, 19, 19],
-        [1, 1],
-        [2, 2],
-        "VALID",
-        "NHWC",
-        [4, 17, 17, 19],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 17, 17, 19],
-        [1, 1, 124, 19],
-        [1, 1],
-        [1, 1],
-        "SAME",
-        "NHWC",
-        [4, 17, 17, 124],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 17, 17, 19],
-        [3, 3, 124, 19],
-        [1, 1],
-        [1, 1],
-        "SAME",
-        "NHWC",
-        [4, 17, 17, 124],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 32],
-        [3, 3, 12, 32],
-        [1, 1],
-        [2, 2],
-        "VALID",
-        "NHWC",
-        [4, 17, 17, 12],
-    )
-    # kernel 2x2, strides (2,2)
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 19],
-        [2, 2, 19, 19],
-        [1, 1],
-        [2, 2],
-        "VALID",
-        "NHWC",
-        [4, 16, 16, 19],
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 32],
-        [2, 2, 12, 32],
-        [1, 1],
-        [2, 2],
-        "VALID",
-        "NHWC",
-        [4, 16, 16, 12],
-    )
-    # output channel is 1
-    _test_convolution(
-        "conv_transpose",
-        [1, 8, 8, 19],
-        [1, 1, 1, 19],
-        [1, 1],
-        [1, 1],
-        "VALID",
-        "NHWC",
-        [1, 8, 8, 1],
-    )
-    # Test without adding shapes to graph def
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 32],
-        [1, 1, 176, 32],
-        [1, 1],
-        [1, 1],
-        "SAME",
-        "NHWC",
-        [4, 8, 8, 176],
-        add_shapes_to_graph_def=False,
-    )
-    _test_convolution(
-        "conv_transpose",
-        [4, 8, 8, 19],
-        [2, 2, 66, 19],
-        [1, 1],
-        [2, 2],
-        "VALID",
-        "NHWC",
-        [4, 16, 16, 66],
-    )
-    # Explicit padding
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.4.1"):
-        _test_convolution(
-            "conv",
-            [4, 8, 8, 16],
-            [1, 1, 16, 32],
-            [1, 1],
-            [1, 1],
-            [[0, 0], [2, 3], [0, 1], [0, 0]],
-            "NHWC",
-        )
-        _test_convolution(
-            "depthwise",
-            [4, 8, 8, 16],
-            [1, 1, 16, 1],
-            [1, 1],
-            [1, 1],
-            [[0, 0], [2, 3], [0, 1], [0, 0]],
-            "NHWC",
-        )
-        _test_convolution(
-            "conv_transpose",
-            [4, 8, 8, 32],
-            [3, 3, 176, 32],
-            [1, 1],
-            [2, 2],
-            [[0, 0], [1, 0], [1, 0], [0, 0]],
-            "NHWC",
-            [4, 16, 16, 176],
-        )
-
-
-#######################################################################
-# Convolution3D
-# -------------
-
-
-def _test_convolution3d(
-    opname,
-    tensor_in_sizes,
-    filter_in_sizes,
-    dilations,
-    strides,
-    padding,
-    data_format,
-    deconv_output_shape=None,
-    add_shapes_to_graph_def=True,
-):
-    """One iteration of 3D convolution with given shapes and attributes"""
-    deconv_output_shape = deconv_output_shape or []
-    total_size_1 = np.prod(tensor_in_sizes)
-    total_size_2 = np.prod(filter_in_sizes)
-    # Initializes the input tensor with array containing incrementing
-    # numbers from 1.
-    data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
-    filter_array = [f * 1.0 for f in range(1, total_size_2 + 1)]
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype="float32")
-        in_filter = constant_op.constant(filter_array, shape=filter_in_sizes, dtype="float32")
-        if data_format == "NDHWC":
-            strides = [1] + strides + [1]
-            dilations = [1] + dilations + [1]
-        else:
-            strides = [1, 1] + strides
-            dilations = [1, 1] + dilations
-
-        if opname == "conv":
-            nn_ops.conv3d(
-                in_data,
-                in_filter,
-                strides=strides,
-                dilations=dilations,
-                padding=padding,
-                data_format=data_format,
-            )
-
-            compare_tf_with_tvm(
-                np.reshape(data_array, tensor_in_sizes).astype("float32"),
-                "Placeholder:0",
-                "Conv3D:0",
-                cuda_layout="NCDHW",
-                add_shapes_to_graph_def=add_shapes_to_graph_def,
-            )
-
-
-@tvm.testing.uses_gpu
-def test_forward_convolution3d():
-    """Convolution3d"""
-    if is_gpu_available():
-        _test_convolution3d(
-            "conv", [4, 176, 8, 8, 8], [1, 1, 1, 176, 32], [1, 1, 1], [1, 1, 1], "SAME", "NCDHW"
-        )
-        _test_convolution3d(
-            "conv", [4, 19, 17, 17, 17], [3, 3, 3, 19, 19], [1, 1, 1], [2, 2, 2], "VALID", "NCDHW"
-        )
-        _test_convolution3d(
-            "conv", [4, 124, 17, 17, 17], [1, 1, 1, 124, 19], [1, 1, 1], [1, 1, 1], "SAME", "NCDHW"
-        )
-        _test_convolution3d(
-            "conv", [4, 12, 17, 17, 17], [3, 3, 3, 12, 32], [1, 1, 1], [2, 2, 2], "VALID", "NCDHW"
-        )
-    _test_convolution3d(
-        "conv", [4, 8, 8, 8, 176], [1, 1, 1, 176, 32], [1, 1, 1], [1, 1, 1], "SAME", "NDHWC"
-    )
-    _test_convolution3d(
-        "conv", [4, 17, 17, 17, 19], [3, 3, 3, 19, 19], [1, 1, 1], [2, 2, 2], "VALID", "NDHWC"
-    )
-    _test_convolution3d(
-        "conv", [4, 17, 17, 17, 124], [1, 1, 1, 124, 19], [1, 1, 1], [1, 1, 1], "SAME", "NDHWC"
-    )
-    _test_convolution3d(
-        "conv", [4, 17, 17, 17, 12], [3, 3, 3, 12, 32], [1, 1, 1], [2, 2, 2], "VALID", "NDHWC"
-    )
-    # Test without adding shapes to graph def
-    _test_convolution3d(
-        "conv",
-        [4, 17, 17, 17, 12],
-        [3, 3, 3, 12, 32],
-        [1, 1, 1],
-        [2, 2, 2],
-        "VALID",
-        "NDHWC",
-        add_shapes_to_graph_def=False,
-    )
-
-
-#######################################################################
-# Convolution3D Transpose
-# -----------------------
-
-
-def _test_convolution3d_transpose(
-    data_shape,
-    filter_shape,
-    strides,
-    padding,
-    output_shape,
-    data_format="NCDHW",
-    add_shapes_to_graph_def=True,
-):
-    """One iteration of 3D convolution transpose with given shapes and attributes"""
-
-    dtype = "float32"
-    data_array = np.random.uniform(size=data_shape).astype(dtype)
-    filter_array = np.random.uniform(size=filter_shape).astype(dtype)
-    if data_format == "NDHWC":
-        strides = [1] + strides + [1]
-    else:
-        strides = [1, 1] + strides
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data_shape, dtype=dtype)
-        in_filter = constant_op.constant(filter_array, shape=filter_shape, dtype=dtype)
-
-        nn_ops.conv3d_transpose(
-            in_data,
-            in_filter,
-            output_shape=output_shape,
-            strides=strides,
-            padding=padding,
-            data_format=data_format,
-        )
-
-        compare_tf_with_tvm(
-            data_array,
-            "Placeholder:0",
-            "conv3d_transpose:0",
-            cuda_layout="NDHWC",
-            add_shapes_to_graph_def=add_shapes_to_graph_def,
-        )
-
-
-@tvm.testing.uses_gpu
-def test_forward_convolution3d_transpose():
-    """Convolution3d transpose"""
-    if is_gpu_available():
-        _test_convolution3d_transpose(
-            data_shape=[1, 10, 8, 8, 8],
-            filter_shape=[1, 1, 1, 6, 10],
-            strides=[1, 1, 1],
-            padding="VALID",
-            output_shape=[1, 6, 8, 8, 8],
-        )
-
-        _test_convolution3d_transpose(
-            data_shape=[4, 9, 8, 8, 8],
-            filter_shape=[1, 1, 1, 6, 9],
-            strides=[1, 1, 1],
-            padding="VALID",
-            output_shape=[4, 6, 8, 8, 8],
-        )
-
-        _test_convolution3d_transpose(
-            data_shape=[1, 3, 8, 8, 8],
-            filter_shape=[1, 1, 1, 6, 3],
-            strides=[2, 2, 2],
-            padding="SAME",
-            output_shape=[1, 6, 15, 15, 15],
-        )
-
-        _test_convolution3d_transpose(
-            data_shape=[1, 16, 8, 8, 8],
-            filter_shape=[3, 3, 3, 6, 16],
-            strides=[3, 3, 3],
-            padding="VALID",
-            output_shape=[1, 6, 24, 24, 24],
-        )
-
-    _test_convolution3d_transpose(
-        data_shape=[1, 8, 8, 8, 10],
-        filter_shape=[1, 1, 1, 6, 10],
-        strides=[1, 1, 1],
-        padding="VALID",
-        output_shape=[1, 8, 8, 8, 6],
-        data_format="NDHWC",
-    )
-
-    _test_convolution3d_transpose(
-        data_shape=[4, 8, 8, 8, 9],
-        filter_shape=[1, 1, 1, 6, 9],
-        strides=[1, 1, 1],
-        padding="VALID",
-        output_shape=[4, 8, 8, 8, 6],
-        data_format="NDHWC",
-    )
-
-    _test_convolution3d_transpose(
-        data_shape=[1, 8, 8, 8, 3],
-        filter_shape=[1, 1, 1, 6, 3],
-        strides=[2, 2, 2],
-        padding="SAME",
-        output_shape=[1, 15, 15, 15, 6],
-        data_format="NDHWC",
-    )
-
-    _test_convolution3d_transpose(
-        data_shape=[1, 8, 8, 8, 16],
-        filter_shape=[3, 3, 3, 6, 16],
-        strides=[3, 3, 3],
-        padding="VALID",
-        output_shape=[1, 24, 24, 24, 6],
-        data_format="NDHWC",
-    )
-
-    # Test without adding shapes to graph def
-    _test_convolution3d_transpose(
-        data_shape=[1, 8, 8, 8, 16],
-        filter_shape=[3, 3, 3, 6, 16],
-        strides=[3, 3, 3],
-        padding="VALID",
-        output_shape=[1, 24, 24, 24, 6],
-        data_format="NDHWC",
-        add_shapes_to_graph_def=False,
-    )
-
-
-#######################################################################
-# BiasAdd
-# -----------
-
-
-def _test_biasadd(tensor_in_sizes, data_format):
-    """One iteration of biasadd with given shapes and attributes"""
-
-    total_size_1 = 1
-    for s in tensor_in_sizes:
-        total_size_1 *= s
-    tensor_bias_sizes = [tensor_in_sizes[1]] if data_format == "NCHW" else [tensor_in_sizes[3]]
-    total_size_2 = tensor_bias_sizes[0]
-    # Initializes the input tensor with array containing incrementing
-    # numbers from 1.
-    data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
-    bias_array = [f * 1.0 for f in range(1, total_size_2 + 1)]
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype="float32")
-        in_bias = constant_op.constant(bias_array, shape=tensor_bias_sizes, dtype="float32")
-        nn_ops.bias_add(in_data, in_bias, data_format=data_format)
-
-        compare_tf_with_tvm(
-            np.reshape(data_array, tensor_in_sizes).astype("float32"), "Placeholder:0", "BiasAdd:0"
-        )
-
-
-@tvm.testing.uses_gpu
-def test_forward_biasadd():
-    """Bias add"""
-    if is_gpu_available():
-        _test_biasadd([4, 176, 8, 8], "NCHW")
-        _test_biasadd([1, 100, 1, 1], "NCHW")
-        _test_biasadd([4, 19, 17, 17], "NCHW")
-        _test_biasadd([4, 124, 3, 3], "NCHW")
-
-    _test_biasadd([4, 8, 8, 176], "NHWC")
-    _test_biasadd([1, 1, 1, 100], "NHWC")
-    _test_biasadd([4, 17, 17, 19], "NHWC")
-    _test_biasadd([4, 3, 3, 124], "NHWC")
-
-
-def _test_forward_where(input_shape):
-    with tf.Graph().as_default():
-        dtype = tf.float32
-        t = tf.constant(
-            np.random.choice([0, 1, -2, 3, -1, 0.1, -0.2], size=input_shape).astype(dtype.name)
-        )
-        out = tf.where(t)
-        compare_tf_with_tvm([], [], out.name, mode="debug")
-        compare_tf_with_tvm([], [], out.name, mode="vm")
-
-
-def test_forward_argwhere():
-    _test_forward_where((5,))
-    _test_forward_where((5, 5))
-    _test_forward_where((5, 5, 5))
-    _test_forward_where((5, 5, 5, 5))
-    _test_forward_where((5, 5, 5, 5, 5))
-
-
-def _test_forward_where_with_broadcast(in_shape, cond_shape):
-    choice_list = list(np.arange(10).astype("float32"))
-    t1 = np.random.choice(choice_list, size=cond_shape)
-    t2 = np.random.choice(choice_list, size=cond_shape)
-    x = np.random.choice(choice_list, size=in_shape)
-    y = np.random.choice(choice_list, size=in_shape)
-
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=cond_shape, dtype="float32", name="in1")
-        in2 = tf.placeholder(shape=cond_shape, dtype="float32", name="in2")
-        condition = math_ops.less(in1, in2, name="less")
-        lhs = tf.placeholder(shape=in_shape, dtype="float32", name="x")
-        rhs = tf.placeholder(shape=in_shape, dtype="float32", name="y")
-        out = tf.where(condition, lhs, rhs)
-        compare_tf_with_tvm([t1, t2, x, y], ["in1:0", "in2:0", "x:0", "y:0"], out.name)
-
-
-def test_forward_where_with_broadcast():
-    _test_forward_where_with_broadcast((5, 2), (5,))
-    _test_forward_where_with_broadcast((5, 7), (5,))
-    _test_forward_where_with_broadcast((3, 2, 5), (3,))
-
-
-#######################################################################
-# SpaceToBatchND
-# --------------
-
-
-def _test_space_to_batch_nd(input_shape, block_shape, paddings, dtype="int32"):
-    data = np.random.uniform(0, 5, size=input_shape).astype(dtype)
-
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(shape=input_shape, dtype=dtype)
-        out = tf.space_to_batch_nd(in_data, block_shape, paddings)
-
-        compare_tf_with_tvm(data, in_data.name, out.name)
-
-
-def _test_space_to_batch_nd_infer_paddings(input_shape, block_shape, dtype="int32"):
-    data = np.random.uniform(0, 5, size=input_shape).astype(dtype)
-    padding_np = np.array([0, 1]).astype(np.int32).reshape((1, 2))
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(shape=input_shape, dtype=dtype)
-        const1 = tf.constant(padding_np, dtype=tf.int32)
-        # make paddings an input to tf.transpose, but not an input to the graph,
-        # so it can be extracted with infer_value_simulated
-        paddings = tf.reverse(const1, axis=[-1])
-        out = tf.space_to_batch_nd(in_data, block_shape, paddings)
-        compare_tf_with_tvm(data, in_data.name, out.name)
-
-
-def test_forward_space_to_batch_nd():
-    """SpaceToBatchNd"""
-    # test cases: https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/space-to-batch-n-d
-    _test_space_to_batch_nd(input_shape=[1, 2, 2, 1], block_shape=[2, 2], paddings=[[0, 0], [0, 0]])
-
-    _test_space_to_batch_nd(input_shape=[1, 2, 2, 3], block_shape=[2, 2], paddings=[[0, 0], [0, 0]])
-
-    _test_space_to_batch_nd(input_shape=[1, 4, 4, 1], block_shape=[2, 2], paddings=[[0, 0], [0, 0]])
-
-    _test_space_to_batch_nd(
-        input_shape=[2, 2, 4, 1], block_shape=[2, 2], paddings=[[0, 0], [2, 0]], dtype="int64"
-    )
-
-    # pylint: disable=line-too-long
-    # https://github.com/tensorflow/tensorflow/blob/24f578/tensorflow/python/kernel_tests/spacetobatch_op_test.py
-    _test_space_to_batch_nd(input_shape=[2, 3], block_shape=[2], paddings=[[1, 0]], dtype="float32")
-
-    _test_space_to_batch_nd(
-        input_shape=[2, 3, 2], block_shape=[2], paddings=[[1, 0]], dtype="float64"
-    )
-
-    _test_space_to_batch_nd_infer_paddings(input_shape=[2, 3, 2], block_shape=[2])
-
-
-#######################################################################
-# BatchToSpaceND
-# --------------
-
-
-def _test_batch_to_space_nd(input_shape, block_shape, crops, dtype="int32"):
-    data = np.random.uniform(0, 5, size=input_shape).astype(dtype)
-
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(shape=input_shape, dtype=dtype)
-        out = tf.batch_to_space_nd(in_data, block_shape, crops)
-
-        compare_tf_with_tvm(data, in_data.name, out.name)
-
-
-def test_forward_batch_to_space_nd():
-    """BatchToSpaceNd"""
-    # test cases: https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/batch-to-space-n-d
-    _test_batch_to_space_nd(input_shape=[4, 1, 1, 1], block_shape=[2, 2], crops=[[0, 0], [0, 0]])
-
-    _test_batch_to_space_nd(input_shape=[4, 1, 1, 3], block_shape=[2, 2], crops=[[0, 0], [0, 0]])
-
-    _test_batch_to_space_nd(input_shape=[4, 2, 2, 1], block_shape=[2, 2], crops=[[0, 0], [0, 0]])
-
-    _test_batch_to_space_nd(
-        input_shape=[8, 1, 3, 1], block_shape=[2, 2], crops=[[0, 0], [2, 0]], dtype="int64"
-    )
-
-    # pylint: disable=line-too-long
-    # https://github.com/tensorflow/tensorflow/blob/24f578/tensorflow/python/kernel_tests/batchtospace_op_test.py
-    _test_batch_to_space_nd(
-        input_shape=[18, 2, 1, 2], block_shape=[2, 3], crops=[[1, 1], [0, 0]], dtype="float32"
-    )
-
-    _test_batch_to_space_nd(
-        input_shape=[20, 5, 8, 7], block_shape=[2, 2], crops=[[1, 1], [1, 1]], dtype="float64"
-    )
-
-
-#######################################################################
-# Reshape
-# -------
-
-
-def _test_reshape(data, out_shape):
-    """One iteration of reshape operation with given data and out shape"""
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        array_ops.reshape(in_data, out_shape)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "Reshape:0")
-
-
-def _test_reshape_with_call():
-    """relay.expr.Call as shape"""
-    data = np.zeros((6, 4, 2))
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out_shape = tf.constant([1, 2, 3], dtype="int32")
-        out_shape = tf.multiply(out_shape, 2)
-        array_ops.reshape(in_data, out_shape)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "Reshape:0")
-
-
-def _test_reshape_like(data, shape_like):
-    """A special case for reshape."""
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        in_shape_like = array_ops.placeholder(shape=shape_like.shape, dtype=data.dtype)
-        out_shape = array_ops.shape(in_shape_like)
-        array_ops.reshape(in_data, out_shape)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "Reshape:0")
-
-
-def _test_reshape_symbolic(data, a_data, b_data):
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        a = array_ops.placeholder(shape=a_data.shape, dtype=a_data.dtype)
-        b = array_ops.placeholder(shape=b_data.shape, dtype=b_data.dtype)
-        newshape = tf.add(a, b)
-        out = array_ops.reshape(in_data, newshape)
-
-        for mode in ["debug", "vm"]:
-            compare_tf_with_tvm(
-                [data, a_data, b_data], [in_data.name, a.name, b.name], out.name, mode=mode
-            )
-
-
-def test_forward_reshape():
-    """Reshape"""
-    _test_reshape(np.arange(6.0), [2, 3])
-    _test_reshape(np.arange(6), [-1, 2])
-    _test_reshape(np.arange(6), [3, -1])
-    _test_reshape(np.arange(6), [-1])
-    _test_reshape_with_call()
-    _test_reshape_like(np.zeros((3, 6)), np.zeros((9, 2)))
-    _test_reshape_symbolic(np.arange(6.0), np.array([2, 0]), np.array([0, 3]))
-    _test_reshape_symbolic(np.arange(6), np.array([-1, 0]), np.array([0, 2]))
-    _test_reshape_symbolic(np.arange(6), np.array([3, 0]), np.array([3, -1]))
-    _test_reshape_symbolic(np.arange(6), np.array([0]), np.array([-1]))
-
-
-#######################################################################
-# DepthToSpace
-# ------------
-
-
-def _test_depthtospace(data, block_size):
-    """One iteration of depth_to_space operation with given data and block size"""
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        array_ops.depth_to_space(in_data, block_size)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "DepthToSpace:0")
-
-
-def test_forward_depthtospace():
-    _test_depthtospace(np.random.normal(size=[1, 32, 32, 4]), 2)
-    _test_depthtospace(np.random.normal(size=[1, 16, 8, 32]), 4)
-
-
-#######################################################################
-# SpaceToDepth
-# ------------
-
-
-def _test_spacetodepth(data, block_size):
-    """One iteration of space_to_depth operation with given data and block size"""
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        array_ops.space_to_depth(in_data, block_size)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "SpaceToDepth:0")
-
-
-def test_forward_spacetodepth():
-    _test_spacetodepth(np.random.normal(size=[1, 32, 32, 4]), 2)
-    _test_spacetodepth(np.random.normal(size=[1, 16, 8, 32]), 4)
-
-
-#######################################################################
-# Squeeze
-# -------
-
-
-def _test_squeeze(data, squeeze_dims=None):
-    """One iteration of squeeze"""
-
-    if squeeze_dims is None:
-        squeeze_dims = []
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-
-        if squeeze_dims:
-            array_ops.squeeze(in_data, squeeze_dims)
-        else:
-            array_ops.squeeze(in_data)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "Squeeze:0")
-
-
-def test_forward_squeeze():
-    """Squeeze"""
-
-    # Nothing to squeeze.
-    _test_squeeze(np.arange(2).reshape((2)))
-    _test_squeeze(np.arange(6).reshape((2, 3)))
-
-    # Squeeze the middle element away.
-    _test_squeeze(np.arange(4).reshape((2, 1, 2)))
-
-    # Squeeze on both ends.
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)))
-
-    # Positive squeeze dim index.
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [0])
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [2, 4])
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [0, 4, 2])
-
-    # Negative squeeze dim index.
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [-1])
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [-3, -5])
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [-3, -5, -1])
-
-
-#######################################################################
-# TensorArray
-# -----------
-def test_tensor_array_write_read():
-    """Tensor array write read"""
-
-    def run(dtype_str, infer_shape, element_shape):
-        with tf.Graph().as_default():
-            dtype = tf_dtypes[dtype_str]
-            np_data = np.array([[1.0, 2.0], [3.0, 4.0]]).astype(dtype_str)
-            _ = [np_data, np_data]
-            t1 = tf.constant(np_data, dtype=dtype)
-            t2 = tf.constant(np_data, dtype=dtype)
-            ta1 = tf.TensorArray(
-                dtype=dtype, size=2, infer_shape=infer_shape, element_shape=element_shape
-            )
-            ta2 = ta1.write(0, t1)
-            ta3 = ta2.write(1, t2)
-            _ = ta3.read(0)
-            _ = tf.get_default_graph()
-            compare_tf_with_tvm([], [], "TensorArrayReadV3:0", mode="vm")
-
-    for dtype in ["float32", "int8"]:
-        run(dtype, False, None)
-        run(dtype, False, tf.TensorShape([None, 2]))
-        run(dtype, True, None)
-
-
-def test_tensor_array_scatter():
-    """Tensor array scatter"""
-
-    def run(dtype_str, infer_shape):
-        with tf.Graph().as_default():
-            dtype = tf_dtypes[dtype_str]
-            if infer_shape:
-                element_shape = tf.TensorShape([tf.Dimension(None)])
-            else:
-                element_shape = None
-            ta0 = _construct_scatter(dtype, dtype_str, element_shape, infer_shape, 3)
-            _ = ta0.read(0)
-            _ = ta0.read(1)
-            _ = ta0.read(2)
-            ta1 = _construct_scatter(dtype, dtype_str, element_shape, infer_shape, 4)
-            out4 = ta1.read(0)
-            _ = tf.get_default_graph()
-            compare_tf_with_tvm([], [], ["TensorArrayReadV3:0"], mode="vm")
-            compare_tf_with_tvm([], [], ["TensorArrayReadV3_1:0"], mode="vm")
-            compare_tf_with_tvm([], [], ["TensorArrayReadV3_2:0"], mode="vm")
-            compare_tf_with_tvm([], [], ["TensorArrayReadV3_2:0", out4.name], mode="vm")
-
-    def _construct_scatter(dtype, dtype_str, element_shape, infer_shape, size):
-        arr = [[float(i)] for i in range(size)]  # pylint: disable=unnecessary-comprehension
-        indices_arr = list(range(size - 1, -1, -1))
-
-        t = tf.constant(np.array(arr).astype(dtype_str), dtype=dtype)
-        indices = tf.constant(indices_arr)
-        ta1 = tf.TensorArray(
-            dtype=dtype, size=size, infer_shape=infer_shape, element_shape=element_shape
-        )
-        ta2 = ta1.scatter(indices, t)
-        return ta2
-
-    for dtype in ["float32", "int8"]:
-        run(dtype, False)
-        run(dtype, True)
-
-
-def test_tensor_array_gather():
-    """tensor array gather"""
-
-    def run(dtype_str, infer_shape):
-        with tf.Graph().as_default():
-            dtype = tf_dtypes[dtype_str]
-            t = tf.constant(np.array([[1.0], [2.0], [3.0]]).astype(dtype_str))
-            scatter_indices = tf.constant([2, 1, 0])
-            gather_indices = tf.constant([1, 2])
-            ta1 = tf.TensorArray(dtype=dtype, size=3, infer_shape=infer_shape)
-            ta2 = ta1.scatter(scatter_indices, t)
-            _ = ta2.gather(gather_indices)
-            _ = tf.get_default_graph()
-            compare_tf_with_tvm([], [], ["TensorArrayGatherV3:0"], mode="vm")
-
-    for dtype in ["float32", "int8"]:
-        run(dtype, True)
-
-
-def test_tensor_array_split():
-    """tensor array split"""
-
-    def run(dtype_str, infer_shape):
-        with tf.Graph().as_default():
-            dtype = tf_dtypes[dtype_str]
-            t = tf.constant(
-                np.array([[1.0], [2.0], [3.0], [4.0], [5.0], [6.0], [7.0], [8.0]]).astype(
-                    dtype_str
-                ),
-                dtype=dtype,
-            )
-            split_length = tf.constant([2, 2, 2, 2], dtype=tf.int32)
-            ta1 = tf.TensorArray(dtype=dtype, size=4, infer_shape=infer_shape)
-            ta2 = ta1.split(t, split_length)
-            _ = ta2.read(0)
-            _ = ta2.read(1)
-            _ = ta2.read(2)
-            _ = ta2.read(3)
-            _ = tf.get_default_graph()
-            compare_tf_with_tvm([], [], ["TensorArrayReadV3:0"], mode="debug")
-            compare_tf_with_tvm([], [], ["TensorArrayReadV3_1:0"], mode="debug")
-            compare_tf_with_tvm([], [], ["TensorArrayReadV3_2:0"], mode="debug")
-            compare_tf_with_tvm([], [], ["TensorArrayReadV3_3:0"], mode="debug")
-
-    for dtype in ["float32", "int8"]:
-        run(dtype, False)
-        run(dtype, True)
-
-
-def test_tensor_array_concat():
-    """Tensor array concat"""
-
-    def run(dtype_str, infer_shape):
-        with tf.Graph().as_default():
-            dtype = tf_dtypes[dtype_str]
-            t = tf.constant(
-                np.array([[1.0], [2.0], [3.0], [4.0], [5.0], [6.0], [7.0], [8.0]]).astype(
-                    dtype_str
-                ),
-                dtype=dtype,
-            )
-            split_length = tf.constant([2, 2, 2, 2], dtype=tf.int32)
-            ta1 = tf.TensorArray(dtype=dtype, size=4, infer_shape=infer_shape)
-            ta2 = ta1.split(t, split_length)
-            t = ta2.concat()
-            _ = tf.identity(t)
-            compare_tf_with_tvm([], [], ["Identity:0"], mode="debug")
-
-    for dtype in ["float32", "int8"]:
-        run(dtype, False)
-        run(dtype, True)
-
-
-def test_tensor_array_size():
-    """Tensor array size"""
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.15.0"):
-        pytest.skip("Needs fixing for tflite >= 1.15.0")
-
-    def run(dtype_str, infer_shape):
-        with tf.Graph().as_default():
-            dtype = tf_dtypes[dtype_str]
-            np_data = np.array([[1.0, 2.0], [3.0, 4.0]]).astype(dtype_str)
-            _ = [np_data, np_data]
-            t1 = tf.constant(np_data, dtype=dtype)
-            t2 = tf.constant(np_data, dtype=dtype)
-            ta1 = tf.TensorArray(dtype=dtype, size=2, infer_shape=infer_shape)
-            ta2 = ta1.write(0, t1)
-            ta3 = ta2.write(1, t2)
-            _ = ta3.size()
-            _ = tf.get_default_graph()
-            compare_tf_with_tvm([], [], "TensorArraySizeV3:0", mode="debug")
-
-    for dtype in ["float32", "int8"]:
-        run(dtype, False)
-        run(dtype, True)
-
-
-def test_tensor_array_stack():
-    """Tensor array stack"""
-
-    def run(dtype_str, infer_shape):
-        if package_version.parse(tf.VERSION) >= package_version.parse("1.15.0"):
-            pytest.skip("Needs fixing for tflite >= 1.15.0")
-
-        with tf.Graph().as_default():
-            dtype = tf_dtypes[dtype_str]
-            t = tf.constant(np.array([[1.0], [2.0], [3.0]]).astype(dtype_str))
-            scatter_indices = tf.constant([2, 1, 0])
-            ta1 = tf.TensorArray(dtype=dtype, size=3, infer_shape=infer_shape)
-            ta2 = ta1.scatter(scatter_indices, t)
-            t1 = ta2.stack()
-            print(t1)
-            _ = tf.get_default_graph()
-
-            compare_tf_with_tvm([], [], ["TensorArrayStack/TensorArrayGatherV3:0"], mode="vm")
-
-    for dtype in ["float32", "int8"]:
-        run(dtype, True)
-
-
-def test_tensor_array_unstack():
-    """Tensor array unstack"""
-
-    def run(dtype_str, input_shape, infer_shape):
-        if package_version.parse(tf.VERSION) >= package_version.parse("1.15.0"):
-            pytest.skip("Needs fixing for tflite >= 1.15.0")
-
-        with tf.Graph().as_default():
-            dtype = tf_dtypes[dtype_str]
-            t = tf.constant(np.random.choice([0, 1, 2, 3], size=input_shape).astype(dtype.name))
-            ta1 = tf.TensorArray(dtype=dtype, infer_shape=infer_shape, size=input_shape[0])
-            ta2 = ta1.unstack(t)
-            _ = ta2.size()
-            _ = ta2.read(0)
-            compare_tf_with_tvm([], [], "TensorArraySizeV3:0", mode="debug")
-            compare_tf_with_tvm([], [], "TensorArrayReadV3:0", mode="debug")
-
-    for dtype in ["float32", "int8"]:
-        run(dtype, (5,), False)
-        run(dtype, (5, 5), True)
-        run(dtype, (5, 5, 5), False)
-        run(dtype, (5, 5, 5, 5), True)
-
-
-#######################################################################
-# ConcatV2
-# --------
-
-
-def _test_concat_v2(shape1, shape2, dim):
-    """One iteration of ConcatV2"""
-
-    with tf.Graph().as_default():
-        dtype = "float32"
-        in1 = tf.placeholder(shape=shape1, dtype=dtype, name="in1")
-        in2 = tf.placeholder(shape=shape2, dtype=dtype, name="in2")
-        array_ops.concat_v2([in1, in2], dim)
-
-        np_data1 = np.random.uniform(size=shape1).astype(dtype)
-        np_data2 = np.random.uniform(size=shape2).astype(dtype)
-
-        compare_tf_with_tvm([np_data1, np_data2], ["in1:0", "in2:0"], "ConcatV2:0")
-
-
-def test_forward_concat_v2():
-    if package_version.parse(tf.__version__) < package_version.parse("1.4.1"):
-        return
-
-    _test_concat_v2([2, 3], [2, 3], 0)
-    _test_concat_v2([10, 3, 5], [2, 3, 5], 0)
-    _test_concat_v2([2, 3], [2, 3], 1)
-    _test_concat_v2([5, 8], [5, 4], 1)
-    _test_concat_v2([2, 8, 5], [2, 8, 6], -1)
-
-
-#######################################################################
-# Sigmoid
-# -------
-
-
-def _test_sigmoid(data):
-    """One iteration of sigmoid"""
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        _ = math_ops.sigmoid(in_data)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "Sigmoid:0")
-
-
-def test_forward_sigmoid():
-    """Sigmoid"""
-
-    _test_sigmoid(np.random.uniform(size=(3, 4, 4, 3)).astype("float32"))
-
-
-#######################################################################
-# Argmin/Argmax
-# -------------
-
-
-def _test_argx(func, data, **kwargs):
-
-    with tf.Graph().as_default():
-        inp = array_ops.placeholder(shape=data.shape, dtype=data.dtype, name="c0")
-        func(inp, name="argx0", **kwargs)
-        compare_tf_with_tvm(data, "c0:0", "argx0:0")
-
-
-def test_forward_argminmax():
-    for output_type in [tf.int64, tf.int32]:
-        for axis in [None, 0, 1, 2]:
-            data = np.random.uniform(size=(8, 4, 9)).astype("float32")
-            _test_argx(tf.argmax, data=data, axis=axis, output_type=output_type)
-            _test_argx(tf.argmin, data=data, axis=axis, output_type=output_type)
-
-
-#######################################################################
-# Variable
-# --------
-
-
-def _test_variable(data):
-    """One iteration of a variable"""
-
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        input_op = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        input_tensor = array_ops.reshape(input_op, data.shape)
-
-        size = input_tensor.shape.dims[1]
-        with variable_scope.variable_scope("linear", reuse=None):
-            w = variable_scope.get_variable("w", shape=[size, size], dtype=input_tensor.dtype)
-        math_ops.matmul(input_tensor, w)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "MatMul:0", init_global_variables=True)
-
-
-def test_forward_variable():
-    """Variable type op test"""
-    _test_variable(np.random.uniform(size=(32, 100)).astype("float32"))
-
-
-@tvm.testing.parametrize_targets("llvm", "cuda")
-def test_read_variable_op(target, dev):
-    """Read Variable op test"""
-
-    tf.reset_default_graph()
-    data = np.random.uniform(size=(32, 100)).astype("float32")
-    input_tensor = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-
-    size = input_tensor.shape.dims[1]
-    var_data = np.random.uniform(-5, 5, size=[size, size]).astype(np.float32)
-    input_var = tf.Variable(var_data, name="var1", use_resource=True)
-    math_ops.matmul(input_tensor, input_var)
-
-    out_name = ["MatMul:0"]
-    out_node = ["MatMul"]
-    in_name = ["Placeholder:0"]
-    in_node = ["Placeholder"]
-    in_data = [data]
-
-    with tf.Session() as sess:
-        sess.run(variables.global_variables_initializer())
-
-        final_graph_def = sess.graph.as_graph_def(add_shapes=True)
-        tf_output = run_tf_graph(sess, in_data, in_name, out_name)
-
-        shape_dict = {e: i.shape for e, i in zip(in_name, in_data)}
-        with pytest.raises(Exception) as execinfo:
-            with tvm.testing.disable_span_filling():
-                mod, _ = relay.frontend.from_tensorflow(
-                    final_graph_def, layout=None, shape=shape_dict, outputs=None
-                )
-            with tvm.testing.enable_span_filling():
-                mod_with_span, _ = relay.frontend.from_tensorflow(
-                    final_graph_def, layout=None, shape=shape_dict, outputs=None
-                )
-            tvm.ir.assert_structural_equal(mod["main"], mod_with_span["main"])
-
-        assert execinfo.value.args[0].startswith("Graph is not frozen. Provide a frozen graph")
-
-        # Now convert the variables to constant and run inference on the converted graph
-        final_graph_def = tf.graph_util.convert_variables_to_constants(
-            sess,
-            sess.graph.as_graph_def(add_shapes=True),
-            out_node,
-        )
-
-        tvm_output = run_tvm_graph(
-            final_graph_def,
-            in_data,
-            in_node,
-            target=target,
-            out_names=out_name,
-            num_output=len(out_name),
-        )
-        for i, tf_out in enumerate(tf_output):
-            tvm.testing.assert_allclose(tf_out, tvm_output[i], atol=1e-4, rtol=1e-5)
-
-        sess.close()
-
-
-#######################################################################
-# MatMul, BatchMatMul, BatchMatMulV2
-# ----------------------------------
-
-
-def _test_matmul(i, j, k, dtype, outer=None):
-    """One iteration of matmul"""
-
-    A_shape_init = [i, j]
-    B_shape_init = [j, k]
-
-    for transpose_a in [False, True]:
-        for transpose_b in [False, True]:
-            outer = outer or []
-            A_shape = outer + (A_shape_init[::-1] if transpose_a else A_shape_init)
-            B_shape = outer + (B_shape_init[::-1] if transpose_b else B_shape_init)
-
-            with tf.Graph().as_default():
-                A = tf.placeholder(shape=A_shape, dtype=dtype, name="A")
-                B = tf.placeholder(shape=B_shape, dtype=dtype, name="B")
-                result = tf.matmul(A, B, transpose_a=transpose_a, transpose_b=transpose_b)
-
-                A_np = np.random.uniform(high=5.0, size=A_shape).astype(dtype)
-                B_np = np.random.uniform(high=5.0, size=B_shape).astype(dtype)
-                compare_tf_with_tvm(
-                    [A_np, B_np], [A.name, B.name], result.name, convert_config={"use_dense": True}
-                )
-                compare_tf_with_tvm(
-                    [A_np, B_np], [A.name, B.name], result.name, convert_config={"use_dense": False}
-                )
-
-
-def test_forward_matmul():
-    """MatMul op test"""
-    _test_matmul(1, 3, 6, "int32")
-    _test_matmul(5, 3, 1, "float64")
-
-
-def _test_batch_matmul(A_shape, B_shape, dtype, adjoint_a=False, adjoint_b=False):
-
-    with tf.Graph().as_default():
-        A = tf.placeholder(shape=A_shape, dtype=dtype, name="A")
-        B = tf.placeholder(shape=B_shape, dtype=dtype, name="B")
-        result = tf.matmul(A, B, adjoint_a=adjoint_a, adjoint_b=adjoint_b, name="batchmatmul")
-
-        A_np = np.random.uniform(high=5.0, size=A_shape).astype(dtype)
-        B_np = np.random.uniform(high=5.0, size=B_shape).astype(dtype)
-        compare_tf_with_tvm(
-            [A_np, B_np],
-            [A.name, B.name],
-            result.name,
-            convert_config={"use_nt_batch_matmul": True},
-        )
-        compare_tf_with_tvm(
-            [A_np, B_np],
-            [A.name, B.name],
-            result.name,
-            convert_config={"use_nt_batch_matmul": False},
-        )
-
-
-def _test_batch_matmul_dynamic(
-    A_shape, B_shape, A_np_shape, B_np_shape, dtype, adjoint_a=False, adjoint_b=False
-):
-    with tf.Graph().as_default():
-        A = tf.placeholder(shape=A_shape, dtype=dtype, name="A")
-        B = tf.placeholder(shape=B_shape, dtype=dtype, name="B")
-        result = tf.matmul(A, B, adjoint_a=adjoint_a, adjoint_b=adjoint_b, name="batchmatmul")
-
-        A_np = np.random.uniform(high=5.0, size=A_np_shape).astype(dtype)
-        B_np = np.random.uniform(high=5.0, size=B_np_shape).astype(dtype)
-        # for now, in TOPI, only llvm & cublas's implementation support dynamic shape
-        # TODO add more backends support in TOPI
-        compare_tf_with_tvm(
-            [A_np, B_np],
-            [A.name, B.name],
-            result.name,
-            mode="vm",
-            targets=["llvm", "cuda -libs=cublas"],
-            convert_config={"use_nt_batch_matmul": True},
-        )
-        compare_tf_with_tvm(
-            [A_np, B_np],
-            [A.name, B.name],
-            result.name,
-            mode="vm",
-            targets=["llvm", "cuda -libs=cublas"],
-            convert_config={"use_nt_batch_matmul": False},
-        )
-
-
-def test_forward_batch_matmul():
-    """TF op BatchMatMul, BatchMatMulV2 test"""
-    _test_batch_matmul((3, 5, 4), (3, 4, 5), "int32")
-    _test_batch_matmul((3, 5, 4), (3, 4, 5), "float32", True, True)
-    _test_batch_matmul((3, 5, 4), (3, 5, 4), "int32", True, False)
-    _test_batch_matmul((3, 5, 4), (3, 5, 4), "float32", False, True)
-    _test_batch_matmul((2, 3, 4, 5, 6), (2, 3, 4, 6, 5), "int32")
-    _test_batch_matmul((1, 2, 3, 4, 5, 6), (1, 2, 3, 4, 6, 5), "float32", True, True)
-    _test_batch_matmul((3, 4, 5, 6), (3, 4, 5, 6), "int32", True, False)
-    _test_batch_matmul((2, 3, 4, 2, 3, 4, 5, 6), (2, 3, 4, 2, 3, 4, 5, 6), "float32", False, True)
-    _test_batch_matmul((1, 8, 64, 2), (2, 1), "float32", False, False)
-    _test_batch_matmul((1, 8, 8, 64), (64, 1), "float32", False, False)
-    _test_batch_matmul((1, 8, 64), (64, 1), "float32", False, False)
-
-
-def test_forward_batch_matmul_dynamic():
-    """Dynamic batch matmul"""
-    _test_batch_matmul_dynamic((None, 5, 4), (None, 4, 5), (3, 5, 4), (3, 4, 5), "int32")
-    _test_batch_matmul_dynamic(
-        (None, 5, 4), (None, 4, 5), (3, 5, 4), (3, 4, 5), "float32", True, True
-    )
-    _test_batch_matmul_dynamic(
-        (None, 5, 4), (None, 5, 4), (3, 5, 4), (3, 5, 4), "int32", True, False
-    )
-    _test_batch_matmul_dynamic(
-        (None, 5, 4), (None, 5, 4), (3, 5, 4), (3, 5, 4), "float32", False, True
-    )
-    _test_batch_matmul_dynamic(
-        (None, 4, 5, 6), (None, 4, 6, 5), (3, 4, 5, 6), (3, 4, 6, 5), "float32"
-    )
-    _test_batch_matmul_dynamic(
-        (None, None, 5, 6), (None, None, 6, 5), (3, 4, 5, 6), (3, 4, 6, 5), "float32"
-    )
-    _test_batch_matmul_dynamic(
-        (None, None, None, 5, 6),
-        (None, None, None, 6, 5),
-        (2, 3, 4, 5, 6),
-        (2, 3, 4, 6, 5),
-        "float32",
-    )
-    _test_batch_matmul_dynamic(
-        (None, None, None, 5, 6),
-        (6, None),
-        (2, 3, 4, 5, 6),
-        (6, 1),
-        "float32",
-    )
-    _test_batch_matmul_dynamic(
-        (None, 5, 6),
-        (6, None),
-        (24, 5, 6),
-        (6, 1),
-        "float32",
-    )
-
-
-#######################################################################
-# SparseTensorDenseMatMul
-# ----------------------------------
-
-
-def _test_sparse_dense_matmul(indices, values, A_inp_shape, B_inp_shape, dtype, flip=False):
-    """One iteration of sparse_dense_matmul"""
-
-    for adjoint_a in [False, True]:
-        for adjoint_b in [False, True]:
-            A_shape = A_inp_shape[::-1] if adjoint_a else A_inp_shape
-            B_shape = B_inp_shape[::-1] if adjoint_b else B_inp_shape
-
-            with tf.Graph().as_default():
-                A_sp = tf.sparse.SparseTensor(indices=indices, values=values, dense_shape=A_shape)
-                B = tf.placeholder(shape=B_shape, dtype=dtype, name="B")
-
-                if flip:
-                    result = tf.sparse.sparse_dense_matmul(
-                        B, A_sp, adjoint_a=adjoint_b, adjoint_b=adjoint_a
-                    )
-                else:
-                    result = tf.sparse.sparse_dense_matmul(
-                        A_sp, B, adjoint_a=adjoint_a, adjoint_b=adjoint_b
-                    )
-
-                B_np = np.random.uniform(high=5.0, size=B_shape).astype(dtype)
-
-                compare_tf_with_tvm([B_np], [B.name], result.name)
-
-
-def test_forward_sparse_dense_matmul():
-    """sparse_dense_matmul op test"""
-    ###################################################################
-    #
-    # In order to create a SparseTensor, it requires 3 input as below:
-    #    SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
-    #
-    # Above Sparse can be represented in Dense as below :
-    #    [[1, 0, 0, 0]
-    #     [0, 0, 2, 0]
-    #     [0, 0, 0, 0]]
-    #
-    # ------------------------------------------------------------------
-
-    _test_sparse_dense_matmul([[0, 0], [1, 2]], [4.0, 8.0], [3, 4], [4, 3], "float32")
-    _test_sparse_dense_matmul([[0, 0], [1, 2]], [4.0, 8.0], [3, 3], [3, 3], "float32")
-    _test_sparse_dense_matmul([[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [5, 5], [5, 5], "float32")
-    _test_sparse_dense_matmul([[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [7, 9], [9, 5], "float32")
-    _test_sparse_dense_matmul([[0, 0], [1, 2]], [4.0, 8.0], [4, 3], [3, 4], "float32", True)
-    _test_sparse_dense_matmul([[0, 0], [1, 2]], [4.0, 8.0], [3, 3], [3, 3], "float32", True)
-    _test_sparse_dense_matmul(
-        [[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [5, 5], [5, 5], "float32", True
-    )
-    _test_sparse_dense_matmul(
-        [[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [9, 5], [7, 9], "float32", True
-    )
-
-
-#######################################################################
-# SparseFillEmptyRows
-# ------------
-
-
-def _test_sparse_fill_empty_rows(indices_np, values_np, dense_shape_np, default_value_int, use_dyn):
-    with tf.Graph().as_default():
-        if use_dyn:
-            indices = tf.placeholder(shape=(None, None), dtype=indices_np.dtype, name="indices")
-            values = tf.placeholder(shape=(None), dtype=values_np.dtype, name="values")
-            dense_shape = tf.placeholder(
-                shape=(None), dtype=dense_shape_np.dtype, name="dense_shape"
-            )
-        else:
-            indices = tf.placeholder(shape=indices_np.shape, dtype=indices_np.dtype, name="indices")
-            values = tf.placeholder(shape=values_np.shape, dtype=values_np.dtype, name="values")
-            dense_shape = tf.placeholder(
-                shape=dense_shape_np.shape, dtype=dense_shape_np.dtype, name="dense_shape"
-            )
-
-        default_value = tf.placeholder(shape=(), dtype=values_np.dtype, name="default_value")
-        sp_input = tf.sparse.SparseTensor(indices=indices, values=values, dense_shape=dense_shape)
-        _ = tf.sparse.fill_empty_rows(sp_input, default_value, name="sparse_fill_empty_rows")
-        compare_tf_with_tvm(
-            [indices_np, values_np, dense_shape_np, default_value_int],
-            [indices.name, values.name, dense_shape.name, default_value.name],
-            [
-                "sparse_fill_empty_rows/SparseFillEmptyRows:0",
-                "sparse_fill_empty_rows/SparseFillEmptyRows:1",
-                "sparse_fill_empty_rows/SparseFillEmptyRows:2",
-            ],
-            mode="vm",
-        )
-
-
-@pytest.mark.parametrize(
-    "sparse_indices_np, sparse_values_np, dense_shape_np, default_value_int",
-    [
-        (
-            np.array([[1, 1], [0, 3], [0, 1], [2, 0], [3, 1]], dtype=np.int64),
-            np.array([1, 2, 3, 4, 5], dtype=np.int64),
-            np.array([5, 6], dtype=np.int64),
-            10,
-        ),
-        (
-            np.array([[1, 1], [0, 3], [2, 0], [3, 1]], dtype=np.int64),
-            np.array([1, 2, 3, 4], dtype=np.int64),
-            np.array([5, 6], dtype=np.int64),
-            10,
-        ),
-        (
-            np.array([[0, 1], [0, 3], [2, 0], [3, 1]], dtype=np.int64),
-            np.array([1, 2, 3, 4], dtype=np.int64),
-            np.array([5, 6], dtype=np.int64),
-            10,
-        ),
-        (
-            np.array([[1, 1, 1], [1, 3, 1], [2, 0, 5], [3, 1, 6]], dtype=np.int64),
-            np.array([1, 2, 3, 4], dtype=np.int64),
-            np.array([7, 7, 7], dtype=np.int64),
-            5,
-        ),
-        (
-            np.array([[1], [2]], dtype=np.int64),
-            np.array([7, 8], dtype=np.int64),
-            np.array([5], dtype=np.int64),
-            4,
-        ),
-        (
-            np.ones((0, 1), dtype=np.int64),
-            np.array([], dtype=np.int64),
-            np.array([5], dtype=np.int64),
-            4,
-        ),
-        (
-            np.ones((0, 3), dtype=np.int64),
-            np.array([], dtype=np.int64),
-            np.array([9, 3, 7], dtype=np.int64),
-            100,
-        ),
-    ],
-)
-@pytest.mark.parametrize("use_dyn", [True, False])
-def test_forward_sparse_fill_empty_rows(
-    sparse_indices_np, sparse_values_np, dense_shape_np, default_value_int, use_dyn
-):
-    """sparse_fill_empty_rows op test"""
-    ###################################################################
-    #
-    # In order to create a SparseTensor, it requires 3 input as below:
-    #    SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
-    #
-    # Above Sparse can be represented in Dense as below :
-    #    [[1, 0, 0, 0]
-    #     [0, 0, 2, 0]
-    #     [0, 0, 0, 0]]
-    #
-    # ------------------------------------------------------------------
-    _test_sparse_fill_empty_rows(
-        sparse_indices_np, sparse_values_np, dense_shape_np, default_value_int, use_dyn
-    )
-
-
-#######################################################################
-# SparseReshape
-# ------------
-
-
-def _test_sparse_reshape(indices_np, values_np, prev_shape_np, new_shape_np, use_dyn=False):
-    with tf.Graph().as_default():
-        if use_dyn:
-            indices = tf.placeholder(shape=(None, None), dtype=indices_np.dtype, name="indices")
-            values = tf.placeholder(shape=(None), dtype=values_np.dtype, name="values")
-            prev_shape = tf.placeholder(shape=(None), dtype=prev_shape_np.dtype, name="prev_shape")
-            new_shape = tf.placeholder(shape=(None), dtype=new_shape_np.dtype, name="new_shape")
-        else:
-            indices = tf.placeholder(shape=indices_np.shape, dtype=indices_np.dtype, name="indices")
-            values = tf.placeholder(shape=values_np.shape, dtype=values_np.dtype, name="values")
-            prev_shape = tf.placeholder(
-                shape=prev_shape_np.shape, dtype=prev_shape_np.dtype, name="prev_shape"
-            )
-            new_shape = tf.placeholder(
-                shape=new_shape_np.shape, dtype=new_shape_np.dtype, name="new_shape"
-            )
-        sp_input = tf.sparse.SparseTensor(indices=indices, values=values, dense_shape=prev_shape)
-
-        _ = tf.sparse.reshape(sp_input, new_shape, name="sparse_reshape")
-        compare_tf_with_tvm(
-            [indices_np, values_np, prev_shape_np, new_shape_np],
-            [indices.name, values.name, prev_shape.name, new_shape.name],
-            ["sparse_reshape:0", "sparse_reshape:1", "sparse_reshape/Identity:0"],
-            mode="vm",
-        )
-
-
-@pytest.mark.parametrize(
-    "sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np",
-    [
-        (
-            np.ones((0, 1), dtype=np.int64),
-            np.array([], dtype=np.int64),
-            np.array([4], dtype=np.int64),
-            np.array([2, -1], dtype=np.int64),
-        ),
-        (
-            np.ones((0, 1), dtype=np.int64),
-            np.array([], dtype=np.int64),
-            np.array([4], dtype=np.int64),
-            np.array([2, 2], dtype=np.int64),
-        ),
-        (
-            np.ones((0, 2), dtype=np.int64),
-            np.array([], dtype=np.int64),
-            np.array([3, 6], dtype=np.int64),
-            np.array([-1, 2], dtype=np.int64),
-        ),
-        (
-            np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [1, 2, 3]], dtype=np.int64),
-            np.array([7, 5, 6, 3, 9], dtype=np.int64),
-            np.array([2, 3, 6], dtype=np.int64),
-            np.array([-1, 9], dtype=np.int64),
-        ),
-        (
-            np.array(
-                [
-                    [0, 0, 0, 0, 0],
-                    [0, 0, 1, 2, 3],
-                    [0, 1, 0, 3, 5],
-                    [1, 0, 0, 4, 6],
-                    [1, 2, 3, 6, 8],
-                ],
-                dtype=np.int64,
-            ),
-            np.array([7, 5, 6, 3, 9], dtype=np.int64),
-            np.array([2, 3, 6, 7, 9], dtype=np.int64),
-            np.array([9, -1, 7], dtype=np.int64),
-        ),
-        (
-            np.array([[0, 0], [0, 1], [3, 4], [4, 3], [7, 3]], dtype=np.int64),
-            np.array([7, 5, 6, 3, 9], dtype=np.int64),
-            np.array([9, 4], dtype=np.int64),
-            np.array([-1], dtype=np.int64),
-        ),
-        (
-            np.array([[0], [5], [10], [20], [24]], dtype=np.int64),
-            np.array([7, 5, 6, 3, 9], dtype=np.int64),
-            np.array([25], dtype=np.int64),
-            np.array([5, 5], dtype=np.int64),
-        ),
-        (
-            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
-            np.array([7, 5, 6, 3, 9], dtype=np.int64),
-            np.array([500, 20], dtype=np.int64),
-            np.array([500, 20], dtype=np.int64),
-        ),
-        (
-            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
-            np.array([7, 5, 6, 3, 9], dtype=np.int64),
-            np.array([500, 20], dtype=np.int64),
-            np.array([500, -1], dtype=np.int64),
-        ),
-        (
-            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
-            np.array([7, 5, 6, 3, 9], dtype=np.int64),
-            np.array([500, 20], dtype=np.int64),
-            np.array([250, 40], dtype=np.int64),
-        ),
-    ],
-)
-@pytest.mark.parametrize("use_dyn", [True, False])
-def test_forward_sparse_reshape(
-    sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np, use_dyn
-):
-    """sparse_reshape op test"""
-    ###################################################################
-    #
-    # In order to create a SparseTensor, it requires 3 input as below:
-    #    SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
-    #
-    # Above Sparse can be represented in Dense as below :
-    #    [[1, 0, 0, 0]
-    #     [0, 0, 2, 0]
-    #     [0, 0, 0, 0]]
-    #
-    # ------------------------------------------------------------------
-    _test_sparse_reshape(sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np, use_dyn)
-
-
-#######################################################################
-# Sparse Segment Variants
-# ------------
-
-
-def _test_sparse_segment_variant(
-    tf_op, data_np, indices_np, segment_ids_np, num_segments, use_dyn=False
-):
-    with tf.Graph().as_default():
-        if use_dyn:
-            data = tf.placeholder(
-                shape=[None for _ in data_np.shape], dtype=data_np.dtype, name="data"
-            )
-            indices = tf.placeholder(shape=[None], dtype=indices_np.dtype, name="indices")
-            segment_ids = tf.placeholder(
-                shape=(None), dtype=segment_ids_np.dtype, name="segment_ids"
-            )
-        else:
-            data = tf.placeholder(shape=data_np.shape, dtype=data_np.dtype, name="data")
-            indices = tf.placeholder(shape=indices_np.shape, dtype=indices_np.dtype, name="indices")
-            segment_ids = tf.placeholder(
-                shape=segment_ids_np.shape, dtype=segment_ids_np.dtype, name="segment_ids"
-            )
-
-        _ = tf_op(
-            data, indices, segment_ids, num_segments=num_segments, name="sparse_segment_variant"
-        )
-        compare_tf_with_tvm(
-            [data_np, indices_np, segment_ids_np],
-            [data.name, indices.name, segment_ids.name],
-            ["sparse_segment_variant:0"],
-            mode="vm",
-        )
-
-
-@pytest.mark.parametrize(
-    "data_np, indices_np, segment_ids_np, num_segments",
-    [
-        (
-            np.array([5, 1, 7, 2, 3, 4], dtype=np.float32),
-            np.array([0, 3, 4], dtype=np.int32),
-            np.array([0, 1, 1], dtype=np.int32),
-            None,
-        ),
-        (
-            np.array([[1, 2, 3, 4], [-1, -2, -3, -4], [5, 6, 7, 8]], dtype=np.float64),
-            np.array([0, 1], dtype=np.int32),
-            np.array([0, 2], dtype=np.int32),
-            4,
-        ),
-        (
-            np.random.random((6, 4, 5)),
-            np.array([0, 2, 4, 3, 1], dtype=np.int32),
-            np.array([0, 0, 1, 5, 5], dtype=np.int32),
-            100,
-        ),
-        (
-            np.random.random((6, 4, 5)),
-            np.array([0, 2, 4, 3, 1], dtype=np.int32),
-            np.array([0, 0, 1, 5, 5], dtype=np.int32),
-            None,
-        ),
-        (
-            np.array([[[1, 7]], [[3, 8]], [[2, 9]]], dtype=np.float64),
-            np.array([0, 1, 2], dtype=np.int32),
-            np.array([0, 0, 1], dtype=np.int32),
-            None,
-        ),
-        (
-            np.random.random((9, 4, 5, 7)),
-            np.array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=np.int32),
-            np.array([0, 0, 1, 3, 5, 6, 7, 7, 8], dtype=np.int32),
-            9,
-        ),
-        (
-            np.random.random((9, 4, 5, 7)),
-            np.array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=np.int32),
-            np.array([0, 0, 1, 3, 5, 6, 7, 7, 8], dtype=np.int32),
-            None,
-        ),
-        (
-            np.array([[1, 2, 3, 4], [-1, -2, -3, -4], [5, 6, 7, 8]], dtype=np.float64),
-            np.array([0, 1], dtype=np.int32),
-            np.array([0, 2], dtype=np.int32),
-            None,
-        ),
-        (
-            np.random.random((9, 4, 5, 7)),
-            np.array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=np.int32),
-            np.array([0, 0, 1, 3, 5, 5, 5, 5, 5], dtype=np.int32),
-            6,
-        ),
-    ],
-)
-@pytest.mark.parametrize("use_dyn", [True, False])
-@pytest.mark.parametrize(
-    "tf_op",
-    [
-        tf.sparse.segment_sum,
-        tf.sparse.segment_sqrt_n,
-        tf.sparse.segment_mean,
-    ],
-)
-def test_forward_sparse_segment_sum_variants(
-    tf_op,
-    data_np,
-    indices_np,
-    segment_ids_np,
-    num_segments,
-    use_dyn,
-):
-    """sparse segment sum variants tests"""
-    _test_sparse_segment_variant(tf_op, data_np, indices_np, segment_ids_np, num_segments, use_dyn)
-
-
-#######################################################################
-# Math SegmentSum
-# ------------
-
-
-def _test_math_segment_sum(data_np, segment_ids_np, use_dyn=False):
-    with tf.Graph().as_default():
-        if use_dyn:
-            data = tf.placeholder(
-                shape=[None for _ in data_np.shape], dtype=data_np.dtype, name="data"
-            )
-            segment_ids = tf.placeholder(
-                shape=(None), dtype=segment_ids_np.dtype, name="segment_ids"
-            )
-        else:
-            data = tf.placeholder(shape=data_np.shape, dtype=data_np.dtype, name="data")
-            segment_ids = tf.placeholder(
-                shape=segment_ids_np.shape, dtype=segment_ids_np.dtype, name="segment_ids"
-            )
-
-        _ = tf.math.segment_sum(data, segment_ids, name="segment_sum")
-        compare_tf_with_tvm(
-            [data_np, segment_ids_np],
-            [data.name, segment_ids.name],
-            ["segment_sum:0"],
-            mode="vm",
-        )
-
-
-@pytest.mark.parametrize(
-    "data_np, segment_ids_np",
-    [
-        (
-            np.array([5, 1, 7, 2, 3, 4], dtype=np.float32),
-            np.array([0, 0, 0, 1, 1, 1], dtype=np.int32),
-        ),
-        (
-            np.array([[1, 2, 3, 4], [-1, -2, -3, -4], [5, 6, 7, 8]], dtype=np.float64),
-            np.array([0, 0, 1], dtype=np.int32),
-        ),
-        (
-            np.random.random((6, 4, 5)),
-            np.array([0, 0, 1, 2, 2, 3], dtype=np.int64),
-        ),
-        (
-            np.array([[[1, 7]], [[3, 8]], [[2, 9]]], dtype=np.float32),
-            np.array([0, 0, 1], dtype=np.int32),
-        ),
-        (
-            np.random.random((9, 4, 5, 7)),
-            np.array([0, 0, 0, 1, 2, 3, 4, 4, 5], dtype=np.int64),
-        ),
-    ],
-)
-@pytest.mark.parametrize("use_dyn", [True, False])
-def test_forward_math_segment_sum(data_np, segment_ids_np, use_dyn):
-    """math segment sum test"""
-    _test_math_segment_sum(data_np, segment_ids_np, use_dyn)
-
-
-# tensorflow.compat.v1.sparse_to_dense
-# ---------------
-def _test_sparse_to_dense(sparse_indices, sparse_values, default_value, output_shape):
-    with tf.Graph().as_default():
-        indices = tf.placeholder(
-            shape=sparse_indices.shape, dtype=str(sparse_indices.dtype), name="indices"
-        )
-        values = tf.placeholder(
-            shape=sparse_values.shape, dtype=str(sparse_values.dtype), name="values"
-        )
-        oshape = tf.constant(output_shape, shape=output_shape.shape, dtype=str(output_shape.dtype))
-
-        # Output shape depends on a dynamic input, use VM.
-        if default_value is None:
-            output = tf.sparse_to_dense(indices, oshape, values)
-            compare_tf_with_tvm(
-                [sparse_indices, sparse_values], ["indices:0", "values:0"], output.name, mode="vm"
-            )
-        else:
-            dv = tf.placeholder(shape=(), dtype=str(default_value.dtype), name="default_value")
-            output = tf.sparse_to_dense(indices, oshape, values, dv)
-            compare_tf_with_tvm(
-                [sparse_indices, sparse_values, default_value],
-                ["indices:0", "values:0", "default_value:0"],
-                output.name,
-                mode="vm",
-            )
-
-
-def test_forward_sparse_to_dense():
-    """Sparse to dense"""
-    # scalar
-    _test_sparse_to_dense(
-        sparse_indices=np.int32(1),
-        sparse_values=np.int32(3),
-        default_value=np.int32(0),
-        output_shape=np.array([5]).astype("int32"),
-    )
-
-    # vector
-    _test_sparse_to_dense(
-        sparse_indices=np.array([0, 1, 4]).astype("int32"),
-        sparse_values=np.array([3, 3, 3]).astype("int32"),
-        default_value=np.int32(0),
-        output_shape=np.array([5]).astype("int32"),
-    )
-
-    # vector nXd
-    _test_sparse_to_dense(
-        sparse_indices=np.array([[0, 0], [1, 2]]).astype("int32"),
-        sparse_values=np.array([1, 2]).astype("int32"),
-        default_value=np.int32(0),
-        output_shape=np.array([3, 4]).astype("int32"),
-    )
-
-    _test_sparse_to_dense(
-        sparse_indices=np.array([[0, 0, 0], [1, 2, 3]]).astype("int32"),
-        sparse_values=np.array([1, 2]).astype("int32"),
-        default_value=np.int32(4),
-        output_shape=np.array([2, 3, 4]).astype("int32"),
-    )
-
-    # floats
-    _test_sparse_to_dense(
-        sparse_indices=np.array([0, 1, 4]).astype("int32"),
-        sparse_values=np.array([3.1, 3.1, 3.1]).astype("float32"),
-        default_value=np.float32(3.5),
-        output_shape=np.array([5]).astype("int32"),
-    )
-
-    # default value not specified
-    _test_sparse_to_dense(
-        sparse_indices=np.array([0, 1, 4]).astype("int32"),
-        sparse_values=np.array([3.1, 3.1, 3.1]).astype("float32"),
-        default_value=None,
-        output_shape=np.array([5]).astype("int32"),
-    )
-
-
-#######################################################################
-# tensorflow.sparse.to_dense
-# ---------------
-def _test_sparse_to_dense_v2(indices, values, A_shape, dtype, default_value=None):
-    with tf.Graph().as_default():
-        A_sp = tf.sparse.SparseTensor(indices=indices, values=values, dense_shape=A_shape)
-
-        result = tf.sparse.to_dense(A_sp, default_value=default_value)
-
-        # The output shape depends on a dynamic input, use VM.
-        compare_tf_with_tvm([], [], result.name, mode="vm")
-
-
-def test_forward_sparse_to_dense_v2():
-    _test_sparse_to_dense_v2([[1]], [3.0], [5], "float32")
-    _test_sparse_to_dense_v2([[1]], [3.0], [5], "float32", 0.3)
-    _test_sparse_to_dense_v2([[0, 0], [1, 2]], [4.0, 8.0], [3, 4], "float32")
-    _test_sparse_to_dense_v2([[0, 0], [1, 2]], [4.0, 8.0], [3, 4], "float32", 1.3)
-    _test_sparse_to_dense_v2([[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [5, 5], "float32")
-    _test_sparse_to_dense_v2([[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [5, 5], "float32", 1.9)
-
-
-#######################################################################
-# tensorflow.sparse.add
-# ----------------------------------
-
-
-def _test_sparse_add(indices, values, A_shape, B_shape, dtype, flip=False):
-    """One iteration of tf.sparse.add"""
-
-    # TODO(ANSHUMAN87): support cuda
-    # TODO(ANSHUMAN87): support both sparse input case
-
-    with tf.Graph().as_default():
-        A_sp = tf.sparse.SparseTensor(
-            indices=indices, values=np.array(values).astype(dtype), dense_shape=A_shape
-        )
-        B = tf.placeholder(shape=B_shape, dtype=dtype, name="B")
-
-        # TODO(ANSHUMAN87): support user input threashold values
-        if flip:
-            if package_version.parse(tf.VERSION) < package_version.parse("1.13.0"):
-                result = tf.sparse.add(B, A_sp, thresh=0)
-            else:
-                result = tf.sparse.add(B, A_sp, threshold=0)
-        else:
-            if package_version.parse(tf.VERSION) < package_version.parse("1.13.0"):
-                result = tf.sparse.add(A_sp, B, thresh=0)
-            else:
-                result = tf.sparse.add(A_sp, B, threshold=0)
-
-        B_np = np.random.uniform(high=5.0, size=B_shape).astype(dtype)
-
-        compare_tf_with_tvm([B_np], [B.name], result.name, no_gpu=True)
-
-
-def test_sparse_add():
-    """sparse.add op test"""
-    ###################################################################
-    #
-    # In order to create a SparseTensor, it requires 3 input as below:
-    #    SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
-    #
-    # Above Sparse can be represented in Dense as below :
-    #    [[1, 0, 0, 0]
-    #     [0, 0, 2, 0]
-    #     [0, 0, 0, 0]]
-    #
-    # ------------------------------------------------------------------
-    for dtype_inp in ["float32", "float64", "int32"]:
-        _test_sparse_add([[0, 0], [1, 2]], [4.0, 8.0], [3, 4], [3, 4], dtype_inp)
-        _test_sparse_add([[0, 0], [1, 2]], [4.0, 8.0], [3, 4], [3, 4], dtype_inp, True)
-        _test_sparse_add([[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [5, 5], [5, 5], dtype_inp)
-        _test_sparse_add([[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [5, 5], [5, 5], dtype_inp, True)
-
-
-#######################################################################
-# StridedSlice
-# ------------
-
-
-def _test_stridedslice(
-    ip_shape,
-    begin,
-    end,
-    stride,
-    dtype,
-    begin_mask=0,
-    end_mask=0,
-    new_axis_mask=0,
-    shrink_axis_mask=0,
-    ellipsis_mask=0,
-):
-    """One iteration of a Stridedslice"""
-
-    tf.reset_default_graph()
-    np_data = np.random.uniform(size=ip_shape).astype(dtype)
-    with tf.Graph().as_default():
-        if len(ip_shape) == 0:  # pylint: disable=len-as-condition
-            in_data = tf.constant(np_data, dtype)
-        else:
-            in_data = tf.placeholder(dtype, ip_shape, name="in_data")
-        tf.strided_slice(
-            in_data,
-            begin,
-            end,
-            stride,
-            begin_mask=begin_mask,
-            end_mask=end_mask,
-            new_axis_mask=new_axis_mask,
-            shrink_axis_mask=shrink_axis_mask,
-            ellipsis_mask=ellipsis_mask,
-            name="strided_slice",
-        )
-        if len(ip_shape) == 0:  # pylint: disable=len-as-condition
-            compare_tf_with_tvm(None, "", "strided_slice:0")
-        else:
-            compare_tf_with_tvm(np_data, "in_data:0", "strided_slice:0")
-
-
-def test_forward_stridedslice():
-    """test StridedSlice"""
-
-    _test_stridedslice([], [0], [0], [1], "float32", new_axis_mask=1)
-    _test_stridedslice([2], [1], [1], [1], "float32", shrink_axis_mask=1)
-    _test_stridedslice([4], [-1], [0], [1], "float32", shrink_axis_mask=1)
-    _test_stridedslice([2, 1], [0], [1], [1], "float32", shrink_axis_mask=1)
-    _test_stridedslice([2, 3, 4], [-2], [0], [1], "float32", shrink_axis_mask=8)
-    _test_stridedslice([2, 3, 4], [0], [1], [1], "float32", shrink_axis_mask=8)
-    _test_stridedslice([3, 4, 3], [1, -1, 0], [4, -5, 3], [2, -1, 1], "float32")
-    _test_stridedslice([3, 4, 3], [1, 0], [4, 3], [2, 1], "float32", ellipsis_mask=8)
-    _test_stridedslice([3, 4, 3], [1, 0], [4, 2], [2, 1], "float32", ellipsis_mask=2)
-    _test_stridedslice([3, 4, 5, 3], [1, 0], [4, 2], [2, 1], "float32", ellipsis_mask=2)
-    _test_stridedslice([3, 4, 5, 3], [1, 0, 1], [4, 2, 2], [2, 1, 1], "float32", ellipsis_mask=2)
-    _test_stridedslice([3, 4, 3], [1, 1, 0], [4, 4, 2], [2, 1, 1], "float32", new_axis_mask=5)
-    _test_stridedslice(
-        [3, 4, 3], [1, 1, 1], [4, 4, 1], [2, 1, 1], "float32", ellipsis_mask=2, new_axis_mask=4
-    )
-    _test_stridedslice(
-        [6, 4, 5], [1, 1, 1], [6, 3, 4], [2, 1, 1], "float32", ellipsis_mask=2, new_axis_mask=5
-    )
-    _test_stridedslice(
-        [3, 4, 3], [1, 1, 2], [4, 4, 3], [2, 1, 1], "float32", ellipsis_mask=4, new_axis_mask=2
-    )
-    _test_stridedslice(
-        [3, 4, 3], [1, 1, 2], [4, 4, 3], [2, 1, 1], "float32", ellipsis_mask=2, new_axis_mask=3
-    )
-    _test_stridedslice(
-        [3, 4, 3], [1, 1, 0], [4, 4, 1], [2, 1, 1], "float32", ellipsis_mask=2, new_axis_mask=3
-    )
-    _test_stridedslice(
-        [3, 4, 3], [1, 1, 2], [4, 4, 3], [2, 1, 1], "float32", ellipsis_mask=2, new_axis_mask=2
-    )
-    _test_stridedslice((3, 4), [1, 0], [4, 4], [1, 1], "float32", shrink_axis_mask=2)
-    _test_stridedslice(
-        [3, 4, 3], [1, 1, 0], [4, 4, 3], [2, 1, 1], "float32", shrink_axis_mask=2, new_axis_mask=2
-    )
-    _test_stridedslice(
-        [3, 4, 3], [1, 1, 0], [4, 4, 3], [2, 1, 1], "float32", shrink_axis_mask=1, new_axis_mask=2
-    )
-    _test_stridedslice(
-        [3, 4, 3], [1, 1, 0], [4, 4, 3], [2, 1, 1], "float32", shrink_axis_mask=2, new_axis_mask=1
-    )
-    _test_stridedslice(
-        [3, 4, 5, 4, 5, 6], [0, 0], [2, 3], [1, 1], "float32", shrink_axis_mask=5, new_axis_mask=1
-    )
-    _test_stridedslice(
-        [3, 4, 5, 4, 5, 6],
-        [0, 0, 1, 2, 1],
-        [2, 3, 4, 5, 3],
-        [1, 1, 2, 2, 1],
-        "float32",
-        shrink_axis_mask=5,
-        new_axis_mask=1,
-        ellipsis_mask=2,
-        begin_mask=8,
-        end_mask=8,
-    )
-    _test_stridedslice(
-        [3, 4, 5, 4, 5, 6],
-        [0, 0, 1, 2, 1],
-        [2, 3, 4, 5, 3],
-        [1, 1, 2, 2, 1],
-        "float32",
-        shrink_axis_mask=8,
-        new_axis_mask=1,
-        ellipsis_mask=2,
-        begin_mask=5,
-        end_mask=5,
-    )
-    _test_stridedslice(
-        [3, 4, 5, 4, 5, 6],
-        [0, 0, 1, 2, 1],
-        [2, 3, 4, 5, 3],
-        [1, 1, 2, 2, 1],
-        "float32",
-        shrink_axis_mask=16,
-        new_axis_mask=1,
-        ellipsis_mask=2,
-        begin_mask=5,
-        end_mask=5,
-    )
-    _test_stridedslice(
-        [3, 4, 5, 4, 5, 6],
-        [1, 2, 0, -3],
-        [4, 5, 3, 3],
-        [2, 2, 1, 1],
-        "float32",
-        shrink_axis_mask=8,
-        new_axis_mask=1,
-        ellipsis_mask=2,
-        begin_mask=5,
-        end_mask=8,
-    )
-    _test_stridedslice(
-        [1, 13, 13, 3, 2],
-        [0, 0],
-        [1, 1],
-        [1, -1],
-        "float32",
-        ellipsis_mask=1,
-        begin_mask=2,
-        end_mask=2,
-    )
-
-
-#######################################################################
-# FloorDiv, RealDiv
-# -----------------
-def _test_forward_divide(ip_shape, dtype):
-    np_numer = np.random.uniform(-100, 100, size=ip_shape).astype(dtype)
-    np_denomin = np.random.uniform(1, 100, size=ip_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        numerator = tf.placeholder(dtype, ip_shape, name="numer")
-        denominator = tf.placeholder(dtype, ip_shape, name="denomin")
-        tf.math.divide(numerator, denominator, name="RealDiv")
-        compare_tf_with_tvm([np_numer, np_denomin], ["numer:0", "denomin:0"], "RealDiv:0")
-
-
-def _test_forward_floordiv(ip_shape, dtype):
-    np_numer = np.random.uniform(1, 100, size=ip_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        numerator = tf.placeholder(dtype, ip_shape, name="numer")
-        tf.math.floordiv(numerator, tf.constant(5, dtype=dtype), name="FloorDiv")
-        compare_tf_with_tvm([np_numer], ["numer:0"], "FloorDiv:0")
-
-
-def test_forward_divide():
-    """test FloorDiv, RealDiv"""
-    _test_forward_divide((4,), "int32")
-    _test_forward_divide((4, 3, 7), "float32")
-    _test_forward_floordiv((4, 3, 7), "float32")
-    _test_forward_floordiv((4, 3, 7), "int32")
-
-
-#######################################################################
-# FloorMod
-# --------
-def _test_forward_floormod(in_shape, if_shape, dtype):
-    np_numer = np.random.uniform(1, 100, size=in_shape).astype(dtype)
-    np_factor = np.random.uniform(1, 100, size=if_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        numerator = tf.placeholder(dtype, in_shape, name="numer")
-        factor = tf.placeholder(dtype, if_shape, name="factor")
-        tf.floormod(numerator, factor, name="FloorMod")
-        compare_tf_with_tvm([np_numer, np_factor], ["numer:0", "factor:0"], "FloorMod:0")
-
-
-def test_forward_floormod():
-    """test FloorMod"""
-    _test_forward_floormod((10,), (10,), "float32")
-    _test_forward_floormod((8, 2), (1,), "float32")
-    _test_forward_floormod((4, 3, 7), (4, 3, 7), "float32")
-    _test_forward_floormod((4, 3, 7), (4, 3, 7), "int32")
-
-
-#######################################################################
-# TruncateMod
-# -----------
-def _test_forward_truncatemod(ip_shape, dtype):
-    np_data_1 = np.random.uniform(-100, 100, size=ip_shape).astype(dtype)
-    np_data_2 = np.random.uniform(1, 10, size=ip_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data_1 = tf.placeholder(dtype, ip_shape, name="in_data_1")
-        in_data_2 = tf.placeholder(dtype, ip_shape, name="in_data_2")
-        tf.truncatemod(in_data_1, in_data_2, name="truncatemod")
-        compare_tf_with_tvm([np_data_1, np_data_2], ["in_data_1:0", "in_data_2:0"], "truncatemod:0")
-
-
-def test_forward_truncatemod():
-    """test TruncateMod"""
-    _test_forward_truncatemod((4, 3, 7), "int32")
-
-
-#######################################################################
-# Gather, GatherV2
-# --------------------------
-
-
-def _test_gather(ip_shape, indice_shape, indice_value, axis, batch_dims, dtype):
-    """One iteration of a GatherV2"""
-
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, ip_shape, name="in_data")
-        indices = tf.placeholder("int32", indice_shape, name="indices")
-        out = tf.gather(in_data, indices, axis=axis, batch_dims=batch_dims)
-        np_data = np.random.uniform(1, 10, size=ip_shape).astype(dtype)
-
-        def _fill_indices(indice_value):
-            indices = np.array(ip_shape, dtype=dtype)
-            if isinstance(indice_value, int):
-                indices = np.array([indice_value], dtype="int32")
-            else:
-                indices = np.asarray(indice_value, dtype="int32")
-            return indices
-
-        np_indices = _fill_indices(indice_value)
-        compare_tf_with_tvm([np_data, np_indices], ["in_data:0", "indices:0"], out.name)
-
-
-def test_forward_gather():
-    """test Gather/GatherV2 layer"""
-    _test_gather((4,), (1,), 1, 0, 1, "int32")
-    _test_gather((4,), (1,), 1, 0, 0, "float32")
-    _test_gather((1, 4), (1,), [0], 0, 0, "int32")
-    _test_gather((4,), (1, 2, 2), [[[1, 0], [0, 1]]], 0, 0, "float32")
-    _test_gather((2, 2), (1, 2, 2), [[[1, 0], [0, 1]]], 0, 0, "int32")
-    _test_gather((2, 2), (1, 2, 2), [[[1, 0], [0, 1]]], 1, 0, "int32")
-    _test_gather((2, 2), (1, 2, 2), [[[1, 0], [0, 1]]], 0, 0, "float32")
-    _test_gather((3, 3, 3), (1, 1, 2), [[[1, 0]]], 0, 0, "int32")
-    _test_gather((3, 3, 3), (1, 1, 2), [[[1, 0]]], 2, 0, "int32")
-    _test_gather((4, 3, 5, 6), (1, 4), [[2, 1, 0, 0]], 0, 0, "float32")
-    _test_gather((2, 2), (2, 2), [[0, 0], [0, 0]], 1, 1, "float32")
-    _test_gather(
-        (2, 2, 3, 6), (2, 2, 3), [[[1, 1, 0], [0, 0, 1]], [[0, 1, 0], [1, 0, 1]]], 2, 2, "float32"
-    )
-    _test_gather(
-        (2, 2, 3, 6), (2, 2, 3), [[[1, 1, 0], [0, 0, 1]], [[0, 1, 0], [1, 0, 1]]], 3, 1, "float32"
-    )
-    _test_gather(
-        (2, 2, 3, 6), (2, 2, 3), [[[1, 1, 0], [0, 0, 1]], [[0, 1, 0], [1, 0, 1]]], 3, 2, "float32"
-    )
-    _test_gather(
-        (2, 2, 3, 6), (2, 2, 3), [[[1, 1, 0], [0, 0, 1]], [[0, 1, 0], [1, 0, 1]]], 3, 0, "float32"
-    )
-
-
-#######################################################################
-# GatherND
-# --------------------------
-
-
-def _test_gather_nd(ip_shape, indice_value, dtype):
-    """test operator GatherNd"""
-    np_data = np.random.uniform(1, 100, size=ip_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, ip_shape, name="in_data")
-        tf.gather_nd(in_data, indices=indice_value, name="gather_nd")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "gather_nd:0")
-
-
-def test_forward_gather_nd():
-    """test operator GatherNd"""
-    _test_gather_nd((2, 2), [[0, 0], [1, 1]], "float32")
-    _test_gather_nd((2, 2, 2), [[1, 0, 0], [0, 0, 0]], "float32")
-    _test_gather_nd((4,), [1], "float32")
-    _test_gather_nd((4,), [1], "int32")
-    _test_gather_nd((1, 4), [0, 3], "int32")
-    _test_gather_nd((2, 2), [[[1, 0], [0, 1]]], "int32")
-    _test_gather_nd((2, 2), [[[1, 0], [0, 1]]], "float32")
-    _test_gather_nd((3, 3, 3), [[[1, 0]]], "int32")
-    _test_gather_nd((3, 3, 3), [[[1, 0]]], "int32")
-    _test_gather_nd((4, 3, 5, 6), [[2, 1, 0, 0]], "float32")
-    _test_gather_nd((3, 3, 3), [[[2, 1]]], "int32")
-
-
-#######################################################################
-# BiasAdd
-# -------
-def test_forward_bias_add():
-    """test Op BiasAdd"""
-
-    def check_bias_add(lh_shpae, rh_shape, dtype):
-        tf.reset_default_graph()
-        lh_data = np.random.uniform(size=lh_shpae).astype(dtype)
-        rh_data = np.random.uniform(size=rh_shape).astype(dtype)
-        with tf.Graph().as_default():
-            lft_data = tf.placeholder(dtype, name="lft_data")
-            rgt_data = tf.placeholder(dtype, name="rgt_data")
-            tf.nn.bias_add(lft_data, rgt_data, name="BiasAdd")
-            compare_tf_with_tvm([lh_data, rh_data], ["lft_data:0", "rgt_data:0"], "BiasAdd:0")
-
-    check_bias_add((10, 8, 16, 32), (32,), dtype="int32")
-    check_bias_add((10, 20), (20,), dtype="float32")
-
-
-#######################################################################
-# Split
-# -----
-
-
-def _test_split(in_shape, axis, num_or_size_splits, dtype):
-    """One iteration of a Split"""
-    np_data = np.random.uniform(-5, 5, size=in_shape).astype(dtype)
-
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, in_shape, name="in_data")
-        _ = len(num_or_size_splits) if isinstance(num_or_size_splits, list) else num_or_size_splits
-        split = tf.split(in_data, num_or_size_splits, axis=axis)
-        relu = [tf.nn.relu(i) for i in split]
-
-        compare_tf_with_tvm([np_data], ["in_data:0"], [n.name for n in relu])
-
-    # and now test together with concat
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, in_shape, name="in_data")
-        splitted = tf.split(in_data, num_or_size_splits, axis=axis)
-        concat = tf.concat(splitted, axis)
-        compare_tf_with_tvm([np_data], "in_data:0", concat.name)
-
-
-def test_forward_split():
-    """test split layer"""
-    # rank 1
-    _test_split((3,), 0, 1, "float32")
-    _test_split((3,), 0, 3, "float32")
-    _test_split((6,), 0, 3, "float32")
-    # rank 2
-    _test_split((6, 2), 0, 3, "float32")
-    _test_split((2, 6), 1, 6, "float32")
-    # rank 3
-    _test_split((6, 2, 4), 0, 2, "int32")
-    _test_split((2, 6, 4), 1, 3, "float32")
-    _test_split((2, 4, 6), 2, 1, "float32")
-    # rank 4
-    _test_split((6, 1, 3, 5), 0, 3, "float32")
-    _test_split((1, 6, 3, 5), 1, 3, "float32")
-    _test_split((1, 3, 6, 5), 2, 3, "float32")
-    _test_split((1, 3, 5, 6), 3, 3, "float32")
-    # split along negative axis
-    _test_split((6, 1, 3, 5), -4, 3, "float32")
-    _test_split((1, 6, 3, 5), -3, 3, "float32")
-    _test_split((1, 3, 6, 5), -2, 3, "float32")
-    _test_split((1, 3, 5, 6), -1, 3, "float32")
-    # size_splits list
-    _test_split((6,), 0, [1, 2, 3], "int32")
-    _test_split((3, 6, 4), -2, [1, 4, 1], "float32")
-
-
-######################################################################
-# TopKV2
-# ------
-
-
-def _test_forward_top_k_v2(in_shape, k):
-    np_data = np.random.uniform(-100, 100, size=in_shape).astype("float32")
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder("float32", in_shape, name="in_data")
-        tf.math.top_k(in_data, k, name="TopK")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "TopK:0")
-
-
-def test_forward_top_k_v2():
-    _test_forward_top_k_v2((3,), 1)
-    _test_forward_top_k_v2((3,), 3)
-    _test_forward_top_k_v2((3, 5, 7), 3)
-    _test_forward_top_k_v2((3, 5, 7), 3)
-
-
-#######################################################################
-# Unstack
-# -------
-
-
-def _test_unstack(ip_shape, axis, dtype):
-    np_data = np.random.uniform(-5, 5, size=ip_shape).astype(dtype)
-
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, ip_shape, name="in_data")
-        unstack = tf.unstack(in_data, axis=axis)
-
-        compare_tf_with_tvm([np_data], ["in_data:0"], [n.name for n in unstack])
-
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, ip_shape, name="in_data")
-        tf.stack(tf.unstack(in_data, axis=axis), axis=axis)
-
-        compare_tf_with_tvm([np_data], ["in_data:0"], "stack:0")
-
-
-def test_forward_unstack():
-    """test unstack layer"""
-    _test_unstack((6,), 0, "int32")
-    _test_unstack((2, 6), 1, "float64")
-    # negative axis
-    _test_unstack((1, 4), -1, "int32")
-    _test_unstack((3, 6, 4), -2, "float32")
-
-
-#######################################################################
-# Tile
-# ----
-
-
-def _test_tile(in_shape, multiples, dtype):
-    np_data = np.random.uniform(-5, 5, size=in_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, in_shape, name="in_data")
-        tf.tile(in_data, multiples=multiples, name="tile")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "tile:0")
-
-
-def test_forward_tile():
-    """test Tile"""
-    _test_tile((2,), (3,), "int32")
-    _test_tile((2, 2), (2, 3), "float32")
-    _test_tile((2, 4, 6), (6, 7, 8), "float64")
-
-
-#######################################################################
-# ClipByValue
-# -----------
-
-
-def _test_forward_clip_by_value(ip_shape, clip_value_min, clip_value_max, dtype):
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, ip_shape, name="in_data")
-        tf.clip_by_value(in_data, clip_value_min, clip_value_max, name="ClipByValue")
-        np_data = np.random.uniform(-100, 100, size=ip_shape).astype(dtype)
-        compare_tf_with_tvm([np_data], ["in_data:0"], "ClipByValue:0")
-
-
-def test_forward_clip_by_value():
-    """test ClipByValue op"""
-    if package_version.parse(tf.__version__) < package_version.parse("1.9"):
-        _test_forward_clip_by_value((4,), 0.1, 5.0, "float32")
-        _test_forward_clip_by_value((4, 4), 1, 5, "int32")
-
-
-#######################################################################
-# Multi Input to graph
-# --------------------
-
-
-def test_forward_multi_input():
-    """Multi Input"""
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.int32, shape=[3, 3], name="in1")
-        in2 = tf.placeholder(tf.int32, shape=[3, 3], name="in2")
-        in3 = tf.placeholder(tf.int32, shape=[3, 3], name="in3")
-        in4 = tf.placeholder(tf.int32, shape=[3, 3], name="in4")
-
-        out1 = tf.add(in1, in2, name="out1")
-        out2 = tf.subtract(in3, in4, name="out2")
-        _ = tf.multiply(out1, out2, name="out")
-        in_data = np.arange(9, dtype="int32").reshape([3, 3])
-
-        compare_tf_with_tvm(
-            [in_data, in_data, in_data, in_data], ["in1:0", "in2:0", "in3:0", "in4:0"], "out:0"
-        )
-
-
-#######################################################################
-# Multi Output to Graph
-# ---------------------
-
-
-def test_forward_multi_output():
-    """Multi Output"""
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.int32, shape=[3, 3], name="in1")
-        in2 = tf.placeholder(tf.int32, shape=[3, 3], name="in2")
-        in3 = tf.placeholder(tf.int32, shape=[3, 3], name="in3")
-        in4 = tf.placeholder(tf.int32, shape=[3, 3], name="in4")
-
-        _ = tf.add(in1, in2, name="out1")
-        _ = tf.subtract(in3, in4, name="out2")
-        in_data = np.arange(9, dtype="int32").reshape([3, 3])
-        in_data = [in_data] * 4
-        in_name = ["in1:0", "in2:0", "in3:0", "in4:0"]
-        out_name = ["out1:0", "out2:0"]
-        out_node = [out.strip(":0") for out in out_name]
-        in_node = [inp.strip(":0") for inp in in_name]
-
-        with tf.Session() as sess:
-            final_graph_def = tf.graph_util.convert_variables_to_constants(
-                sess,
-                sess.graph.as_graph_def(add_shapes=True),
-                out_node,
-            )
-            tf_output = run_tf_graph(sess, in_data, in_name, out_name)
-            tvm_output = run_tvm_graph(
-                final_graph_def, in_data, in_node, target="llvm", out_names=out_node, num_output=2
-            )
-            for i, tf_out in enumerate(tf_output):
-                tvm.testing.assert_allclose(tf_out, tvm_output[i], atol=1e-5, rtol=1e-5)
-
-
-#######################################################################
-# Resize Bilinear, Nearest_Neighbor
-# ---------------------------------
-
-
-def _test_resize_bilinear(in_shape, to_shape, align_corners):
-    """One iteration of resize bilinear"""
-
-    data = np.random.uniform(size=in_shape).astype("float32")
-    shape_data = np.array(to_shape).astype("int32")
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        shape_data = constant_op.constant(
-            shape_data, shape=shape_data.shape, dtype=shape_data.dtype
-        )
-        tf.image.resize_bilinear(in_data, shape_data, align_corners=align_corners)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "ResizeBilinear:0")
-
-
-def _test_resize_bilinear_from_tensor(in_shape, align_corners):
-    """One iteration of resize bilinear with non-constant output shape, requires
-    value inference to get proper output shape."""
-
-    data = np.random.uniform(size=in_shape).astype("float32")
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(
-            shape=[in_shape[0], None, None, in_shape[3]], dtype=data.dtype
-        )
-        to_shape = tf.shape(in_data)[1:3]
-        tf.image.resize_bilinear(in_data, to_shape, align_corners=align_corners)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "ResizeBilinear:0")
-
-
-def _test_resize_nearest_neighbor(in_shape, to_shape):
-    """One iteration of resize nearest neighbor"""
-
-    data = np.random.uniform(size=in_shape).astype("float32")
-    shape_data = np.array(to_shape).astype("int32")
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        shape_data = constant_op.constant(
-            shape_data, shape=shape_data.shape, dtype=shape_data.dtype
-        )
-        tf.image.resize_nearest_neighbor(in_data, shape_data, name="resize_nearest_neighbor")
-
-        compare_tf_with_tvm(data, "Placeholder:0", "resize_nearest_neighbor:0")
-
-
-def _test_resize_nearest_neighbor_dynamic_shape(in_shape, scale):
-    """One iteration of resize nearest neighbor for graph with dynamic input shape"""
-
-    data = np.random.uniform(size=in_shape).astype("float32")
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=None, dtype=data.dtype)
-        # multiply input shape by scale factor
-        new_shape = tf.shape(in_data)[1:3] * tf.constant(scale, dtype=tf.int32)
-        tf.image.resize_nearest_neighbor(in_data, new_shape, name="resize_nearest_neighbor")
-
-        compare_tf_with_tvm(data, "Placeholder:0", "resize_nearest_neighbor:0")
-
-
-def test_forward_resize():
-    """Resize Bilinear, Nearest_Neighbor"""
-    # TF default layout is NHWC
-    _test_resize_bilinear((4, 32, 32, 3), [50, 50], False)
-    _test_resize_bilinear((6, 32, 32, 3), [20, 20], True)
-    _test_resize_bilinear_from_tensor((4, 32, 32, 3), False)
-    _test_resize_bilinear_from_tensor((6, 50, 50, 3), True)
-    _test_resize_nearest_neighbor((6, 32, 32, 3), [20, 20])
-    _test_resize_nearest_neighbor_dynamic_shape((1, 16, 16, 3), scale=[2, 2])
-
-
-#######################################################################
-# BroadcastArgs
-# -----------
-
-
-def _test_broadcast_args(in_shape_1, in_shape_2):
-    """One iteration of broadcast_args"""
-
-    shape_1 = np.array(in_shape_1).astype("int32")
-    shape_2 = np.array(in_shape_2).astype("int32")
-
-    with tf.Graph().as_default():
-        shape_1 = constant_op.constant(shape_1, shape=shape_1.shape, dtype=shape_1.dtype)
-        shape_2 = constant_op.constant(shape_2, shape=shape_2.shape, dtype=shape_2.dtype)
-        tf.raw_ops.BroadcastArgs(s0=shape_1, s1=shape_2)
-
-        compare_tf_with_tvm(None, "", "BroadcastArgs:0", opt_level=0)
-
-
-def test_forward_broadcast_args():
-    """Resize Bilinear"""
-
-    _test_broadcast_args((4, 1, 32, 32), [4, 8, 32, 32])
-    _test_broadcast_args((6, 32, 32, 1), [6, 32, 32, 16])
-    _test_broadcast_args((32, 32, 16), [6, 32, 32, 16])
-
-
-#######################################################################
-# BroadcastTo
-# -----------
-
-
-def _test_broadcast_to(in_shape, to_shape):
-    """One iteration of broadcast_to"""
-
-    data = np.random.uniform(size=in_shape).astype("float32")
-    shape_data = np.array(to_shape).astype("int32")
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        shape_data = constant_op.constant(
-            shape_data, shape=shape_data.shape, dtype=shape_data.dtype
-        )
-        tf.broadcast_to(in_data, shape_data)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "BroadcastTo:0", opt_level=0)
-
-
-def _test_broadcast_to_from_tensor(in_shape):
-    """One iteration of broadcast_to with unknown shape at graph build"""
-
-    data = np.random.uniform(size=in_shape).astype("float32")
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=[None], dtype=data.dtype)
-
-        shape_data = tf.multiply(tf.shape(in_data), 32)
-        tf.broadcast_to(in_data, shape_data)
-
-        compare_tf_with_tvm(data, "Placeholder:0", "BroadcastTo:0")
-
-
-def test_forward_broadcast_to():
-    """Resize Bilinear"""
-
-    _test_broadcast_to((4, 1, 32, 32), [4, 8, 32, 32])
-    _test_broadcast_to((6, 32, 32, 1), [6, 32, 32, 16])
-    _test_broadcast_to_from_tensor((1))
-
-
-#######################################################################
-# Fill
-# ----
-
-
-def _test_fill(in_shape):
-    """Use the fill op to create a tensor of ones with non-constant shape."""
-
-    with tf.Graph().as_default():
-        tf.ones(shape=in_shape, dtype="float32")
-        compare_tf_with_tvm(in_shape, [], "ones:0", opt_level=1)
-
-
-def _test_fill_from_tensor(in_shape):
-    """Use the fill op to create a tensor of ones with non-constant shape.
-    Some extra ops need to be added here to prevent the graph from
-    being fully constant and folded away."""
-
-    data = np.random.uniform(size=in_shape).astype("float32")
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(
-            shape=[in_shape[0], in_shape[1], None, None], dtype=data.dtype
-        )
-
-        x = tf.ones(shape=2 * tf.shape(in_data), dtype=data.dtype)
-        _ = tf.math.add(in_data, tf.reduce_mean(x), name="out1")
-        compare_tf_with_tvm(data, "Placeholder:0", "out1:0")
-
-
-def _test_fill_symbolic_inputs(in_shape_data, in_value_data, dtype):
-    with tf.Graph().as_default():
-        in_shape = tf.placeholder(shape=[in_shape_data.shape[0]], dtype=in_shape_data.dtype)
-        in_value = tf.placeholder(shape=(), dtype=dtype)
-        out = tf.fill(in_shape, in_value)
-        for mode in ["debug", "vm"]:
-            compare_tf_with_tvm(
-                [in_shape_data, in_value_data], [in_shape.name, in_value.name], out.name, mode=mode
-            )
-
-
-def test_forward_fill():
-    """Resize Bilinear"""
-
-    _test_fill((32))
-    _test_fill((6, 32, 64, 64))
-    _test_fill_from_tensor((6, 32, 64, 64))
-    _test_fill_symbolic_inputs(np.array((2,)), np.int32(9), tf.int32)
-    _test_fill_symbolic_inputs(np.array((2, 3)), 9, tf.int64)
-    _test_fill_symbolic_inputs(np.array((2, 3, 4)), np.float32(9.0), tf.float32)
-
-
-#######################################################################
-# Crop to bounding box
-# --------------------
-
-
-def _test_crop(in_shape, off_h, off_w, tar_h, tar_w):
-    """Crop to bounding box"""
-    data = np.random.uniform(size=in_shape).astype("float32")
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        tf.image.crop_to_bounding_box(in_data, off_h, off_w, tar_h, tar_w)
-        compare_tf_with_tvm(data, "Placeholder:0", "crop_to_bounding_box/Slice:0")
-
-
-def test_forward_crop():
-    """Crop to bounding box"""
-    _test_crop((1, 224, 224, 3), 20, 20, 120, 120)
-
-
-#######################################################################
-# CropAndResize
-# -------------
-
-
-def _test_forward_crop_and_resize(
-    img_shape,
-    boxes,
-    box_idx,
-    crop_size,
-    extrapolation_value=0.0,
-    method="bilinear",
-    dtype="float32",
-    atol=1e-4,
-    rtol=1e-4,
-):
-    image = np.random.uniform(0, 10, size=img_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(dtype, image.shape, name="in_data")
-        tf.image.crop_and_resize(
-            in_data,
-            boxes=boxes,
-            box_ind=box_idx,
-            crop_size=crop_size,
-            method=method,
-            extrapolation_value=extrapolation_value,
-            name="crop_and_resize",
-        )
-        compare_tf_with_tvm([image], ["in_data:0"], "crop_and_resize:0", atol=atol, rtol=rtol)
-
-
-def test_forward_crop_and_resize():
-    """CropAndResize"""
-    _test_forward_crop_and_resize([1, 6, 6, 3], [[0, 0, 1, 1]], [0], [3, 3])
-    _test_forward_crop_and_resize([1, 6, 6, 3], [[0, 0, 1, 1]], [0], [3, 3], 0.2)
-    _test_forward_crop_and_resize([1, 6, 6, 3], [[0, 0, 1, 1]], [0], [3, 3], 0.2, "nearest")
-    _test_forward_crop_and_resize([1, 11, 11, 3], [[0.3, 0.3, 1, 1]], [0], [21, 21])
-    _test_forward_crop_and_resize([1, 41, 41, 3], [[0.2, 0.4, 0.8, 0.8]], [0], [21, 11])
-    _test_forward_crop_and_resize([1, 100, 100, 3], [[0, 0, 0.9, 0.9]], [0], [30, 30])
-    _test_forward_crop_and_resize([1, 249, 249, 3], [[0, 0, 1, 1]], [0], [9, 9])
-    _test_forward_crop_and_resize([1, 201, 301, 3], [[0.2, 0.3, 0.7, 0.8]], [0], [51, 51])
-    _test_forward_crop_and_resize(
-        img_shape=[10, 11, 11, 3],
-        boxes=[[0, 0, 0.9, 0.9], [0.2, 0.2, 0.8, 0.8]],
-        box_idx=[0, 1],
-        crop_size=[5, 5],
-    )
-
-    if platform.machine() == "aarch64":
-        pytest.skip("Currently failing on AArch64")
-    _test_forward_crop_and_resize([1, 224, 224, 3], [[0.1, 0.2, 1, 1]], [0], [9, 9])
-    _test_forward_crop_and_resize(
-        img_shape=[20, 576, 576, 3],
-        boxes=[[0, 0, 1, 1], [0, 0, 0.8, 0.8], [0.1, 0.2, 0.9, 1], [0.2, 0, 1, 1]],
-        box_idx=[1, 0, 2, 3],
-        crop_size=[24, 24],
-        extrapolation_value=0.3,
-        atol=1e-3,
-        rtol=1e-3,
-    )
-    _test_forward_crop_and_resize(
-        img_shape=[20, 229, 229, 3],
-        boxes=[[0, 0, 0.9, 0.9], [0.3, 0.3, 1, 1], [0.2, 0.1, 0.7, 0.8], [0, 0, 1, 1]],
-        box_idx=[3, 0, 2, 1],
-        crop_size=[58, 58],
-        extrapolation_value=0.2,
-        method="nearest",
-        atol=1e-3,
-        rtol=1e-3,
-    )
-
-
-#######################################################################
-# Non Max Suppression
-# -------------------
-def _test_forward_nms_v3(
-    bx_shape, score_shape, iou_threshold, score_threshold, out_size, dtype="float32"
-):
-    boxes = np.random.uniform(0, 10, size=bx_shape).astype(dtype)
-    scores = np.random.uniform(size=score_shape).astype(dtype)
-    max_output_size = np.int32(out_size)
-    tf.reset_default_graph()
-    in_data_1 = tf.placeholder(dtype, boxes.shape, name="in_data_1")
-    in_data_2 = tf.placeholder(dtype, scores.shape, name="in_data_2")
-    in_data_3 = tf.placeholder(tf.int32, name="in_data_3")
-    tf.image.non_max_suppression(
-        boxes=in_data_1,
-        scores=in_data_2,
-        max_output_size=in_data_3,
-        iou_threshold=iou_threshold,
-        score_threshold=score_threshold,
-        name="nms",
-    )
-    compare_tf_with_tvm(
-        [boxes, scores, max_output_size],
-        ["in_data_1:0", "in_data_2:0", "in_data_3:0"],
-        "nms/NonMaxSuppressionV3:0",
-        mode="vm",
-    )
-    compare_tf_with_tvm(
-        [boxes, scores, max_output_size],
-        ["in_data_1:0", "in_data_2:0", "in_data_3:0"],
-        "nms/NonMaxSuppressionV3:0",
-        mode="debug",
-    )
-
-
-def _test_forward_nms_v4(
-    bx_shape, score_shape, iou_threshold, score_threshold, out_size, dtype="float32"
-):
-    boxes = np.random.uniform(0, 10, size=bx_shape).astype(dtype)
-    scores = np.random.uniform(size=score_shape).astype(dtype)
-    max_output_size = np.int32(out_size)
-    tf.reset_default_graph()
-    in_data_1 = tf.placeholder(dtype, boxes.shape, name="in_data_1")
-    in_data_2 = tf.placeholder(dtype, scores.shape, name="in_data_2")
-    in_data_3 = tf.placeholder(tf.int32, name="in_data_3")
-    indices_padded, num_valid = tf.image.non_max_suppression_padded(
-        boxes=in_data_1,
-        scores=in_data_2,
-        max_output_size=in_data_3,
-        iou_threshold=iou_threshold,
-        score_threshold=score_threshold,
-        name="nms",
-        pad_to_max_output_size=True,
-    )
-    num_valid = tf.reshape(num_valid, shape=(-1,))
-    indices_padded = tf.reshape(indices_padded, shape=(-1,))
-    tf.slice(indices_padded, tf.constant([0]), num_valid, name="SlicedIndices")
-    compare_tf_with_tvm(
-        [boxes, scores, max_output_size],
-        ["in_data_1:0", "in_data_2:0", "in_data_3:0"],
-        ["nms/NonMaxSuppressionV4:1", "SlicedIndices:0"],
-        mode="vm",
-    )
-    compare_tf_with_tvm(
-        [boxes, scores, max_output_size],
-        ["in_data_1:0", "in_data_2:0", "in_data_3:0"],
-        ["nms/NonMaxSuppressionV4:1", "SlicedIndices:0"],
-        mode="debug",
-    )
-
-
-def _test_forward_nms_v5(
-    bx_shape, score_shape, iou_threshold, score_threshold, out_size, dtype="float32"
-):
-    boxes = np.random.uniform(0, 10, size=bx_shape).astype(dtype)
-    scores = np.random.uniform(size=score_shape).astype(dtype)
-    max_output_size = np.int32(out_size)
-    tf.reset_default_graph()
-    in_data_1 = tf.placeholder(dtype, boxes.shape, name="in_data_1")
-    in_data_2 = tf.placeholder(dtype, scores.shape, name="in_data_2")
-    in_data_3 = tf.placeholder(tf.int32, name="in_data_3")
-    tf.image.non_max_suppression_with_scores(
-        boxes=in_data_1,
-        scores=in_data_2,
-        max_output_size=in_data_3,
-        iou_threshold=iou_threshold,
-        score_threshold=score_threshold,
-        name="nms",
-    )
-    compare_tf_with_tvm(
-        [boxes, scores, max_output_size],
-        ["in_data_1:0", "in_data_2:0", "in_data_3:0"],
-        ["nms/NonMaxSuppressionV5:0", "nms/NonMaxSuppressionV5:1"],
-        mode="vm",
-    )
-
-
-def test_forward_nms():
-    """NonMaxSuppressionV3,5"""
-    for _test_forward_nms in [_test_forward_nms_v3, _test_forward_nms_v5]:
-        _test_forward_nms((5, 4), (5,), 0.7, 0.5, 5)
-        _test_forward_nms((20, 4), (20,), 0.5, 0.6, 10)
-        _test_forward_nms((1000, 4), (1000,), 0.3, 0.7, 1000)
-        _test_forward_nms((2000, 4), (2000,), 0.4, 0.6, 7)
-
-
-def _test_forward_combined_nms(
-    bx_shape,
-    score_shape,
-    iou_threshold,
-    score_threshold,
-    out_size,
-    total_size,
-    clip_boxes=False,
-    dtype="float32",
-):
-    def get_random_scores(size, dtype):
-        size1d = np.prod(size)
-        scores = np.linspace(0, 1, num=size1d)
-        np.random.shuffle(scores)
-        return scores.reshape(size).astype(dtype)
-
-    boxes = np.random.uniform(-1, 2, size=bx_shape).astype(dtype)
-    scores = get_random_scores(score_shape, dtype)
-    max_output_size = np.int32(out_size)
-    tf.reset_default_graph()
-    in_data_1 = tf.placeholder(dtype, boxes.shape, name="in_data_1")
-    in_data_2 = tf.placeholder(dtype, scores.shape, name="in_data_2")
-    in_data_3 = tf.placeholder(tf.int32, name="in_data_3")
-    tf.image.combined_non_max_suppression(
-        boxes=in_data_1,
-        scores=in_data_2,
-        max_output_size_per_class=in_data_3,
-        max_total_size=total_size,
-        iou_threshold=iou_threshold,
-        score_threshold=score_threshold,
-        pad_per_class=False,
-        clip_boxes=clip_boxes,
-        name="nms",
-    )
-    compare_tf_with_tvm(
-        [boxes, scores, max_output_size],
-        ["in_data_1:0", "in_data_2:0", "in_data_3:0"],
-        [
-            "nms/CombinedNonMaxSuppression:0",
-            "nms/CombinedNonMaxSuppression:1",
-            "nms/CombinedNonMaxSuppression:2",
-            "nms/CombinedNonMaxSuppression:3",
-        ],
-    )
-
-
-def test_forward_combined_nms():
-    """CombinedNonMaxSuppression"""
-    _test_forward_combined_nms((1, 64, 1, 4), (1, 64, 1), 0.7, 0.5, 64, 64)
-    _test_forward_combined_nms((1, 32, 1, 4), (1, 32, 1), 0.7, 0.5, 10, 64)
-    _test_forward_combined_nms((1, 32, 1, 4), (1, 32, 2), 0.7, 0.5, 32, 64)
-    _test_forward_combined_nms((1, 64, 1, 4), (1, 64, 20), 0.7, 0.5, 64, 10)
-    # This workload seems flaky on CI.
-    # See https://github.com/apache/tvm/issues/8140
-    # _test_forward_combined_nms((1, 64, 20, 4), (1, 64, 20), 0.7, 0.5, 64, 64, clip_boxes=True)
-    _test_forward_combined_nms((2, 200, 1, 4), (2, 200, 1), 0.4, 0.6, 100, 100)
-    _test_forward_combined_nms((2, 200, 1, 4), (2, 200, 10), 0.4, 0.2, 150, 1000)
-
-
-#######################################################################
-# LSTM
-# ----
-
-
-def _test_lstm_cell(batch_size, num_hidden, num_layers, forget_bias, dtype):
-    """One iteration of a LSTM cell"""
-
-    tf.reset_default_graph()
-    input_size = num_hidden
-    input_data = np.full((batch_size, input_size), 1.0, dtype=dtype)
-    in_state_c = np.full((batch_size, num_hidden), 0.1, dtype=dtype)
-    in_state_h = np.full((batch_size, num_hidden), 0.1, dtype=dtype)
-
-    def _get_tensorflow_output():
-        with tf.Session() as sess:
-            with variable_scope.variable_scope(
-                "root", initializer=init_ops.constant_initializer(0.5)
-            ):
-                m0 = tf.placeholder(dtype, [batch_size, num_hidden], name="m0")
-                m1 = tf.placeholder(dtype, [batch_size, num_hidden], name="m1")
-                x = tf.placeholder(shape=(batch_size, input_size), dtype=dtype, name="input")
-                g, ((out_m0, out_m1)) = tensorflow.contrib.rnn.LSTMBlockCell(
-                    num_hidden, forget_bias=forget_bias
-                )(x, (m0, m1))
-                sess.run([variables.global_variables_initializer()])
-                res = sess.run(
-                    [g, out_m0, out_m1],
-                    {
-                        x.name: np.array([[1.0, 1.0]]),
-                        m0.name: in_state_c,
-                        m1.name: in_state_h,
-                    },
-                )
-            graph_def = sess.graph.as_graph_def(add_shapes=True)
-            final_graph_def = graph_util.convert_variables_to_constants(
-                sess, graph_def, ["root/lstm_cell/LSTMBlockCell"]
-            )
-
-            return final_graph_def, res
-
-    graph_def, tf_out = _get_tensorflow_output()
-    tvm_output = run_tvm_graph(
-        graph_def,
-        [input_data, in_state_c, in_state_h],
-        ["root/input", "root/m0", "root/m1"],
-        num_output=7,
-    )
-    assert isinstance(tvm_output, list)
-
-    tvm.testing.assert_allclose(tf_out[0], tvm_output[6], rtol=1e-3, atol=1e-3)
-    tvm.testing.assert_allclose(tf_out[1], tvm_output[1], rtol=1e-3, atol=1e-3)
-
-
-def test_forward_lstm():
-    """test LSTM block cell"""
-    if package_version.parse(tf.VERSION) < package_version.parse("2.0.0"):
-        # in 2.0, tf.contrib.rnn.LSTMBlockCell is removed
-        _test_lstm_cell(1, 2, 1, 0.5, "float32")
-
-
-#######################################################################
-# Pack
-# ---
-def _test_pack(axis, shape, **kwargs):
-
-    a = np.arange(np.prod(shape), dtype=np.float32).reshape(shape)
-    b = np.arange(np.prod(shape), dtype=np.float32).reshape(shape)
-
-    with tf.Graph().as_default():
-        tf_a = array_ops.placeholder(shape=shape, dtype="float32", name="pl_a")
-        tf_b = array_ops.placeholder(shape=shape, dtype="float32", name="pl_b")
-        tf_c = tf.stack([tf_a, tf_b], axis=axis, **kwargs)
-        assert tf_c.op.op_def.name == "Pack", "tf.stack() is expected to produce 'Pack' operation"
-
-        compare_tf_with_tvm([a, b], ["pl_a:0", "pl_b:0"], "stack:0")
-
-
-def test_forward_pack():
-    for axis in range(-3, 3):
-        _test_pack(axis, [3, 2, 1])
-    for axis in range(-1, 1):
-        _test_pack(axis, [3])
-    _test_pack(0, [])
-
-
-#######################################################################
-# Unpack
-# ------
-def _test_forward_unpack(in_shape, axis, dtype):
-    """test operator Unpack"""
-    np_data = np.random.uniform(-100, 100, size=in_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, in_shape, name="in_data")
-        tf.unstack(in_data, axis=axis, name="Unpack")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "Unpack:0")
-
-
-def test_forward_unpack():
-    _test_forward_unpack((3,), 0, "int32")
-    _test_forward_unpack((3,), -1, "int16")
-    _test_forward_unpack((21, 23, 3), 2, "float32")
-
-
-#######################################################################
-# Range
-# -----
-
-
-def test_forward_range():
-    """test operator Range"""
-    for dtype in [tf.int32, tf.int64]:
-        tf.reset_default_graph()
-        with tf.Graph().as_default():
-            tf.range(1, 18, 3, name="range", dtype=dtype)
-            compare_tf_with_tvm([], [], "range:0")
-
-    # test type assignment for operator Range
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        tf.range(1, 256 + 1, 1, dtype=tf.float32)
-        compare_tf_with_tvm([], [], "range:0")
-
-
-#######################################################################
-# Einsum
-# -----
-
-
-def _test_einsum(equation, dtype, *shape_of_input_tensors):
-    """Test Einsum Op"""
-
-    with tf.Graph().as_default():
-        inputs_placeholders = []
-        input_data = []
-        for idx, shape in enumerate(shape_of_input_tensors):
-            input_name = f"input_{idx}"
-            inputs_placeholders.append(tf.placeholder(shape=shape, dtype=dtype, name=input_name))
-            input_data.append(np.random.normal(size=shape).astype(dtype))
-
-        result = tf.einsum(equation, *inputs_placeholders)
-
-        compare_tf_with_tvm(input_data, [ph.name for ph in inputs_placeholders], result.name)
-
-
-def test_forward_einsum():
-    for dtype in ["float32"]:
-        _test_einsum("ij,jk->ik", dtype, [2, 3], [3, 5])  # Matmul
-        _test_einsum("ij,jk", dtype, [2, 3], [3, 5])  # Matmul
-        _test_einsum("i,i->", dtype, [2], [2])  # Dot product
-        _test_einsum("i,j->ij", dtype, [3], [5])  # Outer produce
-        _test_einsum("ij->ji", dtype, [2, 3])  # Transpose
-        _test_einsum("ii->i", dtype, [3, 3])  # Diag
-        _test_einsum("ii", dtype, [3, 3])  # Trace of a square matrix
-        _test_einsum("bij,bjk->bik", dtype, [7, 5, 3], [7, 3, 2])  # Batch matmul
-
-
-#######################################################################
-# Pad
-# ---
-
-
-def _test_pad(input_shape, paddings, mode, **kwargs):
-    """One iteration of pad operation with given shape"""
-
-    x = np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape)
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=input_shape, dtype="float32")
-        pad_values = constant_op.constant(paddings)
-        _ = tf.pad(in_data, paddings=pad_values, mode=mode, **kwargs)
-
-        if mode == "CONSTANT":
-            if "constant_values" in kwargs:
-                out_name = "PadV2:0"
-            else:
-                out_name = "Pad:0"
-        else:
-            out_name = "MirrorPad:0"
-
-        compare_tf_with_tvm(x, "Placeholder:0", out_name)
-
-
-def test_forward_pad():
-    """Pad"""
-    _test_pad((2, 3), [[1, 1], [2, 2]], mode="CONSTANT")
-    _test_pad((2, 3), [[1, 1], [2, 2]], mode="CONSTANT", constant_values=1.0)
-    _test_pad((2, 3), [[1, 1], [2, 2]], mode="SYMMETRIC")
-    _test_pad((2, 3), [[1, 1], [2, 2]], mode="REFLECT")
-
-
-#######################################################################
-# Logical operators
-# --------------------
-
-
-def test_logical_and():
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in1")
-        in2 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in2")
-        _ = tf.logical_and(in1, in2, name="out")
-        in_data1 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
-        in_data2 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
-        compare_tf_with_tvm([in_data1, in_data2], ["in1:0", "in2:0"], "out:0")
-
-
-def test_logical_or():
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in1")
-        in2 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in2")
-        _ = tf.logical_or(in1, in2, name="out")
-        in_data1 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
-        in_data2 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
-        compare_tf_with_tvm([in_data1, in_data2], ["in1:0", "in2:0"], "out:0")
-
-
-def test_logical_xor():
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in1")
-        in2 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in2")
-        _ = tf.logical_xor(in1, in2, name="out")
-        in_data1 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
-        in_data2 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
-        compare_tf_with_tvm([in_data1, in_data2], ["in1:0", "in2:0"], "out:0")
-
-
-def test_logical_not():
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in1")
-        _ = tf.logical_not(in1, name="out")
-        in_data1 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
-        compare_tf_with_tvm(in_data1, "in1:0", "out:0")
-
-
-def test_forward_logical():
-    test_logical_and()
-    test_logical_or()
-    test_logical_xor()
-    test_logical_not()
-
-
-#######################################################################
-# Where, Select, SelectV2
-# -------------
-def test_forward_where():
-    """Where: return elements depending on conditions"""
-    with tf.Graph().as_default():
-        with tf.Session() as _:
-            input1 = tf.placeholder(tf.int32, shape=[1, 4, 4, 3], name="input1")
-            input2 = tf.placeholder(tf.int32, shape=[1, 4, 4, 3], name="input2")
-            mask = input1 > input2
-            tf.where(mask, input1 + 1, input2 * 2)
-            in_data1 = np.random.uniform(0, 10, size=(1, 4, 4, 3)).astype("uint32")
-            in_data2 = np.random.uniform(0, 10, size=(1, 4, 4, 3)).astype("uint32")
-            compare_tf_with_tvm([in_data1, in_data2], ["input1:0", "input2:0"], "Select:0")
-
-
-#######################################################################
-# Inception V3
-# ------------
-@pytest.mark.skip(reason="See https://github.com/apache/tvm/issues/10275")
-def test_forward_inception_v3():
-    """test inception V3 model"""
-    with tf.Graph().as_default():
-        graph_def = tf_testing.get_workload(
-            "InceptionV3/inception_v3_2016_08_28_frozen-with_shapes.pb"
-        )
-        # Call the utility to import the graph definition into default graph.
-        graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-        data = np.random.uniform(size=(1, 299, 299, 3)).astype("float32")
-
-        with tf.Session() as sess:
-            tf_output = run_tf_graph(sess, data, "input:0", "InceptionV3/Predictions/Reshape_1:0")
-            tvm_output = run_tvm_graph(graph_def, data, "input")
-            tvm.testing.assert_allclose(tf_output[0], tvm_output[0], rtol=1e-5, atol=1e-5)
-
-
-#######################################################################
-# Inception V1
-# ------------
-
-
-def test_forward_inception_v1():
-    """test inception V1 model"""
-    with tf.Graph().as_default():
-        graph_def = tf_testing.get_workload("InceptionV1/classify_image_graph_def-with_shapes.pb")
-        # Call the utility to import the graph definition into default graph.
-        graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-        # Build an image from random data.
-        img_array = np.random.uniform(size=(1, 600, 600, 3)).astype("uint8")
-        img = Image.frombuffer("RGB", (600, 600), img_array.tostring(), "raw", "RGB", 0, 1)
-        temp = utils.tempdir()
-        img_path = temp.relpath("tf-test.jpg")
-        img.save(img_path)
-
-        if not tf.gfile.Exists(os.path.join(img_path)):
-            tf.logging.fatal("File does not exist %s", img_path)
-        data = tf.gfile.FastGFile(os.path.join(img_path), "rb").read()
-
-        temp.remove()
-
-        # Extract tensorflow decoded image frame for tvm input
-        with tf.Session() as sess:
-            tvm_data = run_tf_graph(sess, data, "DecodeJpeg/contents:0", "DecodeJpeg:0")
-
-        with tf.Session() as sess:
-            tf_output = run_tf_graph(sess, data, "DecodeJpeg/contents:0", "softmax:0")
-            tvm_output = run_tvm_graph(graph_def, tvm_data, "DecodeJpeg/contents")
-            tvm.testing.assert_allclose(tf_output[0], tvm_output[0], rtol=1e-5, atol=1e-5)
-
-
-#######################################################################
-# Mobilenet
-# ---------
-
-
-def test_forward_mobilenet():
-    """test mobilenet model"""
-    # MobilenetV2
-    with tf.Graph().as_default():
-        graph_def = tf_testing.get_workload(
-            "https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz",
-            "mobilenet_v2_1.4_224_frozen.pb",
-        )
-        # Call the utility to import the graph definition into default graph.
-        graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-        data = np.random.uniform(size=(1, 224, 224, 3)).astype("float32")
-        out_node = "MobilenetV2/Predictions/Reshape_1"
-
-        with tf.Session() as sess:
-            # Add shapes to the graph.
-            graph_def = tf_testing.AddShapesToGraphDef(sess, out_node)
-            tf_output = run_tf_graph(sess, data, "input:0", out_node + ":0")
-            tvm_output = run_tvm_graph(graph_def, data, "input")
-            tvm.testing.assert_allclose(
-                np.squeeze(tvm_output[0]), np.squeeze(tf_output[0]), rtol=1e-5, atol=1e-5
-            )
-
-
-#######################################################################
-# ResnetV2
-# --------
-
-
-@tvm.testing.requires_gpu
-def test_forward_resnetv2():
-    """test resnet model"""
-    if is_gpu_available():
-        with tf.Graph().as_default():
-            graph_def = tf_testing.get_workload(
-                "ResnetV2/resnet-20180601_resnet_v2_imagenet-shapes.pb"
-            )
-            # Call the utility to import the graph definition into default graph.
-            graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-            data = np.random.uniform(size=(128, 224, 224, 3)).astype("float32")
-            out_node = "ArgMax"
-
-            with tf.Session() as sess:
-                tf_output = run_tf_graph(sess, data, "input_tensor:0", out_node + ":0")
-                for device in ["llvm", "cuda"]:
-                    _ = tvm.device(device, 0)
-                    if not tvm.testing.device_enabled(device):
-                        print(f"Skip because {device} is not enabled")
-                        continue
-                    tvm_output = run_tvm_graph(
-                        graph_def, data, "input_tensor", len(tf_output), target=device
-                    )
-                    tvm.testing.assert_allclose(
-                        np.squeeze(tvm_output[0]), np.squeeze(tf_output[0]), rtol=1e-5, atol=1e-5
-                    )
-
-
-#######################################################################
-# SSD
-# ---
-
-
-def _test_ssd_impl():
-    """Test SSD with backbone MobileNet V1"""
-    with tf.Graph().as_default():
-        graph_def = tf_testing.get_workload(
-            "object_detection/ssd_mobilenet_v1_ppn_shared_"
-            "box_predictor_300x300_coco14_sync_2018_07_03.pb"
-        )
-        # Call the utility to import the graph definition into default graph.
-        graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-        data = np.random.uniform(0.0, 255.0, size=(1, 512, 512, 3)).astype("uint8")
-        in_node = "image_tensor"
-        out_node = ["detection_boxes", "detection_scores", "detection_classes"]
-
-        with tf.Session() as sess:
-            tf_output = run_tf_graph(
-                sess, data, f"{in_node}:0", [f"{oname}:0" for oname in out_node]
-            )
-            # TODO(kevinthesun): enable gpu test when VM heterogeneous execution is ready.
-            for device in ["llvm"]:
-                _ = tvm.device(device, 0)
-                if not tvm.testing.device_enabled(device):
-                    print(f"Skip because {device} is not enabled")
-                    continue
-                tvm_output = run_tvm_graph(
-                    graph_def,
-                    data,
-                    in_node,
-                    len(out_node),
-                    target=device,
-                    layout="NCHW",
-                    out_names=out_node,
-                    mode="vm",
-                    disabled_pass=["FoldScaleAxis"],
-                    serialize=True,
-                )
-                for i in range(len(out_node)):
-                    tvm.testing.assert_allclose(tvm_output[i], tf_output[i], rtol=1e-3, atol=1e-3)
-
-
-@pytest.mark.skip(
-    reason="Use of threading module here hides errors, see https://github.com/apache/tvm/pull/10231"
-)
-def test_forward_ssd():
-    run_thread = threading.Thread(target=_test_ssd_impl, args=())
-    old_stack_size = threading.stack_size(100 * 1024 * 1024)
-    run_thread.start()
-    run_thread.join()
-    threading.stack_size(old_stack_size)
-
-
-#######################################################################
-# Placeholder
-# -----------
-
-
-def test_forward_placeholder():
-    """test a simple pb with Placeholder node in the end of GraphDef"""
-    with tf.Graph().as_default():
-        graph_def = tf_testing.get_workload("Custom/placeholder.pb")
-        # Call the utility to import the graph definition into default graph.
-        graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-        data = np.random.uniform(size=(1, 224, 224, 3)).astype("float32")
-        out_node = "mul"
-
-        with tf.Session() as sess:
-            # Add shapes to the graph.
-            graph_def = tf_testing.AddShapesToGraphDef(sess, out_node)
-            tf_output = run_tf_graph(sess, data, "Placeholder:0", out_node + ":0")
-            tvm_output = run_tvm_graph(graph_def, data, "Placeholder")
-            tvm.testing.assert_allclose(
-                np.squeeze(tvm_output[0]), np.squeeze(tf_output[0]), rtol=1e-5, atol=1e-5
-            )
-
-
-#######################################################################
-# PTB
-# ---
-try:
-    # Load contrib for running ptb model in tf version before 2.0
-    import tensorflow.contrib
-except ImportError:
-    pass
-
-
-def test_forward_ptb():
-    """test ptb model"""
-    config = tf_testing.get_config()
-    num_steps = config.num_steps
-    num_hidden = config.hidden_size
-    num_layers = config.num_layers
-    batch_size = config.batch_size
-    vocab_size = config.vocab_size
-    out_sample_shape = (batch_size, vocab_size)
-    out_state_shape = (batch_size, num_hidden)
-    # Sample input
-    inpt = "we have no useful information on"
-    cnt_sample = 20
-
-    def _pretty_print(items, is_char_model, id2word):
-        if not is_char_model:
-            return " ".join([id2word[x] for x in items])
-        else:
-            return "".join([id2word[x] for x in items]).replace("_", " ")
-
-    def _get_tvm_graph_module(graph_def):
-        # Cell inputs 'c and 'h' consist of all layers values
-        shape_dict = {"Model/Placeholder": (batch_size, num_steps)}
-
-        with tvm.testing.disable_span_filling():
-            mod, params = relay.frontend.from_tensorflow(
-                graph_def,
-                shape=shape_dict,
-                outputs=[
-                    "Model/Softmax:0",
-                    "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell:1",
-                    "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell:6",
-                    "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_1:1",
-                    "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_1:6",
-                ],
-            )
-        with tvm.testing.enable_span_filling():
-            mod_with_span, _ = relay.frontend.from_tensorflow(
-                graph_def,
-                shape=shape_dict,
-                outputs=[
-                    "Model/Softmax:0",
-                    "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell:1",
-                    "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell:6",
-                    "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_1:1",
-                    "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_1:6",
-                ],
-            )
-        tvm.ir.assert_structural_equal(mod["main"], mod_with_span["main"])
-
-        target = "llvm"
-        with tvm.transform.PassContext(opt_level=0):
-            graph, lib, params = relay.build(mod, target, params=params)
-
-        dev = tvm.cpu(0)
-        return params, graph_executor.create(graph, lib, dev)
-
-    def _do_tvm_sample(model, data, in_states, params, num_samples):
-        """Sampled from the model"""
-        samples = []
-        state = in_states
-        sample = None
-
-        def _get_sample(data, state):
-            input_data = np.full((batch_size, num_steps), data, dtype="int32")
-
-            model.set_input("Model/Placeholder", tvm.nd.array(input_data.astype("int32")))
-            model.set_input(
-                "Model/MultiRNNCellZeroState/LSTMBlockCellZeroState/zeros",
-                tvm.nd.array(state[0].astype("float32")),
-            )
-            model.set_input(
-                "Model/MultiRNNCellZeroState/LSTMBlockCellZeroState/zeros_1",
-                tvm.nd.array(state[1].astype("float32")),
-            )
-            model.set_input(
-                "Model/MultiRNNCellZeroState/LSTMBlockCellZeroState_1/zeros",
-                tvm.nd.array(state[2].astype("float32")),
-            )
-            model.set_input(
-                "Model/MultiRNNCellZeroState/LSTMBlockCellZeroState_1/zeros_1",
-                tvm.nd.array(state[3].astype("float32")),
-            )
-            model.set_input(**params)
-            model.run()
-            tvm_output = model.get_output(0, tvm.nd.empty(out_sample_shape, "float32")).numpy()
-
-            state_output = []
-            for i in range(4):
-                state_output.append(
-                    model.get_output(i + 1, tvm.nd.empty(out_state_shape, "float32")).numpy()
-                )
-            sample = tf_testing.pick_from_weight(tvm_output[0])
-
-            return sample, state_output
-
-        for x in data:
-            sample, state = _get_sample(x, state)
-
-        if sample is not None:
-            samples.append(sample)
-        else:
-            samples.append(0)
-
-        k = 1
-        while k < num_samples:
-            sample, state = _get_sample(samples[-1], state)
-            samples.append(sample)
-            k += 1
-        return samples, state
-
-    with tf.Graph().as_default():
-        word_to_id, id_to_word, graph_def = tf_testing.get_workload_ptb()
-        vocab_size = len(word_to_id)
-        # Call the utility to import the graph definition into default graph.
-        graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-        sess = tf.Session()
-
-    # TVM graph module creation
-    params, m = _get_tvm_graph_module(graph_def)
-
-    # Create 10 predicted statments of 20 words
-    cnt_stm = 0
-    while cnt_stm < 10:
-        cnt_stm += 1
-        in_state = [np.full((batch_size, num_hidden), 0, dtype="float32")] * 2 * num_layers
-        seed_for_sample = inpt.split()
-        tvm_samples, _ = _do_tvm_sample(
-            m, [word_to_id[word] for word in seed_for_sample], in_state, params, cnt_sample
-        )
-        tvm_sample_str = _pretty_print(tvm_samples, False, id_to_word)
-        tf_samples, _ = tf_testing.do_tf_sample(
-            sess, [word_to_id[word] for word in seed_for_sample], in_state, cnt_sample
-        )
-        tf_sample_str = _pretty_print(tf_samples, False, id_to_word)
-        inpt = tvm_sample_str
-        tvm.testing.assert_allclose(tf_samples, tvm_samples, rtol=1e-5, atol=1e-5)
-        assert tvm_sample_str == tf_sample_str
-
-
-#######################################################################
-# LRN (Local Response Normalization)
-# ----------------------------------
-
-
-def _test_lrn(ishape, size, axis, bias, alpha, beta):
-    """testing local response normalization"""
-    lrn_depth_radius = size / 2
-
-    inp_array = np.random.uniform(size=ishape).astype(np.float32)
-
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype, name="lrn0_data")
-        nn_ops.local_response_normalization(
-            in1, name="lrn", depth_radius=lrn_depth_radius, bias=bias, alpha=alpha, beta=beta
-        )
-
-        compare_tf_with_tvm(inp_array, "lrn0_data:0", "lrn:0")
-
-
-def test_forward_lrn():
-    _test_lrn((1, 3, 20, 20), 3, 1, 1.0, 1.0, 0.5)
-
-
-#######################################################################
-# l2_normalize
-# ------------
-
-
-def _test_l2_normalize(ishape, eps, axis):
-    """testing l2 normalize (uses max, sum, square, sqrt frontend operators)"""
-
-    inp_array = np.random.uniform(size=ishape).astype(np.float32)
-
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        nn.l2_normalize(in1, axis=axis, epsilon=eps, name=None, dim=None)
-
-        compare_tf_with_tvm(inp_array, "Placeholder:0", "l2_normalize:0")
-
-
-def test_forward_l2_normalize():
-    _test_l2_normalize((1, 3, 20, 20), 0.001, (0,))
-
-
-#######################################################################
-# transpose
-# ---------
-
-
-def _test_forward_transpose(ishape, axes=None):
-    data = np.random.uniform(size=ishape).astype(np.float32)
-
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=data.shape, dtype=data.dtype, name="transpose_data")
-
-        if axes is None:
-            tf.transpose(in1)
-        else:
-            tf.transpose(in1, perm=axes)
-
-        compare_tf_with_tvm(data, "transpose_data:0", "transpose:0")
-
-
-def _test_forward_tranapose_axes_input(ishape, axes):
-    data = np.random.uniform(size=ishape).astype(np.float32)
-    axes_np = np.array(axes).astype(np.int32)
-
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=data.shape, dtype=data.dtype, name="transpose_data")
-
-        const1 = tf.constant(axes_np, dtype=tf.int32)
-
-        # make axes an input to tf.transpose, but not an input to the graph,
-        # so it can be extracted with infer_value_simulated
-        axes = tf.reverse(const1, axis=[-1])
-        tf.transpose(in1, axes)
-
-        compare_tf_with_tvm([data], ["transpose_data:0"], "transpose:0")
-
-
-def test_forward_transpose():
-    _test_forward_transpose((2, 3, 4), (1, 2, 0))
-    _test_forward_transpose((2, 3, 4))
-    _test_forward_transpose((7, 8, 8, 10))
-    _test_forward_transpose((2, 3, 4), (1, 2, 0))
-    _test_forward_transpose((2, 3, 4), (0, 1, 2))
-    _test_forward_transpose((2, 3, 4, 5), (3, 0, 1, 2))
-    _test_forward_tranapose_axes_input((2, 3, 4), (1, 2, 0))
-    _test_forward_tranapose_axes_input((2, 3, 4, 5), (3, 0, 1, 2))
-
-
-def _test_forward_slice_operation_input(input_value, begin_value, size_value):
-    input_data = np.array(input_value, dtype=np.float32)
-    with tf.Graph().as_default():
-        input_tensor = tf.placeholder(shape=input_data.shape, dtype=input_data.dtype, name="input")
-        tf.slice(input_tensor, begin_value, size_value, name="slice_output")
-        compare_tf_with_tvm([input_data], ["input:0"], "slice_output:0")
-
-
-def test_forward_slice():
-    _test_forward_slice_operation_input([1, 1], [0], [2])
-    _test_forward_slice_operation_input([0, 1, 2, 3], [3], [-1])
-    _test_forward_slice_operation_input(
-        [[0, 1, 2, 3], [4, 5, 6, 7]], begin_value=[0, 1], size_value=[-1, -1]
-    )
-
-
-def test_forward_ceil():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(size=ishape).astype(np.float32)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        tf.ceil(in1)
-        compare_tf_with_tvm(inp_array, "Placeholder:0", "Ceil:0")
-
-
-def test_forward_floor():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(size=ishape).astype(np.float32)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        tf.floor(in1)
-        compare_tf_with_tvm(inp_array, "Placeholder:0", "Floor:0")
-
-
-def test_forward_relu():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    for mode in ["graph_executor", "vm"]:
-        with tf.Graph().as_default():
-            in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-            tf.nn.relu(in1)
-            compare_tf_with_tvm(inp_array, "Placeholder:0", "Relu:0", mode=mode)
-
-
-def test_forward_leaky_relu():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    for mode in ["graph_executor", "vm"]:
-        with tf.Graph().as_default():
-            in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-            tf.nn.leaky_relu(in1, alpha=0.4)
-            compare_tf_with_tvm(inp_array, "Placeholder:0", "LeakyRelu:0", mode=mode)
-
-
-def test_forward_elu():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        tf.nn.elu(in1)
-        compare_tf_with_tvm(inp_array, "Placeholder:0", "Elu:0")
-
-
-def test_forward_selu():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        tf.nn.selu(in1)
-        compare_tf_with_tvm(inp_array, "Placeholder:0", "Selu:0")
-
-
-def test_forward_tanh():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        tf.nn.tanh(in1)
-        compare_tf_with_tvm(inp_array, "Placeholder:0", "Tanh:0")
-
-
-#######################################################################
-# Softmax
-# -------
-def test_forward_softmax():
-    """test operator Softmax"""
-
-    def check_softmax(in_shape, axis, dtype):
-        np_data = np.random.uniform(-100, 100, size=in_shape).astype(dtype)
-        tf.reset_default_graph()
-        with tf.Graph().as_default():
-            in_data = tf.placeholder(dtype, in_shape, name="in_data")
-            tf.nn.softmax(in_data, axis=axis, name="Softmax")
-            compare_tf_with_tvm([np_data], ["in_data:0"], "Softmax:0")
-
-    check_softmax((2, 3, 5), 2, "float32")
-    check_softmax((2, 3, 5), -1, "float32")
-
-
-#######################################################################
-# Tensor
-# ------
-
-
-def test_forward_round():
-    """test Round"""
-    np_data = np.random.uniform(-10, 10, size=(5, 7)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, (5, 7), name="in_data")
-        tf.round(in_data, name="round")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "round:0")
-
-
-def test_forward_abs():
-    """test operator Abs"""
-    np_data = np.random.uniform(1, 100, size=(9, 11)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, (9, 11), name="in_data")
-        tf.math.abs(in_data, name="abs")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "abs:0")
-
-
-def _test_forward_zeros_like(in_shape, dtype):
-    np_data = np.random.uniform(-10, 10, size=in_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, in_shape, name="in_data")
-        tf.zeros_like(in_data, name="zeros_like")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "zeros_like:0")
-
-
-def test_forward_zeros_like():
-    if package_version.parse(tf.__version__) < package_version.parse("1.2"):
-        _test_forward_zeros_like((2, 3), "int32")
-        _test_forward_zeros_like((2, 3, 5), "int8")
-        _test_forward_zeros_like((2, 3, 5, 7), "uint16")
-        _test_forward_zeros_like((2, 3, 11), "float32")
-        _test_forward_zeros_like((2, 3, 11), "float64")
-
-
-def test_forward_squared_difference():
-    ishape = (1, 3, 10, 14)
-    inp_array_a = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    inp_array_b = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array_a.shape, dtype=inp_array_a.dtype, name="in1")
-        in2 = tf.placeholder(shape=inp_array_b.shape, dtype=inp_array_b.dtype, name="in2")
-        out = tf.math.squared_difference(in1, in2)
-        compare_tf_with_tvm([inp_array_a, inp_array_b], [in1.name, in2.name], out.name)
-
-
-def _test_forward_reverse_v2(in_shape, axis, dtype):
-    np_data = np.random.uniform(-10, 10, size=in_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, in_shape, name="in_data")
-        tf.reverse(in_data, axis=[axis], name="reverse")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "reverse:0")
-
-
-def test_forward_reverse_v2():
-    """test ReverseV2"""
-    _test_forward_reverse_v2((2, 3), 0, "int32")
-    _test_forward_reverse_v2((2, 3, 5), 2, "float32")
-    _test_forward_reverse_v2((2, 3, 5, 7), 1, "float32")
-    _test_forward_reverse_v2((2, 3, 5), -1, "float64")
-    _test_forward_reverse_v2((2, 3, 5), -3, "float64")
-
-
-def test_forward_sign():
-    """test Sign"""
-    np_data = np.random.uniform(-10, 10, size=(5, 7, 11)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, (5, 7, 11), name="in_data")
-        tf.sign(in_data, name="sign")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "sign:0")
-
-
-def test_forward_square():
-    """test operator Square"""
-    np_data = np.random.uniform(1, 100, size=(2, 3, 5)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, (2, 3, 5), name="in_data")
-        tf.square(in_data, name="square")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "square:0")
-
-
-def test_forward_pow_exp():
-    """test Pow and Exp"""
-    np_in1 = np.random.uniform(-2, 2, size=(5, 7, 11)).astype(np.float32)
-    np_in2 = np.random.uniform(-2, 2, size=(5, 7, 11)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.float32, (5, 7, 11), name="in1")
-        in2 = tf.placeholder(tf.float32, (5, 7, 11), name="in2")
-        _ = tf.pow(in1, in2, name="pow")
-        _ = tf.exp(in1, name="exp")
-        compare_tf_with_tvm([np_in1, np_in2], ["in1:0", "in2:0"], "pow:0")
-        compare_tf_with_tvm([np_in1], ["in1:0"], "exp:0")
-
-
-def test_forward_unary():
-    """Unary"""
-
-    def _test_forward_unary(op, a_min=1, a_max=5, dtype=np.float32):
-        """test unary operators"""
-        np_data = np.random.uniform(a_min, a_max, size=(2, 3, 5)).astype(dtype)
-        tf.reset_default_graph()
-        with tf.Graph().as_default():
-            in_data = tf.placeholder(dtype, (2, 3, 5), name="in_data")
-            out = op(in_data)
-            compare_tf_with_tvm([np_data], ["in_data:0"], out.name)
-
-    _test_forward_unary(tf.acos, -1, 1)
-    _test_forward_unary(tf.asin, -1, 1)
-    _test_forward_unary(tf.atanh, -1, 1)
-    _test_forward_unary(tf.sinh)
-    _test_forward_unary(tf.cosh)
-    _test_forward_unary(tf.acosh)
-    _test_forward_unary(tf.asinh)
-    _test_forward_unary(tf.atan)
-    _test_forward_unary(tf.sin)
-    _test_forward_unary(tf.cos)
-    _test_forward_unary(tf.tan)
-    _test_forward_unary(tf.tanh)
-    _test_forward_unary(tf.erf)
-    _test_forward_unary(tf.log)
-    _test_forward_unary(tf.log1p)
-
-
-def test_forward_atan2():
-    """test operator tan"""
-    tf.disable_eager_execution()
-    np_data_1 = np.random.uniform(1, 100, size=(2, 3, 5)).astype(np.float32)
-    np_data_2 = np.random.uniform(1, 100, size=(2, 3, 5)).astype(np.float32)
-    tf.reset_default_graph()
-    in_data_1 = tf.placeholder(tf.float32, (2, 3, 5), name="in_data_1")
-    in_data_2 = tf.placeholder(tf.float32, (2, 3, 5), name="in_data_2")
-    tf.atan2(in_data_1, in_data_2, name="atan2")
-    compare_tf_with_tvm([np_data_1, np_data_2], ["in_data_1:0", "in_data_2:0"], "atan2:0")
-
-
-def test_forward_expm1():
-    """test operator expm1"""
-
-    def _test_forward_expm1(shape):
-        tf.disable_eager_execution()
-        np_data = np.random.uniform(1, 10, size=shape).astype(np.float32)
-        tf.reset_default_graph()
-        in_data = tf.placeholder(tf.float32, shape, name="in_data")
-        tf.expm1(in_data, name="expm1")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "expm1:0")
-
-    _test_forward_expm1([1, 100])
-    _test_forward_expm1([1, 10, 10])
-    _test_forward_expm1([2, 5, 2, 5])
-
-
-def test_forward_softsign():
-    """test operator softsign"""
-
-    def _test_forward_softsign(shape):
-        tf.disable_eager_execution()
-        np_data = np.random.uniform(1, 100, size=shape).astype(np.float32)
-        tf.reset_default_graph()
-        in_data = tf.placeholder(tf.float32, shape, name="in_data")
-        tf.nn.softsign(in_data, name="softsign")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "softsign:0")
-
-    _test_forward_softsign([1, 100])
-    _test_forward_softsign([1, 10, 10])
-    _test_forward_softsign([2, 5, 2, 5])
-
-
-def test_forward_rint():
-    """test operator rint"""
-
-    def _test_forward_rint(shape):
-        tf.disable_eager_execution()
-        np_data = np.random.uniform(-100, 100, size=shape).astype(np.float32)
-        tf.reset_default_graph()
-        in_data = tf.placeholder(tf.float32, shape, name="in_data")
-        tf.math.rint(in_data, name="rint")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "rint:0")
-
-    _test_forward_rint([100])
-    _test_forward_rint([1, 100])
-    _test_forward_rint([1, 10, 10])
-    _test_forward_rint([2, 5, 2, 5])
-
-
-def test_forward_negative():
-    """test tf operator Neg"""
-    np_data = np.random.uniform(-100, 255, size=(224, 224, 3)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, (224, 224, 3), name="in_data")
-        tf.negative(in_data, name="negative")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "negative:0")
-
-
-def test_forward_log_softmax():
-    """test operator LogSoftmax"""
-    np_data = np.random.uniform(1, 100, size=(9, 11)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, (9, 11), name="in_data")
-        tf.math.log_softmax(in_data, name="LogSoftmax")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "LogSoftmax:0")
-
-
-def test_forward_softplus():
-    """test operator Softplus"""
-    np_data = np.random.uniform(1, 10, size=(2, 3, 5)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, (2, 3, 5), name="in_data")
-        tf.nn.softplus(in_data, name="softplus")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "softplus:0")
-
-
-def test_forward_rsqrt():
-    """test Rsqrt"""
-    np_data = np.random.uniform(1, 100, size=(5, 7, 11)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, (5, 7, 11), name="in_data")
-        tf.rsqrt(in_data, name="rsqrt")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "rsqrt:0")
-
-
-def test_forward_sqrt():
-    """test Sqrt"""
-    np_data = np.random.uniform(1, 100, size=(5, 7, 11)).astype(np.float32)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, (5, 7, 11), name="in_data")
-        tf.sqrt(in_data, name="sqrt")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "sqrt:0")
-
-
-def _test_forward_right_shift(in_shape, dtype):
-    """test operator RightShift"""
-    lh_data = np.random.randint(1, 3, size=in_shape).astype(dtype)
-    rh_data = np.random.randint(1, 8, size=in_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        lft_data = tf.placeholder(dtype, in_shape, name="lft_data")
-        rgt_data = tf.placeholder(dtype, in_shape, name="rgt_data")
-        tf.bitwise.right_shift(lft_data, rgt_data, name="RightShift")
-        compare_tf_with_tvm([lh_data, rh_data], ["lft_data:0", "rgt_data:0"], "RightShift:0")
-
-
-def test_forward_right_shift():
-    _test_forward_right_shift((7,), "int32")
-    _test_forward_right_shift((3, 11), "int16")
-
-
-def _test_forward_left_shift(in_shape, dtype):
-    """test operator LeftShift"""
-    lh_data = np.random.randint(100, 1000000, size=in_shape).astype(dtype)
-    rh_data = np.random.randint(1, 3, size=in_shape).astype(dtype)
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        lft_data = tf.placeholder(dtype, in_shape, name="lft_data")
-        rgt_data = tf.placeholder(dtype, in_shape, name="rgt_data")
-        tf.bitwise.left_shift(lft_data, rgt_data, name="LeftShift")
-        compare_tf_with_tvm([lh_data, rh_data], ["lft_data:0", "rgt_data:0"], "LeftShift:0")
-
-
-def test_forward_left_shift():
-    _test_forward_left_shift((10,), "int32")
-    _test_forward_left_shift((224, 224, 3), "int16")
-
-
-#######################################################################
-# Mean
-# ----
-
-
-def test_forward_mean():
-    """Mean"""
-
-    def check_mean(ishape, **kwargs):
-        inp_array = np.random.uniform(size=ishape).astype(np.float32)
-        with tf.Graph().as_default():
-            in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-            tf.keras.backend.mean(in1, **kwargs)
-            compare_tf_with_tvm(inp_array, "Placeholder:0", "Mean:0", no_gpu=True)
-
-    check_mean((10, 8, 16, 32))
-    check_mean((10, 8, 16, 32), axis=(2, 3))
-    check_mean((10, 8, 16, 32), axis=(1, 2), keepdims=True)
-
-
-#######################################################################
-# Size
-# ----
-
-
-def test_forward_size():
-    """Size"""
-
-    def check_size(ishape):
-        np_input = np.random.uniform(size=ishape).astype(np.float32)
-
-        # if all dimensions are constant, TF will optimize away size operator into constant
-        tf_input_shape = list(np_input.shape)
-        tf_input_shape[0] = None
-
-        with tf.Graph().as_default():
-            tf_input = tf.placeholder(shape=tf_input_shape, dtype=np_input.dtype, name="input")
-            tf.size(tf_input, name="size")
-            compare_tf_with_tvm([np_input], ["input:0"], "size:0")
-
-    check_size((10, 8, 16, 32))
-    check_size((10,))
-
-
-#######################################################################
-# All, Any, Max, Min, Prod, variance, std, logsumexp, euclidean_norm
-# ------------------------------------------------------------------
-
-
-def test_forward_reduce():
-    """Reduce"""
-
-    def _check_op(tf_op, ishape, axis, keepdims, dtype="float32"):
-        tf.reset_default_graph()
-        if dtype == "bool":
-            np_data = np.random.choice([True, False], size=ishape)
-        else:
-            np_data = np.random.uniform(size=ishape).astype(dtype)
-        if tf_op == tf.math.reduce_prod:
-            axis = 1
-            np_data = np_data.reshape(1, -1)
-        with tf.Graph().as_default():
-            in_data = tf.placeholder(dtype, name="in_data")
-            reduce_op = tf_op(in_data, axis=axis, keepdims=keepdims, name="reduce_std")
-            compare_tf_with_tvm([np_data], ["in_data:0"], reduce_op.name)
-
-    def _test_math_op(op, d_types=None):
-        d_types = d_types or ["int32", "float32"]
-        for dtype in d_types:
-            _check_op(op, (3, 10), axis=(-1), keepdims=False, dtype=dtype)
-            _check_op(op, (8, 16, 32), axis=(-1), keepdims=False, dtype=dtype)
-            _check_op(op, (1, 8, 8, 3), axis=(2, 3), keepdims=True, dtype=dtype)
-            _check_op(op, (2, 3, 10, 10), axis=(1, 2), keepdims=True, dtype=dtype)
-
-    _test_math_op(tf.math.reduce_all, d_types=["bool"])
-    _test_math_op(tf.math.reduce_any, d_types=["bool"])
-    _test_math_op(tf.math.reduce_max)
-    _test_math_op(tf.math.reduce_min)
-    _test_math_op(tf.math.reduce_prod)
-    _test_math_op(tf.math.reduce_variance, d_types=["float32"])
-    _test_math_op(tf.math.reduce_std, d_types=["float32"])
-    _test_math_op(tf.math.reduce_logsumexp, d_types=["float32"])
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.15.0"):
-        _test_math_op(tf.math.reduce_euclidean_norm)
-
-
-#######################################################################
-# All, Max, Min
-# ------------------------------------------------------------------
-
-
-def test_forward_raw_reduce():
-    """Raw reduce"""
-
-    def _check_op(tf_op, ishape, axis, keepdims, range_axis=False, dtype="float32"):
-        tf.reset_default_graph()
-        if dtype == "bool":
-            np_data = np.random.choice([True, False], size=ishape)
-        else:
-            np_data = np.random.uniform(size=ishape).astype(dtype)
-        if tf_op == tf.math.reduce_prod:
-            axis = 1
-            np_data = np_data.reshape(1, -1)
-        with tf.Graph().as_default():
-            if range_axis:
-                axis = tf.range(axis[0], axis[1], axis[2], name="range", dtype="int32")
-            in_data = tf.placeholder(dtype, name="in_data")
-            reduce_op = tf_op(input=in_data, axis=axis, keep_dims=keepdims, name="reduce_std")
-            compare_tf_with_tvm([np_data], ["in_data:0"], reduce_op.name)
-
-    def _test_raw_reduce_op(op, d_types=None):
-        d_types = d_types or ["int32", "float32"]
-        for dtype in d_types:
-            _check_op(op, (3, 10), axis=(-1), keepdims=False, dtype=dtype)
-            _check_op(op, (8, 16, 32), axis=(-1), keepdims=False, dtype=dtype)
-            _check_op(op, (1, 8, 8, 3), axis=(2, 3), keepdims=True, dtype=dtype)
-            _check_op(op, (2, 3, 10, 10), axis=(1, 2), keepdims=True, dtype=dtype)
-            _check_op(op, (1, 8, 8, 3), axis=(2, 4, 1), keepdims=True, range_axis=True, dtype=dtype)
-            _check_op(
-                op, (2, 3, 10, 10), axis=(1, 3, 1), keepdims=True, range_axis=True, dtype=dtype
-            )
-
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.4.1"):
-        _test_raw_reduce_op(tf.raw_ops.All, d_types=["bool"])
-        _test_raw_reduce_op(tf.raw_ops.Max)
-        _test_raw_reduce_op(tf.raw_ops.Min)
-
-
-#######################################################################
-# Relational operators
-# --------------------
-
-
-def _test_forward_rel_op(data, func):
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=data[0].shape, dtype=data[0].dtype, name="in1")
-        in2 = tf.placeholder(shape=data[1].shape, dtype=data[1].dtype, name="in2")
-        op = func(in1, in2, name="op")
-        _ = tf.cast(op, tf.int32, name="out1")
-        compare_tf_with_tvm([data[0], data[1]], ["in1:0", "in2:0"], "out1:0")
-
-
-def test_forward_rel_ops():
-    t1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
-    t2 = np.array([[9, 8, 7], [6, 5, 4], [3, 2, 1]])
-    _test_forward_rel_op([t1, t2], math_ops.less)
-    _test_forward_rel_op([t1, t2], math_ops.greater)
-    _test_forward_rel_op([t1, t2], math_ops.less_equal)
-    _test_forward_rel_op([t1, t2], math_ops.greater_equal)
-    _test_forward_rel_op([t1, t2], math_ops.equal)
-    _test_forward_rel_op([t1, t2], math_ops.not_equal)
-
-
-#######################################################################
-# ExpandDims
-# ----------
-
-
-def _test_forward_expand_dims(data, axis):
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=data.shape, dtype=data.dtype, name="in1")
-        out = tf.expand_dims(in1, axis)
-        compare_tf_with_tvm([data], [in1.name], out.name)
-
-
-def test_forward_expand_dims():
-    _test_forward_expand_dims(np.int32(1), 0)
-    _test_forward_expand_dims(np.array([1]), 0)
-    _test_forward_expand_dims(np.array([1]), -1)
-    _test_forward_expand_dims(np.array([[1], [2]]), 0)
-    _test_forward_expand_dims(np.array([[1], [2]]), 1)
-    _test_forward_expand_dims(np.array([[1], [2]]), -1)
-
-
-#######################################################################
-# Maximum, Minimum
-# ----------------
-def test_forward_maximum():
-    """test Op Maximum"""
-
-    def check_maximum(lh_shape, rh_shape, dtype):
-        tf.reset_default_graph()
-        lh_data = np.random.uniform(size=lh_shape).astype(dtype)
-        rh_data = np.random.uniform(size=rh_shape).astype(dtype)
-        with tf.Graph().as_default():
-            lft_data = tf.placeholder(dtype, name="lft_data")
-            rgt_data = tf.placeholder(dtype, name="rgt_data")
-            tf.math.maximum(lft_data, rgt_data, name="maximum")
-            compare_tf_with_tvm([lh_data, rh_data], ["lft_data:0", "rgt_data:0"], "maximum:0")
-
-    check_maximum((10, 8, 16, 32), (1,), dtype="int32")
-    check_maximum((10, 8, 16, 32), (10, 8, 16, 32), dtype="float32")
-
-
-def test_forward_minimum():
-    """test Op Minimum"""
-
-    def check_minimum(lh_shape, rh_shape, dtype):
-        tf.reset_default_graph()
-        lh_data = np.random.uniform(size=lh_shape).astype(dtype)
-        rh_data = np.random.uniform(size=rh_shape).astype(dtype)
-        with tf.Graph().as_default():
-            lft_data = tf.placeholder(dtype, name="lft_data")
-            rgt_data = tf.placeholder(dtype, name="rgt_data")
-            tf.math.minimum(lft_data, rgt_data, name="minimum")
-            compare_tf_with_tvm([lh_data, rh_data], ["lft_data:0", "rgt_data:0"], "minimum:0")
-
-    check_minimum((10, 8, 16, 32), (1,), dtype="int32")
-    check_minimum((10, 8, 16, 32), (10, 8, 16, 32), dtype="float32")
-
-
-#######################################################################
-# PlaceholderWithDefault
-# ----------------------
-def test_placeholder():
-    """Placeholder"""
-    with tf.Graph().as_default():
-        in_data1 = np.random.uniform(-5, 5, size=(3, 4, 5)).astype(np.float32)
-        var1 = tf.Variable(in_data1, name="in1")
-        var2 = array_ops.placeholder_with_default(var1, None, name="place1")
-
-        in_data2 = np.random.uniform(-5, 5, size=(3, 4, 5)).astype(np.float32)
-        place1 = array_ops.placeholder(shape=in_data1.shape, dtype=in_data1.dtype, name="in2")
-
-        out1 = tf.math.add(var1, var2, name="out1")
-        _ = tf.math.add(out1, place1, name="out2")
-
-        compare_tf_with_tvm(
-            [in_data1, in_data2], ["place1:0", "in2:0"], "out2:0", init_global_variables=True
-        )
-
-
-#######################################################################
-# OneHot
-# ----------------------
-
-
-def _test_forward_one_hot(indices_shape, depth, on_value, off_value, axis, out_dtype):
-    inp_array1 = np.random.randint(0, 5, size=indices_shape)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array1.shape, dtype=inp_array1.dtype)
-        out = tf.one_hot(in1, depth, on_value, off_value, axis, dtype=out_dtype)
-        compare_tf_with_tvm(inp_array1, in1.name, out.name)
-
-
-def test_forward_one_hot():
-    _test_forward_one_hot((3,), 3, 1, 0, -1, "int32")
-    _test_forward_one_hot((3,), 3, 1.0, 0.0, -1, "float32")
-    _test_forward_one_hot((2, 2), 5, 2, -2, 0, "int32")
-    _test_forward_one_hot((2, 2), 5, 0.5, -0.5, 1, "float32")
-    _test_forward_one_hot((3, 2, 4, 5), 6, 1, 0, 1, "int32")
-    _test_forward_one_hot((3, 2, 4, 5), 6, 1.0, 0.0, 0, "float32")
-
-
-#######################################################################
-# AddN
-# ----------------------
-
-
-def _test_forward_add_n(inputs):
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        temp = []
-        for each in inputs:
-            temp.append(tf.placeholder(shape=each.shape, dtype=each.dtype))
-        output = tf.add_n(temp)
-        compare_tf_with_tvm(list(inputs), [each.name for each in temp], output.name)
-
-
-def test_forward_add_n():
-    """Add n"""
-    x = np.random.randint(1, 100, size=(3, 3, 3), dtype=np.int32)
-    y = np.random.randint(1, 100, size=(3, 3, 3), dtype=np.int32)
-    z = np.random.randint(1, 100, size=(3, 3, 3), dtype=np.int32)
-    m, n, o = x.astype(np.float32), y.astype(np.float32), z.astype(np.float32)
-    in0 = x
-    in1 = [x, y]
-    in2 = (x, y, z)
-    in3 = m
-    in4 = [m, n]
-    in5 = (m, n, o)
-    _test_forward_add_n(in0)
-    _test_forward_add_n(in1)
-    _test_forward_add_n(in2)
-    _test_forward_add_n(in3)
-    _test_forward_add_n(in4)
-    _test_forward_add_n(in5)
-
-
-#######################################################################
-# Sharing params case
-# ----------------------
-
-
-def test_sharing_node():
-    """Test the sharing params case."""
-    np_data = np.random.uniform(size=(2, 2, 2)).astype("float32")
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(tf.float32, shape=(2, 2, 2), name="in_data")
-        axis = tf.constant([-1], dtype=tf.int32, name="axis")
-        mean0 = tf.reduce_mean(in_data, axis=axis, keepdims=False, name="mean0")
-        mean1 = tf.reduce_mean(in_data, axis=axis, keepdims=False, name="mean1")
-        _ = tf.add(mean0, mean1, name="out")
-        compare_tf_with_tvm([np_data], ["in_data:0"], "out:0")
-
-
-#######################################################################
-# Unravel Index
-# ----------------------
-def _test_forward_unravel_index(inputs):
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        temp = []
-        for each in inputs:
-            temp.append(tf.placeholder(shape=each.shape, dtype=each.dtype))
-        output = tf.unravel_index(temp[0], temp[1])
-        compare_tf_with_tvm(list(inputs), [each.name for each in temp], output.name)
-
-
-def _test_forward_unravel_index_scalar(x, y, dtype="int32"):
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        indices_1 = constant_op.constant(x, dtype=dtype)
-        dims_1 = constant_op.constant(y, dtype=dtype)
-        out_1 = array_ops.unravel_index(indices_1, dims_1)
-        compare_tf_with_tvm([], [], out_1.name)
-
-
-def test_forward_unravel_index():
-    """Unravel index"""
-    x = np.array([0, 1, 2, 3])
-    y = np.array([2, 2])
-    _test_forward_unravel_index([x, y])
-
-    x = np.array([0, 1, 2, 5])
-    y = np.array([2, 3])
-    _test_forward_unravel_index([x, y])
-
-    x = np.array([0, 1, 2, 5])
-    y = np.array([6])
-    _test_forward_unravel_index([x, y])
-
-    x = np.array([102, 300, 16])
-    y = np.array([10, 10, 9, 6])
-    _test_forward_unravel_index([x, y])
-
-    x = np.array([100])
-    y = np.array([10, 10, 9, 6])
-    _test_forward_unravel_index([x, y])
-
-    # Test scalar input
-    _test_forward_unravel_index_scalar(13, [1, 4, 5, 2])
-
-
-#######################################################################
-# Dilation2d
-# ----------------------
-def _test_dilation2d(tensor_in_sizes, filter_in_sizes, strides, dilations, padding):
-    """One iteration of dilation2d with given shapes and attributes"""
-
-    total_size_1 = np.prod(tensor_in_sizes)
-    total_size_2 = np.prod(filter_in_sizes)
-    # Initializes the input tensor with array containing incrementing
-    # numbers from 1.
-    data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
-    filter_array = [f * 1.0 for f in range(1, total_size_2 + 1)]
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype="float32")
-        in_filter = constant_op.constant(filter_array, shape=filter_in_sizes, dtype="float32")
-
-        nn_ops.dilation2d(in_data, in_filter, strides=strides, rates=dilations, padding=padding)
-
-        compare_tf_with_tvm(
-            np.reshape(data_array, tensor_in_sizes).astype("float32"),
-            "Placeholder:0",
-            "Dilation2D:0",
-            no_gpu=True,
-        )
-
-
-def test_forward_dilation():
-    """Dilation2d"""
-    _test_dilation2d([1, 18, 18, 32], [4, 4, 32], [1, 1, 1, 1], [1, 2, 1, 1], "VALID")
-    _test_dilation2d([1, 15, 15, 32], [4, 4, 32], [1, 1, 1, 1], [1, 2, 1, 1], "SAME")
-    _test_dilation2d([1, 5, 5, 1], [2, 2, 1], [1, 1, 1, 1], [1, 1, 1, 1], "VALID")
-    _test_dilation2d([1, 5, 5, 1], [3, 3, 1], [1, 1, 1, 1], [1, 2, 2, 1], "VALID")
-    _test_dilation2d([1, 5, 5, 3], [3, 3, 3], [1, 1, 1, 1], [1, 1, 1, 1], "SAME")
-    _test_dilation2d([1, 28, 28, 3], [5, 5, 3], [1, 2, 2, 1], [1, 1, 1, 1], "VALID")
-    _test_dilation2d([1, 224, 224, 10], [8, 8, 10], [1, 1, 1, 1], [1, 1, 1, 1], "VALID")
-    _test_dilation2d([1, 18, 18, 32], [4, 4, 32], [1, 1, 1, 1], [1, 2, 1, 1], "SAME")
-    _test_dilation2d([1, 15, 15, 32], [4, 4, 32], [1, 1, 1, 1], [1, 2, 1, 1], "VALID")
-    _test_dilation2d([1, 5, 5, 1], [7, 2, 1], [1, 3, 1, 1], [1, 1, 1, 1], "SAME")
-    _test_dilation2d([1, 5, 5, 1], [3, 4, 1], [1, 2, 1, 1], [1, 2, 2, 1], "SAME")
-    _test_dilation2d([1, 5, 5, 3], [3, 3, 3], [1, 1, 4, 1], [1, 1, 1, 1], "VALID")
-    _test_dilation2d([1, 28, 28, 3], [5, 6, 3], [1, 1, 2, 1], [1, 1, 1, 1], "SAME")
-    _test_dilation2d([1, 224, 224, 10], [8, 8, 10], [1, 3, 1, 1], [1, 1, 1, 1], "SAME")
-    _test_dilation2d([1, 3, 3, 1], [2, 2, 1], [1, 1, 1, 1], [1, 2, 2, 1], "SAME")
-    _test_dilation2d([1, 3, 3, 1], [2, 2, 1], [1, 1, 1, 1], [1, 1, 2, 1], "VALID")
-
-
-def _test_identityn(data_np_list):
-    with tf.Graph().as_default():
-        data_tensors = []
-        data_tensors_name = []
-        for index, data_np in enumerate(data_np_list):
-            tensor_name = f"data_{index}"
-            data_tensors_name.append(tensor_name + ":0")
-            data_tensors.append(
-                tf.placeholder(shape=data_np.shape, dtype=str(data_np.dtype), name=tensor_name)
-            )
-
-        output = tf.identity_n(data_tensors)
-        output_names = [out.name for out in output]
-        compare_tf_with_tvm(
-            data_np_list,
-            data_tensors_name,
-            output_names,
-        )
-
-
-@pytest.mark.parametrize(
-    "data_np_list",
-    [
-        (
-            [
-                np.array([[1, 1], [0, 3], [0, 1], [2, 0], [3, 1]], dtype=np.int64),
-                np.array([1, 2, 3, 4, 5], dtype=np.int64),
-                np.array([5, 6], dtype=np.int64),
-            ]
-        ),
-        (
-            [
-                np.array([[1, 1], [0, 3], [2, 0], [3, 1]], dtype=np.int64),
-                np.array([1, 2, 3, 4], dtype=np.int64),
-                np.array([5, 6], dtype=np.int64),
-                np.array([True, False, True]),
-            ]
-        ),
-        (
-            [
-                np.array([]),
-                np.array([[]]),
-            ]
-        ),
-    ],
-)
-def test_forward_identityn(data_np_list):
-    """Identityn"""
-    _test_identityn(data_np_list)
-
-
-#######################################################################
-# infinity ops
-# ------------
-def _verify_infiniteness_ops(tf_op, name):
-    """test operator infinity ops"""
-
-    # Only float types are allowed in Tensorflow for isfinite and isinf
-    # float16 is failing on cuda
-    tf_dtypes = ["float32", "float64"]  # pylint: disable=redefined-outer-name
-    for tf_dtype in tf_dtypes:
-        shape = (8, 8)
-        data = np.random.uniform(size=shape).astype(tf_dtype)
-        data.ravel()[np.random.choice(data.size, int(data.size * 0.5), replace=False)] = np.inf
-        data.ravel()[np.random.choice(data.size, int(data.size * 0.5), replace=False)] = np.nan
-
-        tf.reset_default_graph()
-        in_data = tf.placeholder(tf_dtype, shape, name="in_data")
-        tf_op(in_data, name=name)
-        compare_tf_with_tvm([data], ["in_data:0"], f"{name}:0")
-
-
-def test_forward_isinf():
-    _verify_infiniteness_ops(tf.is_inf, "isinf")
-
-
-def test_forward_isfinite():
-    _verify_infiniteness_ops(tf.is_finite, "isfinite")
-
-
-def test_forward_isnan():
-    _verify_infiniteness_ops(tf.is_nan, "isnan")
-
-
-def _test_spop_placeholder_without_shape_info():
-    with tf.Graph().as_default():
-
-        @function.Defun(*[tf.int32] * 2)
-        def Forward(x, y):
-            print(x.name)
-            print(y.name)
-            b = tf.add(x, y)
-            return b
-
-        pl1 = tf.placeholder(tf.int32, name="pl1")
-        pl2 = tf.placeholder(tf.int32, name="pl2")
-        pl3 = tf.placeholder(tf.int32, name="pl3")
-        data = np.array([[-1, 1], [2, -2]], dtype=np.int32)
-        data2 = np.array([[-2, 3], [4, -6]], dtype=np.int32)
-        data3 = np.array([[-2, 3], [4, -6]], dtype=np.int32)
-        z1 = gen_functional_ops.StatefulPartitionedCall(args=[pl1, pl2], Tout=[tf.int32], f=Forward)
-        z2 = z1 + pl3
-        compare_tf_with_tvm(
-            [data, data2, data3],
-            ["pl1:0", "pl2:0", "pl3:0"],
-            ["StatefulPartitionedCall:0", z2.name],
-            mode="vm",
-            init_global_variables=True,
-        )
-
-
-def _test_spop_placeholder_with_shape_and_default_value():
-    with tf.Graph().as_default():
-        data = np.ones([1], dtype=int).astype(np.int32)
-        dataVar = tf.Variable(data, shape=data.shape)
-        pl1 = array_ops.placeholder_with_default(dataVar, shape=data.shape, name="pl1")
-        tpl = tf.convert_to_tensor(pl1, dtype=tf.int32)
-
-        @function.Defun(*[tf.int32])
-        def pl_with_default(pl):
-            return tf.expand_dims(tf.multiply(pl, pl), 0)
-
-        _ = gen_functional_ops.StatefulPartitionedCall(
-            args=[tpl], Tout=[tf.int32], f=pl_with_default
-        )
-        compare_tf_with_tvm(
-            data, ["pl1:0"], "StatefulPartitionedCall:0", mode="vm", init_global_variables=True
-        )
-
-
-def _test_spop_placeholder_numpy_arange_feed():
-    with tf.Graph().as_default():
-        t1 = tf.placeholder(tf.int32, (3, 3, 3), "t1")
-        t1_data = np.arange(27, dtype=np.int32).reshape((3, 3, 3))
-        t2 = tf.placeholder(tf.int32, (3, 3, 3), "t2")
-        t2_data = np.arange(27, dtype=np.int32).reshape((3, 3, 3))
-
-        @tf.function
-        def add(x, y):
-            return tf.add(x, y, "add_t1_t2")
-
-        t3 = add(t1, t2)
-        compare_tf_with_tvm(
-            [t1_data, t2_data], ["t1:0", "t2:0"], [t3.name], mode="vm", init_global_variables=True
-        )
-
-
-def _test_spop_placeholder_numpy_array_feed():
-    with tf.Graph().as_default():
-        t1_data = np.array([[-1, 1, 3], [2, -2, 4], [2, -3, 14]], dtype=np.int32)
-        t2_data = np.array([[-2, 1, 2], [12, -2, 14], [12, -3, 4]], dtype=np.int32)
-        t1 = tf.placeholder(tf.int32, name="t1")
-        t2 = tf.placeholder(tf.int32, name="t2")
-
-        @tf.function
-        def add(x, y):
-            return tf.add(x, y, "add_t1_t2")
-
-        t3 = add(t1, t2)
-        compare_tf_with_tvm(
-            [t1_data, t2_data], ["t1:0", "t2:0"], [t3.name], mode="vm", init_global_variables=True
-        )
-
-
-def _test_spop_function_invocation_basic():
-    with tf.Graph().as_default():
-
-        def fun1(a):
-            return tf.multiply(a, a)
-
-        def fun2(b):
-            return tf.multiply(b, 10)
-
-        @tf.function
-        def fun3(x, y):
-            x = fun2(x)
-            y = fun1(y)
-            z = tf.add(x, y)
-            return z
-
-        t3 = fun3(tf.constant(10.5), tf.constant(20.4))
-
-        compare_tf_with_tvm([], [], [t3.name], mode="vm", init_global_variables=True)
-
-
-def _test_spop_function_invocation_nested():
-    with tf.Graph().as_default():
-        t1 = tf.placeholder(tf.int32, (3, 3, 3), name="t1")
-        t1_data = np.arange(27, dtype=np.int32).reshape((3, 3, 3))
-        t2 = tf.placeholder(tf.int32, name="t2")
-        t2_data = np.arange(27, dtype=np.int32).reshape((3, 3, 3))
-
-        @tf.function
-        def myfunc(x, y):
-            return tf.add(x, y, "myfunc")
-
-        @tf.function
-        def myfunc2(x, y):
-            z = myfunc(x, y)
-            l = myfunc(z, y)
-            m = myfunc(l, z)
-            return tf.add(l, m, "myfunc2")
-
-        res1 = myfunc(t1, t2)
-        res2 = myfunc2(res1, t1)
-
-        compare_tf_with_tvm(
-            [t1_data, t2_data], ["t1:0", "t2:0"], [res2.name], mode="vm", init_global_variables=True
-        )
-
-
-def _test_spop_function_invocation_no_autograph():
-    with tf.Graph().as_default():
-
-        @tf.function(autograph=False)
-        def fun1(a):
-            return tf.multiply(a, a)
-
-        @tf.function(autograph=False)
-        def fun2(b):
-            return tf.multiply(b, 10)
-
-        @tf.function
-        def fun3(x, y):
-            x = fun2(x)
-            y = fun1(y)
-            z = tf.add(x, y)
-            return z
-
-        t3 = fun3(tf.constant(10.5), tf.constant(20.4))
-
-        compare_tf_with_tvm([], [], [t3.name], mode="vm", init_global_variables=True)
-
-
-def _test_spop_function_invocation_defun():
-    with tf.Graph().as_default():
-
-        def fun1(a):
-            return tf.multiply(a, a)
-
-        def fun2(b):
-            return tf.multiply(b, b)
-
-        @function.Defun(dtypes.float32, dtypes.float32, func_name="Fun3")
-        def fun3(x, y):
-            x = fun2(x)
-            y = fun1(y)
-            z = tf.add(x, y)
-            return z
-
-        _ = gen_functional_ops.StatefulPartitionedCall(
-            args=[tf.constant(10.5), tf.constant(20.4)],
-            Tout=[dtypes.float32],
-            f=fun3,
-            name="SpopFnInvocation",
-        )
-        compare_tf_with_tvm([], [], "SpopFnInvocation:0", mode="vm", init_global_variables=True)
-
-
-def _test_spop_arithmetic():
-    with tf.Graph().as_default():
-
-        @function.Defun(*[dtypes.int32] * 3)
-        def arithmetic(m, x, c):
-            z = tf.add(tf.multiply(m, x), c)
-            return z
-
-        m = tf.constant(10)
-        x = tf.constant(20)
-        c = tf.constant(2)
-        _ = gen_functional_ops.StatefulPartitionedCall(
-            args=[m, x, c], Tout=[tf.int32], f=arithmetic
-        )
-
-        compare_tf_with_tvm(
-            [], [], "StatefulPartitionedCall:0", mode="vm", init_global_variables=True
-        )
-
-
-def _test_spop_control_flow():
-    with tf.Graph().as_default():
-
-        @function.Defun(*[dtypes.float32] * 2)
-        def Body1(x, y):
-            with ops.device("/job:localhost/replica:0/task:0/device:CPU:0"):
-                z = math_ops.multiply(x, y)
-                i = 0
-                while i < 10:
-                    i += 1
-                    if i == 5:
-                        continue
-                    z = math_ops.multiply(x, y * i)
-            return z
-
-        _ = gen_functional_ops.StatefulPartitionedCall(
-            args=[constant_op.constant(32.0), constant_op.constant(100.0)],
-            Tout=[dtypes.float32],
-            f=Body1,
-        )
-        compare_tf_with_tvm(
-            [], [], "StatefulPartitionedCall:0", mode="vm", init_global_variables=True
-        )
-
-
-def _test_spop_variables():
-    with tf.Graph().as_default():
-        const1 = tf.constant(10)
-        const2 = tf.constant(20)
-        var1 = tf.Variable(const1, dtype=tf.int32)
-        var2 = tf.Variable(const2, dtype=tf.int32)
-
-        @function.Defun(tf.int32, tf.int32)
-        def Forward(x, y):
-            return tf.multiply(x, y)
-
-        _ = gen_functional_ops.StatefulPartitionedCall(
-            args=[var1, var2], Tout=[tf.int32], f=Forward
-        )
-        compare_tf_with_tvm(
-            [], [], "StatefulPartitionedCall:0", init_global_variables=True, mode="vm"
-        )
-
-
-def _test_spop_constants():
-    with tf.Graph().as_default():
-
-        @function.Defun(*[dtypes.int32] * 2)
-        def constantsFn(x, y):
-            vv = tf.constant([2, 3, 4], name="vv")
-            z = tf.add(vv + x, y)
-            return z
-
-        a = tf.constant(20000, name="a")
-        b = tf.constant(40000, name="b")
-        _ = gen_functional_ops.StatefulPartitionedCall(args=[a, b], Tout=[tf.int32], f=constantsFn)
-
-        compare_tf_with_tvm(
-            [], [], "StatefulPartitionedCall:0", mode="vm", init_global_variables=True
-        )
-
-
-def _test_spop_stateful():
-    # This test case is to test that TVM rejects any TF stateful operations
-    # (including Resource Variables) except StatefulPartitionedCall/PartitionedCall
-    # (as these two operators can still be used as container graphs to execute
-    # "stateless" operations internally.
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-
-        @tf.function
-        def FunctionWithStatefulOp_One(i):
-            b = tf.random.uniform(shape=[2, 4], maxval=10, dtype=tf.float32, seed=10)
-            y = tf.multiply(b, i)
-            return y
-
-        @tf.function
-        def FunctionWithStatefulOp(m, n):
-            a = tf.random.uniform(shape=[2, 4], maxval=10, dtype=tf.float32, seed=10)
-            x = tf.multiply(a, m)
-            y = FunctionWithStatefulOp_One(n)
-            z = tf.multiply(x, y)
-            return z
-
-        op = FunctionWithStatefulOp(constant_op.constant(1.0), constant_op.constant(2.0))
-        with pytest.raises(Exception) as execinfo:
-            compare_tf_with_tvm([], [], [op.name], init_global_variables=True, mode="vm")
-        assert execinfo.value.args[0].startswith("The following operators are not implemented")
-
-
-def _test_spop_device_assignment():
-    # This test case is to test that TVM rejects inconsistent device assignment
-    # while using StatefulPartitionedCall/PartitionedCall operators which in case of TVM will
-    # be used as container graphs to internally execute "stateless" operations.
-
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-
-        def fun1(a):
-            with ops.device("/GPU:0"):
-                return tf.multiply(a, a)
-
-        def fun2(b):
-            with ops.device("/job:localhost/replica:0/task:0/device:CPU:1"):
-                return tf.multiply(b, b)
-
-        @function.Defun(dtypes.float32, dtypes.float32, func_name="Fun3")
-        def fun3(x, y):
-            with ops.device("/CPU:0"):
-                x = fun2(x)
-            with ops.device("/job:localhost/replica:0/task:0/device:CPU:2"):
-                y = fun1(y)
-            with ops.device("/job:localhost/replica:0/task:0/device:CPU:3"):
-                z = tf.add(x, y)
-                return z
-
-        _ = gen_functional_ops.StatefulPartitionedCall(
-            args=[tf.constant(10.5), tf.constant(20.4)], Tout=[dtypes.float32], f=fun3
-        )
-        with pytest.raises(Exception) as execinfo:
-            compare_tf_with_tvm(
-                [], [], "StatefulPartitionedCall:0", mode="vm", init_global_variables=True
-            )
-        assert execinfo.value.args[0].startswith("Found inconsistent Device assignment")
-
-
-def _test_spop_resource_variables():
-    # This test case is to test that TVM rejects any graph containing
-    # resource variables with StatefulPartitionedOp.
-
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-
-        const1 = tf.constant(10)
-        const2 = tf.constant(20)
-        var1 = tf.Variable(const1, dtype=tf.int32, use_resource=True)
-        var2 = tf.Variable(const2, dtype=tf.int32, use_resource=True)
-
-        @tf.function
-        def resourceVariablesTest(x, y):
-            return tf.multiply(x, y)
-
-        _ = resourceVariablesTest(var1, var2)
-        with pytest.raises(Exception) as execinfo:
-            compare_tf_with_tvm(
-                [], [], "StatefulPartitionedCall:0", mode="vm", init_global_variables=True
-            )
-        # pylint: disable=implicit-str-concat
-        assert execinfo.value.args[0].startswith("Graph is not frozen." " Provide a frozen graph")
-
-
-def test_forward_spop():
-    """Spop"""
-    _test_spop_stateful()
-    _test_spop_device_assignment()
-    # tensorflow version upgrade support
-    # This test is expected to fail in TF version >= 2.6
-    # as the generated graph will be considered frozen, hence
-    # not passing the criteria for the test below.
-    if package_version.parse(tf.__version__) < package_version.parse("2.6.1"):
-        _test_spop_resource_variables()
-
-    # Placeholder test cases
-    _test_spop_placeholder_without_shape_info()
-    _test_spop_placeholder_with_shape_and_default_value()
-    _test_spop_placeholder_numpy_arange_feed()
-    _test_spop_placeholder_numpy_array_feed()
-
-    # Function Invocation test cases
-    _test_spop_function_invocation_basic()
-    _test_spop_function_invocation_nested()
-    _test_spop_function_invocation_no_autograph()
-    _test_spop_function_invocation_defun()
-
-    # Test cases for various other TF constructs
-    _test_spop_arithmetic()
-    _test_spop_control_flow()
-    _test_spop_variables()
-    _test_spop_constants()
-
-
-#######################################################################
-# Dynamic input shape
-# -------------------
-def test_forward_dynamic_input_shape():
-    """Dynamic input shape"""
-    tf.reset_default_graph()
-
-    with tf.Graph().as_default():
-        data = tf.placeholder(tf.float32, name="data", shape=(None,))
-        _ = data + 1
-        np_data = np.random.uniform(size=(2,)).astype("float32")
-        out_name = "add"
-
-        with tf.Session() as sess:
-            graph_def = tf_testing.AddShapesToGraphDef(sess, out_name)
-            tf_output = run_tf_graph(sess, np_data, "data:0", [f"{out_name}:0"])
-            # TODO(kevinthesun): enable gpu test when VM heterogeneous execution is ready.
-            for device in ["llvm"]:
-                _ = tvm.device(device, 0)
-                if not tvm.testing.device_enabled(device):
-                    print(f"Skip because {device} is not enabled")
-                    continue
-                tvm_output = run_tvm_graph(
-                    graph_def,
-                    np_data,
-                    ["data"],
-                    1,
-                    target=device,
-                    layout="NCHW",
-                    out_names=[out_name],
-                    mode="vm",
-                    ignore_in_shape=True,
-                )
-                tvm.testing.assert_allclose(tvm_output[0], tf_output[0], rtol=1e-5, atol=1e-5)
-
-
-def test_forward_dynmaic_rnn_lstmblockcell():
-    """Dynmaic rnn lstmblockcell"""
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.0.0"):
-        return
-
-    total_series_length = 50000
-    truncated_backprop_length = 15
-    state_size = 4
-    echo_step = 3
-    batch_size = 5
-    num_layers = 5
-
-    def generateData():
-        x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))
-        y = np.roll(x, echo_step)
-        y[0:echo_step] = 0
-
-        x = x.reshape((batch_size, -1))  # The first index changing slowest, subseries as rows
-        y = y.reshape((batch_size, -1))
-
-        return (x, y)
-
-    batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
-
-    init_state = tf.placeholder(tf.float32, [num_layers, 2, batch_size, state_size])
-
-    state_per_layer_list = tf.unstack(init_state, axis=0)
-    rnn_tuple_state = tuple(
-        list(
-            tf.nn.rnn_cell.LSTMStateTuple(
-                state_per_layer_list[idx][0], state_per_layer_list[idx][1]
-            )
-            for idx in range(num_layers)
-        )
-    )
-
-    # Forward passes
-    def lstm_cell():
-        return tensorflow.contrib.rnn.LSTMBlockCell(state_size)
-
-    cell = tf.nn.rnn_cell.MultiRNNCell(
-        [lstm_cell() for _ in range(num_layers)], state_is_tuple=True
-    )
-    states_series, current_state = tf.nn.dynamic_rnn(
-        cell, tf.expand_dims(batchX_placeholder, -1), initial_state=rnn_tuple_state
-    )
-
-    with tf.Session() as sess:
-        sess.run(tf.global_variables_initializer())
-        x, _ = generateData()
-        _current_state = np.zeros((num_layers, 2, batch_size, state_size))
-
-        start_idx = 0
-        end_idx = start_idx + truncated_backprop_length
-
-        batchX = x[:, start_idx:end_idx]
-
-        # Save current state for TVM
-        current_state_tvm = _current_state
-
-        _current_state, _states_series = sess.run(
-            [current_state, states_series],
-            feed_dict={batchX_placeholder: batchX, init_state: _current_state},
-        )
-
-        # Organize results and corresponding names
-        tf_output = [_states_series]
-
-        for c in _current_state:
-            tf_output.append(c.c)
-            tf_output.append(c.h)
-
-        name = [states_series.name.split(":")[0]]
-
-        for t in current_state:
-            name.append(t.c.name.split(":")[0])
-            name.append(t.h.name.split(":")[0])
-
-        graph_def = sess.graph.as_graph_def(add_shapes=True)
-
-        final_graph_def = graph_util.convert_variables_to_constants(sess, graph_def, name)
-
-        _ = run_tvm_graph(
-            final_graph_def,
-            [batchX.astype("float32"), current_state_tvm.astype("float32")],
-            ["Placeholder", "Placeholder_1"],
-            out_names=name,
-            num_output=len(name),
-            mode="vm",
-            disabled_pass=["FoldScaleAxis"],
-        )
-
-        # Compare result
-        for _, tf_out in enumerate(tf_output):
-            tvm.testing.assert_allclose(tf_out, tf_out, atol=1e-5, rtol=1e-5)
-
-
-#######################################################################
-# Unique
-# ------------
-
-
-def _test_unique(n, dtype, is_dyn):
-    tf.reset_default_graph()
-    np_data = np.random.randint(100, size=n).astype(dtype)
-    with tf.Graph().as_default():
-        if is_dyn:
-            in_data = tf.placeholder(dtype, [n], name="in_data")
-        else:
-            in_data = tf.constant(np_data, dtype, name="in_data")
-        tf.unique(in_data)
-        if is_dyn:
-            compare_tf_with_tvm(np_data, "in_data:0", ["Unique:0", "Unique:1"], mode="vm")
-        else:
-            compare_tf_with_tvm(np_data, "", ["Unique:0", "Unique:1"], mode="vm")
-
-
-def test_forward_unique():
-    """test Unique"""
-
-    for dtype in ["int32", "int64"]:
-        for is_dyn in [False, True]:
-            _test_unique(50, dtype, is_dyn)
-            _test_unique(100, dtype, is_dyn)
-
-
-#######################################################################
-# Unique with counts
-# ------------
-
-
-def _test_unique_with_counts(n, dtype, is_dyn):
-    tf.reset_default_graph()
-    np_data = np.random.randint(100, size=n).astype(dtype)
-    with tf.Graph().as_default():
-        if is_dyn:
-            in_data = tf.placeholder(dtype, [n], name="in_data")
-        else:
-            in_data = tf.constant(np_data, dtype, name="in_data")
-        tf.unique_with_counts(in_data)
-        if is_dyn:
-            compare_tf_with_tvm(
-                np_data,
-                "in_data:0",
-                ["UniqueWithCounts:0", "UniqueWithCounts:1", "UniqueWithCounts:2"],
-                mode="vm",
-            )
-        else:
-            compare_tf_with_tvm(
-                np_data,
-                "",
-                ["UniqueWithCounts:0", "UniqueWithCounts:1", "UniqueWithCounts:2"],
-                mode="vm",
-            )
-
-
-def test_forward_unique_with_counts():
-    """test UniqueWithCounts"""
-
-    for dtype in ["int32", "int64"]:
-        for is_dyn in [False, True]:
-            _test_unique_with_counts(10, dtype, is_dyn)
-            _test_unique_with_counts(20, dtype, is_dyn)
-
-
-#######################################################################
-# check graph ir for nn.moments
-# ------------
-
-
-def test_moments():
-    """NN.moments"""
-    g = tf.Graph()
-    shape = [4, 176, 8, 8]
-    dtype = "float32"
-    with g.as_default():
-        A = tf.placeholder(shape=shape, dtype=dtype, name="A")
-        _ = tf.placeholder(shape=shape, dtype=dtype, name="B")
-        mean, variance = tf.nn.moments(A, [1], keep_dims=True)
-        _ = (A - mean) / tf.sqrt(variance + 0.0005)
-
-    with tvm.testing.disable_span_filling():
-        mod, _ = from_tensorflow(g.as_graph_def(add_shapes=True))
-    with tvm.testing.enable_span_filling():
-        mod_with_span, _ = from_tensorflow(g.as_graph_def(add_shapes=True))
-    tvm.ir.assert_structural_equal(mod["main"], mod_with_span["main"], map_free_vars=True)
-
-    program = """
-    def @main(%A: Tensor[(4, 176, 8, 8), float32]) {
-        %527 = mean(%A, axis=[1], keepdims=True) /* moments/mean */;
-        %528 = subtract(%A, %527) /* sub */;
-        %529 = subtract(%A, %527);
-        %530 = multiply(%529, %529) /* moments/SquaredDifference */;
-        %531 = mean(%530, axis=[1], keepdims=True) /* moments/variance */;
-        %532 = add(%531, 0.0005f) /* add */;
-        %533 = sqrt(%532) /* Sqrt */;
-        divide(%528, %533) /* truediv */
-    }
-    """
-    mod_golden = tvm.relay.parse('#[version = "0.0.5"]\n' + program)
-    tvm.ir.assert_structural_equal(mod["main"].body, mod_golden["main"].body, map_free_vars=True)
-
-
-#######################################################################
-# invert_permutation
-# --------------------
-
-
-def test_invert_permutation():
-    """test InvertPermutation"""
-    tf.reset_default_graph()
-
-    input_shape = [6]
-    x = np.array([3, 4, 0, 2, 1, 5]).astype("int32")
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(shape=input_shape, dtype="int32")
-        tf.invert_permutation(in_data)
-        out_name = "InvertPermutation:0"
-        compare_tf_with_tvm(x, "Placeholder:0", out_name, no_gpu=False)
-
-
-#######################################################################
-# Bincount
-# ----
-
-
-def _test_bincount(in_shape, size, weights):
-    with tf.Graph().as_default():
-        inputs = []
-        data = []
-        inputs.append(tf.placeholder(shape=in_shape, dtype="int32", name="input0"))
-        data.append(np.random.uniform(0, size, size=in_shape).astype("int32"))
-        inputs.append(tf.placeholder(shape=(), dtype="int32", name="size"))
-        data.append(np.array(size, "int32"))
-        if weights:
-            inputs.append(tf.placeholder(shape=in_shape, dtype="float32", name="weights"))
-            data.append(np.reshape(weights, in_shape).astype("float32"))
-        else:
-            inputs.append(tf.placeholder(shape=(0,), dtype="float32", name="weights"))
-            data.append(np.array([], "float32"))
-        result = tf.raw_ops.Bincount(arr=data[0], size=data[1], weights=data[2])
-        compare_tf_with_tvm(data, [a.name for a in inputs], result.name, mode="vm")
-
-
-def test_forward_bincount():
-    """Test Bincount Op"""
-    # 2D input
-    _test_bincount((3, 10), 20, [1.0] * 30)
-    _test_bincount((3, 10), 20, [1.5] * 30)
-    _test_bincount((3, 10), 20, None)
-    # 1D input
-    _test_bincount((10,), 20, [1.0] * 10)
-    _test_bincount((10,), 20, [1.5] * 10)
-    _test_bincount((10,), 20, None)
-
-
-#######################################################################
-# DenseBincount
-# ----
-
-
-def _test_dense_bincount(in_shape, size, weights, binary_output):
-    with tf.Graph().as_default():
-        inputs = []
-        data = []
-        inputs.append(tf.placeholder(shape=in_shape, dtype="int32", name="input0"))
-        data.append(np.random.uniform(0, size, size=in_shape).astype("int32"))
-        inputs.append(tf.placeholder(shape=(), dtype="int32", name="size"))
-        data.append(np.array(size, "int32"))
-        if weights:
-            inputs.append(tf.placeholder(shape=in_shape, dtype="float32", name="weights"))
-            data.append(np.reshape(weights, in_shape).astype("float32"))
-        else:
-            inputs.append(tf.placeholder(shape=(0,), dtype="float32", name="weights"))
-            data.append(np.array([], "float32"))
-        result = tf.raw_ops.DenseBincount(
-            input=data[0],
-            size=data[1],
-            weights=data[2],
-            binary_output=binary_output,
-        )
-        compare_tf_with_tvm(data, [a.name for a in inputs], result.name, mode="vm")
-
-
-def test_forward_dense_bincount():
-    """Test DenseBincount Op"""
-    for binary_output in [False, True]:
-        # 2D input
-        _test_dense_bincount((3, 10), 20, [1.0] * 30, binary_output)
-        _test_dense_bincount((3, 10), 20, [1.5] * 30, binary_output)
-        _test_dense_bincount((3, 10), 20, None, binary_output)
-        # 1D input
-        _test_dense_bincount((10,), 20, [1.0] * 10, binary_output)
-        _test_dense_bincount((10,), 20, [1.5] * 10, binary_output)
-        _test_dense_bincount((10,), 20, None, binary_output)
-
-
-#######################################################################
-# Test structural_equal and span of a model
-# --------------------------------------
-class TestSetSpan:
-    """Test Structure and span of frequently-used models"""
-
-    def _verify(self, res_fptr, golden_fptr):
-        with tvm.testing.enable_span_filling():
-            with_span = res_fptr()
-        with tvm.testing.disable_span_filling():
-            without_span = res_fptr()
-        tvm.ir.assert_structural_equal(with_span, without_span)
-        _verify_structural_equal_with_span(with_span, golden_fptr())
-
-    def test_conv2d_bias_add_span(self):
-        """Test Structure and span of conv2d and bias add model match to the expected result"""
-
-        def _res():
-            in_shape = (1, 5, 5, 1)
-            kernel_shpae = (2, 2, 1, 2)
-            kernel_in = np.ones(kernel_shpae)
-            bias_val_shape = tuple([2])
-            bias_val_in = np.ones(bias_val_shape)
-
-            with tf.Graph().as_default() as g:
-                x = array_ops.placeholder(shape=in_shape, dtype="float32", name="input")
-                kernel = tf.constant(kernel_in, dtype=tf.float32, name="filter_weight")
-                bias_val_tensor = tf.constant(bias_val_in, dtype=tf.float32, name="conv2d_bias")
-                conv2d = tf.nn.conv2d(
-                    x, kernel, strides=[1, 1, 1, 1], padding="VALID", name="conv2d"
-                )
-                _ = tf.nn.bias_add(conv2d, bias_val_tensor, name="bias_add")
-
-                mod, _ = relay.frontend.from_tensorflow(
-                    g.as_graph_def(), shape={"input": in_shape}, outputs=["bias_add"]
-                )
-                return mod["main"]
-
-        def _golden():
-            model_in = relay.var(
-                "input", relay.TensorType([1, 5, 5, 1]), span=_create_span("input")
-            )
-            weight = relay.var(
-                "filter_weight", relay.TensorType([2, 2, 1, 2]), span=_create_span("filter_weight")
-            )
-            bias = relay.var("conv2d_bias", relay.TensorType([2]), span=_create_span("conv2d_bias"))
-            conv2d = _set_span(
-                relay.nn.conv2d(
-                    model_in,
-                    weight,
-                    channels=2,
-                    kernel_size=[2, 2],
-                    data_layout="NHWC",
-                    kernel_layout="HWIO",
-                ),
-                "conv2d",
-            )
-            add = _set_span(relay.op.add(conv2d, bias), "bias_add")
-            mod = ir.IRModule.from_expr(add)
-            return mod["main"]
-
-        self._verify(_res, _golden)
-
-    def test_fully_connected_bias_add_span(self):
-        """Test Structure and span of fully connected model match to the expected result"""
-
-        def _res():
-            in_shape = (1, 10)
-            kernel_shpae = (10, 10)
-            kernel_in = np.ones(kernel_shpae)
-            bias_val_shape = tuple([10])
-            bias_val_in = np.ones(bias_val_shape)
-
-            with tf.Graph().as_default() as g:
-                x = array_ops.placeholder(shape=in_shape, dtype="float32", name="input")
-                in_filter = tf.constant(kernel_in, dtype=tf.float32, name="filter_weight")
-                bias_val_tensor = tf.constant(bias_val_in, dtype=tf.float32, name="dense_bias")
-                mat_mul = math_ops.mat_mul(x, in_filter, name="dense")
-                _ = tf.nn.bias_add(mat_mul, bias_val_tensor, name="bias_add")
-
-                mod, _ = relay.frontend.from_tensorflow(
-                    g.as_graph_def(),
-                    shape={"input": in_shape},
-                    outputs=["bias_add"],
-                    convert_config={"use_dense": True},
-                )
-                return mod["main"]
-
-        def _golden():
-            model_in = relay.var("input", relay.TensorType([1, 10]), span=_create_span("input"))
-            weight = relay.var(
-                "filter_weight", relay.TensorType([10, 10]), span=_create_span("filter_weight")
-            )
-            bias = relay.var("dense_bias", relay.TensorType([10]), span=_create_span("dense_bias"))
-            transpose = _set_span(relay.transpose(weight, [1, 0]), "dense")
-            dense = _set_span(relay.nn.dense(model_in, transpose, units=10), "dense")
-            add = _set_span(relay.op.add(dense, bias), "bias_add")
-            mod = ir.IRModule.from_expr(add)
-            return mod["main"]
-
-        self._verify(_res, _golden)
-
-    def test_reshape_span(self):
-        """Test Structure and span of reshape model match to the expected result"""
-
-        def _res():
-            in_shape = (1, 10)
-            output_shape = (2, 5)
-
-            with tf.Graph().as_default() as g:
-                x = array_ops.placeholder(shape=in_shape, dtype="float32", name="input")
-                _ = array_ops.reshape(x, output_shape, "reshape")
-
-                mod, _ = relay.frontend.from_tensorflow(
-                    g.as_graph_def(), shape={"input": in_shape}, outputs=["reshape"]
-                )
-                return mod["main"]
-
-        def _golden():
-            model_in = relay.var("input", relay.TensorType([1, 10]), span=_create_span("input"))
-            reshape = _set_span(relay.reshape(model_in, [2, 5]), "reshape")
-            mod = ir.IRModule.from_expr(reshape)
-            return mod["main"]
-
-        self._verify(_res, _golden)
-
-    def test_batch_norm_span(self):
-        """Test Structure and span of batchnorm model match to the expected result"""
-
-        def _res():
-            in_shape = (1, 12, 12, 32)
-            with tf.Graph().as_default() as g:
-                input_tensor = tf.placeholder(tf.float32, shape=in_shape, name="input")
-                alpha = tf.constant(
-                    np.ones(
-                        in_shape[-1],
-                    ),
-                    dtype=tf.float32,
-                    name="alpha",
-                )
-                beta = tf.constant(
-                    np.ones(
-                        in_shape[-1],
-                    ),
-                    dtype=tf.float32,
-                    name="beta",
-                )
-                _ = tf.nn.fused_batch_norm(x=input_tensor, offset=beta, scale=alpha, name="bn")
-                mod, _ = relay.frontend.from_tensorflow(
-                    g.as_graph_def(), shape={"input": in_shape}, outputs=["bn"]
-                )
-                return mod["main"]
-
-        def _golden():
-            model_in = relay.var(
-                "input", relay.TensorType([1, 12, 12, 32]), span=_create_span("input")
-            )
-            alpha = relay.var("alpha", relay.TensorType([32]), span=_create_span("alpha"))
-            beta = relay.var("beta", relay.TensorType([32]), span=_create_span("beta"))
-            mean = _set_span(relay.op.mean(model_in, axis=[3], exclude=True), "bn")
-            variance_mean = _set_span(
-                relay.op.mean(model_in, axis=[3], keepdims=True, exclude=True), "bn"
-            )
-            variance = _set_span(
-                relay.op._make._variance(model_in, variance_mean, [3], False, True, False), "bn"
-            )
-            bn = _set_span(
-                relay.nn.batch_norm(model_in, alpha, beta, mean, variance, axis=3, epsilon=0.001),
-                "bn",
-            )
-            mod = ir.IRModule.from_expr(bn[0])
-            return mod["main"]
-
-        self._verify(_res, _golden)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/tensorflow/test_no_op.py b/tests/python/frontend/tensorflow/test_no_op.py
deleted file mode 100644
index bc6be5c3059c..000000000000
--- a/tests/python/frontend/tensorflow/test_no_op.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Unit tests for converting TensorFlow debugging ops to Relay."""
-try:
-    import tensorflow.compat.v1 as tf
-
-    tf.disable_v2_behavior()
-except ImportError:
-    import tensorflow as tf
-import numpy as np
-from tvm import relay, ir, testing
-from tvm.relay.frontend.tensorflow import from_tensorflow
-
-
-def run_relay(graph):
-    with testing.disable_span_filling():
-        mod, params = from_tensorflow(graph.as_graph_def(add_shapes=True))
-    with testing.enable_span_filling():
-        mod_with_span, _ = relay.frontend.from_tensorflow(graph.as_graph_def(add_shapes=True))
-    assert ir.structural_equal(mod["main"], mod_with_span["main"])
-
-    return relay.create_executor("debug", mod=mod).evaluate()(**params)
-
-
-def test_no_op():
-    g = tf.Graph()
-    with g.as_default():
-        no_op = tf.no_op()
-        with tf.Session() as sess:
-            # In TF, the type of a no-op is None.
-            assert sess.run(no_op) is None
-
-        # In TVM, no-op is currently translated to 0, though it should
-        # probably be none or an empty tuple.
-        np.testing.assert_allclose(0, run_relay(g).numpy())
-
-
-if __name__ == "__main__":
-    test_no_op()
diff --git a/tests/python/frontend/tensorflow2/common.py b/tests/python/frontend/tensorflow2/common.py
deleted file mode 100644
index f9bf00e4239e..000000000000
--- a/tests/python/frontend/tensorflow2/common.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument, too-many-lines, len-as-condition, broad-except
-# pylint: disable=import-outside-toplevel, redefined-builtin
-"""TF2 to relay converter test utilities"""
-
-import tvm
-from tvm import relay
-
-from tvm.runtime.vm import VirtualMachine
-import tvm.contrib.graph_executor as runtime
-from tvm.relay.frontend.tensorflow2 import from_tensorflow
-import tvm.testing
-from tvm.relay.testing.tf import vmobj_to_list as vmobj_to_list
-
-import tensorflow as tf
-from tensorflow.python.eager.def_function import Function
-
-
-def run_tf_code(func, input_):
-    if type(func) is Function:
-        f_out = func(input_)
-        if isinstance(f_out, (list, tuple)):
-            np_out = [x.numpy() for x in f_out]
-        else:
-            np_out = [f_out.numpy()]
-    else:
-        f_out = func(tf.constant(input_))
-        if type(f_out) is dict:
-            np_out = [f_out[k].numpy() for k in sorted(f_out.keys())]
-        elif type(f_out) is list:
-            np_out = [x.numpy() for x in f_out]
-        else:
-            np_out = f_out.numpy()
-    return np_out
-
-
-def compile_graph_executor(mod, params, target="llvm", target_host="llvm", opt_level=3):
-    with tvm.transform.PassContext(opt_level):
-        lib = relay.build(mod, target=tvm.target.Target(target, host=target_host), params=params)
-    return lib
-
-
-def compile_vm(mod, params, target="llvm", target_host="llvm", opt_level=3, disabled_pass=None):
-    with tvm.transform.PassContext(opt_level, disabled_pass=disabled_pass):
-        vm_exec = relay.vm.compile(
-            mod, target=tvm.target.Target(target, host=target_host), params=params
-        )
-    return vm_exec
-
-
-def run_vm(vm_exec, input_, ctx=tvm.cpu(0)):
-    vm = VirtualMachine(vm_exec, ctx)
-    _out = vm.invoke("main", input_)
-    return vmobj_to_list(_out)
-
-
-def run_graph_executor(lib, input_, ctx=tvm.cpu(0)):
-    mod = runtime.GraphModule(lib["default"](ctx))
-    mod.set_input(0, input_)
-    mod.run()
-    return [mod.get_output(i).numpy() for i in range(mod.get_num_outputs())]
-
-
-def compare_tf_tvm(gdef, input_, output_, runtime="vm", output_tensors=None):
-    """compare tf and tvm execution for the same input.
-
-    Parameters
-    ----------
-    gdef: TF2 graph def extracted to be fed into from_tensorflow parser.
-    (https://www.tensorflow.org/code/tensorflow/core/framework/graph.proto)
-
-    input_: a single numpy array object
-
-    output_: the expected output from TF to match TVM output with
-
-    runtime: choose TVM runtime; either "vm" for VirtualMachine or "graph" for GraphExecutor
-
-    output_tensors : List of output tensor names (Optional)
-            if not specified then the last node is assumed as graph output.
-    """
-    mod, params = from_tensorflow(gdef, outputs=output_tensors)
-    if runtime == "vm":
-        exec_ = compile_vm(mod, params)
-        tvm_out = run_vm(exec_, input_)
-    elif runtime == "graph":
-        lib = compile_graph_executor(mod, params)
-        tvm_out = run_graph_executor(lib, input_)
-    else:
-        raise RuntimeError("Runtime input not supported: %s" % runtime)
-
-    tvm.testing.assert_allclose(output_, tvm_out, atol=1e-5)
diff --git a/tests/python/frontend/tensorflow2/test_functional_models.py b/tests/python/frontend/tensorflow2/test_functional_models.py
deleted file mode 100644
index 53ece82217a1..000000000000
--- a/tests/python/frontend/tensorflow2/test_functional_models.py
+++ /dev/null
@@ -1,649 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument, too-many-lines, len-as-condition, broad-except
-# pylint: disable=import-outside-toplevel, redefined-builtin
-"""TF2 to relay converter test: tests basic examples"""
-
-import tempfile
-import tensorflow as tf
-import numpy as np
-import pytest
-from common import compare_tf_tvm
-from common import run_tf_code
-
-
-def _function_graph(TestClass):
-    f = TestClass().func
-    gdef = f.get_concrete_function().graph.as_graph_def()
-    gdef_ops = list(set([n.op for n in gdef.node]))
-    input_ = TestClass().get_input()
-    output = run_tf_code(f, input_)
-    return gdef, input_, output
-
-
-def _model_graph(TestClass):
-    model = TestClass()
-    with tempfile.TemporaryDirectory() as model_path:
-        tf.saved_model.save(model, model_path)
-        imported = tf.saved_model.load(model_path)
-
-    f = imported.signatures["serving_default"]
-    gdef = f.graph.as_graph_def(add_shapes=True)
-
-    input_ = model.get_input()
-    output = run_tf_code(f, input_)
-    return gdef, input_, output
-
-
-def run_func_graph(TestClass, runtime="vm", outputs=None):
-    compare_tf_tvm(*_function_graph(TestClass), runtime=runtime, output_tensors=outputs)
-
-
-def run_model_graph(TestClass, outputs=None):
-    compare_tf_tvm(*_model_graph(TestClass), runtime="vm", output_tensors=outputs)
-
-
-def run_all(TestClass):
-    run_model_graph(TestClass)
-    for runtime_ in ["vm", "graph"]:
-        run_func_graph(TestClass, runtime=runtime_)
-
-
-def test_add_one():
-    class AddOne(tf.Module):
-        """simple function to test x=x+1; scalar as input"""
-
-        def get_input(self):
-            return np.array(1.0, dtype="float32")
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(), dtype=tf.float32)])
-        def func(self, x):
-            return x + 1
-
-    run_all(AddOne)
-
-
-def test_add_one_2d():
-    class AddOne2D(tf.Module):
-        """2D array as input"""
-
-        def get_input(self):
-            return np.ones((2, 2), dtype="float32")
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 2), dtype=tf.float32)])
-        def func(self, x):
-            return x + 1
-
-    run_all(AddOne2D)
-
-
-def test_add_one_2d_constant():
-    class AddOne2DConstant(tf.Module):
-        """2D array as input with 2D constant as well; 2D constant stored in params after convert"""
-
-        def get_input(self):
-            return np.ones((2, 2), dtype="float32")
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 2), dtype=tf.float32)])
-        def func(self, x):
-            return x + np.ones((2, 2), dtype="float32")
-
-    run_all(AddOne2DConstant)
-
-
-def test_sub_one_2d_constant():
-    class SubOne2DConstant(tf.Module):
-        """2D array as input with 2D constant as well; 2D constant stored in params after convert"""
-
-        def get_input(self):
-            return np.ones((2, 2), dtype="float32")
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 2), dtype=tf.float32)])
-        def func(self, x):
-            return x - np.ones((2, 2), dtype="float32")
-
-    run_all(SubOne2DConstant)
-
-
-def test_mul_one_2d_constant():
-    class MulOne2DConstant(tf.Module):
-        """2D array as input with 2D constant as well; 2D constant stored in params after convert"""
-
-        def get_input(self):
-            return np.ones((2, 2), dtype="float32")
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 2), dtype=tf.float32)])
-        def func(self, x):
-            return x * np.ones((2, 2), dtype="float32")
-
-    run_all(MulOne2DConstant)
-
-
-def test_div_one_2d_constant():
-    class DivOne2DConstant(tf.Module):
-        """2D array as input with 2D constant as well; 2D constant stored in params after convert"""
-
-        def get_input(self):
-            return np.ones((2, 2), dtype="float32")
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 2), dtype=tf.float32)])
-        def func(self, x):
-            return x / np.ones((2, 2), dtype="float32")
-
-    run_all(DivOne2DConstant)
-
-
-def test_strided_slice():
-    class StridedSlice(tf.Module):
-        def get_input(self):
-            return np.ones((3, 2, 3), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(3, 2, 3), dtype=tf.float32)])
-        def func(self, x):
-            return tf.strided_slice(x, [1, 0, 0], [2, 1, 3], [1, 1, 1])
-
-    run_all(StridedSlice)
-
-
-def test_split():
-    class Split(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            a, b, c = tf.split(x, 3, axis=1)
-            return tf.raw_ops.Pack(values=[a, b, c], axis=1)
-
-    run_all(Split)
-
-
-def test_shape():
-    class Shape(tf.Module):
-        def get_input(self):
-            return np.ones((3, 2, 3), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(3, 2, 3), dtype=tf.float32)])
-        def func(self, x):
-            a = tf.ones_like(tf.raw_ops.Shape(input=x), dtype=tf.float32)
-            return a + x
-
-    run_all(Shape)
-
-
-def test_pack():
-    class Pack(tf.Module):
-        def get_input(self):
-            return np.ones((2, 3), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 3), dtype=tf.float32)])
-        def func(self, x):
-            return tf.raw_ops.Pack(values=[x, x], axis=0)
-
-    run_all(Pack)
-
-
-def test_max():
-    class Maximum(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            a, b = tf.split(x, 2, axis=1)
-            return tf.math.maximum(a, b, name=None)
-
-    run_all(Maximum)
-
-
-def test_less():
-    class Less(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            a, b = tf.split(x, 2, axis=1)
-            return tf.math.less(a, b, name=None)
-
-    run_all(Less)
-
-
-def test_equal():
-    class Equal(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            a, b = tf.split(x, 2, axis=1)
-            return tf.math.equal(a, b, name=None)
-
-    run_all(Equal)
-
-
-def test_cast():
-    class Cast(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            return tf.cast(x, tf.int32)
-
-    run_all(Cast)
-
-
-def test_expand_dims():
-    class ExpandDims(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            return tf.expand_dims(x, axis=2)
-
-    run_all(ExpandDims)
-
-
-def test_transpose():
-    class Transpose(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            x = tf.expand_dims(x, axis=2)
-            return tf.transpose(x, perm=[0, 2, 1])
-
-    run_all(Transpose)
-
-
-def test_reshape():
-    class Reshape(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            return tf.reshape(x, (1, 2, 15))
-
-    run_all(Reshape)
-
-
-def test_tanh():
-    class Tanh(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            return tf.math.tanh(x)
-
-    run_all(Tanh)
-
-
-def test_sigmoid():
-    class Sigmoid(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            return tf.math.sigmoid(x)
-
-    run_all(Sigmoid)
-
-
-def test_relu():
-    class Relu(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            return tf.nn.relu(x)
-
-    run_all(Relu)
-
-
-def test_floor():
-    class Floor(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            return tf.math.floor(x)
-
-    run_all(Floor)
-
-
-def test_floor_mod():
-    class FloorMod(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            a, b = tf.split(x, 2, axis=1)
-            return tf.math.floormod(a, b)
-
-    run_all(FloorMod)
-
-
-def test_concat_v2():
-    class ConcatV2(tf.Module):
-        def get_input(self):
-            return np.ones((1, 30), dtype=np.float32)
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1, 30), dtype=tf.float32)])
-        def func(self, x):
-            a, b, c = tf.split(x, 3, axis=1)
-            axis = tf.add(tf.constant(1, dtype="int32"), tf.constant(0, dtype="int32"))
-            return tf.raw_ops.ConcatV2(values=[a, b, c], axis=axis)
-
-    run_all(ConcatV2)
-
-
-def test_multi_output():
-    class MultiOutput(tf.Module):
-        def get_input(self):
-            return np.ones((2, 2), dtype="float32")
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 2), dtype=tf.float32)])
-        def func(self, x):
-            y = 2 * x
-            return x, y
-
-    run_func_graph(MultiOutput, runtime="vm", outputs=["Identity:output:0", "Identity_1:output:0"])
-    run_func_graph(
-        MultiOutput, runtime="graph", outputs=["Identity:output:0", "Identity_1:output:0"]
-    )
-    run_model_graph(MultiOutput, outputs=["Identity:output:0"])
-
-
-def test_if():
-    def create_if_class(_condition=True):
-        class If(tf.Module):
-            def get_input(self):
-                return np.ones((2, 2), dtype="float32")
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=(2, 2), dtype=tf.float32)])
-            def func(self, x):
-                @tf.function(input_signature=[tf.TensorSpec(shape=(2, 2), dtype=tf.float32)])
-                def double(x):
-                    return 2 * x
-
-                @tf.function(input_signature=[tf.TensorSpec(shape=(2, 2), dtype=tf.float32)])
-                def triple(x):
-                    return 3 * x
-
-                output = tf.raw_ops.If(
-                    cond=_condition,
-                    input=[x],
-                    Tout=[tf.float32],
-                    output_shapes=[(2, 2)],
-                    then_branch=double.get_concrete_function(),
-                    else_branch=triple.get_concrete_function(),
-                )
-                return output[0]
-
-        return If
-
-    for cond in [True, False]:
-        if_class = create_if_class(_condition=cond)
-        run_func_graph(if_class, runtime="vm")
-        run_model_graph(if_class)
-
-
-def test_stateless_while():
-    class StatelessWhile(tf.Module):
-        def get_input(self):
-            return np.array([6], dtype="float32")
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1,), dtype=tf.float32)])
-        def func(self, x):
-            i = tf.constant(3.0)
-            cond = lambda i: tf.less(i, x)
-            body = lambda i: (tf.add(i, 2),)
-            r = tf.while_loop(cond, body, [i])
-            return r[0]
-
-    run_func_graph(StatelessWhile, runtime="vm")
-    run_model_graph(StatelessWhile)
-
-
-def test_stateless_while_2var():
-    class StatelessWhile2Var(tf.Module):
-        def get_input(self):
-            return np.array([20], dtype="float32")
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1,), dtype=tf.float32)])
-        def func(self, x):
-            i = tf.constant(3.0)
-            j = tf.constant(5.0)
-            cond = lambda i, j: tf.less(i + j, x)
-            body = lambda i, j: (tf.add(i, 2), tf.add(j, 3))
-            r = tf.while_loop(cond, body, [i, j])
-            return r
-
-    run_func_graph(
-        StatelessWhile2Var, runtime="vm", outputs=["Identity:output:0", "Identity_1:output:0"]
-    )
-    run_model_graph(StatelessWhile2Var, outputs=["Identity:output:0"])
-
-
-def test_tensorlist():
-    def run_test(elem_shape):
-        class TensorList(tf.Module):
-            def get_input(self):
-                in_tens = np.ones((2, 3), dtype="float32")
-                in_tens[1, :] = np.zeros((3,), dtype="float32")
-                return in_tens
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=(2, 3), dtype=tf.float32)])
-            def func(self, x):
-                dtype = tf.float32
-                tl = tf.raw_ops.TensorListReserve(
-                    element_shape=elem_shape, num_elements=2, element_dtype=dtype
-                )
-                tl = tf.raw_ops.TensorListSetItem(input_handle=tl, index=0, item=x[0, :])
-                tl = tf.raw_ops.TensorListSetItem(input_handle=tl, index=1, item=x[1, :])
-                output = tf.raw_ops.TensorListGetItem(
-                    input_handle=tl, index=0, element_shape=elem_shape, element_dtype=dtype
-                )
-                return output
-
-        run_model_graph(TensorList)
-        run_func_graph(TensorList, runtime="vm")
-
-    run_test((3,))
-    run_test((-1,))
-
-
-def test_tensorlist_stack():
-    def run_test(elem_shape):
-        class TensorListStack(tf.Module):
-            def get_input(self):
-                in_tens = np.ones((2, 3), dtype="float32")
-                in_tens[1] = np.zeros((3,), dtype="float32")
-                return in_tens
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=(2, 3), dtype=tf.float32)])
-            def func(self, x):
-                dtype = tf.float32
-                tl = tf.raw_ops.TensorListReserve(
-                    element_shape=elem_shape, num_elements=2, element_dtype=dtype
-                )
-                tl = tf.raw_ops.TensorListFromTensor(tensor=x, element_shape=elem_shape)
-                output = tf.raw_ops.TensorListStack(
-                    input_handle=tl, element_shape=elem_shape, element_dtype=dtype
-                )
-                return output
-
-        run_model_graph(TensorListStack)
-        run_func_graph(TensorListStack, runtime="vm")
-
-    run_test((3,))
-    run_test((-1,))
-
-
-def test_tensorlist_2d():
-    def run_test(elem_shape):
-        class TensorList2D(tf.Module):
-            def get_input(self):
-                in_tens = np.ones((2, 3, 4), dtype="float32")
-                in_tens[1, :, :] = np.zeros((3, 4), dtype="float32")
-                return in_tens
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=(2, 3, 4), dtype=tf.float32)])
-            def func(self, x):
-                dtype = tf.float32
-                tl = tf.raw_ops.TensorListReserve(
-                    element_shape=elem_shape, num_elements=2, element_dtype=dtype
-                )
-                tl = tf.raw_ops.TensorListSetItem(input_handle=tl, index=0, item=x[0, :, :])
-                tl = tf.raw_ops.TensorListSetItem(input_handle=tl, index=1, item=x[1, :, :])
-                output = tf.raw_ops.TensorListGetItem(
-                    input_handle=tl, index=0, element_shape=elem_shape, element_dtype=dtype
-                )
-                return output
-
-        run_model_graph(TensorList2D)
-        run_func_graph(TensorList2D, runtime="vm")
-
-    run_test((3, 4))
-    run_test((-1, -1))
-
-
-def test_tensorlist_stack_2d():
-    def run_test(elem_shape):
-        class TensorListStack2D(tf.Module):
-            def get_input(self):
-                in_tens = np.ones((2, 3, 4), dtype="float32")
-                in_tens[1, :, :] = np.zeros((3, 4), dtype="float32")
-                return in_tens
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=(2, 3, 4), dtype=tf.float32)])
-            def func(self, x):
-                dtype = tf.float32
-                tl = tf.raw_ops.TensorListReserve(
-                    element_shape=elem_shape, num_elements=2, element_dtype=dtype
-                )
-                tl = tf.raw_ops.TensorListFromTensor(tensor=x, element_shape=elem_shape)
-                output = tf.raw_ops.TensorListStack(
-                    input_handle=tl, element_shape=elem_shape, element_dtype=dtype
-                )
-                return output
-
-        run_model_graph(TensorListStack2D)
-        run_func_graph(TensorListStack2D, runtime="vm")
-
-    run_test((3, 4))
-    run_test((-1, -1))
-
-
-def test_tensorlist_stack_unpack():
-    def run_test(elem_shape):
-        class TensorListStack2D(tf.Module):
-            def get_input(self):
-                in_tens = np.ones((1, 3, 4), dtype="float32")
-                return in_tens
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=(1, 3, 4), dtype=tf.float32)])
-            def func(self, x):
-                dtype = tf.float32
-                tl = tf.raw_ops.TensorListReserve(
-                    element_shape=elem_shape, num_elements=1, element_dtype=dtype
-                )
-                tl = tf.raw_ops.TensorListSetItem(input_handle=tl, index=0, item=x[0, :, :])
-                output = tf.raw_ops.TensorListStack(
-                    input_handle=tl, element_shape=elem_shape, element_dtype=dtype, num_elements=1
-                )
-                output = tf.raw_ops.Unpack(value=output, num=1, axis=0)
-                return output
-
-        run_model_graph(TensorListStack2D)
-        run_func_graph(TensorListStack2D, runtime="vm")
-
-    run_test((3, 4))
-    run_test((-1, -1))
-
-
-def test_bincount_1d():
-    def run_test(weights, minlength, maxlength, axis, binary_output):
-        class Bincount1D(tf.Module):
-            def get_input(self):
-                return np.random.uniform(low=0, high=maxlength, size=(100,)).astype("int32")
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=[None], dtype=tf.int32)])
-            def func(self, x):
-                return tf.math.bincount(
-                    x,
-                    weights=weights,
-                    minlength=minlength,
-                    maxlength=maxlength,
-                    axis=axis,
-                    binary_output=binary_output,
-                )
-
-        run_model_graph(Bincount1D)
-        run_func_graph(Bincount1D, runtime="vm")
-
-    for axis in [None, 0, -1]:
-        run_test(weights=None, minlength=20, maxlength=20, axis=axis, binary_output=False)
-        run_test(weights=None, minlength=20, maxlength=20, axis=axis, binary_output=True)
-
-    # weights and axis=None need operator UnsortedSegmentSum to be implemented. Skip axis=None
-    weights = np.random.uniform(low=0.2, high=5, size=(100,)).astype("float32")
-    for axis in [0, -1]:
-        run_test(weights=weights, minlength=20, maxlength=20, axis=axis, binary_output=False)
-
-
-def test_bincount_2d():
-    def run_test(weights, minlength, maxlength, axis, binary_output):
-        class Bincount2D(tf.Module):
-            def get_input(self):
-                return np.random.uniform(low=0, high=maxlength, size=(3, 100)).astype("int32")
-
-            @tf.function(input_signature=[tf.TensorSpec([None, None], tf.int32)])
-            def func(self, x):
-                return tf.math.bincount(
-                    x,
-                    weights=weights,
-                    minlength=minlength,
-                    maxlength=maxlength,
-                    axis=axis,
-                    binary_output=binary_output,
-                )
-
-        run_model_graph(Bincount2D)
-        run_func_graph(Bincount2D, runtime="vm")
-
-    for axis in [None, 0, -1]:
-        run_test(weights=None, minlength=20, maxlength=20, axis=axis, binary_output=False)
-        run_test(weights=None, minlength=20, maxlength=20, axis=axis, binary_output=True)
-
-    # weights and axis=None need operator UnsortedSegmentSum to be implemented. Skip axis=None
-    weights = np.random.uniform(low=0.2, high=5, size=(3, 100)).astype("float32")
-    for axis in [0, -1]:
-        run_test(weights=weights, minlength=20, maxlength=20, axis=axis, binary_output=False)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/tensorflow2/test_sequential_models.py b/tests/python/frontend/tensorflow2/test_sequential_models.py
deleted file mode 100644
index 2ad41508630c..000000000000
--- a/tests/python/frontend/tensorflow2/test_sequential_models.py
+++ /dev/null
@@ -1,168 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument, too-many-lines, len-as-condition, broad-except
-# pylint: disable=import-outside-toplevel, redefined-builtin
-"""TF2 to relay converter test: testing models built with tf.keras.Sequential()"""
-
-import tempfile
-import numpy as np
-import pytest
-import tensorflow as tf
-from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
-
-from common import compare_tf_tvm
-from common import run_tf_code
-
-
-def run_sequential_model(model_fn, input_shape):
-    def get_input(shape):
-        _input = np.random.uniform(0, 1, shape).astype(dtype="float32")
-        return _input
-
-    def save_and_reload(_model):
-        with tempfile.TemporaryDirectory() as model_path:
-            tf.saved_model.save(_model, model_path)
-            loaded = tf.saved_model.load(model_path)
-            func = loaded.signatures["serving_default"]
-            frozen_func = convert_variables_to_constants_v2(func)
-        return frozen_func
-
-    def model_graph(model, input_shape):
-        _input = get_input(input_shape)
-        f = save_and_reload(model(input_shape))
-        _output = run_tf_code(f, _input)
-        gdef = f.graph.as_graph_def(add_shapes=True)
-        return gdef, _input, _output
-
-    compare_tf_tvm(*model_graph(model_fn, input_shape), runtime="vm")
-
-
-def test_dense_model():
-    def dense_model(input_shape, num_units=128):
-        return tf.keras.Sequential(
-            [tf.keras.layers.Flatten(input_shape=input_shape[1:]), tf.keras.layers.Dense(num_units)]
-        )
-
-    run_sequential_model(dense_model, input_shape=(1, 28, 28))
-
-
-def test_mnist_model():
-    def mnist_model(input_shape):
-        return tf.keras.Sequential(
-            [
-                tf.keras.layers.Flatten(input_shape=input_shape[1:]),
-                tf.keras.layers.Dense(128, activation="relu"),
-                tf.keras.layers.Dense(10),
-            ]
-        )
-
-    run_sequential_model(mnist_model, input_shape=(1, 28, 28))
-
-
-def test_conv2d_model():
-    def conv2d_model(input_shape, kernel=(3, 3), filters=16):
-        model = tf.keras.Sequential(
-            [
-                tf.keras.layers.Input(shape=input_shape[1:], batch_size=1),
-                tf.keras.layers.Conv2D(filters, kernel),
-            ]
-        )
-        return model
-
-    run_sequential_model(conv2d_model, input_shape=(1, 32, 32, 3))
-
-
-def test_maxpool_model():
-    def maxpool_model(input_shape, pool_size=(2, 2)):
-        model = tf.keras.Sequential(
-            [tf.keras.layers.MaxPool2D(pool_size=pool_size, input_shape=input_shape[1:])]
-        )
-        return model
-
-    run_sequential_model(maxpool_model, input_shape=(1, 32, 32, 3))
-
-
-def test_maxpool_batchnorm_model():
-    def maxpool_batchnorm_model(input_shape, pool_size=(2, 2)):
-        model = tf.keras.Sequential(
-            [
-                tf.keras.layers.MaxPool2D(pool_size=pool_size, input_shape=input_shape[1:]),
-                tf.keras.layers.BatchNormalization(),
-            ]
-        )
-        return model
-
-    run_sequential_model(maxpool_batchnorm_model, input_shape=(1, 32, 32, 3))
-
-
-def test_tensorlist_stack_model():
-    def tensorlist_stack_model(input_shape):
-        class TensorArrayStackLayer(tf.keras.layers.Layer):
-            def __init__(self):
-                super().__init__()
-
-            def call(self, inputs):
-                inputs = tf.squeeze(inputs)
-                outputs = tf.TensorArray(
-                    tf.float32,
-                    size=inputs.shape[0],
-                    infer_shape=False,
-                    element_shape=inputs.shape[1:],
-                )
-                outputs = outputs.unstack(inputs)
-
-                return outputs.stack()
-
-        input_shape = (3, 32)
-        model = tf.keras.Sequential(
-            [tf.keras.layers.Input(shape=input_shape, batch_size=1), TensorArrayStackLayer()]
-        )
-        return model
-
-    run_sequential_model(tensorlist_stack_model, input_shape=(3, 32))
-
-
-def test_tensorlist_read_model():
-    def tensorlist_read_model(input_shape):
-        class TensorArrayReadLayer(tf.keras.layers.Layer):
-            def __init__(self):
-                super().__init__()
-
-            def call(self, inputs):
-                inputs = tf.squeeze(inputs)
-                outputs = tf.TensorArray(
-                    tf.float32,
-                    size=inputs.shape[0],
-                    infer_shape=False,
-                    element_shape=inputs.shape[1:],
-                )
-                for i in range(inputs.shape[0]):
-                    outputs = outputs.write(i, inputs[i, :])
-
-                return outputs.read(0)
-
-        input_shape = (3, 32)
-        model = tf.keras.Sequential(
-            [tf.keras.layers.Input(shape=input_shape, batch_size=1), TensorArrayReadLayer()]
-        )
-        return model
-
-    run_sequential_model(tensorlist_read_model, input_shape=(3, 32))
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/frontend/test_common.py b/tests/python/frontend/test_common.py
deleted file mode 100644
index 2b35ae71f2d6..000000000000
--- a/tests/python/frontend/test_common.py
+++ /dev/null
@@ -1,220 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import numpy as np
-
-from tvm import relay, testing, transform
-from tvm.relay.frontend.common import StrAttrsDict, set_span
-from relay.utils.tag_span import _set_span, _create_span, _verify_structural_equal_with_span
-
-
-def test_key_is_present():
-    attrs = StrAttrsDict({"a": 1})
-    assert attrs.has_attr("a")
-
-
-def test_key_is_not_present():
-    attrs = StrAttrsDict({"a": 1})
-    assert not attrs.has_attr("b")
-
-
-class TestSetSpan:
-    def test_pass_ctx_switch(self):
-        def _res(should_fill):
-            if should_fill:
-                with testing.enable_span_filling():
-                    return set_span(relay.var("x", shape=(1, 64, 56, 56)), "x_var")
-            else:
-                with testing.disable_span_filling():
-                    return set_span(relay.var("x", shape=(1, 64, 56, 56)), "x_var")
-
-        disable = relay.var("x", shape=(1, 64, 56, 56))
-        enable = relay.var("x", shape=(1, 64, 56, 56), span=_create_span("x_var"))
-
-        _verify_structural_equal_with_span(_res(False), disable)
-        _verify_structural_equal_with_span(_res(True), enable)
-
-    # Should tag all exprs without span, and stop when expr is span-tagged
-    def test_builtin_tuple(self):
-        def _res():
-            a = relay.const(np.ones([1, 1, 1]), dtype="int64", span=_create_span("a"))
-            b = relay.const(np.zeros([1, 1, 1]), dtype="int64")
-            return set_span(tuple([a, b]), "tuple")
-
-        def _golden():
-            a = relay.const(np.ones([1, 1, 1]), dtype="int64", span=_create_span("a"))
-            b = relay.const(np.zeros([1, 1, 1]), dtype="int64", span=_create_span("tuple"))
-            return tuple([a, b])
-
-        res_tuple, golden_tuple = _res(), _golden()
-        assert len(res_tuple) == len(golden_tuple)
-        for i in range(len(res_tuple)):
-            _verify_structural_equal_with_span(res_tuple[i], golden_tuple[i])
-
-    def test_builtin_list(self):
-        def _res():
-            a = relay.const(np.ones([1, 1, 1]), dtype="int64", span=_create_span("a"))
-            b = relay.const(np.zeros([1, 1, 1]), dtype="int64")
-            t = relay.Tuple([a, b])
-            t_a = relay.TupleGetItem(t, 0)
-            t_b = relay.TupleGetItem(t, 1)
-            return set_span([t_a, t_b], "list")
-
-        def _golden():
-            a = relay.const(np.ones([1, 1, 1]), dtype="int64", span=_create_span("a"))
-            b = relay.const(np.zeros([1, 1, 1]), dtype="int64", span=_create_span("list"))
-            t = relay.Tuple([a, b], span=_create_span("list"))
-            t_a = relay.TupleGetItem(t, 0, span=_create_span("list"))
-            t_b = relay.TupleGetItem(t, 1, span=_create_span("list"))
-            return [t_a, t_b]
-
-        res_list, golden_list = _res(), _golden()
-        assert len(res_list) == len(golden_list)
-        for i in range(len(res_list)):
-            _verify_structural_equal_with_span(res_list[i], golden_list[i])
-
-    def test_var(self):
-        x = set_span(relay.var("x", shape=(1, 64, 56, 56)), "x_var")
-        x_expected = relay.var("x", shape=(1, 64, 56, 56), span=_create_span("x_var"))
-        _verify_structural_equal_with_span(x, x_expected)
-
-    def test_constant(self):
-        c = set_span(relay.const(np.ones([64, 64, 3, 3]), dtype="int64"), "const_c")
-        c_expected = relay.const(
-            np.ones([64, 64, 3, 3]), dtype="int64", span=_create_span("const_c")
-        )
-        _verify_structural_equal_with_span(c, c_expected)
-
-    def test_call(self):
-        def _res():
-            x = set_span(relay.var("x", shape=(1, 64, 56, 56)), "x_var")
-            w = relay.const(np.ones([64, 64, 3, 3]), dtype="int64")
-            y = set_span(
-                relay.nn.conv2d(x, w, channels=64, kernel_size=(3, 3), padding=(1, 1)), "conv2d"
-            )
-            return relay.Function([x], y)
-
-        def _golden():
-            x = relay.var("x", shape=(1, 64, 56, 56), span=_create_span("x_var"))
-            w = relay.const(np.ones([64, 64, 3, 3]), dtype="int64", span=_create_span("conv2d"))
-            y = _set_span(
-                relay.nn.conv2d(x, w, channels=64, kernel_size=(3, 3), padding=(1, 1)), "conv2d"
-            )
-            return relay.Function([x], y)
-
-        _verify_structural_equal_with_span(_res(), _golden())
-
-    def test_tuple(self):
-        def _res():
-            a = set_span(relay.const(np.ones([1, 1, 1]), dtype="int64"), "a")
-            b = relay.const(np.ones([1, 1, 1]), dtype="int64")
-            t = set_span(relay.Tuple([a, b]), "t")
-            return relay.Function([], t)
-
-        def _golden():
-            a = relay.const(np.ones([1, 1, 1]), dtype="int64", span=_create_span("a"))
-            b = relay.const(np.ones([1, 1, 1]), dtype="int64", span=_create_span("t"))
-            t = relay.Tuple([a, b], span=_create_span("t"))
-            return relay.Function([], t)
-
-        _verify_structural_equal_with_span(_res(), _golden())
-
-    def test_tuple_getitem(self):
-        def _res():
-            a = set_span(relay.const(np.ones([1, 1, 1]), dtype="int64"), "a")
-            b = relay.const(np.ones([1, 1, 1]), dtype="int64")
-            t = relay.Tuple([a, b])
-            i = set_span(relay.TupleGetItem(t, 0), "i")
-            return relay.Function([], i)
-
-        def _golden():
-            a = relay.const(np.ones([1, 1, 1]), dtype="int64", span=_create_span("a"))
-            b = relay.const(np.ones([1, 1, 1]), dtype="int64", span=_create_span("i"))
-            t = relay.Tuple([a, b], span=_create_span("i"))
-            i = relay.TupleGetItem(t, 0, span=_create_span("i"))
-            return relay.Function([], i)
-
-        _verify_structural_equal_with_span(_res(), _golden())
-
-    def test_let(self):
-        def _res():
-            x = set_span(relay.Var("x"), "x_var")
-            c_1 = relay.const(np.ones(10))
-            add = relay.add(x, x)
-            body = set_span(relay.Let(x, c_1, add), "let")
-
-            c_2 = set_span(relay.const(np.zeros(10)), "zeros")
-            y = set_span(relay.add(body, c_2), "add_2")
-            return relay.Function([x], y)
-
-        def _golden():
-            x = relay.Var("x", span=_create_span("x_var"))
-            c_1 = relay.const(np.ones(10), span=_create_span("let"))
-            add = _set_span(relay.add(x, x), "let")
-            body = relay.Let(x, c_1, add, span=_create_span("let"))
-
-            c_2 = relay.const(np.zeros(10), span=_create_span("zeros"))
-            y = _set_span(relay.add(body, c_2), "add_2")
-            return relay.Function([x], y)
-
-        _verify_structural_equal_with_span(_res(), _golden())
-
-    def test_if(self):
-        def _res():
-            x = set_span(relay.var("x", shape=[], dtype="float32"), "x_var")
-            y = set_span(relay.var("y", shape=[], dtype="float32"), "y_var")
-            eq = relay.equal(x, y)
-
-            true_branch = set_span(relay.add(x, y), "true_branch")
-            false_branch = relay.subtract(x, y)
-            ife = set_span(relay.If(eq, true_branch, false_branch), "if")
-            return relay.Function([x, y], ife)
-
-        def _golden():
-            x = relay.var("x", shape=[], dtype="float32", span=_create_span("x_var"))
-            y = relay.var("y", shape=[], dtype="float32", span=_create_span("y_var"))
-            eq = _set_span(relay.equal(x, y), "if")
-
-            true_branch = _set_span(relay.add(x, y), "true_branch")
-            false_branch = _set_span(relay.subtract(x, y), "if")
-            ife = relay.If(eq, true_branch, false_branch, span=_create_span("if"))
-            return relay.Function([x, y], ife)
-
-        _verify_structural_equal_with_span(_res(), _golden())
-
-    def test_fn(self):
-        def _res():
-            x = set_span(relay.var("x", shape=(1, 64, 56, 56)), "x_var")
-            w = relay.const(np.ones([64, 64, 3, 3]), dtype="int64")
-            y = relay.nn.conv2d(x, w, channels=64, kernel_size=(3, 3), padding=(1, 1))
-            f = set_span(relay.Function([x], y), "func")
-            return f
-
-        def _golden():
-            x = relay.var("x", shape=(1, 64, 56, 56), span=_create_span("x_var"))
-            w = relay.const(np.ones([64, 64, 3, 3]), dtype="int64", span=_create_span("func"))
-            y = _set_span(
-                relay.nn.conv2d(x, w, channels=64, kernel_size=(3, 3), padding=(1, 1)), "func"
-            )
-            f = relay.Function([x], y, span=_create_span("func"))
-            return f
-
-        _verify_structural_equal_with_span(_res(), _golden())
-
-
-if __name__ == "__main__":
-    testing.main()
diff --git a/tests/python/frontend/tflite/test_forward.py b/tests/python/frontend/tflite/test_forward.py
deleted file mode 100644
index cb0b17ea3fcf..000000000000
--- a/tests/python/frontend/tflite/test_forward.py
+++ /dev/null
@@ -1,5722 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-argument, import-outside-toplevel, inconsistent-return-statements
-"""
-TFLite testcases
-================
-This article is a test script to test TFLite operator with Relay.
-"""
-from __future__ import print_function
-from functools import partial
-import platform
-import os
-import tempfile
-import typing
-from packaging import version as package_version
-import pytest
-import numpy as np
-
-from PIL import Image
-
-from tflite.BuiltinOperator import BuiltinOperator
-
-try:
-    import tensorflow.compat.v1 as tf
-
-    # tensorflow.python.framework.ops module itself is not part of
-    # TensorFlow's public API: the precise contents of that module
-    # may vary from one version to the next
-    import tensorflow.compat.v1 as ops
-except ImportError:
-    import tensorflow as tf
-    import tensorflow as ops
-from tensorflow.python.framework import constant_op
-
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import image_ops
-from tensorflow.python.ops import gen_array_ops
-from tensorflow.python.ops import nn_impl
-from tensorflow.python.ops import variables
-from tensorflow import raw_ops
-
-try:
-    from tensorflow import lite as interpreter_wrapper
-except ImportError:
-    from tensorflow.contrib import lite as interpreter_wrapper
-
-import tvm
-import tvm.relay.testing.tf as tf_testing
-from tvm.contrib.download import download_testdata
-from tvm import relay, ir
-from tvm.contrib import graph_executor
-from relay.utils.tag_span import _set_span, _create_span, _verify_structural_equal_with_span
-
-
-#######################################################################
-# Generic run functions for TVM & TFLite
-# --------------------------------------
-def convert_to_list(x):
-    if not isinstance(x, list):
-        x = [x]
-    return x
-
-
-#######################################################################
-# Get a real image for e2e testing
-# --------------------------------
-def get_real_image(im_height, im_width, quantized=True):
-    repo_base = "https://github.com/dmlc/web-data/raw/main/tensorflow/models/InceptionV1/"
-    img_name = "elephant-299.jpg"
-    image_url = os.path.join(repo_base, img_name)
-    img_path = download_testdata(image_url, img_name, module="data")
-    image = Image.open(img_path).resize((im_height, im_width))
-    x = np.array(image).astype("uint8") if quantized else np.array(image).astype("float32")
-    data = np.reshape(x, (1, im_height, im_width, 3))
-    return data
-
-
-def pre_processed_image(height, width):
-    """Image preprocessed"""
-    repo_base = "https://github.com/dmlc/web-data/raw/main/tensorflow/models/InceptionV1/"
-    img_name = "elephant-299.jpg"
-    image_url = os.path.join(repo_base, img_name)
-    img_path = download_testdata(image_url, img_name, module="data")
-    image = tf.io.read_file(img_path)
-    image = tf.image.decode_jpeg(image, channels=3)
-    with tf.name_scope("eval_image"):
-        if image.dtype != tf.float32:
-            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
-        image = tf.image.central_crop(image, central_fraction=0.875)
-    # Resize the image to the specified height and width.
-    image = tf.image.resize(image, [height, width], align_corners=False)
-    image = tf.expand_dims(image, axis=0)
-    return image
-
-
-def get_real_image_object_detection(im_height, im_width):
-    repo_base = "https://github.com/dmlc/web-data/raw/main/gluoncv/detection/"
-    img_name = "street_small.jpg"
-    image_url = os.path.join(repo_base, img_name)
-    img_path = download_testdata(image_url, img_name, module="data")
-    image = Image.open(img_path).resize((im_height, im_width))
-    x = np.array(image).astype("uint8")
-    data = np.reshape(x, (1, im_height, im_width, 3))
-    return data
-
-
-def vmobj_to_list(obj):
-    """Converts TVM objects returned by VM execution to Python List."""
-    if isinstance(obj, tvm.nd.NDArray):
-        return [obj.numpy().tolist()]
-    elif isinstance(obj, tvm.runtime.container.ADT):
-        result = []
-        for f in obj:
-            result.extend(vmobj_to_list(f))
-        return result
-    elif isinstance(obj, tvm.relay.backend.interpreter.ConstructorValue):
-        if obj.constructor.name_hint == "Cons":
-            t_l = vmobj_to_list(obj.fields[1])
-            h_d = vmobj_to_list(obj.fields[0])
-            h_d.extend(t_l)
-            return h_d
-        elif obj.constructor.name_hint == "Nil":
-            return []
-        elif "tensor_nil" in obj.constructor.name_hint:
-            return [0]
-        elif "tensor" in obj.constructor.name_hint:
-            return [obj.fields[0].numpy()]
-        else:
-            raise RuntimeError(f"Unknown object type: {obj.constructor.name_hint}")
-    else:
-        raise RuntimeError(f"Unknown object type: {type(obj)}")
-
-
-def _quantize_keras_model(
-    keras_model,
-    representative_data_gen,
-    is_float_input=False,
-    is_float_output=False,
-    int_quant_dtype=tf.int8,
-):
-    """Utility function to quantize a Keras model using TFLite converter."""
-    converter = interpreter_wrapper.TFLiteConverter.from_keras_model(keras_model)
-    if int_quant_dtype == tf.int8:
-        converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
-        converter.representative_dataset = representative_data_gen
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        inference_dtype = tf.uint8
-    elif int_quant_dtype == tf.int16:
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_data_gen
-        converter.target_spec.supported_ops = [
-            tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
-        ]
-        inference_dtype = tf.uint16
-    else:
-        raise RuntimeError(
-            f"Invalid quantized dtype {int_quant_dtype}. Supported types: int8, int16."
-        )
-
-    # NOTE: If representative dataset is provided, and inference input type is not set,
-    #       then converter will self add quant & dequant Op accordingly.
-    if not is_float_input:
-        converter.inference_input_type = inference_dtype
-    if not is_float_output:
-        converter.inference_output_type = inference_dtype
-
-    return converter.convert()
-
-
-def run_tvm_graph(
-    tflite_model_buf,
-    input_data,
-    input_node,
-    num_output=1,
-    target="llvm",
-    out_names=None,
-    mode="graph_executor",
-    op_converter=relay.frontend.tflite.OperatorConverter,
-):
-    """Generic function to compile on relay and execute on tvm"""
-    # TFLite.Model.Model has changed to TFLite.Model from 1.14 to 2.1
-    try:
-        import tflite.Model
-
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
-    except AttributeError:
-        import tflite
-
-        tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
-    except ImportError as exc:
-        raise ImportError("The tflite package must be installed") from exc
-
-    input_data = convert_to_list(input_data)
-    input_node = convert_to_list(input_node)
-
-    shape_dict = {}
-    dtype_dict = {}
-    for i, node in enumerate(input_node):
-        shape_dict[node] = input_data[i].shape
-        dtype_dict[node] = input_data[i].dtype.name
-
-    with tvm.testing.disable_span_filling():
-        mod, params = relay.frontend.from_tflite(
-            tflite_model, shape_dict=shape_dict, dtype_dict=dtype_dict, op_converter=op_converter
-        )
-    with tvm.testing.enable_span_filling():
-        mod_with_span, _ = relay.frontend.from_tflite(
-            tflite_model, shape_dict=shape_dict, dtype_dict=dtype_dict, op_converter=op_converter
-        )
-    tvm.ir.assert_structural_equal(mod["main"], mod_with_span["main"])
-
-    if mode in ["debug", "vm"]:
-        inputs = []
-        for param in mod["main"].params:
-            found = False
-            for i, n in enumerate(input_node):
-                if n == param.name_hint:
-                    found = True
-                    inputs.append(tvm.nd.array(input_data[i]))
-                    break
-            # Interpreter doesn't bind constants, so still need to find in params
-            if not found:
-                inputs.append(tvm.nd.array(params[param.name_hint]))
-        result = relay.create_executor(mode, mod=mod, device=tvm.cpu(), target="llvm").evaluate()(
-            *inputs
-        )
-        return vmobj_to_list(result)
-    else:
-        with tvm.transform.PassContext(opt_level=3):
-            lib = relay.build(mod, target, params=params)
-
-        dev = tvm.device(target, 0)
-
-        m = graph_executor.GraphModule(lib["default"](dev))
-        # set inputs
-        for i, node in enumerate(input_node):
-            m.set_input(node, tvm.nd.array(input_data[i].astype(input_data[i].dtype)))
-        # execute
-        m.run()
-        # get outputs
-        assert out_names is None or num_output == len(
-            out_names
-        ), f"out_names: {out_names} num_output: {num_output}"
-        tvm_output_list = []
-        for i in range(0, num_output):
-            tvm_output = m.get_output(i)
-            tvm_output_list.append(tvm_output.numpy())
-        return tvm_output_list
-
-
-def run_tflite_graph(tflite_model_buf, input_data):
-    """Generic function to execute TFLite"""
-    input_data = convert_to_list(input_data)
-
-    interpreter = interpreter_wrapper.Interpreter(model_content=tflite_model_buf)
-    input_details = interpreter.get_input_details()
-    output_details = interpreter.get_output_details()
-
-    for i, input_detail in enumerate(input_details):
-        interpreter.resize_tensor_input(input_detail["index"], input_data[i].shape)
-    interpreter.allocate_tensors()
-
-    # set input
-    assert len(input_data) == len(input_details)
-    for i, input_detail in enumerate(input_details):
-        interpreter.set_tensor(input_detail["index"], input_data[i])
-
-    # Run
-    interpreter.invoke()
-
-    # get output
-    tflite_output = []
-    for _, output_detail in enumerate(output_details):
-        tflite_output.append(interpreter.get_tensor(output_detail["index"]))
-
-    return tflite_output
-
-
-def compare_tflite_with_tvm(
-    in_data: typing.List[np.ndarray],
-    in_name: typing.List[str],
-    input_tensors: typing.List,
-    output_tensors: typing.List,
-    init_global_variables: bool = False,
-    out_names=None,
-    quantized=False,
-    input_range=None,
-    mode="graph_executor",
-    experimental_new_converter=False,
-    fp16_quantized=False,
-    int_quant_dtype=tf.uint8,
-):
-    """Generic function to generate and compare TFLite and TVM output"""
-    in_data = convert_to_list(in_data)
-    in_name = convert_to_list(in_name)
-    out_names = convert_to_list(out_names)
-    in_node = [0] * len(in_name)
-    for i, _ in enumerate(in_name):
-        in_node[i] = in_name[i].split(":")[0] if ":" in in_name[i] else in_name[i]
-
-    with tf.Session() as sess:
-        if init_global_variables:
-            sess.run(variables.global_variables_initializer())
-        # convert to tflite model
-        converter = tf.lite.TFLiteConverter.from_session(sess, input_tensors, output_tensors)
-
-        if len(input_tensors) > 1:
-            if len(input_tensors[0].shape) <= 4 and len(input_tensors[1].shape) <= 4:
-                converter._experimental_disable_batchmatmul_unfold = True
-            else:
-                converter._experimental_disable_batchmatmul_unfold = False
-
-        converter.experimental_new_converter = experimental_new_converter
-        if quantized:
-            if int_quant_dtype == tf.int16:
-                converter.optimizations = [tf.lite.Optimize.DEFAULT]
-                converter.target_spec.supported_ops = [
-                    tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
-                ]
-            elif int_quant_dtype == tf.int8:
-                converter.inference_type = tf.lite.constants.INT8
-            else:
-                # default to int8 quantization
-                converter.inference_type = tf.lite.constants.QUANTIZED_UINT8
-
-            input_arrays = converter.get_input_arrays()
-            input_stats = {}
-            # calculate the mean and quantization scale for every input tensor,
-            # with respect to its fp32 input range, defined in fake_quant.
-            # s = 255/(fmax-fmin);  m = -fmin*s (the zero point)
-            for i in input_arrays:
-                try:
-                    quant_scale = 255 / (input_range[i][1] - input_range[i][0])
-                except ZeroDivisionError:
-                    print("Min and max of the input range for tensor " + i + " can't be equal")
-                mean = -input_range[i][0] * quant_scale
-                input_stats[i] = (mean, quant_scale)
-            converter.quantized_input_stats = input_stats
-        elif fp16_quantized:
-            converter.optimizations = [tf.lite.Optimize.DEFAULT]
-            converter.target_spec.supported_types = [tf.float16]
-
-        tflite_model_buffer = converter.convert()
-        tflite_output = run_tflite_graph(tflite_model_buffer, in_data)
-
-        for device in ["llvm"]:
-            _ = tvm.device(device, 0)
-            if not tvm.testing.device_enabled(device):
-                print(f"Skip because {device} is not enabled")
-                continue
-
-            tvm_output = run_tvm_graph(
-                tflite_model_buffer,
-                in_data,
-                in_node,
-                target=device,
-                num_output=len(out_names),
-                out_names=out_names,
-                mode=mode,
-            )
-            # WARNING: the results could well be random values clipped to 0 or 255 because of badly
-            # tuned output range for the specific operator. While adding test ensure that we aren't
-            # getting only clipped values in output tensors that still pass the assertion.
-            # For reference see _test_elemwise_qnn_out_range()
-            if quantized and not fp16_quantized:
-                for i, _ in enumerate(tflite_output):
-                    # allow absolute tolerance of 1 in the quantized results
-                    tvm.testing.assert_allclose(
-                        tflite_output[i],  # pylint: disable=unnecessary-list-index-lookup
-                        tvm_output[i],
-                        atol=1,
-                        rtol=1e-5,
-                    )
-            else:
-                for i, _ in enumerate(tflite_output):
-                    tvm.testing.assert_allclose(
-                        tflite_output[i],  # pylint: disable=unnecessary-list-index-lookup
-                        tvm_output[i],
-                        atol=1e-5,
-                        rtol=1e-5,
-                    )
-
-
-def with_fused_activation_function(input_tensor, fn_name):
-    """Fused activation function"""
-    if fn_name is None or fn_name == "NONE":
-        return input_tensor
-    if fn_name == "RELU":
-        return nn_ops.relu(input_tensor)
-    if fn_name == "RELU6":
-        return nn_ops.relu6(input_tensor)
-    if fn_name == "RELU_N1_TO_1":
-        return math_ops.maximum(-1, math_ops.minimum(input_tensor, 1))
-    if fn_name == "TANH":
-        return math_ops.tanh(input_tensor)
-    raise AssertionError(f"Unknown fused_activation_function {fn_name}")
-
-
-def _test_split(in_shape, axis, num_splits, dtype):
-    """internal split tester taking as parameters in_shape, number of tensors to split into
-    and dtype (data type)"""
-
-    np_data = np.random.uniform(-5, 5, size=in_shape).astype(dtype)
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=in_shape, dtype=dtype, name="in_data")
-        out = array_ops.split(in_data, num_splits, axis=axis)
-        num_splits = len(num_splits) if isinstance(num_splits, list) else num_splits
-        out_names = ["out_" + str(n) + ":0" for n in range(num_splits)]
-        compare_tflite_with_tvm([np_data], ["in_data"], [in_data], out, out_names=out_names)
-
-
-def test_forward_split():
-    """test split layer"""
-    # rank 1
-    _test_split((3,), 0, 1, "float32")
-    _test_split((3,), 0, 3, "float32")
-    _test_split((6,), 0, 3, "float32")
-    # rank 2
-    _test_split((6, 2), 0, 3, "float32")
-    _test_split((2, 6), 1, 6, "float32")
-    # rank 3
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        _test_split((6, 2, 4), 0, 2, "int32")
-
-    _test_split((2, 6, 4), 1, 3, "float32")
-    _test_split((2, 4, 6), 2, 1, "float32")
-    # rank 4
-    _test_split((6, 1, 3, 5), 0, 3, "float32")
-    _test_split((1, 6, 3, 5), 1, 3, "float32")
-    _test_split((1, 3, 6, 5), 2, 3, "float32")
-    _test_split((1, 3, 5, 6), 3, 3, "float32")
-    # split along negative axis
-    _test_split((6, 1, 3, 5), -4, 3, "float32")
-    _test_split((1, 6, 3, 5), -3, 3, "float32")
-    _test_split((1, 3, 6, 5), -2, 3, "float32")
-    _test_split((1, 3, 5, 6), -1, 3, "float32")
-    # size_splits split
-    _test_split((6,), 0, [1, 2, 3], "float32")
-    _test_split((3, 6, 4), -2, [1, 4, 1], "float32")
-
-
-#######################################################################
-# slice
-# -----
-
-
-def _test_slice(data, begin, size):
-    """One iteration of SLICE"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out = array_ops.slice(in_data, begin, size)
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_slice():
-    """SLICE"""
-    _test_slice(np.arange(4, dtype=np.float32).reshape((4,)), begin=[0], size=[2])
-    _test_slice(np.arange(18, dtype=np.int32).reshape((3, 2, 3)), begin=[1, 0, 0], size=[1, 1, 3])
-    # tflite 1.13 outputs nonsense values if size[i] == -1
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        _test_slice(np.arange(8, dtype=np.int32).reshape((2, 4)), begin=[0, 1], size=[-1, -1])
-        _test_slice(np.arange(5, dtype=np.int32).reshape((5,)), begin=[4], size=[-1])
-
-
-#######################################################################
-# Topk
-# ----
-def _test_topk(in_shape, k=1):
-    """One iteration of TOPK"""
-    data = np.random.uniform(size=in_shape).astype("float32")
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out = nn_ops.top_k(in_data, k, name="TopK")
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out[0]])
-
-
-def test_forward_topk():
-    """TOPK"""
-    _test_topk((3,), 1)
-    _test_topk((3,), 3)
-    _test_topk((3, 5, 7), 3)
-    _test_topk((3, 5, 7), 3)
-
-
-#######################################################################
-# Gather
-# ------
-
-
-def _test_gather(dshape, indices, axis, dtype, quantized=False, oob=False, wrap_idx=False):
-    """One iteration of Gather"""
-    indices = np.asarray(indices).astype("int32")
-    data = np.random.uniform(1, 10, size=dshape)
-    data = data.astype(np.uint8) if quantized else data.astype(dtype)
-    with tf.Graph().as_default():
-        if wrap_idx:
-            in_name = "in_indices"
-            indices_expr = array_ops.placeholder(
-                shape=indices.shape, dtype=indices.dtype, name=in_name
-            )
-            in_tensor_name = [in_name + ":0"]
-            in_indices = [indices_expr]
-        else:
-            indices_expr = indices
-            indices = []
-            in_tensor_name = []
-            in_indices = []
-
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype, name="in_data")
-        if axis:
-            out = array_ops.gather(in_data, indices_expr, axis=axis)
-        else:
-            out = array_ops.gather(in_data, indices_expr)  # tflite conversion fails for None axis
-        input_range = {"in_data": (-100, 100)} if quantized else None
-        try:
-            compare_tflite_with_tvm(
-                [data] + indices,
-                ["in_data:0"] + in_tensor_name,
-                [in_data] + in_indices,
-                [out],
-                quantized=quantized,
-                input_range=input_range,
-            )
-        except ValueError as exc:
-            if not oob:
-                raise exc
-        except Exception as exc:
-            raise exc
-
-
-def test_forward_gather():
-    """GATHER"""
-    for quantized in [False, True]:
-        for wrap_idx in [False, True]:
-            _test_gather((4,), [1], 0, "float32", quantized, wrap_idx)
-            _test_gather((4,), [1], None, "int32", quantized, wrap_idx)
-            _test_gather((1, 4), [0], 0, "int32", quantized, wrap_idx)
-            _test_gather((4,), [[[1, 0], [0, 1]]], 0, "float32", quantized, wrap_idx)
-            _test_gather((2, 2), [[[1, 0], [0, 1]]], 1, "int32", quantized, wrap_idx)
-            _test_gather((2, 2), [[[1, 0], [0, 1]]], None, "float32", quantized, wrap_idx)
-            _test_gather((3, 3, 3), [[[1, 0]]], 0, "int32", quantized, wrap_idx)
-            _test_gather((3, 3, 3), [[[1, 0]]], 2, "int32", quantized, wrap_idx)
-            _test_gather((4, 3, 5, 6), [[2, 1, 0, 0]], 0, "float32", quantized, wrap_idx)
-            _test_gather((3, 3, 3), [[[2, 1]]], -1, "int32", quantized, wrap_idx)
-        # Out of boundary error cannot be tested with wrapped index
-        _test_gather((4,), [16], 0, "float32", quantized, oob=True)
-        _test_gather((1, 3, 3), [12], 0, "int32", quantized, oob=True)
-        _test_gather((1, 3, 3), [20], 1, "float32", quantized, oob=True)
-        _test_gather((1, 3, 3), [20, 20], 2, "float32", quantized, oob=True)
-
-
-#######################################################################
-# Gather_ND
-# ---------
-
-
-def _test_gather_nd(data, indices):
-    """One iteration of GATHER_ND"""
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(shape=data.shape, dtype=data.dtype, name="data")
-        indices_data = tf.placeholder(shape=indices.shape, dtype=indices.dtype, name="indices")
-        out = tf.gather_nd(in_data, indices_data)
-
-        compare_tflite_with_tvm(
-            [data, indices], ["data:0", "indices:0"], [in_data, indices_data], [out]
-        )
-
-
-def test_forward_gather_nd():
-    """GATHER_ND"""
-    _test_gather_nd(
-        np.array([[[1.2, 2.0], [3.1, 4.1]], [[5.1, 6.1], [7.1, 8.1]]]).astype("float32"),
-        np.asarray([[0, 1], [1, 0]]).astype("int32"),
-    )
-    _test_gather_nd(
-        np.reshape(np.arange(30), [5, 6]).astype("int32"), np.asarray([[1, 2]]).astype("int32")
-    )
-    _test_gather_nd(
-        np.reshape(np.arange(12), [2, 3, 2]).astype("int32"),
-        np.asarray([[[0, 0], [0, 1]], [[1, 0], [1, 1]]]).astype("int32"),
-    )
-    _test_gather_nd(
-        np.reshape(np.arange(4), [4]).astype("float32"), np.asarray([1]).astype("int32")
-    )
-    _test_gather_nd(
-        np.reshape(np.arange(4), [1, 4]).astype("float32"), np.asarray([0]).astype("int32")
-    )
-    _test_gather_nd(
-        np.reshape(np.arange(4), [1, 4]).astype("float32"), np.asarray([0, 3]).astype("int32")
-    )
-
-
-#######################################################################
-# StridedSlice
-# ------------
-
-
-def _test_stridedslice(
-    ip_shape,
-    begin,
-    end,
-    stride,
-    dtype,
-    begin_mask=0,
-    end_mask=0,
-    new_axis_mask=0,
-    shrink_axis_mask=0,
-    ellipsis_mask=0,
-    quantized=False,
-):
-    """One iteration of a Stridedslice"""
-    data = np.random.uniform(size=ip_shape).astype(dtype)
-    data = data.astype(np.uint8) if quantized else data.astype(dtype)
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype, ip_shape, name="in_data")
-        out = array_ops.strided_slice(
-            in_data,
-            begin,
-            end,
-            stride,
-            begin_mask=begin_mask,
-            end_mask=end_mask,
-            new_axis_mask=new_axis_mask,
-            shrink_axis_mask=shrink_axis_mask,
-            ellipsis_mask=ellipsis_mask,
-        )
-        input_range = {"in_data": (-100, 100)} if quantized else None
-        compare_tflite_with_tvm(
-            [data], ["in_data:0"], [in_data], [out], quantized=quantized, input_range=input_range
-        )
-
-
-def test_forward_stridedslice():
-    """test StridedSlice"""
-    for quantized in [False, True]:
-        _test_stridedslice(
-            (1, 3, 3),
-            [0, 0, 0],
-            [3, 3, 3],
-            [1, 1, 1],
-            "float32",
-            shrink_axis_mask=7,
-            quantized=quantized,
-        )
-        _test_stridedslice(
-            (1, 3, 3),
-            [0, 0, 0],
-            [3, 3, 3],
-            [1, 1, 1],
-            "float32",
-            shrink_axis_mask=5,
-            quantized=quantized,
-        )
-        _test_stridedslice((2), [1], [1], [1], "float32", shrink_axis_mask=1, quantized=quantized)
-        _test_stridedslice(
-            (3, 4, 3), [1, -1, 0], [4, -5, 3], [2, -1, 1], "float32", quantized=quantized
-        )
-        _test_stridedslice(
-            (3, 4), [1, 0], [4, 4], [1, 1], "float32", shrink_axis_mask=0, quantized=quantized
-        )
-        _test_stridedslice(
-            (4, 4), [1, 0], [4, 4], [1, 1], "float32", shrink_axis_mask=2, quantized=quantized
-        )
-        _test_stridedslice(
-            (3, 4), [-1, 0], [0, 3], [1, 1], "float32", shrink_axis_mask=1, quantized=quantized
-        )
-
-
-#######################################################################
-# transpose
-# ---------
-
-
-def _test_forward_transpose(ishape, axes=()):
-    data = np.random.uniform(size=ishape).astype(np.float32)
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-
-        if not axes:
-            out = array_ops.transpose(in_data)
-        else:
-            out = array_ops.transpose(in_data, axes)
-
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_transpose():
-    _test_forward_transpose((2, 2))
-    _test_forward_transpose((2, 3, 4))
-    _test_forward_transpose((7, 8, 8, 10))
-    _test_forward_transpose((2, 3, 4), (1, 2, 0))
-    _test_forward_transpose((2, 3, 4), (0, 1, 2))
-    _test_forward_transpose((2, 3, 4, 5), (3, 0, 1, 2))
-    _test_forward_transpose((2, 3, 4, 5), ())
-
-
-#######################################################################
-# Cast
-# ----
-
-
-def _test_cast(data, cast_dtype, use_mlir=False):
-    """One iteration of CAST"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out = math_ops.cast(in_data, cast_dtype)
-        compare_tflite_with_tvm(
-            data, "Placeholder:0", [in_data], [out], experimental_new_converter=use_mlir
-        )
-
-
-def test_forward_cast():
-    """CAST"""
-    for use_mlir in [False, True]:
-        _test_cast(
-            np.arange(6.0, dtype=np.float32).reshape((1, 6)), cast_dtype=tf.int32, use_mlir=use_mlir
-        )
-        _test_cast(
-            np.arange(6.0, dtype=np.float32).reshape((1, 6)), cast_dtype=tf.uint8, use_mlir=use_mlir
-        )
-        _test_cast(
-            np.arange(6.0, dtype=np.int32).reshape((1, 6)), cast_dtype=tf.int64, use_mlir=use_mlir
-        )
-
-
-#######################################################################
-# Batch Mat Mul
-# ----
-def _test_batch_matmul(
-    a_shape, b_shape, dtype, out_dtype, adjoint_a=False, adjoint_b=False, quantized=False
-):
-    with tf.Graph().as_default():
-        a = array_ops.placeholder(shape=a_shape, dtype=dtype, name="A")
-        b = array_ops.placeholder(shape=b_shape, dtype=dtype, name="B")
-        print(tf.__version__)
-
-        result = raw_ops.BatchMatMulV3(
-            x=a, y=b, Tout=out_dtype, adj_x=adjoint_a, adj_y=adjoint_b, name="batchmatmul"
-        )
-        input_range = {"A": (-100, 100), "B": (-100, 100)} if quantized else None
-
-        a_np = np.random.uniform(high=5.0, size=a_shape).astype(dtype)
-        b_np = np.random.uniform(high=5.0, size=b_shape).astype(dtype)
-        compare_tflite_with_tvm(
-            [a_np, b_np],
-            [a.name, b.name],
-            [a, b],
-            [result],
-            experimental_new_converter=True,
-            quantized=quantized,
-            input_range=input_range,
-        )
-
-
-@pytest.mark.parametrize("config", [("int8", "int32", True), ("float32", "float32", False)])
-def test_forward_batch_matmul(config):
-    """BATCH_MAT_MUL"""
-    _test_batch_matmul(
-        (3, 5, 4), (3, 4, 5), dtype=config[0], out_dtype=config[1], quantized=config[2]
-    )
-    _test_batch_matmul(
-        (3, 5, 4),
-        (3, 4, 5),
-        dtype=config[0],
-        out_dtype=config[1],
-        adjoint_a=True,
-        adjoint_b=True,
-        quantized=config[2],
-    )
-    _test_batch_matmul(
-        (3, 5, 4),
-        (3, 5, 4),
-        dtype=config[0],
-        out_dtype=config[1],
-        adjoint_a=True,
-        adjoint_b=False,
-        quantized=config[2],
-    )
-    _test_batch_matmul(
-        (2, 3, 5, 4),
-        (1, 3, 5, 4),
-        dtype=config[0],
-        out_dtype=config[1],
-        adjoint_a=True,
-        adjoint_b=False,
-        quantized=config[2],
-    )
-    _test_batch_matmul(
-        (3, 5, 4),
-        (3, 5, 4),
-        dtype=config[0],
-        out_dtype=config[1],
-        adjoint_a=False,
-        adjoint_b=True,
-        quantized=config[2],
-    )
-    _test_batch_matmul(
-        (2, 3, 5, 4),
-        (1, 3, 5, 4),
-        dtype=config[0],
-        out_dtype=config[1],
-        adjoint_a=False,
-        adjoint_b=True,
-        quantized=config[2],
-    )
-    _test_batch_matmul(
-        (3, 4, 5, 6), (3, 4, 6, 5), dtype=config[0], out_dtype=config[1], quantized=config[2]
-    )
-    # BatchMatMul doesn't support larger than 4D tensors
-    # _test_batch_matmul(
-    #    (2, 3, 4, 5, 6), (2, 3, 4, 6, 5), dtype=config[0], out_dtype=config[1], quantized=config[2]
-    # )
-
-
-#######################################################################
-# Tile
-# ----
-
-
-def _test_forward_tile(in_shape, reps, dtype):
-    data = np.random.uniform(-5, 5, size=in_shape).astype(dtype)
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-
-        out = array_ops.tile(in_data, reps)
-
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_tile():
-    _test_forward_tile((2,), (3,), "int32")
-    _test_forward_tile((2, 2), (2, 3), "float32")
-
-
-######################################################################
-# BatchToSpaceND
-# --------------
-
-
-def _test_batch_to_space_nd(input_shape, block_shape, crops, dtype="int32"):
-    data = np.random.uniform(0, 5, size=input_shape).astype(dtype)
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=input_shape, dtype=dtype)
-
-        out = array_ops.batch_to_space_nd(in_data, block_shape, crops)
-
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_batch_to_space_nd():
-    # test cases: https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/batch-to-space-n-d
-    _test_batch_to_space_nd(input_shape=[4, 1, 1, 1], block_shape=[2, 2], crops=[[0, 0], [0, 0]])
-
-    _test_batch_to_space_nd(input_shape=[4, 1, 1, 3], block_shape=[2, 2], crops=[[0, 0], [0, 0]])
-
-    _test_batch_to_space_nd(input_shape=[4, 2, 2, 1], block_shape=[2, 2], crops=[[0, 0], [0, 0]])
-
-    _test_batch_to_space_nd(input_shape=[4, 3, 3, 1], block_shape=[2, 2], crops=[[0, 1], [0, 1]])
-
-
-######################################################################
-# SpaceToBatchND
-# --------------
-
-
-def _test_space_to_batch_nd(input_shape, block_shape, paddings, dtype="int32"):
-    data = np.random.uniform(0, 5, size=input_shape).astype(dtype)
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=input_shape, dtype=dtype)
-
-        out = array_ops.space_to_batch_nd(in_data, block_shape, paddings)
-
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_space_to_batch_nd():
-    # test cases: https://www.tensorflow.org/api_docs/python/tf/space_to_batch_nd
-    _test_space_to_batch_nd(input_shape=[1, 2, 2, 1], block_shape=[2, 2], paddings=[[0, 0], [0, 0]])
-
-    _test_space_to_batch_nd(input_shape=[1, 2, 2, 3], block_shape=[2, 2], paddings=[[0, 0], [0, 0]])
-
-    _test_space_to_batch_nd(input_shape=[1, 4, 4, 1], block_shape=[2, 2], paddings=[[0, 0], [0, 0]])
-
-    _test_space_to_batch_nd(input_shape=[2, 2, 4, 1], block_shape=[2, 2], paddings=[[0, 0], [2, 0]])
-
-
-#######################################################################
-# Pooling
-# -------
-def _test_pooling_iteration(input_shape, **kwargs):
-    """One iteration of pool operation with given shapes and attributes"""
-
-    x = -np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape) - 1
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=input_shape, dtype="float32")
-        out = nn_ops.pool(in_data, **kwargs)
-
-        compare_tflite_with_tvm(x, "Placeholder:0", [in_data], [out])
-
-
-def _test_pooling(input_shape, **kwargs):
-    _test_pooling_iteration(input_shape, **kwargs)
-
-
-def test_forward_pooling():
-    """Pooling"""
-
-    for pool_type in ["AVG", "MAX"]:
-        _test_pooling(
-            input_shape=[2, 9, 10, 2],
-            window_shape=[1, 1],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1],
-            strides=[1, 1],
-        )
-
-        _test_pooling(
-            input_shape=[2, 10, 9, 2],
-            window_shape=[1, 1],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1],
-            strides=[1, 1],
-        )
-
-        _test_pooling(
-            input_shape=[2, 9, 10, 2],
-            window_shape=[2, 1],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1],
-            strides=[1, 1],
-        )
-
-        _test_pooling(
-            input_shape=[2, 10, 9, 2],
-            window_shape=[2, 3],
-            padding="SAME",
-            pooling_type=pool_type,
-            dilation_rate=[1, 1],
-            strides=[2, 1],
-        )
-
-
-def _test_l2_pool2d(input_shape, ksize, strides, padding, data_format, fused_func_name=None):
-    x = np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape) - 1
-
-    with tf.Graph().as_default():
-        in_data = tf.placeholder(dtype=tf.float32, name="input", shape=input_shape)
-        out = tf.sqrt(
-            tf.nn.avg_pool(
-                tf.square(in_data),
-                ksize=ksize,
-                strides=strides,
-                padding=padding,
-                data_format=data_format,
-            )
-        )
-        out = with_fused_activation_function(out, fused_func_name)
-
-        compare_tflite_with_tvm(x, "input", [in_data], [out])
-
-
-def test_forward_l2_pool2d():
-    _test_l2_pool2d([1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], "SAME", "NHWC", "RELU6")
-    _test_l2_pool2d([2, 9, 10, 2], [1, 1, 1, 1], [1, 1, 1, 1], "SAME", "NHWC", "RELU6")
-    _test_l2_pool2d([2, 9, 10, 2], [1, 2, 1, 1], [1, 1, 1, 1], "SAME", "NHWC")
-    _test_l2_pool2d([2, 9, 10, 2], [1, 2, 1, 1], [1, 1, 2, 1], "SAME", "NHWC")
-    _test_l2_pool2d([1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], "VALID", "NHWC", "RELU")
-    _test_l2_pool2d([2, 9, 10, 2], [1, 1, 1, 1], [1, 1, 1, 1], "VALID", "NHWC")
-    _test_l2_pool2d([2, 9, 10, 2], [1, 2, 1, 1], [1, 1, 1, 1], "VALID", "NHWC")
-    _test_l2_pool2d([2, 9, 10, 2], [1, 2, 1, 1], [1, 1, 2, 1], "VALID", "NHWC", "RELU6")
-
-
-#######################################################################
-# Convolution
-# -----------
-
-
-def _test_tflite2_quantized_convolution(
-    input_shape,
-    kernel_shape,
-    filters,
-    padding="valid",
-    data_format=None,
-    int_quant_dtype=tf.int8,
-    groups=1,
-):
-    """One iteration of TFLite2 quantized convolution with given shapes and attributes"""
-    data_format = "channels_last" if data_format == "NHWC" else "channels_first"
-    data = np.random.uniform(0, 1, input_shape).astype("float32")
-    _ = np.random.uniform(0, 1, kernel_shape).astype("float32")
-
-    data_in = tf.keras.layers.Input(shape=data.shape[1:])
-    conv = tf.keras.layers.Conv2D(
-        filters=filters,
-        kernel_size=(kernel_shape[0], kernel_shape[1]),
-        activation=tf.nn.relu,
-        padding=padding,
-        data_format=data_format,
-        groups=groups,
-    )(data_in)
-    keras_model = tf.keras.models.Model(data_in, conv)
-
-    # To create quantized values with dynamic range of activations, needs representative dataset
-    def representative_data_gen():
-        for _ in range(1):
-            yield [data]
-
-    tflite_model_quant = _quantize_keras_model(
-        keras_model,
-        representative_data_gen,
-        is_float_input=True,
-        is_float_output=True,
-        int_quant_dtype=int_quant_dtype,
-    )
-    # TFLite.Model.Model has changed to TFLite.Model from 1.14 to 2.1
-    try:
-        import tflite.Model
-
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_quant, 0)
-    except AttributeError:
-        import tflite
-
-        tflite_model = tflite.Model.GetRootAsModel(tflite_model_quant, 0)
-    except ImportError as exc:
-        raise ImportError("The tflite package must be installed") from exc
-
-    subgraph = tflite_model.Subgraphs(0)
-    model_input = subgraph.InputsAsNumpy()
-    input_node = subgraph.Tensors(model_input).Name().decode("utf-8")
-
-    tflite_output = run_tflite_graph(tflite_model_quant, data)
-    if package_version.parse(tf.__version__) < package_version.parse("2.9"):
-        input_node = data_in.name.replace(":0", "")
-    else:
-        input_node = "serving_default_" + data_in.name + ":0"
-    tvm_output = run_tvm_graph(tflite_model_quant, data, input_node)
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-2, atol=1e-2
-    )
-
-
-def test_forward_quantized_convolution():
-    """Quantized convolution"""
-    for int_quant_dtype in [tf.int8, tf.int16]:
-        _test_tflite2_quantized_convolution(
-            (1, 28, 28, 1),
-            (1, 1),
-            12,
-            data_format="NHWC",
-            int_quant_dtype=int_quant_dtype,
-        )
-
-        _test_tflite2_quantized_convolution(
-            (1, 1, 28, 28),
-            (1, 1),
-            12,
-            data_format="NCWH",
-            int_quant_dtype=int_quant_dtype,
-        )
-
-        _test_tflite2_quantized_convolution(
-            (64, 2, 28, 28),
-            (1, 1),
-            12,
-            data_format="NCWH",
-            int_quant_dtype=int_quant_dtype,
-        )
-
-        _test_tflite2_quantized_convolution(
-            (2, 32, 28, 28),
-            (1, 1),
-            16,
-            data_format="NCWH",
-            int_quant_dtype=int_quant_dtype,
-            groups=8,
-        )
-
-        if platform.machine() == "aarch64":
-            pytest.skip(
-                reason=(
-                    "Grouped convolution type inference error for `arm_cpu`. "
-                    "See https://github.com/apache/tvm/issues/16532"
-                )
-            )
-
-        _test_tflite2_quantized_convolution(
-            (1, 16, 10, 10),
-            (3, 3),
-            2,
-            data_format="NCWH",
-            int_quant_dtype=int_quant_dtype,
-            groups=2,
-        )
-
-
-def test_forward_quantized_depthwise_convolution():
-    """Test qnn.conv2d depthwise compiled with TVM against TFLite reference."""
-    for int_quant_dtype in [tf.int8, tf.int16]:
-        _test_tflite2_quantized_depthwise_convolution(
-            [1, 17, 17, 12], [3, 3, 12, 1], [1, 1], [2, 2], "VALID", "NHWC", 1, int_quant_dtype
-        )
-        _test_tflite2_quantized_depthwise_convolution(
-            [1, 24, 24, 3], [7, 7, 3, 8], [1, 1], [2, 2], "SAME", "NHWC", 8, int_quant_dtype
-        )
-    _test_tflite2_quantized_depthwise_convolution(
-        [1, 8, 8, 128], [1, 1, 128, 1], [1, 1], [1, 1], "SAME", "NHWC", 1, tf.int8
-    )
-
-    if platform.machine() == "aarch64":
-        pytest.skip(
-            reason=(
-                "Tensor intrinsic data type mismatch error. "
-                "See https://github.com/apache/tvm/issues/16533"
-            )
-        )
-
-    _test_tflite2_quantized_depthwise_convolution(
-        [1, 8, 8, 128], [1, 1, 128, 1], [1, 1], [1, 1], "SAME", "NHWC", 1, tf.int16
-    )
-
-
-def _test_tflite2_quantized_depthwise_convolution(
-    input_shape,
-    kernel_shape,
-    dilations,
-    strides,
-    padding,
-    data_format,
-    depth_multiplier,
-    int_quant_dtype=tf.int8,
-):
-    """One iteration of TFLite2 quantized depthwise convolution with given shapes and attributes"""
-
-    data_format = "channels_last" if data_format == "NHWC" else "channels_first"
-    data = np.random.uniform(0, 1, input_shape).astype("float32")
-    kernel = np.random.uniform(0, 1, kernel_shape).astype("float32")
-
-    data_in = tf.keras.layers.Input(shape=data.shape[1:])
-    conv = tf.keras.layers.DepthwiseConv2D(
-        kernel_size=(kernel_shape[0], kernel_shape[1]),
-        strides=strides,
-        padding=padding,
-        data_format=data_format,
-        activation="relu",
-        use_bias=False,
-        depth_multiplier=depth_multiplier,
-    )(data_in)
-    keras_model = tf.keras.models.Model(data_in, conv)
-    keras_model.layers[1].set_weights([kernel])
-
-    # To create quantized values with dynamic range of activations, needs representative dataset
-    def representative_data_gen():
-        for _ in range(1):
-            yield [data]
-
-    tflite_model_quant = _quantize_keras_model(
-        keras_model,
-        representative_data_gen,
-        is_float_input=True,
-        is_float_output=True,
-        int_quant_dtype=int_quant_dtype,
-    )
-
-    # TFLite.Model.Model has changed to TFLite.Model from 1.14 to 2.1
-    try:
-        import tflite.Model
-
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_quant, 0)
-    except AttributeError:
-        import tflite
-
-        tflite_model = tflite.Model.GetRootAsModel(tflite_model_quant, 0)
-    except ImportError as exc:
-        raise ImportError("The tflite package must be installed") from exc
-
-    subgraph = tflite_model.Subgraphs(0)
-    model_input = subgraph.InputsAsNumpy()
-    input_node = subgraph.Tensors(model_input).Name().decode("utf-8")
-
-    tflite_output = run_tflite_graph(tflite_model_quant, data)
-    tvm_output = run_tvm_graph(tflite_model_quant, data, input_node)
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-2, atol=1e-2
-    )
-
-
-def _test_convolution(
-    tensor_in_sizes,
-    filter_in_sizes,
-    dilations,
-    strides,
-    padding,
-    data_format,
-    is_depthwise=False,
-    quantized=False,
-    fp16_quantized=False,
-):
-    """One iteration of convolution with given shapes and attributes"""
-
-    total_size_1 = 1
-    total_size_2 = 1
-    for s in tensor_in_sizes:
-        total_size_1 *= s
-    for s in filter_in_sizes:
-        total_size_2 *= s
-    # Initializes the input tensor with array containing incrementing
-    # numbers from 1.
-    if quantized:
-        data_array = np.random.uniform(0, 255, tensor_in_sizes).astype("uint8")
-        filter_array = np.random.uniform(0, 255, filter_in_sizes).astype("uint8")
-    else:
-        data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
-        filter_array = [f * 1.0 for f in range(1, total_size_2 + 1)]
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype="float32", name="in_data")
-        in_filter = constant_op.constant(
-            filter_array, shape=filter_in_sizes, dtype="float32", name="in_filter"
-        )
-        strides = [1] + strides + [1]
-        dilations = [1] + dilations + [1]
-
-        if is_depthwise:
-            out = nn_ops.depthwise_conv2d_native(
-                in_data, in_filter, strides=strides, padding=padding, data_format=data_format
-            )
-        else:
-            out = nn_ops.conv2d(
-                in_data, in_filter, strides=strides, padding=padding, data_format=data_format
-            )
-
-        if quantized and not fp16_quantized:
-            if is_depthwise:
-                # Quantized the inputs and feed them to the convolution
-                inq_data = tf.quantization.fake_quant_with_min_max_args(
-                    in_data, min=-100, max=100, name="inq_data"
-                )
-                inq_filter = tf.quantization.fake_quant_with_min_max_args(
-                    in_filter, min=-100, max=100, name="inq_filter"
-                )
-                out = nn_ops.depthwise_conv2d_native(
-                    inq_data, inq_filter, strides=strides, padding=padding, data_format=data_format
-                )
-                out = tf.quantization.fake_quant_with_min_max_args(
-                    out, min=-200, max=200, name="out"
-                )
-
-                # Set the input quantization range
-                input_range = {"in_data": (-100, 100)} if quantized else None
-
-                # Compare
-                compare_tflite_with_tvm(
-                    data_array,
-                    "in_data",
-                    [in_data],
-                    [out],
-                    quantized=quantized,
-                    input_range=input_range,
-                    experimental_new_converter=True,
-                )
-            else:
-                # Quantized the inputs and feed them to the convolution
-                inq_data = tf.quantization.fake_quant_with_min_max_args(
-                    in_data, min=-100, max=100, name="inq_data"
-                )
-                inq_filter = tf.quantization.fake_quant_with_min_max_args(
-                    in_filter, min=-100, max=100, name="inq_filter"
-                )
-                out = nn_ops.conv2d(
-                    inq_data, inq_filter, strides=strides, padding=padding, data_format=data_format
-                )
-                out = tf.quantization.fake_quant_with_min_max_args(
-                    out, min=-200, max=200, name="out"
-                )
-
-                # Set the input quantization range
-                input_range = {"in_data": (-100, 100)} if quantized else None
-
-                # Compare
-                compare_tflite_with_tvm(
-                    data_array,
-                    "in_data",
-                    [in_data],
-                    [out],
-                    quantized=quantized,
-                    input_range=input_range,
-                    experimental_new_converter=True,
-                )
-        else:
-            data_array = np.reshape(data_array, tensor_in_sizes).astype("float32")
-            compare_tflite_with_tvm(data_array, "in_data", [in_data], [out])
-
-
-def test_forward_convolution():
-    """Convolution"""
-    for quantized in [False, True]:
-        for fp16_quantized in [False, True]:
-            _test_convolution(
-                [4, 8, 8, 176],
-                [1, 1, 176, 32],
-                [1, 1],
-                [1, 1],
-                "SAME",
-                "NHWC",
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-            _test_convolution(
-                [4, 17, 17, 19],
-                [3, 3, 19, 19],
-                [1, 1],
-                [2, 2],
-                "VALID",
-                "NHWC",
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-            _test_convolution(
-                [4, 17, 17, 124],
-                [1, 1, 124, 19],
-                [1, 1],
-                [1, 1],
-                "SAME",
-                "NHWC",
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-            _test_convolution(
-                [4, 17, 17, 12],
-                [3, 3, 12, 32],
-                [1, 1],
-                [2, 2],
-                "VALID",
-                "NHWC",
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-
-            # depthwise convolution
-            _test_convolution(
-                [4, 8, 8, 176],
-                [1, 1, 176, 1],
-                [1, 1],
-                [1, 1],
-                "SAME",
-                "NHWC",
-                True,
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-            _test_convolution(
-                [4, 17, 17, 19],
-                [3, 3, 19, 1],
-                [1, 1],
-                [2, 2],
-                "VALID",
-                "NHWC",
-                True,
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-            _test_convolution(
-                [4, 17, 17, 124],
-                [1, 1, 124, 1],
-                [1, 1],
-                [1, 1],
-                "SAME",
-                "NHWC",
-                True,
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-            _test_convolution(
-                [4, 17, 17, 12],
-                [3, 3, 12, 1],
-                [1, 1],
-                [2, 2],
-                "VALID",
-                "NHWC",
-                True,
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-            _test_convolution(
-                [4, 17, 17, 12],
-                [3, 3, 12, 2],
-                [1, 1],
-                [2, 2],
-                "VALID",
-                "NHWC",
-                True,
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-            # depthwise convolution with single input channel
-            _test_convolution(
-                [1, 76, 64, 1],
-                [9, 5, 1, 96],
-                [1, 1],
-                [1, 1],
-                "SAME",
-                "NHWC",
-                True,
-                quantized=quantized,
-                fp16_quantized=fp16_quantized,
-            )
-
-    # TFLite2 quantized convolution testing
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.3.0"):
-        _test_convolution(
-            [1, 8, 8, 176], [1, 1, 176, 32], [1, 1], [1, 1], "SAME", "NHWC", quantized=True
-        )
-        _test_convolution(
-            [1, 17, 17, 12], [3, 3, 12, 32], [1, 1], [2, 2], "VALID", "NHWC", quantized=True
-        )
-        _test_convolution(
-            [1, 17, 17, 19], [3, 3, 19, 19], [1, 1], [2, 2], "VALID", "NHWC", quantized=True
-        )
-        _test_convolution(
-            [1, 17, 17, 124], [1, 1, 124, 19], [1, 1], [1, 1], "SAME", "NHWC", quantized=True
-        )
-
-
-#######################################################################
-# Transpose Convolution
-# ---------------------
-
-
-def _test_transpose_conv(
-    tensor_in_sizes,
-    filter_in_sizes,
-    output_shape,
-    strides,
-    padding,
-    quantized=False,
-    fp16_quantized=False,
-):
-    """One iteration of transpose convolution with given shapes and attributes"""
-
-    total_size_1 = 1
-    total_size_2 = 1
-    for s in tensor_in_sizes:
-        total_size_1 *= s
-    for s in filter_in_sizes:
-        total_size_2 *= s
-
-    with tf.Graph().as_default():
-        if quantized and not fp16_quantized:
-            # Initializes the input tensor with array containing incrementing
-            # numbers from 1.
-            data_array = [max(f, 255) for f in range(1, total_size_1 + 1)]
-            filter_array = [max(f, 255) for f in range(1, total_size_2 + 1)]
-            data_array = np.reshape(data_array, tensor_in_sizes).astype("uint8")
-            filter_array = np.reshape(filter_array, filter_in_sizes).astype("uint8")
-
-            in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype="float32", name="in_data")
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-100, max=100, name="q_data"
-            )
-            input_range = {"q_data": (-100, 100)}
-
-            in_filter = constant_op.constant(
-                filter_array, shape=filter_in_sizes, dtype="float32", name="in_filter"
-            )
-            inq_filter = tf.quantization.fake_quant_with_min_max_args(
-                in_filter, min=-100, max=100, name="q_filter"
-            )
-
-            strides = [1] + strides + [1]
-
-            out = nn_ops.conv2d_transpose(
-                inq_data, inq_filter, output_shape=output_shape, strides=strides, padding=padding
-            )
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-100, max=100, name="out")
-            compare_tflite_with_tvm(
-                [data_array], ["q_data"], [inq_data], [out], quantized=True, input_range=input_range
-            )
-        else:
-            # Initializes the input tensor with array containing incrementing
-            # numbers from 1.
-            data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
-            filter_array = [f * 1.0 for f in range(1, total_size_2 + 1)]
-
-            in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype="float32", name="in_data")
-            in_filter = constant_op.constant(
-                filter_array, shape=filter_in_sizes, dtype="float32", name="in_filter"
-            )
-            strides = [1] + strides + [1]
-            # in_filter layout is HWOI
-            out = nn_ops.conv2d_transpose(
-                in_data, in_filter, output_shape=output_shape, strides=strides, padding=padding
-            )
-            data_array = np.reshape(data_array, tensor_in_sizes).astype("float32")
-            compare_tflite_with_tvm(
-                [data_array], ["in_data"], [in_data], [out], fp16_quantized=fp16_quantized
-            )
-
-
-def test_forward_transpose_conv():
-    """Transpose convolution"""
-    for quantized in [True, False]:
-        for fp16_quantized in [True, False]:
-            # odd size input, padding VALID
-            _test_transpose_conv(
-                [1, 5, 6, 16],
-                [2, 2, 16, 16],
-                [1, 10, 12, 16],
-                [2, 2],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-            # odd size input, padding SAME
-            _test_transpose_conv(
-                [1, 5, 6, 16],
-                [2, 2, 16, 16],
-                [1, 10, 12, 16],
-                [2, 2],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-            # kernel 3x3, padding VALID
-            _test_transpose_conv(
-                [4, 32, 32, 16],
-                [3, 3, 5, 16],
-                [4, 34, 34, 5],
-                [1, 1],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [3, 3, 5, 16],
-                [1, 65, 65, 5],
-                [2, 2],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [3, 3, 5, 16],
-                [1, 65, 34, 5],
-                [2, 1],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-
-            # kernel 3x3, padding SAME
-            _test_transpose_conv(
-                [4, 32, 32, 16],
-                [3, 3, 5, 16],
-                [4, 32, 32, 5],
-                [1, 1],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [3, 3, 5, 16],
-                [1, 64, 64, 5],
-                [2, 2],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [3, 3, 5, 16],
-                [1, 64, 32, 5],
-                [2, 1],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-
-            # kernel 2x2, padding VALID
-            _test_transpose_conv(
-                [4, 32, 32, 16],
-                [2, 2, 5, 16],
-                [4, 33, 33, 5],
-                [1, 1],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [2, 2, 5, 16],
-                [1, 64, 64, 5],
-                [2, 2],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [2, 2, 5, 16],
-                [1, 64, 33, 5],
-                [2, 1],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-
-            # kernel 2x2, padding SAME
-            _test_transpose_conv(
-                [4, 32, 32, 16],
-                [2, 2, 5, 16],
-                [4, 32, 32, 5],
-                [1, 1],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [2, 2, 5, 16],
-                [1, 64, 64, 5],
-                [2, 2],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [2, 2, 5, 16],
-                [1, 64, 32, 5],
-                [2, 1],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-
-            # kernel 1x1, padding VALID
-            _test_transpose_conv(
-                [4, 32, 32, 16],
-                [1, 1, 5, 16],
-                [4, 32, 32, 5],
-                [1, 1],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [1, 1, 5, 16],
-                [1, 63, 63, 5],
-                [2, 2],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [1, 1, 5, 16],
-                [1, 63, 32, 5],
-                [2, 1],
-                "VALID",
-                quantized,
-                fp16_quantized,
-            )
-
-            # kernel 1x1, padding SAME
-            _test_transpose_conv(
-                [4, 32, 32, 16],
-                [1, 1, 5, 16],
-                [4, 32, 32, 5],
-                [1, 1],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [1, 1, 5, 16],
-                [1, 63, 63, 5],
-                [2, 2],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-            _test_transpose_conv(
-                [1, 32, 32, 16],
-                [1, 1, 5, 16],
-                [1, 63, 32, 5],
-                [2, 1],
-                "SAME",
-                quantized,
-                fp16_quantized,
-            )
-
-
-def _test_tflite2_quantized_transpose_conv(
-    input_shape,
-    kernel_shape,
-    filters,
-    padding="valid",
-    strides=(1, 1),
-    data_format=None,
-    int_quant_dtype=tf.int8,
-):
-    """One iteration of TFLite2 quantized tranpose conv with given shapes and attributes"""
-    data_format = "channels_last" if data_format == "NHWC" else "channels_first"
-    data = np.random.uniform(0, 1, input_shape).astype("float32")
-    _ = np.random.uniform(0, 1, kernel_shape).astype("float32")
-
-    data_in = tf.keras.layers.Input(shape=data.shape[1:], batch_size=1)
-    transpose_conv = tf.keras.layers.Conv2DTranspose(
-        filters=filters,
-        kernel_size=(kernel_shape[0], kernel_shape[1]),
-        padding=padding,
-        strides=strides,
-        use_bias=True,
-    )(data_in)
-    keras_model = tf.keras.models.Model(data_in, transpose_conv)
-
-    # To create quantized values with dynamic range of activations, needs representative dataset
-    def representative_data_gen():
-        for _ in range(1):
-            yield [data]
-
-    tflite_model_quant = _quantize_keras_model(
-        keras_model,
-        representative_data_gen,
-        is_float_input=True,
-        is_float_output=True,
-        int_quant_dtype=int_quant_dtype,
-    )
-
-    # TFLite.Model.Model has changed to TFLite.Model from 1.14 to 2.1
-    try:
-        import tflite.Model
-
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_quant, 0)
-    except AttributeError:
-        import tflite
-
-        tflite_model = tflite.Model.GetRootAsModel(tflite_model_quant, 0)
-    except ImportError as exc:
-        raise ImportError("The tflite package must be installed") from exc
-
-    subgraph = tflite_model.Subgraphs(0)
-    model_input = subgraph.InputsAsNumpy()
-    input_node = subgraph.Tensors(model_input).Name().decode("utf-8")
-
-    tflite_output = run_tflite_graph(tflite_model_quant, data)
-
-    if package_version.parse(tf.__version__) < package_version.parse("2.9"):
-        input_node = data_in.name.replace(":0", "")
-    else:
-        input_node = "serving_default_" + data_in.name + ":0"
-
-    tvm_output = run_tvm_graph(tflite_model_quant, data, input_node)
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-2, atol=1e-2
-    )
-
-
-def test_forward_quantized_transpose_conv():
-    """Quantized convolution"""
-    for int_quant_dtype in [tf.int8, tf.int16]:
-        _test_tflite2_quantized_transpose_conv(
-            (1, 1, 5, 64),
-            (3, 3),
-            64,
-            padding="same",
-            strides=(1, 2),
-            data_format="NHWC",
-            int_quant_dtype=int_quant_dtype,
-        )
-
-
-#######################################################################
-# Reshape
-# -------
-
-
-def _test_reshape(data, out_shape, wrap_shape, quantized=False):
-    """One iteration of reshape operation with given data and out shape"""
-    if quantized:
-        with tf.Graph().as_default():
-            in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in")
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-100, max=100, name="inq_0"
-            )
-
-            input_range = {"inq_0": (-100, 100)}
-            out_shape = out_shape if not wrap_shape else np.array(out_shape, dtype=np.int32)
-
-            in_shape = (
-                out_shape
-                if not wrap_shape
-                else array_ops.placeholder(
-                    shape=out_shape.shape, dtype=out_shape.dtype, name="Newshape"
-                )
-            )
-
-            out = array_ops.reshape(inq_data, in_shape)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-200, max=200, name="out")
-            compare_tflite_with_tvm(
-                [data, out_shape] if wrap_shape else [data],
-                ["inq_0:0", "Newshape:0"] if wrap_shape else ["inq_0:0"],
-                [inq_data, in_shape] if wrap_shape else [inq_data],
-                [out],
-                quantized=True,
-                input_range=input_range,
-                mode="vm",
-            )
-    else:
-        # Test with tensor and constant
-        with tf.Graph().as_default():
-            in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-
-            out_shape = out_shape if not wrap_shape else np.array(out_shape, dtype=np.int32)
-
-            in_shape = (
-                out_shape
-                if not wrap_shape
-                else array_ops.placeholder(
-                    shape=out_shape.shape, dtype=out_shape.dtype, name="Newshape"
-                )
-            )
-
-            out = array_ops.reshape(in_data, in_shape)
-
-            compare_tflite_with_tvm(
-                [data, out_shape] if wrap_shape else [data],
-                ["Placeholder:0", "Newshape:0"] if wrap_shape else ["Placeholder:0"],
-                [in_data, in_shape] if wrap_shape else [in_data],
-                [out],
-                mode="vm",
-            )
-
-
-def test_forward_reshape():
-    for wrap in [True, False]:
-        _test_reshape(np.arange(6.0, dtype=np.float32), [2, 3], wrap)
-        _test_reshape(np.arange(6), [-1, 2], wrap)
-        _test_reshape(np.arange(6), [3, -1], wrap)
-        _test_reshape(np.arange(6), [-1], wrap)
-
-    _test_reshape(np.arange(6, dtype=np.uint8), [2, 3], False, True)
-    _test_reshape(np.arange(6, dtype=np.uint8), [-1, 2], False, True)
-
-
-#######################################################################
-# Resize
-# ------
-
-
-def _test_resize(
-    tf_resize_op, images_data, size_data, align_corners, half_pixel_centers, quantized=False
-):
-    """One iteration of Resize"""
-    # Test with tensor and constant
-    with tf.Graph().as_default():
-        images_tensor = array_ops.placeholder(shape=images_data.shape, dtype="float32", name="in")
-        size = ops.convert_to_tensor(size_data, dtype=size_data.dtype)
-
-        if quantized:
-            images_tensor_q = tf.quantization.fake_quant_with_min_max_args(
-                images_tensor, min=-3, max=2, name="in"
-            )
-            input_range = {"in": (-3, 2)}
-            out_tensor = tf_resize_op(
-                images=images_tensor_q,
-                size=size,
-                align_corners=align_corners,
-                half_pixel_centers=half_pixel_centers,
-            )
-            out_tensor = tf.quantization.fake_quant_with_min_max_args(
-                out_tensor, min=-3, max=2, name="out_tensor"
-            )
-
-            compare_tflite_with_tvm(
-                [images_data],
-                ["in:0"],
-                [images_tensor],
-                [out_tensor],
-                quantized=True,
-                input_range=input_range,
-            )
-        else:
-            out_tensor = tf_resize_op(
-                images=images_tensor,
-                size=size,
-                align_corners=align_corners,
-                half_pixel_centers=half_pixel_centers,
-            )
-            compare_tflite_with_tvm([images_data], ["in:0"], [images_tensor], [out_tensor])
-
-
-def test_all_resize():
-    """Resize"""
-    images_data = np.random.uniform(0, 255, (1, 16, 16, 3))
-    images_data_float32 = images_data.astype(np.float32)
-    images_data_uint8 = images_data.astype(np.uint8)
-    size_data = np.array([8, 8]).astype("int32")
-    ### RESIZE_BILINEAR
-    _test_resize(
-        tf.image.resize_bilinear,
-        images_data_float32,
-        size_data,
-        align_corners=False,
-        half_pixel_centers=False,
-        quantized=False,
-    )
-    _test_resize(
-        tf.image.resize_bilinear,
-        images_data_float32,
-        size_data,
-        align_corners=True,
-        half_pixel_centers=False,
-        quantized=False,
-    )
-    _test_resize(
-        tf.image.resize_bilinear,
-        images_data_uint8,
-        size_data,
-        align_corners=False,
-        half_pixel_centers=False,
-        quantized=True,
-    )
-    _test_resize(
-        tf.image.resize_bilinear,
-        images_data_uint8,
-        size_data,
-        align_corners=True,
-        half_pixel_centers=False,
-        quantized=True,
-    )
-    _test_resize(
-        tf.image.resize_bilinear,
-        images_data_uint8,
-        size_data,
-        align_corners=False,
-        half_pixel_centers=True,
-        quantized=True,
-    )
-    ### RESIZE_NEAREST_NEIGHBOR (was added in v1.13)
-    # According to topi resize.h
-    # Align corners not supported for nearest neighbour
-
-    if "RESIZE_NEAREST_NEIGHBOR" in dir(BuiltinOperator()):
-        _test_resize(
-            tf.image.resize_nearest_neighbor,
-            images_data_float32,
-            size_data,
-            align_corners=False,
-            half_pixel_centers=False,
-        )
-        _test_resize(
-            tf.image.resize_nearest_neighbor,
-            images_data_float32,
-            size_data,
-            align_corners=True,
-            half_pixel_centers=False,
-        )
-        _test_resize(
-            tf.image.resize_nearest_neighbor,
-            images_data_float32,
-            size_data,
-            align_corners=False,
-            half_pixel_centers=True,
-        )
-
-
-#######################################################################
-# Range
-# -----
-def _test_range(start, limit, delta):
-    # tflite 1.13 convert method does not accept empty shapes
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        tf.reset_default_graph()
-        with tf.Graph().as_default():
-            start_scalar, limit_scalar, delta_scalar = (
-                tf.placeholder(dtype=start.dtype, shape=(), name="start"),
-                tf.placeholder(dtype=limit.dtype, shape=(), name="limit"),
-                tf.placeholder(dtype=delta.dtype, shape=(), name="delta"),
-            )
-
-            out = tf.range(start_scalar, limit_scalar, delta_scalar, name="range")
-
-            compare_tflite_with_tvm(
-                [start, limit, delta],
-                ["start", "limit", "delta"],
-                [start_scalar, limit_scalar, delta_scalar],
-                [out],
-                mode="vm",
-                quantized=False,
-            )
-
-
-def _test_range_default():
-    # tflite 1.13 convert method does not accept empty shapes
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        tf.reset_default_graph()
-        with tf.Graph().as_default():
-            inputs = [
-                tf.placeholder(dtype=tf.int32, shape=(), name="p1"),
-                tf.placeholder(dtype=tf.int32, shape=(), name="p2"),
-            ]
-            outputs = [
-                tf.range(start=inputs[0], limit=inputs[1]),  # use default delta
-                tf.range(
-                    start=inputs[1]
-                ),  # use start as limit with 0 as the first item in the range
-            ]
-
-            compare_tflite_with_tvm(
-                [np.int32(1), np.int32(18)], ["p1", "p2"], inputs, outputs, mode="vm"
-            )
-
-
-def test_forward_range():
-    _test_range(np.int32(1), np.int32(18), np.int32(3))
-    _test_range(np.int32(1), np.int32(18), np.float32(3.1))  # increment is of type float
-    _test_range(np.float32(1.0), np.int32(18), np.int32(3.1))  # start is of type float
-    _test_range_default()
-
-
-#######################################################################
-# Shape
-# -----
-
-
-def _test_shape(dtype):
-    # tflite 1.13 convert method does not accept empty shapes
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        tf.reset_default_graph()
-        with tf.Graph().as_default():
-            data = np.array([1, 18, 3], dtype=np.int32)
-            start = tf.placeholder(dtype=tf.int32, shape=[], name="start")
-            limit = tf.placeholder(dtype=tf.int32, shape=[], name="limit")
-            delta = tf.placeholder(dtype=tf.int32, shape=[], name="delta")
-            tf_range = tf.range(start, limit, delta, tf.int32, name="range")
-            out = tf.shape(tf_range, out_type=dtype)
-            out = tf.add(out, tf.constant([1], dtype=dtype))
-            compare_tflite_with_tvm(
-                list(np.nditer(data)),
-                ["start", "limit", "delta"],
-                [start, limit, delta],
-                [out],
-                mode="vm",
-            )
-
-
-def test_forward_shape():
-    _test_shape(tf.int32)
-    _test_shape(tf.int64)
-
-
-#######################################################################
-# Concatenation
-# -------------
-
-
-def _test_concatenation(data, axis):
-    """One iteration of concatenation"""
-
-    assert len(data) >= 1
-
-    with tf.Graph().as_default():
-        in_data = [
-            array_ops.placeholder(shape=tensor.shape, dtype=tensor.dtype, name=f"in_{idx}")
-            for idx, tensor in enumerate(data)
-        ]
-        out = array_ops.concat(in_data, axis)
-        name = [f"in_{idx}:0" for idx in range(len(data))]
-
-        compare_tflite_with_tvm(data, name, in_data, [out])
-
-
-def test_forward_concatenation():
-
-    _test_concatenation([np.arange(6).reshape((1, 2, 1, 3)), np.arange(6).reshape((1, 2, 1, 3))], 1)
-
-    _test_concatenation([np.arange(6).reshape((3, 2)), np.arange(6).reshape((3, 2))], 1)
-
-    _test_concatenation(
-        [
-            np.arange(6).reshape((2, 1, 1, 3)),
-            np.arange(6).reshape((2, 1, 1, 3)),
-            np.arange(6).reshape((2, 1, 1, 3)),
-        ],
-        1,
-    )
-
-
-#######################################################################
-# Unary elemwise
-# --------------
-
-
-def _test_unary_elemwise(math_op, data, quantized, quant_range=(-6, 6), int_quant_dtype=tf.int8):
-    """One iteration of unary elemwise"""
-    if quantized:
-        with tf.Graph().as_default():
-            quant_min, quant_max = quant_range
-            in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in_0")
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=quant_min, max=quant_max, name="inq_0"
-            )
-            input_range = {"inq_0": (quant_min, quant_max)}
-            out = math_op(inq_data)
-            out = tf.quantization.fake_quant_with_min_max_args(
-                out, min=quant_min, max=quant_max, name="out"
-            )
-            compare_tflite_with_tvm(
-                data,
-                "inq_0:0",
-                [inq_data],
-                [out],
-                quantized=True,
-                input_range=input_range,
-                experimental_new_converter=True,
-                int_quant_dtype=int_quant_dtype,
-            )
-    else:
-        with tf.Graph().as_default():
-            in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype, name="in")
-            out = math_op(in_data)
-            compare_tflite_with_tvm(
-                data, ["in:0"], [in_data], [out], experimental_new_converter=True
-            )
-
-
-def _unary_elewise_create_model(math_op, data, offset=0, int_quant_dtype=tf.int8):
-    class Model(tf.Module):
-        @tf.function
-        def tf_function(self, x):
-            op = math_op(x)
-            return op
-
-    if int_quant_dtype in (tf.int8, tf.uint8):
-        _ = "int8"
-    elif int_quant_dtype in (tf.int16, tf.uint16):
-        _ = "int16"
-    else:
-        raise Exception(f"Unsupported dtype '{int_quant_dtype}' for unary elementwise test.")
-
-    model = Model()
-
-    # Save the model
-    export_dir = tempfile.gettempdir() + "/tf_model"
-    tf.saved_model.save(
-        model,
-        export_dir,
-        signatures=model.tf_function.get_concrete_function(
-            tf.TensorSpec(data.shape, tf.float32, name="input")
-        ),
-    )
-
-    # Convert the model
-    def representative_dataset():
-        for _ in range(100):
-            tmp_data = np.random.rand(*tuple(data.shape))
-            yield [tmp_data.astype(np.float32) * 2 - offset]
-
-    converter = tf.lite.TFLiteConverter.from_saved_model(export_dir)
-    converter.optimizations = [tf.lite.Optimize.DEFAULT]
-    converter.representative_dataset = representative_dataset
-
-    if int_quant_dtype in (tf.int16, tf.uint16):
-        converter.target_spec.supported_ops = [
-            tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
-        ]
-    else:
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-
-    converter.inference_input_type = int_quant_dtype
-    converter.inference_output_type = int_quant_dtype
-
-    tflite_model = converter.convert()
-    return tflite_model
-
-
-#######################################################################
-# Abs
-# ----
-
-
-def _test_abs(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of abs"""
-    if quantized:
-        tflite_model_quant = _unary_elewise_create_model(
-            tf.math.abs, data, offset=1, int_quant_dtype=int_quant_dtype
-        )
-        tflite_output = run_tflite_graph(tflite_model_quant, data)
-
-        # TFLite 2.6.x upgrade support
-        if package_version.parse(tf.__version__) < package_version.parse("2.6.1"):
-            in_node = ["serving_default_input_int8"]
-        elif package_version.parse(tf.__version__) < package_version.parse("2.9"):
-            in_node = (
-                ["serving_default_input_int16"] if int_quant_dtype == tf.int16 else ["tfl.quantize"]
-            )
-        else:
-            in_node = "serving_default_input"
-
-        tvm_output = run_tvm_graph(tflite_model_quant, data, in_node)
-        tvm.testing.assert_allclose(
-            np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-2
-        )
-    else:
-        return _test_unary_elemwise(math_ops.abs, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Rsqrt
-# ----
-
-
-def _test_rsqrt(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of rsqrt"""
-
-    # tensorflow version upgrade support
-    if package_version.parse(tf.__version__) < package_version.parse("2.6.1") or not quantized:
-        return _test_unary_elemwise(
-            math_ops.rsqrt, data, quantized, quant_range=[1, 6], int_quant_dtype=int_quant_dtype
-        )
-    else:
-        tflite_model_quant = _unary_elewise_create_model(
-            tf.math.rsqrt, data, int_quant_dtype=int_quant_dtype
-        )
-        tflite_output = run_tflite_graph(tflite_model_quant, data)
-        if package_version.parse(tf.__version__) < package_version.parse("2.9"):
-            in_node = ["tfl.quantize"]
-        else:
-            in_node = "serving_default_input"
-        tvm_output = run_tvm_graph(tflite_model_quant, data, in_node)
-        tvm.testing.assert_allclose(
-            np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-2
-        )
-
-
-#######################################################################
-# Ceil
-# ----
-
-
-def _test_ceil(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of ceil"""
-    return _test_unary_elemwise(math_ops.ceil, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Floor
-# -----
-
-
-def _test_floor(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of floor"""
-    return _test_unary_elemwise(math_ops.floor, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Round
-# -----
-
-
-def _test_round(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of round"""
-    return _test_unary_elemwise(math_ops.round, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Exp
-# ---
-
-
-def _test_exp(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of exp"""
-    return _test_unary_elemwise(math_ops.exp, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Log
-# ---
-
-
-def _test_log(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of log"""
-    return _test_unary_elemwise(
-        math_ops.log, data, quantized, quant_range=[1, 6], int_quant_dtype=int_quant_dtype
-    )
-
-
-#######################################################################
-# Sin
-# ---
-
-
-def _test_sin(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of sin"""
-    return _test_unary_elemwise(math_ops.sin, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Cos
-# ---
-
-
-def _test_cos(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of cos"""
-    if quantized:
-        tflite_model_quant = _unary_elewise_create_model(
-            tf.math.cos, data, int_quant_dtype=int_quant_dtype
-        )
-        tflite_output = run_tflite_graph(tflite_model_quant, data)
-        if package_version.parse(tf.__version__) < package_version.parse("2.9"):
-            in_node = ["tfl.quantize"]
-        else:
-            in_node = "serving_default_input"
-        tvm_output = run_tvm_graph(tflite_model_quant, data, in_node)
-        tvm.testing.assert_allclose(
-            np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-2
-        )
-    else:
-        return _test_unary_elemwise(math_ops.cos, data, quantized)
-
-
-#######################################################################
-# Tan
-# ---
-
-
-def _test_tan(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of tan"""
-    return _test_unary_elemwise(math_ops.tan, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Square
-# ------
-
-
-def _test_square(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of square"""
-    return _test_unary_elemwise(math_ops.square, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Neg
-# ------
-
-
-def _test_neg(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of neg"""
-    return _test_unary_elemwise(math_ops.neg, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Sqrt
-# ------
-
-
-def _test_sqrt(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of sqrt"""
-    return _test_unary_elemwise(
-        math_ops.sqrt, data, quantized, quant_range=[1, 6], int_quant_dtype=int_quant_dtype
-    )
-
-
-#######################################################################
-# Elu
-# ---
-
-
-def _test_elu(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of elu"""
-    return _test_unary_elemwise(nn_ops.elu, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-#######################################################################
-# Gelu
-# ---
-
-
-def _test_gelu(data, quantized, int_quant_dtype=tf.int8):
-    """One iteration of elu"""
-    return _test_unary_elemwise(nn_ops.gelu, data, quantized, int_quant_dtype=int_quant_dtype)
-
-
-def _test_forward_unary_elemwise(test_op, int_quant_dtype=None, quantized=True, negative=True):
-    # input data
-    in_data, inq_data = [], []
-
-    np_dtype = int_quant_dtype.as_numpy_dtype if int_quant_dtype else np.uint8
-
-    # quantized input data
-    if quantized:
-        inq_data.append(np.arange(1, 240, 40, dtype=np_dtype))
-        inq_data.append(np.arange(1, 240, 40, dtype=np_dtype).reshape((2, 1, 3)))
-        if int_quant_dtype == np.int8:
-            inq_data.append(np.arange(-128, 127, 45, dtype=np.int8))
-
-    for data in inq_data:
-        test_op(data, quantized=True, int_quant_dtype=int_quant_dtype)
-
-    # normal input data
-    if negative:
-        in_data.append(np.arange(-2.0, 4.0, dtype=np.float32))
-        in_data.append(np.arange(-2.0, 4.0, dtype=np.float32).reshape((2, 1, 3)))
-    else:
-        in_data.append(np.arange(1.0, 7.0, dtype=np.float32))
-        in_data.append(np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 3)))
-
-    for data in in_data:
-        test_op(data, quantized=False, int_quant_dtype=int_quant_dtype)
-
-
-def test_all_unary_elemwise():
-    """All unary elemwise"""
-    _test_forward_unary_elemwise(_test_abs, int_quant_dtype=tf.int8)
-    _test_forward_unary_elemwise(_test_abs, int_quant_dtype=tf.int16)
-    _test_forward_unary_elemwise(_test_floor)
-    _test_forward_unary_elemwise(_test_exp)
-    _test_forward_unary_elemwise(_test_log, negative=False)
-    _test_forward_unary_elemwise(_test_square, int_quant_dtype=tf.int8)
-    _test_forward_unary_elemwise(_test_sin)
-    _test_forward_unary_elemwise(_test_neg)
-    _test_forward_unary_elemwise(_test_sqrt, negative=False)
-    _test_forward_unary_elemwise(_test_gelu, quantized=False)
-    # tensorflow version upgrade support
-    if package_version.parse(tf.VERSION) < package_version.parse("2.6.1"):
-        _test_forward_unary_elemwise(_test_rsqrt, negative=False, int_quant_dtype=tf.uint8)
-    else:
-        _test_forward_unary_elemwise(_test_rsqrt, negative=False, int_quant_dtype=tf.int8)
-    # ceil and cos come with TFLite 1.14.0.post1 fbs schema
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        _test_forward_unary_elemwise(_test_ceil)
-        if package_version.parse(tf.VERSION) < package_version.parse("2.6.1"):
-            _test_forward_unary_elemwise(_test_cos, quantized=False)
-        else:
-            _test_forward_unary_elemwise(_test_cos, int_quant_dtype=tf.int8)
-        _test_forward_unary_elemwise(_test_round)
-        # This fails with TF and Tflite 1.15.2, this could not have been tested
-        # in CI or anywhere else. The failure mode is that we see a backtrace
-        # from the converter that we need to provide a custom Tan operator
-        # implementation.
-        # _test_forward_unary_elemwise(_test_tan)
-        _test_forward_unary_elemwise(_test_elu)
-
-
-#######################################################################
-# Element-wise
-# ------------
-
-
-def _test_elemwise(
-    math_op,
-    data,
-    fused_activation_function=None,
-    quantized=False,
-    qnn_op=None,
-    same_qnn_params=False,
-    comparison_op=False,
-    exclude_zero_point=False,
-):
-    """One iteration of elemwise"""
-
-    assert len(data) == 2
-
-    def __test_elemwise(in_data):
-        assert len(in_data) == 2
-        if quantized:
-            int_quant_dtype = None
-            if data[0].dtype == "int8":
-                int_quant_dtype = tf.int8
-            elif data[0].dtype == "uint8":
-                int_quant_dtype = tf.uint8
-            elif data[0].dtype == "int16":
-                int_quant_dtype = tf.int16
-            else:
-                assert False, "Unsupported conversion from numpy to tflite dtype!"
-
-            # set the fp32 output range with respect to the operation
-            out_min, out_max = _test_elemwise_qnn_out_range(qnn_op)
-            inq0_min, inq0_max = (-100, 100)
-            inq1_min, inq1_max = (-50, 50)
-
-            # if requested use same quantization parameters provided by _test_elemwise_qnn_out_range
-            if same_qnn_params:
-                inq0_min, inq0_max = (out_min, out_max)
-                inq1_min, inq1_max = (out_min, out_max)
-
-            if exclude_zero_point:
-                if inq1_max == inq1_min:
-                    raise ZeroDivisionError("Input range is 0.")
-
-                # only compute for rhs.
-                quant_scale = 255 / (inq1_max - inq1_min)
-                zero_point = int(round(-inq1_min * quant_scale))
-                data[1][data[1] == zero_point] += 1
-                data[1][data[1] == 0] += 1
-
-            # fake_quant will keep the tensors in float32 until the conversion in the session
-            inq_data = [
-                tf.quantization.fake_quant_with_min_max_args(
-                    in_data[0], min=out_min, max=out_max, name="inq_0"
-                )
-                if in_data[0] is not None
-                else tf.quantization.fake_quant_with_min_max_args(
-                    data[0], min=out_min, max=out_max, name="const_tensor0"
-                ),
-                tf.quantization.fake_quant_with_min_max_args(
-                    in_data[1], min=out_min, max=out_max, name="inq_1"
-                )
-                if in_data[1] is not None
-                else tf.quantization.fake_quant_with_min_max_args(
-                    data[1], min=out_min, max=out_max, name="const_tensor1"
-                ),
-            ]
-
-            input_range = {
-                x[1][0]: x[1][1]
-                for x in zip(
-                    in_data, (("inq_0", (inq0_min, inq0_max)), ("inq_1", (inq1_min, inq1_max)))
-                )
-                if x[0] is not None
-            }
-
-            if comparison_op:
-                out = math_op(inq_data[0], inq_data[1])
-                out = with_fused_activation_function(out, fused_activation_function)
-
-                compare_tflite_with_tvm(
-                    [x[1] for x in zip(in_data, data) if x[0] is not None],
-                    [x + ":0" for x in input_range.keys()],
-                    [x[1] for x in zip(in_data, inq_data) if x[0] is not None],
-                    [out],
-                    quantized=True,
-                    input_range=input_range,
-                    experimental_new_converter=same_qnn_params,
-                    int_quant_dtype=int_quant_dtype,
-                )
-            else:
-                out = math_op(inq_data[0], inq_data[1])
-                out = with_fused_activation_function(out, fused_activation_function)
-                out = tf.quantization.fake_quant_with_min_max_args(
-                    out, min=out_min, max=out_max, name="out"
-                )
-
-                # Note same_qnn_params uses experimental_new_converter as toco failed
-                compare_tflite_with_tvm(
-                    [x[1] for x in zip(in_data, data) if x[0] is not None],
-                    [x + ":0" for x in input_range.keys()],
-                    [x[1] for x in zip(in_data, inq_data) if x[0] is not None],
-                    [out],
-                    quantized=True,
-                    input_range=input_range,
-                    experimental_new_converter=same_qnn_params,
-                    int_quant_dtype=int_quant_dtype,
-                )
-        else:
-            out = math_op(
-                in_data[0]
-                if in_data[0] is not None
-                else ops.convert_to_tensor(data[0], dtype=data[0].dtype),
-                in_data[1]
-                if in_data[1] is not None
-                else ops.convert_to_tensor(data[1], dtype=data[1].dtype),
-            )
-            out = with_fused_activation_function(out, fused_activation_function)
-            compare_tflite_with_tvm(
-                [x[1] for x in zip(in_data, data) if x[0] is not None],
-                [x[1] for x in zip(in_data, ("in_0:0", "in_1:0")) if x[0] is not None],
-                [x for x in in_data if x is not None],
-                [out],
-            )
-
-    # Test with two tensors
-    with tf.Graph().as_default():
-        __test_elemwise(
-            in_data=[
-                array_ops.placeholder(shape=data[0].shape, dtype="float32", name="in_0"),
-                array_ops.placeholder(shape=data[1].shape, dtype="float32", name="in_1"),
-            ]
-        )
-    # Test with tensor and constant
-    with tf.Graph().as_default():
-        __test_elemwise(
-            in_data=[array_ops.placeholder(shape=data[0].shape, dtype="float32", name="in_0"), None]
-        )
-    # Test with constant and tensor
-    with tf.Graph().as_default():
-        __test_elemwise(
-            in_data=[None, array_ops.placeholder(shape=data[1].shape, dtype="float32", name="in_1")]
-        )
-
-
-#######################################################################
-# Add
-# ---
-
-
-def _test_add(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of add"""
-    return _test_elemwise(math_ops.add, data, fused_activation_function, quantized, qnn_op)
-
-
-#######################################################################
-# Subtract
-# --------
-
-
-def _test_sub(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of subtract"""
-    return _test_elemwise(math_ops.subtract, data, fused_activation_function, quantized, qnn_op)
-
-
-#######################################################################
-# Mul
-# ---
-
-
-def _test_mul(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of mul"""
-    return _test_elemwise(math_ops.multiply, data, fused_activation_function, quantized, qnn_op)
-
-
-#######################################################################
-# Divide
-# ------
-
-
-def _test_div(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of divide"""
-    return _test_elemwise(
-        math_ops.divide,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-        exclude_zero_point=True,
-    )
-
-
-#######################################################################
-# Power
-# -----
-
-
-def _test_pow(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of power"""
-    return _test_elemwise(
-        math_ops.pow,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-    )
-
-
-#######################################################################
-# Maximum
-# -------
-
-
-def _test_maximum(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of maximum"""
-    return _test_elemwise(
-        math_ops.maximum, data, fused_activation_function, quantized, qnn_op, same_qnn_params=True
-    )
-
-
-#######################################################################
-# Minimum
-# -------
-
-
-def _test_minimum(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of minimum"""
-    return _test_elemwise(
-        math_ops.minimum, data, fused_activation_function, quantized, qnn_op, same_qnn_params=True
-    )
-
-
-#######################################################################
-# Greater
-# -------
-
-
-def _test_greater(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of greater"""
-    return _test_elemwise(
-        math_ops.greater,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-        comparison_op=True,
-    )
-
-
-#######################################################################
-# Greater_equal
-# -------------
-
-
-def _test_greater_equal(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of greater_equal"""
-    return _test_elemwise(
-        math_ops.greater_equal,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-        comparison_op=True,
-    )
-
-
-#######################################################################
-# Less
-# ----
-
-
-def _test_less(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of less"""
-    return _test_elemwise(
-        math_ops.less,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-        comparison_op=True,
-    )
-
-
-#######################################################################
-# Less_equal
-# ----------
-
-
-def _test_less_equal(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of less_equal"""
-    return _test_elemwise(
-        math_ops.less_equal,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-        comparison_op=True,
-    )
-
-
-#######################################################################
-# Equal
-# -----
-
-
-def _test_equal(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of equal"""
-    return _test_elemwise(
-        math_ops.equal,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-        comparison_op=True,
-    )
-
-
-#######################################################################
-# Not_equal
-# ---------
-
-
-def _test_not_equal(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of not_equal"""
-    return _test_elemwise(
-        math_ops.not_equal,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-        comparison_op=True,
-    )
-
-
-#######################################################################
-# Squared_difference
-# ------------------
-
-
-def _test_squared_difference(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of squared difference"""
-    return _test_elemwise(
-        math_ops.squared_difference,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-    )
-
-
-#######################################################################
-# Floor_divide
-# ------------
-
-
-def _test_floor_divide(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of floor_div"""
-    return _test_elemwise(
-        math_ops.floordiv,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-        exclude_zero_point=True,
-    )
-
-
-#######################################################################
-# Floor_mod
-# ---------
-
-
-def _test_floor_mod(data, fused_activation_function=None, quantized=False, qnn_op=None):
-    """One iteration of floor_mod"""
-    return _test_elemwise(
-        math_ops.floormod,
-        data,
-        fused_activation_function,
-        quantized,
-        qnn_op,
-        same_qnn_params=True,
-    )
-
-
-def _test_forward_elemwise(testop):
-    """Elewise"""
-    testop(
-        [
-            np.arange(6.0, dtype=np.float32).reshape((2, 1, 1, 3)),
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 1, 3)),
-        ]
-    )
-    testop(
-        [
-            np.arange(6.0, dtype=np.float32).reshape((2, 1, 3)),
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 3)),
-        ]
-    )
-    testop(
-        [
-            np.arange(3.0, dtype=np.float32).reshape((1, 3)),
-            np.arange(1.0, 4.0, dtype=np.float32).reshape((1, 3)),
-        ]
-    )
-
-
-def _test_forward_elemwise_quantized(testop, dtype=np.uint8):
-    type_info = np.iinfo(dtype)
-    _min, _max = type_info.min, type_info.max
-    testop(
-        [
-            np.array(np.random.uniform(_min, _max, (3, 6)), dtype=dtype),
-            np.array(np.random.uniform(_min, _max, (3, 6)), dtype=dtype),
-        ],
-        quantized=True,
-        qnn_op=testop,
-    )
-
-
-def _test_elemwise_qnn_out_range(qnn_op):
-    # set the fake_quant output range with respect to the input tensors float32 range
-    qnn_out_range = {
-        _test_add: (-150, 150),
-        _test_sub: (-150, 150),
-        _test_mul: (-5e3, 5e3),
-        _test_div: (-150, 150),
-        _test_maximum: (-112, 111),
-        _test_minimum: (-128, 127),
-        _test_equal: (-150, 150),
-        _test_greater: (-150, 150),
-        _test_squared_difference: (0, 65025),
-        _test_floor_divide: (-150, 150),
-        _test_less: (-150, 150),
-        _test_floor_mod: (-150, 150),
-        _test_not_equal: (-150, 150),
-        _test_pow: (0, 3),
-        _test_less_equal: (-150, 150),
-        _test_greater_equal: (-150, 150),
-    }
-
-    return qnn_out_range[qnn_op]
-
-
-def test_all_elemwise():
-    """All_elemwise"""
-    _test_forward_elemwise(_test_add)
-    _test_forward_elemwise_quantized(_test_add)
-    _test_forward_elemwise(partial(_test_add, fused_activation_function="RELU"))
-    # this is broken with tf upgrade 1.15.2 and hits a segfault that needs
-    # further investigation.
-    # _test_forward_elemwise(partial(_test_add, fused_activation_function="RELU6"))
-    _test_forward_elemwise(_test_sub)
-    _test_forward_elemwise_quantized(_test_sub)
-    _test_forward_elemwise(partial(_test_sub, fused_activation_function="RELU"))
-    _test_forward_elemwise(partial(_test_sub, fused_activation_function="RELU6"))
-    _test_forward_elemwise(_test_mul)
-    _test_forward_elemwise_quantized(_test_mul)
-    _test_forward_elemwise(partial(_test_mul, fused_activation_function="RELU"))
-    _test_forward_elemwise(partial(_test_mul, fused_activation_function="RELU6"))
-    _test_forward_elemwise(_test_div)
-    _test_forward_elemwise(partial(_test_div, fused_activation_function="RELU"))
-    _test_forward_elemwise(partial(_test_div, fused_activation_function="RELU6"))
-    _test_forward_elemwise_quantized(_test_div)
-    _test_forward_elemwise(_test_pow)
-    _test_forward_elemwise_quantized(_test_pow)
-    _test_forward_elemwise(_test_maximum)
-    _test_forward_elemwise_quantized(_test_maximum)
-    _test_forward_elemwise(_test_minimum)
-    _test_forward_elemwise_quantized(_test_minimum)
-    _test_forward_elemwise(_test_greater)
-    _test_forward_elemwise_quantized(_test_greater)
-    _test_forward_elemwise(_test_squared_difference)
-    _test_forward_elemwise_quantized(_test_squared_difference, np.int8)
-    _test_forward_elemwise(_test_greater_equal)
-    _test_forward_elemwise_quantized(_test_greater_equal)
-    _test_forward_elemwise(_test_less)
-    _test_forward_elemwise_quantized(_test_less)
-    _test_forward_elemwise(_test_less_equal)
-    _test_forward_elemwise_quantized(_test_less_equal)
-    _test_forward_elemwise(_test_equal)
-    _test_forward_elemwise_quantized(_test_equal)
-    _test_forward_elemwise(_test_not_equal)
-    _test_forward_elemwise_quantized(_test_not_equal)
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        _test_forward_elemwise(_test_floor_divide)
-        _test_forward_elemwise_quantized(_test_floor_divide)
-        _test_forward_elemwise(_test_floor_mod)
-        # This test of quantized floor mod is currently disabled due
-        # to flaky CI failures in main, failing approximately 45% of
-        # the time.
-        #
-        # _test_forward_elemwise_quantized(_test_floor_mod)
-
-
-#######################################################################
-# AddN
-# ----
-
-
-def _test_forward_add_n(inputs):
-    tf.reset_default_graph()
-    with tf.Graph().as_default():
-        temp = []
-        for each in inputs:
-            temp.append(tf.placeholder(shape=each.shape, dtype=each.dtype))
-        output = tf.add_n(temp)
-        compare_tflite_with_tvm(
-            list(inputs),
-            [each.name for each in temp],
-            list(temp),
-            [output],
-        )
-
-
-def test_forward_add_n():
-    """Add n"""
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        x = np.random.randint(1, 100, size=(3, 3, 3), dtype=np.int32)
-        y = np.random.randint(1, 100, size=(3, 3, 3), dtype=np.int32)
-        z_1 = np.random.randint(1, 100, size=(3, 3, 3), dtype=np.int32)
-        x_1, x_2, z_2 = x.astype(np.float32), y.astype(np.float32), z_1.astype(np.float32)
-        in0 = x
-        in1 = [x, y]
-        in2 = (x, y, z_1)
-        in3 = x_1
-        in4 = [x_1, x_2]
-        in5 = (x_1, x_2, z_2)
-        _test_forward_add_n(in0)
-        _test_forward_add_n(in1)
-        _test_forward_add_n(in2)
-        _test_forward_add_n(in3)
-        _test_forward_add_n(in4)
-        _test_forward_add_n(in5)
-
-
-#######################################################################
-# Logical operators
-# -----------------
-
-
-def _test_logical_binary(logical_bin_op, data):
-
-    with tf.Graph().as_default():
-        in_data = [
-            array_ops.placeholder(shape=data[0].shape, dtype="bool", name="in_0"),
-            array_ops.placeholder(shape=data[1].shape, dtype="bool", name="in_1"),
-        ]
-        if logical_bin_op is math_ops.logical_not:
-            out = math_ops.logical_or(in_data[0], in_data[1], name="out1")
-            out = logical_bin_op(out, name="out")
-        else:
-            out = logical_bin_op(in_data[0], in_data[1], name="out")
-
-        compare_tflite_with_tvm(data, ["in_0:0", "in_1:0"], in_data, [out])
-
-
-def _test_forward_logical_and(data):
-    """One iteration of logical and"""
-    return _test_logical_binary(math_ops.logical_and, data)
-
-
-def _test_forward_logical_or(data):
-    """One iteration of logical or"""
-    return _test_logical_binary(math_ops.logical_or, data)
-
-
-def _test_forward_logical_not(data):
-    """One iteration of logical not"""
-    return _test_logical_binary(math_ops.logical_not, data)
-
-
-def test_all_logical():
-    data = [
-        np.random.choice(a=[False, True], size=(2, 3, 4)).astype("bool"),
-        np.random.choice(a=[False, True], size=(2, 3, 4)).astype("bool"),
-    ]
-    # boolean dtype is not supported by older versions than TFLite 1.15.0
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.15.0"):
-        _test_forward_logical_and(data)
-        _test_forward_logical_or(data)
-        _test_forward_logical_not(data)
-
-
-#######################################################################
-# Zeros like
-# ----------
-
-
-def _test_zeros_like(data):
-    """One iteration of ZEROS LIKE"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out = gen_array_ops.zeros_like(in_data)
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_zeros_like():
-    """ZEROS LIKE"""
-    _test_zeros_like(np.arange(6.0, dtype=np.float32).reshape((1, 6)))
-
-
-#######################################################################
-# Fill
-# ----
-
-
-def _test_fill(dims, value_data, value_dtype):
-    """Use the fill op to create a tensor of value_data with constant dims."""
-
-    value_data = np.array(value_data, dtype=value_dtype)
-    # TF 1.13 TFLite convert method does not accept empty shapes
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        with tf.Graph().as_default():
-            value = array_ops.placeholder(dtype=value_dtype, name="value", shape=[])
-            out = tf.fill(dims, value)
-            compare_tflite_with_tvm([value_data], ["value"], [value], [out])
-
-    with tf.Graph().as_default():
-        input1 = array_ops.placeholder(dtype=value_dtype, name="input1", shape=dims)
-        # Fill op gets converted to static tensor during conversion
-        out = tf.fill(dims, value_data)
-        out1 = tf.add(out, input1)
-        input1_data = np.random.uniform(0, 5, size=dims).astype(value_dtype)
-        compare_tflite_with_tvm([input1_data], ["input1"], [input1], [out1])
-
-
-def test_forward_fill():
-    """Test FILL op"""
-
-    _test_fill((1, 2, 2, 4), 5, "int32")
-    _test_fill((1, 2, 2, 4), 5, "float32")
-    _test_fill((5,), 5, "int32")
-
-
-#######################################################################
-# Reduce
-# ------
-
-
-def _test_reduce(math_op, data, keep_dims=None):
-    """One iteration of reduce"""
-
-    assert len(data) == 2
-
-    # Test with tensor and constant
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data[0].shape, dtype=data[0].dtype, name="in")
-        out = math_op(in_data, data[1], keep_dims)
-        compare_tflite_with_tvm([data[0]], ["in:0"], [in_data], [out])
-
-
-def _test_reduce_quantize(math_op, data, keep_dims=None):
-    """One iteration of reduce"""
-
-    assert len(data) == 2
-
-    # Test with tensor and constant
-    with tf.Graph().as_default():
-        in_data = [array_ops.placeholder(shape=data[0].shape, dtype="float32", name="in")]
-        inq_data = [
-            tf.quantization.fake_quant_with_min_max_args(
-                in_data[0], min=-100, max=100, name="inq_0"
-            )
-        ]
-        input_range = {"inq_0": (-100, 100)}
-        out = math_op(inq_data, data[1], keep_dims)
-        out = tf.quantization.fake_quant_with_min_max_args(out, min=-200, max=200, name="out")
-        compare_tflite_with_tvm(
-            [data[0]], ["inq_0:0"], [inq_data[0]], [out], quantized=True, input_range=input_range
-        )
-
-
-#######################################################################
-# Reduce_min
-# ----------
-
-
-def _test_reduce_min(data, keep_dims=None):
-    """One iteration of reduce_min"""
-    return _test_reduce(math_ops.reduce_min, data, keep_dims)
-
-
-#######################################################################
-# Reduce_max
-# ----------
-
-
-def _test_reduce_max(data, keep_dims=None):
-    """One iteration of reduce_max"""
-    return _test_reduce(math_ops.reduce_max, data, keep_dims)
-
-
-#######################################################################
-# Reduce_mean
-# -----------
-
-
-def _test_reduce_mean(data, keep_dims=None, quantized=False):
-    """One iteration of reduce_mean"""
-    if quantized:
-        return _test_reduce_quantize(math_ops.reduce_mean, data, keep_dims)
-    else:
-        return _test_reduce(math_ops.reduce_mean, data, keep_dims)
-
-
-#######################################################################
-# Reduce_prod
-# -----------
-
-
-def _test_reduce_prod(data, keep_dims=None):
-    """One iteration of reduce_prod"""
-    return _test_reduce(math_ops.reduce_prod, data, keep_dims)
-
-
-#######################################################################
-# Reduce_sum
-# -----------
-
-
-def _test_reduce_sum(data, keep_dims=None):
-    """One iteration of reduce_sum"""
-    return _test_reduce(math_ops.reduce_sum, data, keep_dims)
-
-
-#######################################################################
-# Reduce_any
-# ----------
-
-
-def _test_reduce_any(data, keep_dims=None):
-    """One iteration of reduce_any"""
-    return _test_reduce(math_ops.reduce_any, data, keep_dims)
-
-
-def _test_forward_reduce(testop, dtype="float32"):
-    """Reduce"""
-    if dtype == "bool":
-        data0 = [np.random.choice(a=[False, True], size=(16, 16, 16, 16)).astype(dtype), None]
-        data1 = [
-            np.random.choice(a=[False, True], size=(16, 16, 16, 16)).astype(dtype),
-            np.array(1, dtype=np.int32),
-        ]
-        data2 = [
-            np.random.choice(a=[False, True], size=(16, 16, 16, 16)).astype(dtype),
-            np.array([1, 2], dtype=np.int32),
-        ]
-    else:
-        data0 = [np.random.rand(16, 16, 16, 16).astype(dtype), None]
-        data1 = [np.random.rand(16, 16, 16, 16).astype(dtype), np.array(1, dtype=np.int32)]
-        data2 = [np.random.rand(16, 16, 16, 16).astype(dtype), np.array([1, 2], dtype=np.int32)]
-
-    for data in [data0, data1, data2]:
-        testop(data)
-        testop(data, keep_dims=False)
-        testop(data, keep_dims=True)
-
-
-def _test_forward_reduce_quantized(testop):
-    data0 = [
-        np.array(np.random.uniform(0, 255, (3, 6)), dtype=np.uint8),
-        np.array([1, 2], dtype=np.int32),
-    ]
-    testop(data0, quantized=True)
-    testop(data0, keep_dims=False, quantized=True)
-    testop(data0, keep_dims=True, quantized=True)
-
-
-def test_all_reduce():
-    _test_forward_reduce(_test_reduce_min)
-    _test_forward_reduce(_test_reduce_max)
-    _test_forward_reduce(_test_reduce_mean)
-    _test_forward_reduce_quantized(_test_reduce_mean)
-    _test_forward_reduce(_test_reduce_prod)
-    _test_forward_reduce(_test_reduce_sum)
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.15.0"):
-        _test_forward_reduce(_test_reduce_any, dtype="bool")
-
-
-#######################################################################
-# Arg_min_max
-# -----------
-
-
-def _test_arg_min_max(math_op, data, axis, quantized=False):
-    """One iteration of arg_min_max"""
-
-    with tf.Graph().as_default():
-        t_name = "in"
-        in_data = array_ops.placeholder(shape=data.shape, dtype=np.float32, name=t_name)
-        input_range = None
-        qmin, qmax = -100, 102
-        if quantized:
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=qmin, max=qmax, name="q" + t_name
-            )
-            input_range = {inq_data.name.split(":")[0]: (qmin, qmax)}
-            out = math_op(input=inq_data, axis=axis)
-            compare_tflite_with_tvm(
-                [data], [inq_data.name], [inq_data], [out], quantized=True, input_range=input_range
-            )
-        else:
-            out = math_op(input=in_data, axis=axis)
-            compare_tflite_with_tvm([data], [in_data.name], [in_data], [out])
-
-
-def test_forward_arg_min_max():
-    """Arg min max"""
-    # test quantized
-    for data in [np.array(np.random.uniform(-100, 100, (3, 4)), dtype=np.uint8)]:
-        # There is no quantized version of ArgMin
-        for axis in [None, 0, 1, -1]:
-            _test_arg_min_max(math_ops.argmax, data, axis, True)
-
-    for data in [np.array(np.random.uniform(-100, 100, (3, 4)), dtype=np.float32)]:
-        for axis in [None, 0, 1, -1]:
-            _test_arg_min_max(math_ops.argmax, data, axis)
-            _test_arg_min_max(math_ops.argmin, data, axis)
-
-
-#######################################################################
-# Select, Where
-# -------------
-
-
-def test_forward_select():
-    """Select"""
-    with tf.Graph().as_default():
-        with tf.Session() as _:
-            input1 = tf.placeholder(tf.int32, shape=[1, 4, 4, 3], name="input1")
-            input2 = tf.placeholder(tf.int32, shape=[1, 4, 4, 3], name="input2")
-            mask = input1 > input2
-            out = tf.where(mask, input1 + 1, input2 * 2)
-            in_data1 = np.random.uniform(0, 10, size=(1, 4, 4, 3)).astype("int32")
-            in_data2 = np.random.uniform(0, 10, size=(1, 4, 4, 3)).astype("int32")
-
-            compare_tflite_with_tvm(
-                [in_data1, in_data2], ["input1:0", "input2:0"], [input1, input2], [out]
-            )
-
-
-@pytest.mark.parametrize("quant_bits", [2, 4, 8, 16])
-@pytest.mark.parametrize(
-    "value, min_value, max_value",
-    [[-10.11, -6, 6], [-3.55, -6, 6], [0, -6, 6], [3.55, -6, 6], [10.11, -6, 6]],
-)
-def test_forward_fake_quant(value, min_value, max_value, quant_bits):
-    """Fake quant"""
-    with tf.Graph().as_default():
-        with tf.Session() as _:
-            input_placeholder = tf.placeholder(tf.float32, shape=[1], name="input")
-            out = tf.quantization.fake_quant_with_min_max_args(
-                input_placeholder, min=min_value, max=max_value, num_bits=quant_bits, name=None
-            )
-
-            in_data = np.float32(value)
-            compare_tflite_with_tvm([in_data], ["input:0"], [input_placeholder], [out])
-
-
-# Squeeze
-# -------
-
-
-def _test_squeeze(data, squeeze_dims=None):
-    """One iteration of squeeze"""
-
-    if squeeze_dims is None:
-        squeeze_dims = []
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-
-        if squeeze_dims:
-            out = array_ops.squeeze(in_data, squeeze_dims)
-        else:
-            out = array_ops.squeeze(in_data)
-
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_squeeze():
-    """Squeeze"""
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3)), [0, 2])
-    _test_squeeze(np.arange(6).reshape((2, 1, 3, 1)), [1, 3])
-
-
-#######################################################################
-# Quantize/DeQuantize
-# -------------------
-
-
-def _test_quantize_dequantize(data):
-    """One iteration of quantize and dequantize"""
-
-    # Keras model to force TFLite converter to insert 2 TFLite quantize ops.
-    # First TFLite quantize op converts float32 tensor to int8 tensor - Qnn quantize.
-    # Second TFLite quantize op converts int8 tensor to int8 tensor - Qnn requantize.
-    data_in = tf.keras.layers.Input(shape=data.shape[1:])
-    relu = tf.keras.layers.ReLU()(data_in)
-    add = tf.keras.layers.Add()([data_in, relu])
-    concat = tf.keras.layers.Concatenate(axis=0)([relu, add])
-    keras_model = tf.keras.models.Model(inputs=data_in, outputs=concat)
-
-    # To create quantized values with dynamic range of activations, needs representative dataset
-    def representative_data_gen():
-        for _ in range(1):
-            yield [data]
-
-    tflite_model_quant = _quantize_keras_model(keras_model, representative_data_gen, True, True)
-
-    tflite_output = run_tflite_graph(tflite_model_quant, data)
-    if package_version.parse(tf.__version__) < package_version.parse("2.9"):
-        in_node = data_in.name.split(":")[0]
-    else:
-        in_node = "serving_default_" + data_in.name + ":0"
-    tvm_output = run_tvm_graph(tflite_model_quant, data, in_node)
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-2
-    )
-
-
-def _test_quantize_dequantize_const(data):
-    """One iteration of quantize and dequantize"""
-
-    # Keras model to force TFLite converter to insert 2 TFLite quantize ops.
-    # First TFLite quantize op converts float32 tensor to int8 tensor - Qnn quantize.
-    # Second TFLite quantize op converts int8 tensor to int8 tensor - Qnn requantize.
-    data_in = tf.keras.layers.Input(shape=data.shape[1:])
-    relu = tf.keras.layers.ReLU()(data_in)
-    add = tf.keras.layers.Add()([data, relu])
-    concat = tf.keras.layers.Concatenate(axis=0)([relu, add])
-    keras_model = tf.keras.models.Model(inputs=data_in, outputs=concat)
-
-    # To create quantized values with dynamic range of activations, needs representative dataset
-    def representative_data_gen():
-        for _ in range(1):
-            yield [data]
-
-    tflite_model_quant = _quantize_keras_model(keras_model, representative_data_gen, True, True)
-
-    tflite_output = run_tflite_graph(tflite_model_quant, data)
-    if package_version.parse(tf.__version__) < package_version.parse("2.9"):
-        in_node = data_in.name.split(":")[0]
-    else:
-        in_node = "serving_default_" + data_in.name + ":0"
-    tvm_output = run_tvm_graph(tflite_model_quant, data, in_node)
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-2
-    )
-
-
-def test_forward_quantize_dequantize():
-    """Quantize Dequantize"""
-    data = np.random.uniform(0, 1, (1, 4, 4, 3)).astype("float32")
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.1.0"):
-        _test_quantize_dequantize(data)
-        _test_quantize_dequantize_const(data)
-
-
-#######################################################################
-# Pad
-# ---
-
-
-def _test_pad(data, mode="CONSTANT", quantized=False):
-    """One iteration of PAD"""
-
-    assert len(data) == 2
-
-    # Test with tensor and constant
-    with tf.Graph().as_default():
-        in_data = [array_ops.placeholder(shape=data[0].shape, dtype="float32", name="in")]
-
-        if quantized:
-            # fake_quant will keep the tensors in float32 until the conversion in the session
-            input_range = {"inq_0": (-100, 100)}
-            inq_data = [
-                tf.quantization.fake_quant_with_min_max_args(
-                    in_data[0], min=-100, max=100, name="inq_0"
-                )
-            ]
-            out = array_ops.pad(
-                inq_data[0], ops.convert_to_tensor(data[1], dtype=data[1].dtype), mode=mode
-            )
-            compare_tflite_with_tvm(
-                [data[0]],
-                ["inq_0:0"],
-                inq_data,
-                [out],
-                quantized=True,
-                input_range=input_range,
-                experimental_new_converter=True,
-            )
-        else:
-            out = array_ops.pad(
-                in_data[0], ops.convert_to_tensor(data[1], dtype=data[1].dtype), mode=mode
-            )
-            compare_tflite_with_tvm([data[0]], ["in:0"], in_data, [out])
-
-
-def test_forward_pad():
-    """Pad"""
-    _test_pad(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 1, 3)),
-            np.array([[1, 1], [2, 2], [1, 1], [2, 2]], dtype=np.int32),
-        ]
-    )
-    _test_pad(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 3)),
-            np.array([[2, 2], [1, 1], [1, 1]], dtype=np.int32),
-        ]
-    )
-    _test_pad(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ]
-    )
-    _test_pad(
-        [
-            np.arange(1.0, 4.0, dtype=np.float32).reshape((1, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ]
-    )
-    _test_pad(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ],
-        mode="REFLECT",
-    )
-    _test_pad(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ],
-        mode="SYMMETRIC",
-    )
-    _test_pad(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int64),
-        ],
-        mode="REFLECT",
-    )
-    _test_pad(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int64),
-        ],
-        mode="SYMMETRIC",
-    )
-    _test_pad(
-        [
-            np.arange(0, 256, dtype=np.uint8).reshape((1, 256)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ],
-        quantized=True,
-    )
-    _test_pad(
-        [
-            np.arange(0, 256, dtype=np.uint8).reshape((1, 256)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ],
-        quantized=True,
-        mode="SYMMETRIC",
-    )
-    _test_pad(
-        [
-            np.arange(0, 256, dtype=np.uint8).reshape((1, 256)),
-            np.array([[0, 0], [2, 2]], dtype=np.int32),
-        ],
-        quantized=True,
-        mode="REFLECT",
-    )
-
-
-#######################################################################
-# PADV2
-# -----
-
-
-def _test_padv2(data, mode="CONSTANT", quantized=False):
-    """One iteration of PADV2"""
-
-    assert len(data) == 2 or len(data) == 3
-
-    with_constant_values = len(data) == 3
-
-    # Test with tensor and constant
-    with tf.Graph().as_default():
-        in_data = [array_ops.placeholder(shape=data[0].shape, dtype="float32", name="in")]
-
-        if quantized:
-            # fake_quant will keep the tensors in float32 until the conversion in the session
-            input_range = {"inq_0": (-100, 100)}
-            inq_data = [
-                tf.quantization.fake_quant_with_min_max_args(
-                    in_data[0], min=-100, max=100, name="inq_0"
-                )
-            ]
-            if with_constant_values:
-                in_constant_values = constant_op.constant(
-                    data[2], shape=data[2].shape, dtype="float32", name="in_constant_values"
-                )
-                inq_constant_values = tf.quantization.fake_quant_with_min_max_args(
-                    in_constant_values, min=-100, max=100, name="inq_constant_values"
-                )
-                out = array_ops.pad_v2(
-                    inq_data[0],
-                    ops.convert_to_tensor(data[1], dtype=data[1].dtype),
-                    constant_values=inq_constant_values,
-                    mode=mode,
-                )
-                out = tf.quantization.fake_quant_with_min_max_args(
-                    out, min=-100, max=100, name="out"
-                )
-            else:
-                out = array_ops.pad_v2(
-                    inq_data[0], ops.convert_to_tensor(data[1], dtype=data[1].dtype), mode=mode
-                )
-            compare_tflite_with_tvm(
-                [data[0]], ["inq_0:0"], inq_data, [out], quantized=True, input_range=input_range
-            )
-        else:
-            if with_constant_values:
-                out = array_ops.pad_v2(
-                    in_data[0],
-                    ops.convert_to_tensor(data[1], dtype=data[1].dtype),
-                    constant_values=ops.convert_to_tensor(data[2], dtype=data[2].dtype),
-                    mode=mode,
-                )
-            else:
-                out = array_ops.pad_v2(
-                    in_data[0], ops.convert_to_tensor(data[1], dtype=data[1].dtype), mode=mode
-                )
-            compare_tflite_with_tvm([data[0]], ["in:0"], in_data, [out])
-
-
-def test_forward_padv2():
-    """PADV2"""
-    # Tests without Constant_values
-    _test_padv2(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 1, 3)),
-            np.array([[1, 1], [2, 2], [1, 1], [2, 2]], dtype=np.int32),
-        ]
-    )
-    _test_padv2(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 3)),
-            np.array([[2, 2], [1, 1], [1, 1]], dtype=np.int32),
-        ]
-    )
-    _test_padv2(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ]
-    )
-    _test_padv2(
-        [
-            np.arange(1.0, 4.0, dtype=np.float32).reshape((1, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ]
-    )
-    _test_padv2(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ],
-        mode="REFLECT",
-    )
-    _test_padv2(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ],
-        mode="SYMMETRIC",
-    )
-    _test_padv2(
-        [
-            np.arange(0, 256, dtype=np.uint8).reshape((1, 256)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-        ],
-        quantized=True,
-    )
-
-    # Tests with Constant_values
-    _test_padv2(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 1, 3)),
-            np.array([[1, 1], [2, 2], [1, 1], [2, 2]], dtype=np.int32),
-            np.array([2], dtype=np.float32),
-        ]
-    )
-    _test_padv2(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 3)),
-            np.array([[2, 2], [1, 1], [1, 1]], dtype=np.int32),
-            np.array([1], dtype=np.float32),
-        ]
-    )
-    _test_padv2(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-            np.array([-1], dtype=np.float32),
-        ]
-    )
-    _test_padv2(
-        [
-            np.arange(1.0, 4.0, dtype=np.float32).reshape((1, 3)),
-            np.array([[1, 1], [2, 2]], dtype=np.int32),
-            np.array([2], dtype=np.float32),
-        ]
-    )
-    # NOTE: In versions > 2.1.0, there is a bug in Tensorflow package for this scenario.
-    #       Hence, it is disabled temporarily for TF version > 2.1.0 .
-    if package_version.parse(tf.VERSION) <= package_version.parse("2.1.0"):
-        _test_padv2(
-            [
-                np.arange(0, 256, dtype=np.uint8).reshape((1, 256)),
-                np.array([[1, 1], [2, 2]], dtype=np.int32),
-                np.array([2], dtype=np.float32),
-            ],
-            quantized=True,
-        )
-
-    # Constant Values input can be scalar
-    _test_padv2(
-        [
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 1, 3)),
-            np.array([[1, 1], [2, 2], [1, 1], [2, 2]], dtype=np.int32),
-            np.float32(2),
-        ]
-    )
-    # NOTE: In versions > 2.1.0, there is a bug in Tensorflow package for this scenario.
-    #       Hence, it is disabled temporarily for TF versions > 2.1.0.
-    if package_version.parse(tf.VERSION) <= package_version.parse("2.1.0"):
-        _test_padv2(
-            [
-                np.arange(0, 256, dtype=np.uint8).reshape((1, 256)),
-                np.array([[1, 1], [2, 2]], dtype=np.int32),
-                np.uint8(10),
-            ],
-            quantized=True,
-        )
-
-
-#######################################################################
-# EXPAND_DIMS
-# -----------
-
-
-def _test_expand_dims(input_shape, input_type, axis, quantized=False):
-    """One iteration of EXPAND_DIMS"""
-    with tf.Graph().as_default():
-        axis = ops.convert_to_tensor(axis, dtype=axis.dtype)
-
-        if quantized:
-            # ignoring input_type as quantized requires uint8
-            input_array = np.random.uniform(0, 256, input_shape).astype("uint8")
-            in_input = tf.placeholder(dtype="float32", shape=input_array.shape, name="input")
-
-            input_range = {"q_input": (-100, 100)}
-            inq_input = tf.quantization.fake_quant_with_min_max_args(
-                in_input, min=-100, max=100, name="q_input"
-            )
-
-            out = array_ops.expand_dims(inq_input, axis=axis)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-100, max=100, name="out")
-
-            compare_tflite_with_tvm(
-                [input_array],
-                ["q_input"],
-                [inq_input],
-                [out],
-                quantized=True,
-                input_range=input_range,
-            )
-        else:
-            input_array = np.random.uniform(-100, 100, input_shape).astype(input_type)
-            in_input = tf.placeholder(
-                dtype=input_array.dtype, shape=input_array.shape, name="input"
-            )
-
-            out = array_ops.expand_dims(in_input, axis=axis)
-
-            compare_tflite_with_tvm([input_array], ["input"], [in_input], [out])
-
-
-def test_forward_expand_dims():
-    """EXPAND_DIMS"""
-    for quantized in [False, True]:
-        _test_expand_dims((6, 2, 7, 5), "float32", np.int32(0), quantized=quantized)
-        _test_expand_dims((1, 2, 3), "int32", np.int32(-2), quantized=quantized)
-        _test_expand_dims((2, 4, 5), "float32", np.array([1], dtype=np.int32), quantized=quantized)
-
-
-#######################################################################
-# ONE_HOT
-# -------
-
-
-def _test_one_hot(indices, depth, on_value, off_value, axis=None):
-    """One iteration of One_Hot"""
-    with tf.Graph().as_default():
-        in_indices = tf.placeholder(dtype=indices.dtype, shape=indices.shape, name="indices")
-        in_depth = ops.convert_to_tensor(depth, dtype=depth.dtype)
-        in_on_value = tf.placeholder(dtype=on_value.dtype, shape=on_value.shape, name="on_value")
-        in_off_value = tf.placeholder(
-            dtype=off_value.dtype, shape=off_value.shape, name="off_value"
-        )
-        if axis is not None:
-            out = array_ops.one_hot(in_indices, in_depth, in_on_value, in_off_value, axis=axis)
-        else:
-            out = array_ops.one_hot(in_indices, in_depth, in_on_value, in_off_value)
-        compare_tflite_with_tvm(
-            [indices, on_value, off_value],
-            ["indices", "on_value", "off_value"],
-            [in_indices, in_on_value, in_off_value],
-            [out],
-        )
-
-
-def test_forward_one_hot():
-    """One_Hot"""
-    _test_one_hot(np.int32(2), np.int32(8), np.int32(1), np.int32(0))
-    _test_one_hot(np.int32(4), np.int32(8), np.float32(1), np.float32(0))
-    _test_one_hot(np.array([1, 2, 3], dtype=np.int32), np.int32(8), np.int32(3), np.int32(-1))
-    _test_one_hot(
-        np.array([1, 2, 3], dtype=np.int32), np.int32(8), np.int32(3), np.int32(-1), axis=0
-    )
-
-
-#######################################################################
-# Pack
-# ----
-
-
-def _test_pack(data, is_var, axis, quantized=False):
-    """One iteration of pack"""
-
-    assert len(data) >= 1
-    assert len(data) == len(is_var)
-    if quantized:
-        with tf.Graph().as_default():
-            in_data = [
-                array_ops.placeholder(shape=d.shape, dtype="float32", name="in_" + str(idx))
-                if is_var[idx]
-                else constant_op.constant(
-                    d, shape=d.shape, dtype="float32", name="in_constant_" + str(idx)
-                )
-                for idx, d in enumerate(data)
-            ]
-            inq_data = [
-                tf.quantization.fake_quant_with_min_max_args(
-                    i_data, min=-100, max=100, name=f"inq_{idx}"
-                )
-                for idx, i_data in enumerate(in_data)
-            ]
-            input_range = {}
-            for i in range(len(data)):
-                input_range[f"inq_{i}"] = (-100, 100)
-
-            out = array_ops.pack(inq_data, axis=axis)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-100, max=100, name="out")
-            name = [f"inq_{idx}:0" for idx in range(len(data))]
-            compare_tflite_with_tvm(
-                data, name, inq_data, [out], quantized=True, input_range=input_range
-            )
-    else:
-        with tf.Graph().as_default():
-            in_data = [
-                array_ops.placeholder(shape=d.shape, dtype=d.dtype, name="in_" + str(idx))
-                if is_var[idx]
-                else constant_op.constant(
-                    d, shape=d.shape, dtype=d.dtype, name="in_constant_" + str(idx)
-                )
-                for idx, d in enumerate(data)
-            ]
-
-            out = array_ops.pack(in_data, axis=axis)
-            name = [_.name for _ in in_data]
-            compare_tflite_with_tvm(data, name, in_data, [out], experimental_new_converter=True)
-
-
-def test_forward_pack():
-    """Pack"""
-    _test_pack([np.int32(1), np.int32(5)], [False, False], 0)
-    _test_pack([np.array([1, 4]), np.array([2, 5]), np.array([3, 6])], [True, False, False], 0)
-    _test_pack(
-        [np.arange(6).reshape((1, 2, 1, 3)), np.arange(6).reshape((1, 2, 1, 3))], [True, True], 1
-    )
-
-    _test_pack([np.arange(6).reshape((3, 2)), np.arange(6).reshape((3, 2))], [True, True], 1)
-
-    _test_pack(
-        [
-            np.arange(6).reshape((2, 1, 1, 3)),
-            np.arange(6).reshape((2, 1, 1, 3)),
-            np.arange(6).reshape((2, 1, 1, 3)),
-        ],
-        [True, True, True],
-        1,
-    )
-
-    _test_pack(
-        [
-            np.arange(6, dtype=np.uint8).reshape((2, 1, 1, 3)),
-            np.arange(6, dtype=np.uint8).reshape((2, 1, 1, 3)),
-            np.arange(6, dtype=np.uint8).reshape((2, 1, 1, 3)),
-        ],
-        [True, True, True],
-        1,
-        quantized=True,
-    )
-
-
-#######################################################################
-# Unpack
-# ------
-
-
-def _test_unpack(data, axis, num_unpacks):
-    """One iteration of UNPACK"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out = gen_array_ops.unpack(in_data, num=num_unpacks, axis=axis, name="unpack")
-        out_names = ["out_" + str(n) + ":0" for n in range(num_unpacks)]
-        compare_tflite_with_tvm([data], "Placeholder:0", [in_data], out, out_names=out_names)
-
-
-def test_forward_unpack():
-    """UNPACK"""
-    _test_unpack(np.array(np.random.uniform(0, 5, (3, 1)), dtype=np.int32), axis=1, num_unpacks=1)
-    _test_unpack(np.array(np.random.uniform(0, 5, (3, 4)), dtype=np.float32), axis=0, num_unpacks=3)
-    _test_unpack(
-        np.array(np.random.uniform(0, 5, (3, 1, 2)), dtype=np.float32), axis=0, num_unpacks=3
-    )
-    # tflite 1.13 doesn't accept negative axis
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        _test_unpack(
-            np.array(np.random.uniform(0, 5, (3, 6)), dtype=np.int32), axis=-2, num_unpacks=3
-        )
-        _test_unpack(
-            np.array(np.random.uniform(0, 5, (2, 3, 4)), dtype=np.int32), axis=-3, num_unpacks=2
-        )
-
-
-#######################################################################
-# Local response normalization
-# ----------------------------
-
-
-def _test_local_response_normalization(data, depth_radius, bias, alpha, beta):
-    """One iteration of LOCAL_RESPONSE_NORMALIZATION"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in_0")
-        out = nn_ops.local_response_normalization(
-            in_data, depth_radius=depth_radius, bias=bias, alpha=alpha, beta=beta
-        )
-        compare_tflite_with_tvm(data, "in_0:0", [in_data], [out])
-
-
-def test_forward_local_response_normalization():
-    """LOCAL_RESPONSE_NORMALIZATION"""
-    data = np.random.uniform(size=(1, 6, 4, 3)).astype("float32")
-    # LOCAL_RESPONSE_NORMALIZATION come with TFLite >= 1.14.0 fbs schema
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        _test_local_response_normalization(data, depth_radius=5, bias=1, alpha=1, beta=0.5)
-
-
-#######################################################################
-# L2 normalization
-# ----------------
-
-
-def _test_l2_normalization(data, axis, fused_activation_function=None):
-    """One iteration of L2_NORMALIZATION"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out = nn_impl.l2_normalize(in_data, axis)
-        out = with_fused_activation_function(out, fused_activation_function)
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_l2_normalization():
-    """L2_NORMALIZATION"""
-    data = np.random.uniform(size=(3, 6, 4)).astype("float32")
-    _test_l2_normalization(data, axis=2)
-    _test_l2_normalization(data, axis=2, fused_activation_function="RELU")
-
-
-#######################################################################
-# Logistic
-# --------
-
-
-def _test_logistic(data, quantized=False):
-    """One iteration of LOGISTIC"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in_0")
-
-        if quantized:
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-5, max=5, name="inq_0"
-            )
-            input_range = {"inq_0": (-5, 5)}
-            out = math_ops.sigmoid(inq_data)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=0, max=1, name="out")
-            compare_tflite_with_tvm(
-                data, "inq_0:0", [inq_data], [out], quantized=True, input_range=input_range
-            )
-        else:
-            out = math_ops.sigmoid(in_data)
-            compare_tflite_with_tvm(data, "in_0:0", [in_data], [out])
-
-
-def test_forward_logistic():
-    """LOGISTIC"""
-    _test_logistic(np.arange(6.0, dtype=np.float32).reshape((1, 6)))
-    _test_logistic(np.random.uniform(0, 255, (3, 6)).astype(np.uint8), quantized=True)
-
-
-#######################################################################
-# Softmax
-# -------
-
-
-def _test_softmax(data):
-    """One iteration of softmax"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out = nn_ops.softmax(in_data)
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_softmax():
-    """Softmax"""
-    _test_softmax(np.arange(6.0, dtype=np.float32).reshape((1, 6)))
-    _test_softmax(np.arange(6.0, dtype=np.float32).reshape((1, 2, 3)))
-
-
-######################################################################
-# Log_softmax
-# -----------
-
-
-def _test_log_softmax(data, quantized=False):
-    """One iteration of log_softmax"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in_0")
-
-        if quantized:
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-10, max=10, name="inq_0"
-            )
-            input_range = {"inq_0": (-10, 10)}
-            # tflite log_softmax supports only the case when axis is not specified
-            out = nn_ops.log_softmax(inq_data)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-20, max=0, name="out")
-            compare_tflite_with_tvm(
-                data, "inq_0:0", [inq_data], [out], quantized=True, input_range=input_range
-            )
-        else:
-            out = nn_ops.log_softmax(in_data)
-            compare_tflite_with_tvm(data, "in_0:0", [in_data], [out])
-
-
-def test_forward_log_softmax():
-    """Log_softmax"""
-    _test_log_softmax(np.random.uniform(-10, 10, size=(3, 6)).astype(np.float32))
-    _test_log_softmax(np.random.uniform(0, 255, (3, 6)).astype(np.uint8), quantized=True)
-
-
-#######################################################################
-# Tanh
-# ----
-
-
-def _test_tanh(data, quantized=False):
-    """One iteration of TANH"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in_0")
-
-        if quantized:
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-3, max=3, name="inq_0"
-            )
-            input_range = {"inq_0": (-3, 3)}
-            out = math_ops.tanh(inq_data)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-1, max=1, name="out")
-            compare_tflite_with_tvm(
-                data, "inq_0:0", [inq_data], [out], quantized=True, input_range=input_range
-            )
-        else:
-            out = math_ops.tanh(in_data)
-            compare_tflite_with_tvm(data, "in_0:0", [in_data], [out])
-
-
-def test_forward_tanh():
-    """TANH"""
-    _test_tanh(np.arange(6.0, dtype=np.float32).reshape((1, 6)), quantized=False)
-    _test_tanh(np.arange(0, 256, 30, dtype=np.uint8), quantized=True)
-
-
-#######################################################################
-# ReLu
-# ----
-
-
-def _test_relu(data, quantized=False):
-    """One iteration of ReLU"""
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in_0")
-
-        if quantized:
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-10, max=10, name="inq_0"
-            )
-            input_range = {"inq_0": (-10, 10)}
-            out = nn_ops.relu(inq_data)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=0, max=6, name="out")
-            compare_tflite_with_tvm(
-                data, "inq_0:0", [inq_data], [out], quantized=True, input_range=input_range
-            )
-        else:
-            out = nn_ops.relu(in_data)
-            compare_tflite_with_tvm(data, "in_0:0", [in_data], [out])
-
-
-def test_forward_relu():
-    """ReLU"""
-    _test_relu(np.arange(6.0, dtype=np.float32).reshape((1, 6)))
-    _test_relu(np.random.uniform(0, 255, (3, 6)).astype(np.uint8), quantized=True)
-
-
-#######################################################################
-# ReLU6
-# -----
-
-
-def _test_relu6(data, quantized=False):
-    """One iteration of ReLU6"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in_0")
-
-        if quantized:
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-10, max=10, name="inq_0"
-            )
-            input_range = {"inq_0": (-10, 10)}
-            out = nn_ops.relu6(inq_data)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=0, max=6, name="out")
-            compare_tflite_with_tvm(
-                data, "inq_0:0", [inq_data], [out], quantized=True, input_range=input_range
-            )
-        else:
-            out = nn_ops.relu6(in_data)
-            compare_tflite_with_tvm(data, "in_0:0", [in_data], [out])
-
-
-def test_forward_relu6():
-    """ReLU6"""
-    _test_relu6(np.random.uniform(-10, 10, size=(3, 6)).astype(np.float32))
-    _test_relu6(np.random.uniform(0, 255, (3, 6)).astype(np.uint8), quantized=True)
-
-
-#######################################################################
-# Leaky_ReLU
-# ----------
-
-
-def _test_leaky_relu(data, alpha, quantized=False):
-    """One iteration of Leaky_ReLU"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in_0")
-
-        if quantized:
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-3, max=2, name="inq_0"
-            )
-            input_range = {"inq_0": (-3, 2)}
-            out = nn_ops.leaky_relu(inq_data, alpha)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-3, max=2, name="out")
-            compare_tflite_with_tvm(
-                data, "inq_0:0", [inq_data], [out], quantized=True, input_range=input_range
-            )
-        else:
-            out = nn_ops.leaky_relu(in_data, alpha)
-            compare_tflite_with_tvm(data, "in_0:0", [in_data], [out])
-
-
-def test_forward_leaky_relu():
-    """Leaky_ReLU"""
-    _test_leaky_relu(np.random.uniform(-5, 5, (1, 6)).astype(np.float32), alpha=0.2)
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        _test_leaky_relu(
-            np.random.uniform(0, 255, (2, 3)).astype(np.uint8), alpha=0.3, quantized=True
-        )
-
-
-#######################################################################
-# ReLU_n1_to_1
-# ------------
-
-
-def _test_relu_n1_to_1(data, quantized=False):
-    """One iteration of ReLU_n1_to_1"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype="float32", name="in_0")
-
-        if quantized:
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-3, max=3, name="inq_0"
-            )
-            input_range = {"inq_0": (-3, 3)}
-            # There is no such tf operation.
-            # The specific pattern will be replaced into RELU_N1_TO_1 by tflite
-            out = math_ops.maximum(-1.0, math_ops.minimum(inq_data, 1.0))
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-1, max=1, name="out")
-            compare_tflite_with_tvm(
-                data, "inq_0:0", [inq_data], [out], quantized=True, input_range=input_range
-            )
-        else:
-            out = math_ops.maximum(-1.0, math_ops.minimum(in_data, 1.0))
-            compare_tflite_with_tvm(data, "in_0:0", [in_data], [out])
-
-
-def test_forward_relu_n1_to_1():
-    """ReLU_n1_to_1"""
-    _test_relu_n1_to_1(np.random.uniform(-3, 3, (1, 6)).astype(np.float32))
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        _test_relu_n1_to_1(np.random.uniform(0, 255, (3, 6)).astype(np.uint8), quantized=True)
-
-
-#######################################################################
-# PReLU
-# -----
-
-
-def _test_prelu(data, alpha):
-    """One iteration of PReLU"""
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        # This specific pattern will be replaced into PRelu by tflite
-        out = nn_ops.relu(in_data) + (-alpha * nn_ops.relu(-in_data))
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_prelu():
-    """PReLU"""
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(1, 32, 32, 3)).astype("float32"),
-        np.full((3,), 0.2, dtype="float32"),
-    )
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(1, 32, 32, 3)).astype("float32"),
-        np.full((1, 3), 0.2, dtype="float32"),
-    )
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(1, 32, 32, 3)).astype("float32"),
-        np.full((1, 1, 3), 0.2, dtype="float32"),
-    )
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(1, 32, 32, 3)).astype("float32"),
-        np.full((1, 1, 1, 3), 0.2, dtype="float32"),
-    )
-    #
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(1, 32, 32, 3)).astype("float32"),
-        np.full((32, 3), 0.2, dtype="float32"),
-    )
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(1, 32, 32, 3)).astype("float32"),
-        np.full((32, 32, 3), 0.2, dtype="float32"),
-    )
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(1, 32, 32, 3)).astype("float32"),
-        np.full((1, 32, 1, 3), 0.2, dtype="float32"),
-    )
-    #
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(1, 1, 3)).astype("float32"),
-        np.full((3,), 0.2, dtype="float32"),
-    )
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(1, 32, 3)).astype("float32"),
-        np.full((32, 3), 0.2, dtype="float32"),
-    )
-    _test_prelu(
-        np.random.uniform(-5, 5, size=(32, 3)).astype("float32"), np.full((3), 0.2, dtype="float32")
-    )
-
-
-#######################################################################
-# DepthToSpace
-# ------------
-
-
-def _test_depthtospace(data, block_size):
-    """One iteration of depth_to_space operation with given data and block size"""
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out = array_ops.depth_to_space(in_data, block_size)
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_depthtospace():
-    # DEPTH_TO_SPACE comes with TFLite >= 1.15.0 fbs schema
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.15.0"):
-        _test_depthtospace(np.random.normal(size=[1, 32, 32, 4]).astype("float32"), 2)
-        _test_depthtospace(np.random.normal(size=[1, 16, 8, 32]).astype("float32"), 4)
-
-
-#######################################################################
-# SpaceToDepth
-# ------------
-
-
-def _test_spacetodepth(data, block_size):
-    """One iteration of space_to_depth operation with given data and block size"""
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        out = array_ops.space_to_depth(in_data, block_size)
-        compare_tflite_with_tvm(data, "Placeholder:0", [in_data], [out])
-
-
-def test_forward_spacetodepth():
-    _test_spacetodepth(np.random.normal(size=[1, 32, 32, 4]).astype("float32"), 2)
-    _test_spacetodepth(np.random.normal(size=[1, 16, 8, 32]).astype("float32"), 4)
-
-
-#######################################################################
-# ReverseSequence
-# ---------------
-
-
-def _test_reverse_sequence(shape, dtype, seq_lengths, batch_axis, seq_axis):
-    """One iteration of reverse_sequence operation with given data and attributes"""
-
-    data = np.random.uniform(0, 100, size=shape).astype(dtype)
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(dtype=dtype, name="input", shape=shape)
-        out = tf.reverse_sequence(
-            in_data, seq_lengths=seq_lengths, batch_axis=batch_axis, seq_axis=seq_axis
-        )
-
-        compare_tflite_with_tvm(data, "input", [in_data], [out])
-
-
-def test_forward_reverse_sequence():
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        _test_reverse_sequence([4, 3], "float32", [3, 2, 1], 1, 0)
-        _test_reverse_sequence([4, 3], "float32", [3, 2, 1, 3], 0, 1)
-        _test_reverse_sequence([2, 3, 3, 3], "float32", [2, 3, 2], 2, 1)
-        _test_reverse_sequence([2, 4, 6, 4, 5], "float32", [5, 3], 0, 2)
-        _test_reverse_sequence([2, 4, 6, 4, 5], "float32", [5, 3, 1, 4], 3, 2)
-
-
-#######################################################################
-# Sparse To Dense
-# ---------------
-def _test_sparse_to_dense(sparse_indices, sparse_values, default_value, output_shape):
-    # tflite 1.13 convert method does not accept empty shapes
-    if package_version.parse(tf.VERSION) >= package_version.parse("1.14.0"):
-        with tf.Graph().as_default():
-            indices = tf.placeholder(
-                shape=sparse_indices.shape, dtype=str(sparse_indices.dtype), name="indices"
-            )
-            values = tf.placeholder(
-                shape=sparse_values.shape, dtype=str(sparse_values.dtype), name="values"
-            )
-            oshape = tf.constant(
-                output_shape, shape=output_shape.shape, dtype=str(output_shape.dtype)
-            )
-
-            if default_value is None:
-                output = tf.sparse_to_dense(indices, oshape, values)
-                compare_tflite_with_tvm(
-                    [sparse_indices, sparse_values],
-                    ["indices", "values"],
-                    [indices, values],
-                    [output],
-                )
-            else:
-                dv_placeholder = tf.placeholder(
-                    shape=(), dtype=str(default_value.dtype), name="default_value"
-                )
-                output = tf.sparse_to_dense(indices, oshape, values, dv_placeholder)
-                compare_tflite_with_tvm(
-                    [sparse_indices, sparse_values, default_value],
-                    ["indices", "values", "default_value"],
-                    [indices, values, dv_placeholder],
-                    [output],
-                )
-
-
-def test_forward_sparse_to_dense():
-    """
-    Works in tvm/topi/tensorflow. But tflite converter breaks this test case
-    _test_sparse_to_dense(
-        np.int32(1),
-        np.int32(3),
-        np.int32(0),
-        np.array([5]).astype("int32")
-    )
-    """
-    # vector
-    _test_sparse_to_dense(
-        np.array([0, 1, 4]).astype("int32"),
-        np.array([3, 3, 3]).astype("int32"),
-        np.int32(0),
-        np.array([5]).astype("int32"),
-    )
-    # vector nXd
-    _test_sparse_to_dense(
-        np.array([[0, 0], [1, 2]]).astype("int32"),
-        np.array([1, 2]).astype("int32"),
-        np.int32(0),
-        np.array([3, 4]).astype("int32"),
-    )
-    _test_sparse_to_dense(
-        np.array([[0, 0, 0], [1, 2, 3]]).astype("int32"),
-        np.array([1, 2]).astype("int32"),
-        np.int32(4),
-        np.array([2, 3, 4]).astype("int32"),
-    )
-    # floats
-    _test_sparse_to_dense(
-        np.array([0, 1, 4]).astype("int32"),
-        np.array([3.1, 3.1, 3.1]).astype("float32"),
-        np.float32(3.5),
-        np.array([5]).astype("int32"),
-    )
-    # default value not specified
-    _test_sparse_to_dense(
-        np.array([0, 1, 4]).astype("int32"),
-        np.array([3.1, 3.1, 3.1]).astype("float32"),
-        None,
-        np.array([5]).astype("int32"),
-    )
-
-
-#######################################################################
-# Fully Connected
-# ---------------
-def _test_fully_connected(
-    tensor_in_sizes,
-    const_input,
-    filter_in_sizes,
-    bias_in_size=None,
-    quantized=False,
-    fp16_quantized=False,
-):
-    """One iteration of fully connected"""
-
-    total_size_1 = np.prod(tensor_in_sizes)
-    total_size_2 = np.prod(filter_in_sizes)
-
-    assert (
-        int(total_size_1 / tensor_in_sizes[0]) == filter_in_sizes[0]
-    ), "input size and filter size are mismatched"
-
-    # Initializes the input tensor with array containing incrementing
-    # numbers from 1.
-    data_array = np.arange(
-        1, total_size_1 + 1, dtype=np.uint8 if quantized and not fp16_quantized else np.float32
-    )
-    filter_array = np.arange(
-        1, total_size_2 + 1, dtype=np.uint8 if quantized and not fp16_quantized else np.float32
-    )
-    in_name = "input"
-
-    with tf.Graph().as_default():
-        in_data = (
-            constant_op.constant(data_array, shape=tensor_in_sizes, dtype=np.float32, name=in_name)
-            if const_input
-            else array_ops.placeholder(shape=tensor_in_sizes, dtype=np.float32, name=in_name)
-        )
-
-        in_filter = constant_op.constant(filter_array, shape=filter_in_sizes, dtype=np.float32)
-        data_array = np.reshape(data_array, tensor_in_sizes)
-
-        # if we have bias
-        if bias_in_size:
-            assert bias_in_size[0] == filter_in_sizes[1], "bias and filter size are mismatched"
-            bias_array = np.arange(
-                1, bias_in_size[0] + 1, dtype=np.uint8 if quantized else np.float32
-            )
-            in_bias = constant_op.constant(bias_array, shape=bias_in_size, dtype=np.float32)
-
-        if quantized and not fp16_quantized:
-            inq_data = tf.quantization.fake_quant_with_min_max_args(
-                in_data, min=-100, max=100, name="inq_0"
-            )
-            input_range = {"inq_0": (-100, 100)}
-            inq_filter = tf.quantization.fake_quant_with_min_max_args(
-                in_filter, min=-100, max=100, name="inq_1"
-            )
-            input_range = {"inq_0": (-100, 100), "inq_1": (-100, 100)}
-            # reshape N H W C into N H*W*C
-            inq_data_reshape = array_ops.reshape(inq_data, [tensor_in_sizes[0], -1])
-            out = math_ops.mat_mul(inq_data_reshape, inq_filter)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-100, max=100, name="out")
-
-            # if we have bias
-            if bias_in_size:
-                out = nn_ops.bias_add(out, in_bias)
-
-            compare_tflite_with_tvm(
-                data_array,
-                inq_data.name,
-                [inq_data],
-                [out],
-                quantized=True,
-                input_range=input_range,
-                experimental_new_converter=True,
-            )
-        else:
-            # reshape N H W C into N H*W*C
-            in_data_reshape = array_ops.reshape(in_data, [tensor_in_sizes[0], -1])
-            out = math_ops.mat_mul(in_data_reshape, in_filter)
-            # TODO : Need to construct a fc op with (keep_num_dims == True)
-
-            # if we have bias
-            if bias_in_size:
-                out = nn_ops.bias_add(out, in_bias)
-
-            compare_tflite_with_tvm(
-                data_array,
-                in_data.name,
-                [in_data],
-                [out],
-                experimental_new_converter=True,
-                fp16_quantized=fp16_quantized,
-            )
-
-
-def test_forward_fully_connected():
-    """Fully Connected"""
-    for input_shape, weight_shape, bias_shape in [
-        ([1, 4], [4, 4], None),
-        ([1, 4], [4, 4], [4]),
-        ([1, 1, 1, 5], [5, 5], None),
-        ([1, 1, 10], [10, 103], None),
-        ([1, 1, 1, 150], [150, 100], None),
-        ([1, 1, 1, 150], [150, 100], None),
-        ([1, 1, 1, 150], [150, 100], [100]),
-        ([5, 1, 1, 150], [150, 100], None),
-        ([5, 1, 1, 150], [150, 100], [100]),
-    ]:
-        for const_input in [False, True]:
-            for quantized in [False, True]:
-                for fp16_quantized in [False, True]:
-                    _test_fully_connected(
-                        input_shape,
-                        const_input,
-                        weight_shape,
-                        bias_shape,
-                        quantized,
-                        fp16_quantized,
-                    )
-
-
-#######################################################################
-# REVERSE_V2
-# ----------
-
-
-def _test_reverse_v2(input_shape, axis, dtype):
-    """One iteration of REVERSE_V2"""
-    with tf.Graph().as_default():
-        input_array = np.random.randint(0, 100, size=input_shape).astype(dtype)
-        in_input = tf.placeholder(dtype=input_array.dtype, shape=input_array.shape, name="input")
-        in_axis = ops.convert_to_tensor(axis, dtype=axis.dtype)
-
-        out = array_ops.reverse(in_input, in_axis)
-
-        compare_tflite_with_tvm([input_array], ["input"], [in_input], [out])
-
-
-def test_forward_reverse_v2():
-    """REVERSE_V2"""
-    for dtype in ["float32", "int32"]:
-        _test_reverse_v2((5), np.array([0], dtype="int32"), dtype)
-        _test_reverse_v2((5, 6, 4, 2), np.array([2], dtype="int32"), dtype)
-
-
-#######################################################################
-# MATRIX_SET_DIAG
-# ---------------
-
-
-def _test_matrix_set_diag(input_shape, input_type, quantized=False):
-    """One iteration of MATRIX_SET_DIAG"""
-    with tf.Graph().as_default():
-        diagonal_shape = list(input_shape[:-2])
-        diagonal_shape.append(min(input_shape[-2], input_shape[-1]))
-
-        if quantized:
-            # ignoring input_type as quantized requires uint8
-            input_array = np.random.uniform(0, 256, input_shape).astype("uint8")
-            in_input = tf.placeholder(dtype="float32", shape=input_array.shape, name="input")
-            inq_input = tf.quantization.fake_quant_with_min_max_args(
-                in_input, min=-100, max=100, name="q_input"
-            )
-
-            diagonal = np.random.uniform(0, 256, diagonal_shape).astype("uint8")
-            in_diagonal = tf.placeholder(dtype="float32", shape=diagonal.shape, name="diagonal")
-            inq_diagonal = tf.quantization.fake_quant_with_min_max_args(
-                in_diagonal, min=-100, max=100, name="q_diagonal"
-            )
-
-            input_range = {"q_input": (-100, 100), "q_diagonal": (-100, 100)}
-
-            out = array_ops.matrix_set_diag(inq_input, inq_diagonal)
-            out = tf.quantization.fake_quant_with_min_max_args(out, min=-100, max=100, name="out")
-
-            compare_tflite_with_tvm(
-                [input_array, diagonal],
-                ["q_input", "q_diagonal"],
-                [inq_input, inq_diagonal],
-                [out],
-                quantized=True,
-                input_range=input_range,
-            )
-        else:
-            input_array = np.random.uniform(0, 100, input_shape).astype(input_type)
-            diagonal = np.random.uniform(0, 100, diagonal_shape).astype(input_type)
-
-            in_input = tf.placeholder(
-                dtype=input_array.dtype, shape=input_array.shape, name="input"
-            )
-            in_diagonal = tf.placeholder(
-                dtype=diagonal.dtype, shape=diagonal.shape, name="diagonal"
-            )
-
-            out = array_ops.matrix_set_diag(in_input, in_diagonal)
-
-            compare_tflite_with_tvm(
-                [input_array, diagonal], ["input", "diagonal"], [in_input, in_diagonal], [out]
-            )
-
-
-def test_forward_matrix_set_diag():
-    """MATRIX_SET_DIAG"""
-    for dtype in [np.float32, np.int32]:
-        _test_matrix_set_diag((4, 4), dtype)
-        _test_matrix_set_diag((5, 4, 3, 4), dtype)
-        _test_matrix_set_diag((4, 4, 2), dtype)
-
-    _test_matrix_set_diag((4, 4), np.uint8, quantized=True)
-    _test_matrix_set_diag((5, 4, 3, 4), np.uint8, quantized=True)
-    _test_matrix_set_diag((4, 4, 2), np.uint8, quantized=True)
-
-
-#######################################################################
-# MATRIX_DIAG
-# -----------
-
-
-def _test_matrix_diag(diagonal_shape, dtype):
-    """One iteration of MATRIX_DIAG"""
-    with tf.Graph().as_default():
-        diagonal = np.random.uniform(0, 100, diagonal_shape).astype(dtype)
-        in_diagonal = tf.placeholder(dtype=diagonal.dtype, shape=diagonal.shape, name="diagonal")
-
-        out = array_ops.matrix_diag(in_diagonal)
-
-        compare_tflite_with_tvm(
-            [diagonal], ["diagonal"], [in_diagonal], [out], experimental_new_converter=True
-        )
-
-
-def test_forward_matrix_diag():
-    """MATRIX_DIAG"""
-    for dtype in [np.float32, np.int32]:
-        _test_matrix_diag((4), dtype)
-        _test_matrix_diag((5, 4, 3), dtype)
-        _test_matrix_diag((2, 3), dtype)
-
-
-#######################################################################
-# Custom Operators
-# ----------------
-
-
-def _test_detection_postprocess(tf_model_file, box_encodings_size, class_predictions_size):
-    """One iteration of detection postProcess with given model and shapes"""
-    converter = tf.lite.TFLiteConverter.from_frozen_graph(
-        tf_model_file,
-        input_arrays=["raw_outputs/box_encodings", "raw_outputs/class_predictions"],
-        output_arrays=[
-            "TFLite_Detection_PostProcess",
-            "TFLite_Detection_PostProcess:1",
-            "TFLite_Detection_PostProcess:2",
-            "TFLite_Detection_PostProcess:3",
-        ],
-        input_shapes={
-            "raw_outputs/box_encodings": box_encodings_size,
-            "raw_outputs/class_predictions": class_predictions_size,
-        },
-    )
-    converter.allow_custom_ops = True
-    converter.inference_type = tf.lite.constants.FLOAT
-    tflite_model = converter.convert()
-    np.random.seed(0)
-    box_encodings = np.random.uniform(size=box_encodings_size).astype("float32")
-    class_predictions = np.random.uniform(size=class_predictions_size).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model, [box_encodings, class_predictions])
-    tvm_output = run_tvm_graph(
-        tflite_model,
-        [box_encodings, class_predictions],
-        ["raw_outputs/box_encodings", "raw_outputs/class_predictions"],
-        num_output=4,
-    )
-
-    # Check all output shapes are equal
-    assert all(
-        list(
-            tvm_tensor.shape == tflite_tensor.shape
-            for (tvm_tensor, tflite_tensor) in zip(tvm_output, tflite_output)
-        )
-    )
-
-    # Check valid count is the same
-    assert tvm_output[3] == tflite_output[3]
-    valid_count = tvm_output[3][0]
-
-    # For boxes that do not have any detections, TFLite puts random values. Therefore, we compare
-    # tflite and tvm tensors for only valid boxes.
-    for i in range(0, valid_count):
-        # Check bounding box co-ords
-        tvm.testing.assert_allclose(
-            np.squeeze(tvm_output[0][0][i]),
-            np.squeeze(tflite_output[0][0][i]),
-            rtol=1e-5,
-            atol=1e-5,
-        )
-
-        # Check the class
-        # Stricter check to ensure class remains same
-        np.testing.assert_equal(np.squeeze(tvm_output[1][0][i]), np.squeeze(tflite_output[1][0][i]))
-
-        # Check the score
-        tvm.testing.assert_allclose(
-            np.squeeze(tvm_output[2][0][i]),
-            np.squeeze(tflite_output[2][0][i]),
-            rtol=1e-5,
-            atol=1e-5,
-        )
-
-
-def test_detection_postprocess():
-    """Detection PostProcess"""
-
-    # Fast-NMS
-    box_encodings_size = (1, 1917, 4)
-    class_predictions_size = (1, 1917, 91)
-    tf_model_file = tf_testing.get_workload_official(
-        "http://download.tensorflow.org/models/object_detection/"
-        "ssd_mobilenet_v2_quantized_300x300_coco_2019_01_03.tar.gz",
-        "ssd_mobilenet_v2_quantized_300x300_coco_2019_01_03/tflite_graph.pb",
-    )
-    _test_detection_postprocess(tf_model_file, box_encodings_size, class_predictions_size)
-
-    # Fast-NMS
-    box_encodings_size = (1, 2034, 4)
-    class_predictions_size = (1, 2034, 91)
-    tf_model_file = download_testdata(
-        "https://github.com/czh978/models_for_tvm_test/raw/main/tflite_graph_with_postprocess.pb",
-        "tflite_graph_with_postprocess.pb",
-    )
-    _test_detection_postprocess(tf_model_file, box_encodings_size, class_predictions_size)
-
-    # Regular NMS
-    box_encodings_size = (1, 1917, 4)
-    class_predictions_size = (1, 1917, 91)
-    tf_model_file = download_testdata(
-        (
-            "https://github.com/Grovety/ModelZoo/raw/52fb82156ae8c8e3f62c7d7caf6867b25261dda4/"
-            "models/object_detection/ssd_mobilenet_v1/tflite_int8/tflite_graph_with_regular_nms.pb"
-        ),
-        "tflite_graph_with_regular_nms.pb",
-    )
-    _test_detection_postprocess(tf_model_file, box_encodings_size, class_predictions_size)
-
-
-#######################################################################
-# Custom Converter
-# ----------------
-
-
-def test_custom_op_converter():
-    """Test case for user-defined operator converter in TFLite frontend"""
-
-    class DummyOperatorConverter(relay.frontend.tflite.OperatorConverter):
-        """Operator Converter for converting TFLite ops to relay ops"""
-
-        def __init__(self, model, subgraph, exp_tab):
-            super().__init__(model, subgraph, exp_tab)
-            self.allow_custom_ops = True
-
-            convert_map_overwrite = {"SUB": self.convert_sub_dummy}
-
-            self.convert_map.update(convert_map_overwrite)
-
-        def convert_sub_dummy(self, op):
-            """Convert TFLite SUB"""
-            input_tensors = self.get_input_tensors(op)
-            assert len(input_tensors) == 2, "input tensors length should be 2"
-
-            lhs_tensor = input_tensors[0]
-            rhs_tensor = input_tensors[1]
-
-            lhs_expr = self.get_expr(lhs_tensor.tensor_idx)
-            rhs_expr = self.get_expr(rhs_tensor.tensor_idx)
-
-            temp_expr = relay.op.negative(rhs_expr)
-            out = relay.op.add(lhs_expr, temp_expr)
-
-            return out
-
-    with tf.Graph().as_default():
-        # Generate TFLite model for single addition
-        data = [
-            np.arange(6.0, dtype=np.float32).reshape((2, 1, 1, 3)),
-            np.arange(1.0, 7.0, dtype=np.float32).reshape((2, 1, 1, 3)),
-        ]
-        in_data = [
-            array_ops.placeholder(shape=data[0].shape, dtype="float32", name="in_0"),
-            array_ops.placeholder(shape=data[1].shape, dtype="float32", name="in_1"),
-        ]
-        out = math_ops.subtract(in_data[0], in_data[1])
-        in_name = [x[1] for x in zip(in_data, ("in_0:0", "in_1:0"))]
-        input_tensors = in_data
-        output_tensors = [out]
-        in_node = [0] * len(in_name)
-        for i, _ in enumerate(in_name):
-            in_node[i] = in_name[i].split(":")[0]
-
-        with tf.Session() as sess:
-            converter = tf.lite.TFLiteConverter.from_session(sess, input_tensors, output_tensors)
-            tflite_model_buf = converter.convert()
-    in_data = [x[1] for x in zip(in_data, data)]
-    tvm_output_orig = run_tvm_graph(tflite_model_buf, in_data, in_node)
-    tvm_output_dummy = run_tvm_graph(
-        tflite_model_buf, in_data, in_node, op_converter=DummyOperatorConverter
-    )
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output_orig[0]), np.squeeze(tvm_output_dummy[0]), rtol=1e-5, atol=1e-5
-    )
-
-
-#######################################################################
-# Mobilenet
-# ---------
-
-
-def test_forward_mobilenet_v1():
-    """Test the Mobilenet V1 TF Lite model."""
-    # MobilenetV1
-    tflite_model_file = tf_testing.get_workload_official(
-        "http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz",
-        "mobilenet_v1_1.0_224.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    data = np.random.uniform(size=(1, 224, 224, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-5
-    )
-
-
-def test_forward_mobilenet_v2():
-    """Test the Mobilenet V2 TF Lite model."""
-    # MobilenetV2
-    tflite_model_file = tf_testing.get_workload_official(
-        "http://download.tensorflow.org/models/tflite_11_05_08/mobilenet_v2_1.0_224.tgz",
-        "mobilenet_v2_1.0_224.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    data = np.random.uniform(size=(1, 224, 224, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-5
-    )
-
-
-#######################################################################
-# Mobilenet V3
-# ------------
-
-
-def test_forward_mobilenet_v3():
-    """Test the Mobilenet V3 TF Lite model."""
-    # In MobilenetV3, some ops are not supported before tf 1.15 fbs schema
-    if package_version.parse(tf.VERSION) < package_version.parse("1.15.0"):
-        return
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_1.0_float.tgz",
-        "v3-large_224_1.0_float/v3-large_224_1.0_float.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    data = np.random.uniform(size=(1, 224, 224, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-5
-    )
-
-
-#######################################################################
-# Mobilenet V1 Sparse
-# -----------------
-
-
-def test_forward_sparse_mobilenet_v1():
-    """Test the Sparse version of Mobilenet V1 TF Lite model."""
-    # MobilenetV1
-    tflite_model_file = download_testdata(
-        "https://storage.googleapis.com/fast-convnets/tflite-models/mbv1_140_90_12b4_720.tflite",
-        "mbv1_140_90_12b4_720.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    data = np.random.uniform(size=(1, 224, 224, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "float_image_input")
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-5
-    )
-
-
-#######################################################################
-# Mobilenet V2 Sparse
-# -----------------
-
-
-def test_forward_sparse_mobilenet_v2():
-    """Test the Sparse version of Mobilenet V2 TF Lite model."""
-    # MobilenetV1
-    tflite_model_file = download_testdata(
-        "https://storage.googleapis.com/fast-convnets/tflite-models/mbv2_200_85_11-16b2_744.tflite",
-        "mbv2_200_85_11-16b2_744.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    data = np.random.uniform(size=(1, 224, 224, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "float_image_input")
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-5
-    )
-
-
-#######################################################################
-# Inception
-# ---------
-
-
-def test_forward_inception_v3_net():
-    """Test the Inception V3 TF Lite model."""
-    # InceptionV3
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/"
-        "upload_20180427/inception_v3_2018_04_27.tgz",
-        "inception_v3.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    data = np.random.uniform(size=(1, 299, 299, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-5
-    )
-
-
-def test_forward_inception_v4_net():
-    """Test the Inception V4 TF Lite model."""
-    # InceptionV4
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/download.tensorflow.org/models/"
-        "tflite/model_zoo/upload_20180427/"
-        "inception_v4_2018_04_27.tgz",
-        "inception_v4.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    data = np.random.uniform(size=(1, 299, 299, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-5
-    )
-
-
-def test_forward_inception_v4_net_batched():
-    """Test the Inception V4 TF Lite model."""
-    # InceptionV4
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/download.tensorflow.org/models/"
-        "tflite/model_zoo/upload_20180427/"
-        "inception_v4_2018_04_27.tgz",
-        "inception_v4.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    data = np.random.uniform(size=(4, 299, 299, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm.testing.assert_allclose(
-        np.squeeze(tvm_output[0]), np.squeeze(tflite_output[0]), rtol=1e-5, atol=1e-5
-    )
-
-
-def test_forward_qnn_inception_v1_net():
-    """Test the Quantized TFLite Inception model."""
-    # InceptionV1
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/download.tensorflow.org/models/"
-        "inception_v1_224_quant_20181026.tgz",
-        "inception_v1_224_quant.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-
-    # Test image. Checking the labels because the requantize implementation is different between
-    # TFLite and Relay. This cause final output numbers to mismatch. So, testing accuracy via
-    # labels. Also, giving a real image, instead of random inputs.
-    data = get_real_image(224, 224)
-
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tflite_predictions = np.squeeze(tflite_output)
-    tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm_predictions = np.squeeze(tvm_output)
-    tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-    tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-def test_forward_qnn_mobilenet_v1_net():
-    """Test the Quantized TFLite Mobilenet V1 model."""
-    # MobilenetV1
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_2018_08_02/"
-        "mobilenet_v1_1.0_224_quant.tgz",
-        "mobilenet_v1_1.0_224_quant.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-
-    # Test image. Checking the labels because the requantize implementation is different between
-    # TFLite and Relay. This cause final output numbers to mismatch. So, testing accuracy via
-    # labels. Also, giving a real image, instead of random inputs.
-    data = get_real_image(224, 224)
-
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tflite_predictions = np.squeeze(tflite_output)
-    tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm_predictions = np.squeeze(tvm_output)
-    tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-    tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-def test_forward_qnn_mobilenet_v2_net():
-    """Test the Quantized TFLite Mobilenet V2 model."""
-    # MobilenetV2
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/download.tensorflow.org/models/tflite_11_05_08/"
-        "mobilenet_v2_1.0_224_quant.tgz",
-        "mobilenet_v2_1.0_224_quant.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-
-    # Test image. Checking the labels because the requantize implementation is different between
-    # TFLite and Relay. This cause final output numbers to mismatch. So, testing accuracy via
-    # labels. Also, giving a real image, instead of random inputs.
-    data = get_real_image(224, 224)
-
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tflite_predictions = np.squeeze(tflite_output)
-    tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm_predictions = np.squeeze(tvm_output)
-    tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-    tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-#######################################################################
-# Mobilenet V3 Quantized
-# ----------------------
-
-
-def test_forward_qnn_mobilenet_v3_net():
-    """Test the Quantized TFLite Mobilenet V3 model."""
-    # In MobilenetV3, some ops are not supported before tf 1.15 fbs schema
-    if package_version.parse(tf.VERSION) < package_version.parse("1.15.0"):
-        pytest.skip("Unsupported in tflite < 1.15.0")
-    else:
-        pytest.skip("This segfaults with tensorflow 1.15.2 and above")
-
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_1.0_uint8.tgz",
-        "v3-large_224_1.0_uint8/v3-large_224_1.0_uint8.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-
-    # Test image. Checking the labels because the requantize implementation is different between
-    # TFLite and Relay. This cause final output numbers to mismatch. So, testing accuracy via
-    # labels. Also, giving a real image, instead of random inputs.
-    data = get_real_image(224, 224)
-
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tflite_predictions = np.squeeze(tflite_output)
-    tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm_predictions = np.squeeze(tvm_output)
-    tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-    tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-@pytest.mark.skipif(
-    platform.machine() == "aarch64",
-    reason="Fails with an output mismatch. See https://github.com/apache/tvm/issues/16534",
-)
-def test_forward_tflite2_qnn_resnet50():
-    """Test the Quantized TFLite version 2.1.0 Resnet50 model."""
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.1.0"):
-        tflite_model_file = download_testdata(
-            "https://raw.githubusercontent.com/dmlc/web-data/main/tensorflow/models/Quantized/"
-            "resnet_50_quantized.tflite",
-            "resnet_50_quantized.tflite",
-        )
-        with open(tflite_model_file, "rb") as f:
-            tflite_model_buf = f.read()
-
-        data = pre_processed_image(224, 224)
-
-        tflite_output = run_tflite_graph(tflite_model_buf, data)
-        tflite_predictions = np.squeeze(tflite_output)
-        tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-        tvm_output = run_tvm_graph(tflite_model_buf, np.array(data), "input_1")
-        tvm_predictions = np.squeeze(tvm_output)
-        tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-        tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-def test_forward_tflite2_qnn_inception_v1():
-    """Test the Quantized TFLite version 2.1.0 Inception V1 model."""
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.1.0"):
-        tflite_model_file = download_testdata(
-            "https://raw.githubusercontent.com/dmlc/web-data/main/tensorflow/models/Quantized/"
-            "inception_v1_quantized.tflite",
-            "inception_v1_quantized.tflite",
-        )
-        with open(tflite_model_file, "rb") as f:
-            tflite_model_buf = f.read()
-
-        data = pre_processed_image(224, 224)
-
-        tflite_output = run_tflite_graph(tflite_model_buf, data)
-        tflite_predictions = np.squeeze(tflite_output)
-        tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-        tvm_output = run_tvm_graph(tflite_model_buf, np.array(data), "input_1")
-        tvm_predictions = np.squeeze(tvm_output)
-        tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-        tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-def test_forward_tflite2_qnn_mobilenet_v2():
-    """Test the Quantized TFLite version 2.1.0 Mobilenet V2 model."""
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.1.0"):
-        tflite_model_file = download_testdata(
-            "https://raw.githubusercontent.com/dmlc/web-data/main/tensorflow/models/Quantized/"
-            "mobilenet_v2_quantized.tflite",
-            "mobilenet_v2_quantized.tflite",
-        )
-        with open(tflite_model_file, "rb") as f:
-            tflite_model_buf = f.read()
-
-        data = pre_processed_image(224, 224)
-
-        tflite_output = run_tflite_graph(tflite_model_buf, data)
-        tflite_predictions = np.squeeze(tflite_output)
-        tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-        tvm_output = run_tvm_graph(tflite_model_buf, np.array(data), "input_1")
-        tvm_predictions = np.squeeze(tvm_output)
-        tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-        tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-def test_forward_tflite_float16():
-    """Test float16 quantized model"""
-    # MobilenetV2
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_2018_02_22/"
-        "mobilenet_v1_0.25_128.tgz",
-        "mobilenet_v1_0.25_128_frozen.pb",
-    )
-
-    converter = tf.lite.TFLiteConverter.from_frozen_graph(
-        tflite_model_file, ["input"], ["MobilenetV1/Predictions/Reshape_1"]
-    )
-    converter.optimizations = [tf.lite.Optimize.DEFAULT]
-    converter.target_spec.supported_types = [tf.float16]
-    tflite_model_buf = converter.convert()
-
-    # Test image. Checking the labels because the requantize implementation is different between
-    # TFLite and Relay. This cause final output numbers to mismatch. So, testing accuracy via
-    # labels. Also, giving a real image, instead of random inputs.
-    data = get_real_image(128, 128, quantized=False)
-
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tflite_predictions = np.squeeze(tflite_output)
-    tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm_predictions = np.squeeze(tvm_output)
-    tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-    tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-@pytest.mark.skipif(
-    platform.machine() == "aarch64",
-    reason="Fails during leagalization due to int16 datatype. "
-    "See https://github.com/apache/tvm/issues/16535",
-)
-def test_forward_mobilenet_int16():
-    """Test int16 quantized model"""
-    # MobilenetV2
-    model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_2018_02_22/"
-        "mobilenet_v1_0.25_128.tgz",
-        "mobilenet_v1_0.25_128_frozen.pb",
-    )
-
-    # Test image. Checking the labels because the requantize implementation is different between
-    # TFLite and Relay. This cause final output numbers to mismatch. So, testing accuracy via
-    # labels. Also, giving a real image, instead of random inputs.
-    #
-    # According to TFLite documentation, despite the quantization being done to make this model
-    # use int16 types, inputs and outputs are kept float32 by default.
-    # https://www.tensorflow.org/lite/performance/post_training_integer_quant_16x8
-    data = get_real_image(128, 128, quantized=False)
-
-    converter = tf.lite.TFLiteConverter.from_frozen_graph(
-        model_file, ["input"], ["MobilenetV1/Predictions/Reshape_1"]
-    )
-
-    def representative_dataset():
-        for _ in range(1):
-            yield [data]
-
-    converter.optimizations = [tf.lite.Optimize.DEFAULT]
-    converter.target_spec.supported_ops = [
-        tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
-    ]
-    converter.representative_dataset = representative_dataset
-    tflite_model_buf = converter.convert()
-
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tflite_predictions = np.squeeze(tflite_output)
-    tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input")
-    tvm_predictions = np.squeeze(tvm_output)
-    tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-    tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-@pytest.mark.skipif(
-    platform.machine() == "aarch64",
-    reason="Fails during leagalization due to int16 datatype. "
-    "See https://github.com/apache/tvm/issues/16535",
-)
-def test_forward_ds_cnn_int16():
-    """Test DS_CNN int16 quantized model"""
-    tflite_model_file = download_testdata(
-        "https://github.com/ARM-software/ML-zoo/blob/48f458af1e9065d9aad2ad94d24b58d6e7c00817/"
-        "models/keyword_spotting/ds_cnn_small/tflite_int16/ds_cnn_quantized.tflite?raw=true",
-        "ds_cnn_quantized_int16.tflite",
-    )
-
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-
-    data = np.random.uniform(size=(1, 490)).astype("int16")
-
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tflite_predictions = np.squeeze(tflite_output)
-    tflite_sorted_labels = tflite_predictions.argsort()[-3:][::-1]
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "serving_default_input:0")
-    tvm_predictions = np.squeeze(tvm_output)
-    tvm_sorted_labels = tvm_predictions.argsort()[-3:][::-1]
-    tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)
-
-
-#######################################################################
-# Unidirectional Sequence LSTM
-# ---------------------
-def test_forward_unidirectional_sequence_lstm():
-    """Test the UnidirectionalSequenceLSTM TFLite"""
-    if package_version.parse(tf.VERSION) >= package_version.parse("2.1.0"):
-        tflite_model_file = download_testdata(
-            "https://github.com/SebastianBoblestETAS/nn_models/blob/"
-            "ce49c5de64889493161ca4194a20e0fd5eb707e6/lstm_1_in_3_out_2_ts_4.tflite?raw=true",
-            "lstm_1_in_3_out_2_ts_4.tflite",
-        )
-        with open(tflite_model_file, "rb") as f:
-            tflite_model_buf = f.read()
-
-        data = np.array(
-            [
-                [
-                    [0.5488135, 0.71518934, 0.60276335],
-                    [0.5448832, 0.4236548, 0.6458941],
-                    [0.4375872, 0.891773, 0.96366274],
-                    [0.3834415, 0.79172504, 0.5288949],
-                ]
-            ],
-            dtype="float32",
-        )
-
-        tflite_output = run_tflite_graph(tflite_model_buf, data)
-        tvm_output = run_tvm_graph(tflite_model_buf, data, "serving_default_input_1:0")
-        tvm.testing.assert_allclose(tflite_output, tvm_output)
-
-
-#######################################################################
-# Quantized SSD Mobilenet
-# -----------------------
-
-
-def test_forward_qnn_coco_ssd_mobilenet_v1():
-    """Test the quantized Coco SSD Mobilenet V1 TF Lite model."""
-    pytest.skip(
-        "LLVM bug - getExtendedVectorNumElements - "
-        + "https://discuss.tvm.apache.org/t/segfault-in-llvm/3567. The workaround is to use a "
-        + "specific target, for example, llvm -mpcu=core-avx2"
-    )
-
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/download.tensorflow.org/models/tflite/"
-        "coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip",
-        "detect.tflite",
-    )
-
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-
-    data = get_real_image_object_detection(300, 300)
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(
-        tflite_model_buf, data, "normalized_input_image_tensor", num_output=4
-    )
-
-    # Check all output shapes are equal
-    assert all(
-        list(
-            tvm_tensor.shape == tflite_tensor.shape
-            for (tvm_tensor, tflite_tensor) in zip(tvm_output, tflite_output)
-        )
-    )
-
-    # Check valid count is the same
-    assert tvm_output[3] == tflite_output[3]
-    valid_count = tvm_output[3][0]
-
-    # For boxes that do not have any detections, TFLite puts random values. Therefore, we compare
-    # tflite and tvm tensors for only valid boxes.
-    for i in range(0, valid_count):
-        # We compare the bounding boxes whose prediction score is above 60%. This is typical in end
-        # to end application where a low prediction score is discarded. This is also needed because
-        # multiple low score bounding boxes can have same score and TFlite and TVM can have
-        # different orderings for same score bounding boxes. Another reason for minor differences in
-        # low score bounding boxes is the difference between TVM and TFLite for requantize operator.
-        if tvm_output[2][0][i] > 0.6:
-            # Check bounding box co-ords. The tolerances have to be adjusted, from 1e-5 to 1e-2,
-            # because of differences between for requantiize operator in TFLite and TVM.
-            tvm.testing.assert_allclose(
-                np.squeeze(tvm_output[0][0][i]),
-                np.squeeze(tflite_output[0][0][i]),
-                rtol=1e-2,
-                atol=1e-2,
-            )
-
-            # Check the class
-            # Stricter check to ensure class remains same
-            np.testing.assert_equal(
-                np.squeeze(tvm_output[1][0][i]), np.squeeze(tflite_output[1][0][i])
-            )
-
-            # Check the score
-            tvm.testing.assert_allclose(
-                np.squeeze(tvm_output[2][0][i]),
-                np.squeeze(tflite_output[2][0][i]),
-                rtol=1e-5,
-                atol=1e-5,
-            )
-
-
-#######################################################################
-# SSD Mobilenet
-# -------------
-
-
-def test_forward_coco_ssd_mobilenet_v1():
-    """Test the FP32 Coco SSD Mobilenet V1 TF Lite model."""
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://raw.githubusercontent.com/dmlc/web-data/main/tensorflow/models/object_detection/"
-        "ssd_mobilenet_v1_coco_2018_01_28.tgz",
-        "ssd_mobilenet_v1_coco_2018_01_28.tflite",
-    )
-
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-
-    np.random.seed(0)
-    data = np.random.uniform(size=(1, 300, 300, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(
-        tflite_model_buf, data, "normalized_input_image_tensor", num_output=4
-    )
-
-    # Check all output shapes are equal
-    assert all(
-        list(
-            tvm_tensor.shape == tflite_tensor.shape
-            for (tvm_tensor, tflite_tensor) in zip(tvm_output, tflite_output)
-        )
-    )
-
-    # Check valid count is the same
-    assert tvm_output[3] == tflite_output[3]
-    valid_count = tvm_output[3][0]
-
-    # For boxes that do not have any detections, TFLite puts random values. Therefore, we compare
-    # tflite and tvm tensors for only valid boxes.
-    for i in range(0, valid_count):
-        # Check bounding box co-ords
-        tvm.testing.assert_allclose(
-            np.squeeze(tvm_output[0][0][i]),
-            np.squeeze(tflite_output[0][0][i]),
-            rtol=1e-5,
-            atol=1e-5,
-        )
-        # Check the class
-        np.testing.assert_equal(np.squeeze(tvm_output[1][0][i]), np.squeeze(tflite_output[1][0][i]))
-
-        # Check the score
-        tvm.testing.assert_allclose(
-            np.squeeze(tvm_output[2][0][i]),
-            np.squeeze(tflite_output[2][0][i]),
-            rtol=1e-5,
-            atol=1e-5,
-        )
-
-
-#######################################################################
-# MediaPipe
-# -------------
-def test_forward_mediapipe_hand_landmark():
-    """Test MediaPipe 2D hand landmark TF Lite model."""
-    # MediaPipe 2D hand landmark TF
-    tflite_model_file = download_testdata(
-        "https://github.com/google/mediapipe/raw/v0.7.4/mediapipe/models/hand_landmark.tflite",
-        "hand_landmark.tflite",
-    )
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    data = np.random.uniform(size=(1, 256, 256, 3)).astype("float32")
-    tflite_output = run_tflite_graph(tflite_model_buf, data)
-    tvm_output = run_tvm_graph(tflite_model_buf, data, "input_1", num_output=2)
-    for i in range(2):
-        tvm.testing.assert_allclose(
-            np.squeeze(tvm_output[i]), np.squeeze(tflite_output[i]), rtol=1e-5, atol=1e-5
-        )
-
-
-#######################################################################
-# Test check for Tensorflow "dynamic range quantization" optimization
-# --------------
-def test_prevent_tensorflow_dynamic_range():
-    """
-    Should prevent running "dynamic range quantization" optimized TFLite graph
-    """
-    data_array = np.random.randint(0, 2, (1, 1024, 1024)).astype(dtype=np.float32)
-    filter_array = np.random.randint(0, 2, (1024, 1024)).astype(dtype=np.float32)
-    data_in = tf.keras.layers.Input(shape=data_array.shape[1:])
-    dense = tf.keras.layers.Dense(units=filter_array.shape[-1], use_bias=False)(data_in)
-    keras_model = tf.keras.models.Model(data_in, dense)
-    keras_model.layers[1].set_weights([filter_array])
-
-    converter = interpreter_wrapper.TFLiteConverter.from_keras_model(keras_model)
-    converter.optimizations = [tf.lite.Optimize.DEFAULT]
-    tflite_model = converter.convert()
-    with pytest.raises(tvm.error.OpNotImplemented):
-        _ = run_tvm_graph(tflite_model, data_array, data_in.name.replace(":0", ""))
-
-
-def _test_nms_v5(
-    bx_shape, score_shape, iou_threshold, score_threshold, max_output_size, dtype="float32"
-):
-    """One iteration of nms_v5 with given attributes"""
-    boxes = np.random.uniform(0, 10, size=bx_shape).astype(dtype)
-    scores = np.random.uniform(size=score_shape).astype(dtype)
-
-    tf.reset_default_graph()
-    tf.compat.v1.disable_eager_execution()
-    in_data_1 = array_ops.placeholder(dtype, boxes.shape, name="in_data_1")
-    in_data_2 = array_ops.placeholder(dtype, scores.shape, name="in_data_2")
-    out = image_ops.non_max_suppression_with_scores(
-        boxes=in_data_1,
-        scores=in_data_2,
-        max_output_size=max_output_size,
-        iou_threshold=iou_threshold,
-        score_threshold=score_threshold,
-        name="nms",
-    )
-
-    compare_tflite_with_tvm(
-        [boxes, scores],
-        ["in_data_1:0", "in_data_2:0"],
-        [in_data_1, in_data_2],
-        [out[0], out[1]],
-        out_names=[out[0].name, out[1].name],
-        experimental_new_converter=True,
-    )
-
-
-def test_forward_nms_v5():
-    """test nms_v5"""
-    _test_nms_v5((10000, 4), (10000,), 0.5, 0.4, 100)
-    _test_nms_v5((1000, 4), (1000,), 0.7, 0.3, 50)
-
-
-#######################################################################
-# Test structural_equal and span of a model
-# --------------------------------------
-def test_structure_and_span():
-    """Test Structure and span of frequently-used models"""
-
-    def _verify(res_fptr, golden_fptr):
-        with tvm.testing.enable_span_filling():
-            with_span = res_fptr()
-        with tvm.testing.disable_span_filling():
-            without_span = res_fptr()
-        tvm.ir.assert_structural_equal(with_span, without_span)
-        _verify_structural_equal_with_span(with_span, golden_fptr())
-
-    def _tf_to_tflite(
-        input_tensors, output_tensors, init_global_variables=False, experimental_new_converter=False
-    ):
-        with tf.Session() as sess:
-            if init_global_variables:
-                sess.run(variables.global_variables_initializer())
-            converter = tf.lite.TFLiteConverter.from_session(sess, input_tensors, output_tensors)
-            converter.experimental_new_converter = experimental_new_converter
-
-            tflite_model_buffer = converter.convert()
-
-        try:
-            import tflite.Model
-
-            tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buffer, 0)
-        except AttributeError:
-            import tflite
-
-            tflite_model = tflite.Model.GetRootAsModel(tflite_model_buffer, 0)
-        except ImportError:
-            raise ImportError("The tflite package must be installed")
-        return tflite_model
-
-    def _test_conv2d_bias_add_span():
-        def _res():
-            in_shape = (1, 5, 5, 1)
-            kernel_shpae = (2, 2, 1, 2)
-            kernel_in = np.ones(kernel_shpae)
-
-            with tf.Graph().as_default():
-                x = array_ops.placeholder(shape=in_shape, dtype="float32", name="input")
-                kernel = tf.constant(kernel_in, dtype=tf.float32, name="filter_weight")
-                tf_model = tf.nn.conv2d(
-                    x, kernel, strides=[1, 1, 1, 1], padding="VALID", name="conv2d"
-                )
-                tflite_model = _tf_to_tflite([x], [tf_model])
-
-            mod, _ = relay.frontend.from_tflite(
-                tflite_model,
-                shape_dict={"input": in_shape},
-                dtype_dict={"input": "float32"},
-                op_converter=relay.frontend.tflite.OperatorConverter,
-            )
-            return mod["main"]
-
-        def _golden():
-            in_input = relay.var(
-                "input", relay.TensorType([1, 5, 5, 1]), span=_create_span("input")
-            )
-            weight = relay.var(
-                "_param_1", relay.TensorType([2, 2, 1, 2]), span=_create_span("filter_weight")
-            )
-            bias = relay.var("_param_2", relay.TensorType([2]), span=_create_span("conv2d_bias"))
-            conv2d = _set_span(
-                relay.nn.conv2d(
-                    in_input,
-                    weight,
-                    channels=2,
-                    kernel_size=[2, 2],
-                    data_layout="NHWC",
-                    kernel_layout="HWIO",
-                ),
-                "conv2d",
-            )
-            bias_add = _set_span(relay.nn.bias_add(conv2d, bias, axis=3), "conv2d")
-            attrs = ir.make_node("DictAttrs", **{"output_tensor_names": ["conv2d"]})
-            func = relay.Function([in_input, weight, bias], bias_add, attrs=attrs)
-            mod = ir.IRModule.from_expr(func)
-            return mod["main"]
-
-        _verify(_res, _golden)
-
-    def _test_fully_connected_bias_add_span():
-        def _res():
-            in_shape = (1, 10)
-            kernel_shpae = (10, 10)
-            kernel_in = np.ones(kernel_shpae)
-
-            with tf.Graph().as_default():
-                x = array_ops.placeholder(shape=in_shape, dtype="float32", name="input")
-                weight = tf.constant(kernel_in, dtype=tf.float32, name="filter_weight")
-                tf_model = math_ops.mat_mul(x, weight, name="dense")
-                tflite_model = _tf_to_tflite([x], [tf_model])
-
-            mod, _ = relay.frontend.from_tflite(
-                tflite_model,
-                shape_dict={"input": in_shape},
-                dtype_dict={"input": "float32"},
-                op_converter=relay.frontend.tflite.OperatorConverter,
-            )
-            return mod["main"]
-
-        def _golden():
-            in_input = relay.var("input", relay.TensorType([1, 10]), span=_create_span("input"))
-            weight = relay.var(
-                "_param_1", relay.TensorType([10, 10]), span=_create_span("filter_weight/transpose")
-            )
-            bias = relay.var("_param_2", relay.TensorType([10]), span=_create_span("dense_bias"))
-            reshape = _set_span(relay.reshape(in_input, [-1, 10]), "dense")
-            dense = _set_span(relay.nn.dense(reshape, weight, units=10), "dense")
-            bias_add = _set_span(relay.nn.bias_add(dense, bias), "dense")
-            attrs = ir.make_node("DictAttrs", **{"output_tensor_names": ["dense"]})
-            func = relay.Function([in_input, weight, bias], bias_add, attrs=attrs)
-            mod = ir.IRModule.from_expr(func)
-            return mod["main"]
-
-        _verify(_res, _golden)
-
-    def _test_reshape_span():
-        def _res():
-            in_shape = (1, 10)
-            output_shape = (2, 5)
-
-            with tf.Graph().as_default():
-                x = array_ops.placeholder(shape=in_shape, dtype="float32", name="input")
-                tf_model = array_ops.reshape(x, output_shape, "reshape")
-                tflite_model = _tf_to_tflite([x], [tf_model])
-
-            mod, _ = relay.frontend.from_tflite(
-                tflite_model,
-                shape_dict={"input": in_shape},
-                dtype_dict={"input": "float32"},
-                op_converter=relay.frontend.tflite.OperatorConverter,
-            )
-            return mod["main"]
-
-        def _golden():
-            in_input = relay.var("input", relay.TensorType([1, 10]), span=_create_span("input"))
-            reshape = _set_span(relay.reshape(in_input, [2, 5]), "reshape")
-            attrs = ir.make_node("DictAttrs", **{"output_tensor_names": ["reshape"]})
-            func = relay.Function([in_input], reshape, attrs=attrs)
-            mod = ir.IRModule.from_expr(func)
-            return mod["main"]
-
-        _verify(_res, _golden)
-
-    _test_conv2d_bias_add_span()
-    _test_fully_connected_bias_add_span()
-    _test_reshape_span()
-
-
-class TestConv2d:
-    """Import Conv2d operator from TFLite, build with Relay and test."""
-
-    input_shape, kernel_shape, padding = tvm.testing.parameters(
-        ((1, 128, 256, 6), (5, 5, 6, 10), "SAME"),
-        ((1, 128, 256, 6), (5, 5, 6, 10), "VALID"),
-        # conv2d_group cases
-        ((1, 30, 40, 6), (5, 5, 1, 6), "SAME"),
-        ((1, 30, 40, 6), (5, 5, 1, 6), "VALID"),
-    )
-
-    def test_conv2d(self, input_shape: tuple, kernel_shape: tuple, padding: str):
-        dtype = tf.float32
-        kernel_in = np.ones(kernel_shape)
-        with tf.Graph().as_default():
-            x = array_ops.placeholder(shape=input_shape, dtype=dtype.name, name="input")
-            kernel = tf.constant(kernel_in, dtype=dtype, name="filter_weight")
-            out = tf.nn.conv2d(x, kernel, strides=[1, 1, 1, 1], padding=padding, name="conv2d")
-            input_data = np.random.randn(*input_shape).astype(dtype.name)
-            compare_tflite_with_tvm(
-                [input_data],
-                ["input"],
-                [x],
-                [out],
-            )
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/scripts/task_python_frontend.sh b/tests/scripts/task_python_frontend.sh
index 593e8f50c1d0..22d9d9ee22db 100755
--- a/tests/scripts/task_python_frontend.sh
+++ b/tests/scripts/task_python_frontend.sh
@@ -25,33 +25,4 @@ export OMP_NUM_THREADS=1
 
 export TVM_TEST_TARGETS="llvm;cuda"
 
-find . -type f -path "*.pyc" | xargs rm -f
-
-# Rebuild cython
-make cython3
-
-
-echo "Running relay ONNX frontend test..."
-run_pytest cython python-frontend-onnx tests/python/frontend/onnx
-
-echo "Running relay PyTorch frontend test..."
-run_pytest cython python-frontend-pytorch tests/python/frontend/pytorch
-
-echo "Running relay Tensorflow frontend test..."
-# Note: Tensorflow tests often have memory issues, so invoke each one separately
-TENSORFLOW_TESTS=$(./ci/scripts/jenkins/pytest_ids.py --folder tests/python/frontend/tensorflow)
-i=0
-for node_id in $TENSORFLOW_TESTS; do
-    echo "$node_id"
-    run_pytest cython "python-frontend-tensorflow-$i" "$node_id"
-    i=$((i+1))
-done
-
-echo "Running relay DarkNet frontend test..."
-run_pytest cython python-frontend-darknet tests/python/frontend/darknet
-
-echo "Running relay PaddlePaddle frontend test..."
-run_pytest cython python-frontend-paddlepaddle tests/python/frontend/paddlepaddle
-
-echo "Running relay CoreML frontend test..."
-run_pytest cython python-frontend-coreml tests/python/frontend/coreml
+# TODO(Siyuan): Keep this file for passing CI
diff --git a/tests/scripts/task_python_frontend_cpu.sh b/tests/scripts/task_python_frontend_cpu.sh
index aac554bea53a..73f21c3c924b 100755
--- a/tests/scripts/task_python_frontend_cpu.sh
+++ b/tests/scripts/task_python_frontend_cpu.sh
@@ -26,13 +26,4 @@ export OMP_NUM_THREADS=1
 
 export TVM_TEST_TARGETS="llvm"
 
-find . -type f -path "*.pyc" | xargs rm -f
-
-# Rebuild cython
-make cython3
-
-echo "Running relay TFLite frontend test..."
-run_pytest cython python-frontend-tflite tests/python/frontend/tflite
-
-echo "Running relay Keras frontend test..."
-run_pytest cython python-frontend-keras tests/python/frontend/keras
+# TODO(Siyuan): Keep this file for passing CI
diff --git a/tests/scripts/task_python_unittest.sh b/tests/scripts/task_python_unittest.sh
index 5b07b5256ea5..4a13c6ce1ed2 100755
--- a/tests/scripts/task_python_unittest.sh
+++ b/tests/scripts/task_python_unittest.sh
@@ -55,7 +55,6 @@ TEST_FILES=(
 )
 
 for TEST_FILE in ${TEST_FILES[@]}; do
-    run_pytest ctypes ${TEST_FILE}-0, tests/python/${TEST_FILE}
     run_pytest cython ${TEST_FILE}-1, tests/python/${TEST_FILE}
 done
 
diff --git a/tests/scripts/task_python_unittest_gpuonly.sh b/tests/scripts/task_python_unittest_gpuonly.sh
index b478bbdc773d..e68fcba25c91 100755
--- a/tests/scripts/task_python_unittest_gpuonly.sh
+++ b/tests/scripts/task_python_unittest_gpuonly.sh
@@ -33,5 +33,4 @@ export TVM_UNITTEST_TESTSUITE_NAME=python-codegen-vulkan
 
 source tests/scripts/setup-pytest-env.sh
 
-run_pytest ctypes ${TVM_UNITTEST_TESTSUITE_NAME}-0 tests/python/codegen/test_target_codegen_vulkan.py
 run_pytest cython ${TVM_UNITTEST_TESTSUITE_NAME}-1 tests/python/codegen/test_target_codegen_vulkan.py