From 6963dc967103999d624259571302d5c2f6820a3f Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 25 Aug 2021 14:38:20 -0700
Subject: [PATCH 01/14] nll loss v1

---
 python/tvm/relay/frontend/onnx.py | 61 +++++++++++++++++++++++--------
 1 file changed, 45 insertions(+), 16 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 5471f67ea106..03ff82ce296d 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -22,6 +22,7 @@
 
 import numpy as np
 import tvm
+from tvm import relay
 from tvm.ir import IRModule
 from tvm.topi.utils import get_const_tuple
 
@@ -32,24 +33,12 @@
 from .. import loops as _loops
 from .. import op as _op
 from .. import qnn as _qnn
+from .. import random as _random
 from .. import ty as _ty
 from .. import vision as _vision
-from .. import random as _random
-from .common import (
-    AttrCvt,
-    Renamer,
-    fold_constant,
-    get_name,
-    get_relay_op,
-    infer_channels,
-    infer_shape,
-    infer_type,
-    infer_value,
-    new_var,
-    unbind,
-    gru_cell,
-    lstm_cell,
-)
+from .common import (AttrCvt, Renamer, fold_constant, get_name, get_relay_op,
+                     gru_cell, infer_channels, infer_shape, infer_type,
+                     infer_value, lstm_cell, new_var, unbind)
 
 __all__ = ["from_onnx"]
 
@@ -3481,6 +3470,46 @@ def _impl_v1(cls, inputs, attr, params):
         return vals
 
 
+class NegativeLogLikelihoodLoss(OnnxOpConverter):
+    """Operator converter for random_uniform"""
+
+    VALID_REDUCTIONS = {"mean", "sum", "none"}
+
+    @classmethod
+    def _impl_v13(cls, inputs, attr, params):
+        ignore_index = attr.get("ignore_index", None)
+        reduction = attr.get("reduction", "mean")
+
+        if reduction not in cls.VALID_REDUCTIONS:
+            raise ValueError(
+                f"Unknown reduction type {reduction}, choices are {cls.VALID_REDUCTIONS}"
+            )
+
+        input_tensor, target_tensor, weight_tensor = inputs
+        loss = -input_tensor
+        if weight_tensor is not None:
+            loss *= weight_tensor
+
+        if target_tensor is not None and ignore_index is not None:
+            mask_tensor = target_tensor == ignore_index
+
+            # Turn all "True" entries to 0 and all "False" entries to 1
+            mask_tensor = 1 - mask_tensor
+
+            loss *= mask_tensor
+
+        if reduction == "mean":
+            if weight_tensor is not None:
+                return relay.sum(loss) / relay.sum(weight_tensor)
+            else:
+                return relay.mean(loss)
+        elif reduction == "sum":
+            return relay.sum(loss)
+        else:
+            # Case reduction == 'none'
+            return loss
+
+
 # compatible operators that do NOT require any conversion.
 _identity_list = []
 

From d6f420f8ef40ffe7cecc61b78b2a66900136eb80 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 25 Aug 2021 14:46:23 -0700
Subject: [PATCH 02/14] add converter

---
 python/tvm/relay/frontend/onnx.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 03ff82ce296d..f723581cfa76 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -36,9 +36,21 @@
 from .. import random as _random
 from .. import ty as _ty
 from .. import vision as _vision
-from .common import (AttrCvt, Renamer, fold_constant, get_name, get_relay_op,
-                     gru_cell, infer_channels, infer_shape, infer_type,
-                     infer_value, lstm_cell, new_var, unbind)
+from .common import (
+    AttrCvt,
+    Renamer,
+    fold_constant,
+    get_name,
+    get_relay_op,
+    gru_cell,
+    infer_channels,
+    infer_shape,
+    infer_type,
+    infer_value,
+    lstm_cell,
+    new_var,
+    unbind,
+)
 
 __all__ = ["from_onnx"]
 
@@ -3692,6 +3704,8 @@ def _get_convert_map(opset):
         "ConvInteger": ConvInteger.get_converter(opset),
         # Random number generation.
         "RandomUniform": RandomUniform.get_converter(opset),
+        # Loss functions
+        "NegativeLogLikelihoodLoss": NegativeLogLikelihoodLoss.get_converter(opset),
     }
 
 

From 83998d0bb7f49231ecad4dc0de28bd26ade63884 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 25 Aug 2021 14:50:23 -0700
Subject: [PATCH 03/14] decode strings in byte form

---
 python/tvm/relay/frontend/onnx.py | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index f723581cfa76..3fd354677657 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -36,21 +36,9 @@
 from .. import random as _random
 from .. import ty as _ty
 from .. import vision as _vision
-from .common import (
-    AttrCvt,
-    Renamer,
-    fold_constant,
-    get_name,
-    get_relay_op,
-    gru_cell,
-    infer_channels,
-    infer_shape,
-    infer_type,
-    infer_value,
-    lstm_cell,
-    new_var,
-    unbind,
-)
+from .common import (AttrCvt, Renamer, fold_constant, get_name, get_relay_op,
+                     gru_cell, infer_channels, infer_shape, infer_type,
+                     infer_value, lstm_cell, new_var, unbind)
 
 __all__ = ["from_onnx"]
 
@@ -3490,7 +3478,7 @@ class NegativeLogLikelihoodLoss(OnnxOpConverter):
     @classmethod
     def _impl_v13(cls, inputs, attr, params):
         ignore_index = attr.get("ignore_index", None)
-        reduction = attr.get("reduction", "mean")
+        reduction = attr.get("reduction", b"mean").decode("utf-8")
 
         if reduction not in cls.VALID_REDUCTIONS:
             raise ValueError(

From 6c7ec712d70c9ba9d8c5204011b5a3d4770a642e Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 25 Aug 2021 14:54:47 -0700
Subject: [PATCH 04/14] decode variable length inputs

---
 python/tvm/relay/frontend/onnx.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 3fd354677657..49a1c18703ff 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -3485,7 +3485,12 @@ def _impl_v13(cls, inputs, attr, params):
                 f"Unknown reduction type {reduction}, choices are {cls.VALID_REDUCTIONS}"
             )
 
-        input_tensor, target_tensor, weight_tensor = inputs
+        input_tensor, target_tensor = inputs
+        if len(inputs) == 3:
+            weight_tensor = inputs[2]
+        else:
+            weight_tensor = None 
+            
         loss = -input_tensor
         if weight_tensor is not None:
             loss *= weight_tensor

From 1fbc3b7978dffd8e73000489cce6eebb285a31a0 Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@system76-pc.localdomain>
Date: Wed, 25 Aug 2021 15:59:17 -0700
Subject: [PATCH 05/14] make shapes correct

---
 python/tvm/relay/frontend/onnx.py |    6 +-
 python/tvm/testing/#utils.py#     | 1436 +++++++++++++++++++++++++++++
 2 files changed, 1440 insertions(+), 2 deletions(-)
 create mode 100644 python/tvm/testing/#utils.py#

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 49a1c18703ff..9a81dc93168b 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -3490,8 +3490,10 @@ def _impl_v13(cls, inputs, attr, params):
             weight_tensor = inputs[2]
         else:
             weight_tensor = None 
-            
-        loss = -input_tensor
+
+        target_tensor = relay.expand_dims(target_tensor, 1)
+        loss = -relay.gather(input_tensor, axis=1, indices=target_tensor)
+        loss = relay.squeeze(loss, axis=[1])
         if weight_tensor is not None:
             loss *= weight_tensor
 
diff --git a/python/tvm/testing/#utils.py# b/python/tvm/testing/#utils.py#
new file mode 100644
index 000000000000..04a235b64fdf
--- /dev/null
+++ b/python/tvm/testing/#utils.py#
@@ -0,0 +1,1436 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# pylint: disable=invalid-name,unnecessary-comprehension
+""" TVM testing utilities
+
+Testing Markers
+***************
+
+We use pytest markers to specify the requirements of test functions. Currently
+there is a single distinction that matters for our testing environment: does
+the test require a gpu. For tests that require just a gpu or just a cpu, we
+have the decorator :py:func:`requires_gpu` that enables the test when a gpu is
+available. To avoid running tests that don't require a gpu on gpu nodes, this
+decorator also sets the pytest marker `gpu` so we can use select the gpu subset
+of tests (using `pytest -m gpu`).
+
+Unfortunately, many tests are written like this:
+
+.. python::
+
+    def test_something():
+        for target in all_targets():
+            do_something()
+
+The test uses both gpu and cpu targets, so the test needs to be run on both cpu
+and gpu nodes. But we still want to only run the cpu targets on the cpu testing
+node. The solution is to mark these tests with the gpu marker so they will be
+run on the gpu nodes. But we also modify all_targets (renamed to
+enabled_targets) so that it only returns gpu targets on gpu nodes and cpu
+targets on cpu nodes (using an environment variable).
+
+Instead of using the all_targets function, future tests that would like to
+test against a variety of targets should use the
+:py:func:`tvm.testing.parametrize_targets` functionality. This allows us
+greater control over which targets are run on which testing nodes.
+
+If in the future we want to add a new type of testing node (for example
+fpgas), we need to add a new marker in `tests/python/pytest.ini` and a new
+function in this module. Then targets using this node should be added to the
+`TVM_TEST_TARGETS` environment variable in the CI.
+"""
+import collections
+import copy
+import copyreg
+import ctypes
+import functools
+import logging
+import os
+import sys
+import time
+import pickle
+import pytest
+import _pytest
+import numpy as np
+import tvm
+import tvm.arith
+import tvm.tir
+import tvm.te
+import tvm._ffi
+
+from tvm.contrib import nvcc, cudnn
+from tvm.error import TVMError
+
+
+def assert_allclose(actual, desired, rtol=1e-7, atol=1e-7):
+    """Version of np.testing.assert_allclose with `atol` and `rtol` fields set
+    in reasonable defaults.
+
+    Arguments `actual` and `desired` are not interchangeable, since the function
+    compares the `abs(actual-desired)` with `atol+rtol*abs(desired)`.  Since we
+    often allow `desired` to be close to zero, we generally want non-zero `atol`.
+    """
+    actual = np.asanyarray(actual)
+    desired = np.asanyarray(desired)
+    np.testing.assert_allclose(actual.shape, desired.shape)
+    np.testing.assert_allclose(actual, desired, rtol=rtol, atol=atol, verbose=True)
+
+
+def check_numerical_grads(
+    function, input_values, grad_values, function_value=None, delta=1e-3, atol=1e-2, rtol=0.1
+):
+    """A helper function that checks that numerical gradients of a function are
+    equal to gradients computed in some different way (analytical gradients).
+
+    Numerical gradients are computed using finite difference approximation. To
+    reduce the number of function evaluations, the number of points used is
+    gradually increased if the error value is too high (up to 5 points).
+
+    Parameters
+    ----------
+    function
+        A function that takes inputs either as positional or as keyword
+        arguments (either `function(*input_values)` or `function(**input_values)`
+        should be correct) and returns a scalar result. Should accept numpy
+        ndarrays.
+
+    input_values : Dict[str, numpy.ndarray] or List[numpy.ndarray]
+        A list of values or a dict assigning values to variables. Represents the
+        point at which gradients should be computed.
+
+    grad_values : Dict[str, numpy.ndarray] or List[numpy.ndarray]
+        Gradients computed using a different method.
+
+    function_value : float, optional
+        Should be equal to `function(**input_values)`.
+
+    delta : float, optional
+        A small number used for numerical computation of partial derivatives.
+        The default 1e-3 is a good choice for float32.
+
+    atol : float, optional
+        Absolute tolerance. Gets multiplied by `sqrt(n)` where n is the size of a
+        gradient.
+
+    rtol : float, optional
+        Relative tolerance.
+    """
+    # If input_values is a list then function accepts positional arguments
+    # In this case transform it to a function taking kwargs of the form {"0": ..., "1": ...}
+    if not isinstance(input_values, dict):
+        input_len = len(input_values)
+        input_values = {str(idx): val for idx, val in enumerate(input_values)}
+
+        def _function(_input_len=input_len, _orig_function=function, **kwargs):
+            return _orig_function(*(kwargs[str(i)] for i in range(input_len)))
+
+        function = _function
+
+        grad_values = {str(idx): val for idx, val in enumerate(grad_values)}
+
+    if function_value is None:
+        function_value = function(**input_values)
+
+    # a helper to modify j-th element of val by a_delta
+    def modify(val, j, a_delta):
+        val = val.copy()
+        val.reshape(-1)[j] = val.reshape(-1)[j] + a_delta
+        return val
+
+    # numerically compute a partial derivative with respect to j-th element of the var `name`
+    def derivative(x_name, j, a_delta):
+        modified_values = {
+            n: modify(val, j, a_delta) if n == x_name else val for n, val in input_values.items()
+        }
+        return (function(**modified_values) - function_value) / a_delta
+
+    def compare_derivative(j, n_der, grad):
+        der = grad.reshape(-1)[j]
+        return np.abs(n_der - der) < atol + rtol * np.abs(n_der)
+
+    for x_name, grad in grad_values.items():
+        if grad.shape != input_values[x_name].shape:
+            raise AssertionError(
+                "Gradient wrt '{}' has unexpected shape {}, expected {} ".format(
+                    x_name, grad.shape, input_values[x_name].shape
+                )
+            )
+
+        ngrad = np.zeros_like(grad)
+
+        wrong_positions = []
+
+        # compute partial derivatives for each position in this variable
+        for j in range(np.prod(grad.shape)):
+            # forward difference approximation
+            nder = derivative(x_name, j, delta)
+
+            # if the derivative is not equal to the analytical one, try to use more
+            # precise and expensive methods
+            if not compare_derivative(j, nder, grad):
+                # central difference approximation
+                nder = (derivative(x_name, j, -delta) + nder) / 2
+
+                if not compare_derivative(j, nder, grad):
+                    # central difference approximation using h = delta/2
+                    cnder2 = (
+                        derivative(x_name, j, delta / 2) + derivative(x_name, j, -delta / 2)
+                    ) / 2
+                    # five-point derivative
+                    nder = (4 * cnder2 - nder) / 3
+
+            # if the derivatives still don't match, add this position to the
+            # list of wrong positions
+            if not compare_derivative(j, nder, grad):
+                wrong_positions.append(np.unravel_index(j, grad.shape))
+
+            ngrad.reshape(-1)[j] = nder
+
+        wrong_percentage = int(100 * len(wrong_positions) / np.prod(grad.shape))
+
+        dist = np.sqrt(np.sum((ngrad - grad) ** 2))
+        grad_norm = np.sqrt(np.sum(ngrad ** 2))
+
+        if not (np.isfinite(dist) and np.isfinite(grad_norm)):
+            raise ValueError(
+                "NaN or infinity detected during numerical gradient checking wrt '{}'\n"
+                "analytical grad = {}\n numerical grad = {}\n".format(x_name, grad, ngrad)
+            )
+
+        # we multiply atol by this number to make it more universal for different sizes
+        sqrt_n = np.sqrt(float(np.prod(grad.shape)))
+
+        if dist > atol * sqrt_n + rtol * grad_norm:
+            raise AssertionError(
+                "Analytical and numerical grads wrt '{}' differ too much\n"
+                "analytical grad = {}\n numerical grad = {}\n"
+                "{}% of elements differ, first 10 of wrong positions: {}\n"
+                "distance > atol*sqrt(n) + rtol*grad_norm\n"
+                "distance {} > {}*{} + {}*{}".format(
+                    x_name,
+                    grad,
+                    ngrad,
+                    wrong_percentage,
+                    wrong_positions[:10],
+                    dist,
+                    atol,
+                    sqrt_n,
+                    rtol,
+                    grad_norm,
+                )
+            )
+
+        max_diff = np.max(np.abs(ngrad - grad))
+        avg_diff = np.mean(np.abs(ngrad - grad))
+        logging.info(
+            "Numerical grad test wrt '%s' of shape %s passes, "
+            "dist = %f, max_diff = %f, avg_diff = %f",
+            x_name,
+            grad.shape,
+            dist,
+            max_diff,
+            avg_diff,
+        )
+
+
+def assert_prim_expr_equal(lhs, rhs):
+    """Assert lhs and rhs equals to each iother.
+
+    Parameters
+    ----------
+    lhs : tvm.tir.PrimExpr
+        The left operand.
+
+    rhs : tvm.tir.PrimExpr
+        The left operand.
+    """
+    ana = tvm.arith.Analyzer()
+    res = ana.simplify(lhs - rhs)
+    equal = isinstance(res, tvm.tir.IntImm) and res.value == 0
+    if not equal:
+        raise ValueError("{} and {} are not equal".format(lhs, rhs))
+
+
+def check_bool_expr_is_true(bool_expr, vranges, cond=None):
+    """Check that bool_expr holds given the condition cond
+    for every value of free variables from vranges.
+
+    for example, 2x > 4y solves to x > 2y given x in (0, 10) and y in (0, 10)
+    here bool_expr is x > 2y, vranges is {x: (0, 10), y: (0, 10)}, cond is 2x > 4y
+    We creates iterations to check,
+    for x in range(10):
+      for y in range(10):
+        assert !(2x > 4y) || (x > 2y)
+
+    Parameters
+    ----------
+    bool_expr : tvm.ir.PrimExpr
+        Boolean expression to check
+    vranges: Dict[tvm.tir.expr.Var, tvm.ir.Range]
+        Free variables and their ranges
+    cond: tvm.ir.PrimExpr
+        extra conditions needs to be satisfied.
+    """
+    if cond is not None:
+        bool_expr = tvm.te.any(tvm.tir.Not(cond), bool_expr)
+
+    def _run_expr(expr, vranges):
+        """Evaluate expr for every value of free variables
+        given by vranges and return the tensor of results.
+        """
+
+        def _compute_body(*us):
+            vmap = {v: u + r.min for (v, r), u in zip(vranges.items(), us)}
+            return tvm.tir.stmt_functor.substitute(expr, vmap)
+
+        A = tvm.te.compute([r.extent.value for v, r in vranges.items()], _compute_body)
+        args = [tvm.nd.empty(A.shape, A.dtype)]
+        sch = tvm.te.create_schedule(A.op)
+        mod = tvm.build(sch, [A])
+        mod(*args)
+        return args[0].numpy()
+
+    res = _run_expr(bool_expr, vranges)
+    if not np.all(res):
+        indices = list(np.argwhere(res == 0)[0])
+        counterex = [(str(v), i + r.min) for (v, r), i in zip(vranges.items(), indices)]
+        counterex = sorted(counterex, key=lambda x: x[0])
+        counterex = ", ".join([v + " = " + str(i) for v, i in counterex])
+        ana = tvm.arith.Analyzer()
+        raise AssertionError(
+            "Expression {}\nis not true on {}\n"
+            "Counterexample: {}".format(ana.simplify(bool_expr), vranges, counterex)
+        )
+
+
+def check_int_constraints_trans_consistency(constraints_trans, vranges=None):
+    """Check IntConstraintsTransform is a bijective transformation.
+
+    Parameters
+    ----------
+    constraints_trans : arith.IntConstraintsTransform
+        Integer constraints transformation
+    vranges: Dict[tvm.tir.Var, tvm.ir.Range]
+        Free variables and their ranges
+    """
+    if vranges is None:
+        vranges = {}
+
+    def _check_forward(constraints1, constraints2, varmap, backvarmap):
+        ana = tvm.arith.Analyzer()
+        all_vranges = vranges.copy()
+        all_vranges.update({v: r for v, r in constraints1.ranges.items()})
+
+        # Check that the transformation is injective
+        cond_on_vars = tvm.tir.const(1, "bool")
+        for v in constraints1.variables:
+            if v in varmap:
+                # variable mapping is consistent
+                v_back = ana.simplify(tvm.tir.stmt_functor.substitute(varmap[v], backvarmap))
+                cond_on_vars = tvm.te.all(cond_on_vars, v == v_back)
+        # Also we have to check that the new relations are true when old relations are true
+        cond_subst = tvm.tir.stmt_functor.substitute(
+            tvm.te.all(tvm.tir.const(1, "bool"), *constraints2.relations), backvarmap
+        )
+        # We have to include relations from vranges too
+        for v in constraints2.variables:
+            if v in constraints2.ranges:
+                r = constraints2.ranges[v]
+                range_cond = tvm.te.all(v >= r.min, v < r.min + r.extent)
+                range_cond = tvm.tir.stmt_functor.substitute(range_cond, backvarmap)
+                cond_subst = tvm.te.all(cond_subst, range_cond)
+        cond_subst = ana.simplify(cond_subst)
+        check_bool_expr_is_true(
+            tvm.te.all(cond_subst, cond_on_vars),
+            all_vranges,
+            cond=tvm.te.all(tvm.tir.const(1, "bool"), *constraints1.relations),
+        )
+
+    _check_forward(
+        constraints_trans.src,
+        constraints_trans.dst,
+        constraints_trans.src_to_dst,
+        constraints_trans.dst_to_src,
+    )
+    _check_forward(
+        constraints_trans.dst,
+        constraints_trans.src,
+        constraints_trans.dst_to_src,
+        constraints_trans.src_to_dst,
+    )
+
+
+def _get_targets(target_str=None):
+    if target_str is None:
+        target_str = os.environ.get("TVM_TEST_TARGETS", "")
+        # Use dict instead of set for de-duplication so that the
+        # targets stay in the order specified.
+        target_names = list({t.strip(): None for t in target_str.split(";") if t.strip()})
+
+    if not target_names:
+        target_names = DEFAULT_TEST_TARGETS
+
+    targets = []
+    for target in target_names:
+        target_kind = target.split()[0]
+
+        if target_kind == "cuda" and "cudnn" in tvm.target.Target(target).attrs.get("libs", []):
+            is_enabled = tvm.support.libinfo()["USE_CUDNN"].lower() in ["on", "true", "1"]
+            is_runnable = is_enabled and cudnn.exists()
+        else:
+            is_enabled = tvm.runtime.enabled(target_kind)
+            is_runnable = is_enabled and tvm.device(target_kind).exist
+
+        targets.append(
+            {
+                "target": target,
+                "target_kind": target_kind,
+                "is_enabled": is_enabled,
+                "is_runnable": is_runnable,
+            }
+        )
+
+    if all(not t["is_runnable"] for t in targets):
+        if tvm.runtime.enabled("llvm"):
+            logging.warning(
+                "None of the following targets are supported by this build of TVM: %s."
+                " Try setting TVM_TEST_TARGETS to a supported target. Defaulting to llvm.",
+                target_str,
+            )
+            return _get_targets("llvm")
+
+        raise TVMError(
+            "None of the following targets are supported by this build of TVM: %s."
+            " Try setting TVM_TEST_TARGETS to a supported target."
+            " Cannot default to llvm, as it is not enabled." % target_str
+        )
+
+    return targets
+
+
+DEFAULT_TEST_TARGETS = [
+    "llvm",
+    "llvm -device=arm_cpu",
+    "cuda",
+    "cuda -model=unknown -libs=cudnn",
+    "nvptx",
+    "vulkan -from_device=0",
+    "opencl",
+    "opencl -device=mali,aocl_sw_emu",
+    "opencl -device=intel_graphics",
+    "metal",
+    "rocm",
+]
+
+
+def device_enabled(target):
+    """Check if a target should be used when testing.
+
+    It is recommended that you use :py:func:`tvm.testing.parametrize_targets`
+    instead of manually checking if a target is enabled.
+
+    This allows the user to control which devices they are testing against. In
+    tests, this should be used to check if a device should be used when said
+    device is an optional part of the test.
+
+    Parameters
+    ----------
+    target : str
+        Target string to check against
+
+    Returns
+    -------
+    bool
+        Whether or not the device associated with this target is enabled.
+
+    Example
+    -------
+    >>> @tvm.testing.uses_gpu
+    >>> def test_mytest():
+    >>>     for target in ["cuda", "llvm"]:
+    >>>         if device_enabled(target):
+    >>>             test_body...
+
+    Here, `test_body` will only be reached by with `target="cuda"` on gpu test
+    nodes and `target="llvm"` on cpu test nodes.
+    """
+    assert isinstance(target, str), "device_enabled requires a target as a string"
+    # only check if device name is found, sometime there are extra flags
+    target_kind = target.split(" ")[0]
+    return any(target_kind == t["target_kind"] for t in _get_targets() if t["is_runnable"])
+
+
+def enabled_targets():
+    """Get all enabled targets with associated devices.
+
+    In most cases, you should use :py:func:`tvm.testing.parametrize_targets` instead of
+    this function.
+
+    In this context, enabled means that TVM was built with support for
+    this target, the target name appears in the TVM_TEST_TARGETS
+    environment variable, and a suitable device for running this
+    target exists.  If TVM_TEST_TARGETS is not set, it defaults to
+    variable DEFAULT_TEST_TARGETS in this module.
+
+    If you use this function in a test, you **must** decorate the test with
+    :py:func:`tvm.testing.uses_gpu` (otherwise it will never be run on the gpu).
+
+    Returns
+    -------
+    targets: list
+        A list of pairs of all enabled devices and the associated context
+
+    """
+    return [(t["target"], tvm.device(t["target"])) for t in _get_targets() if t["is_runnable"]]
+
+
+def _compose(args, decs):
+    """Helper to apply multiple markers"""
+    if len(args) > 0:
+        f = args[0]
+        for d in reversed(decs):
+            f = d(f)
+        return f
+    return decs
+
+
+def uses_gpu(*args):
+    """Mark to differentiate tests that use the GPU in some capacity.
+
+    These tests will be run on CPU-only test nodes and on test nodes with GPUs.
+    To mark a test that must have a GPU present to run, use
+    :py:func:`tvm.testing.requires_gpu`.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _uses_gpu = [pytest.mark.gpu]
+    return _compose(args, _uses_gpu)
+
+
+def requires_gpu(*args):
+    """Mark a test as requiring a GPU to run.
+
+    Tests with this mark will not be run unless a gpu is present.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_gpu = [
+        pytest.mark.skipif(
+            not tvm.cuda().exist
+            and not tvm.rocm().exist
+            and not tvm.opencl().exist
+            and not tvm.metal().exist
+            and not tvm.vulkan().exist,
+            reason="No GPU present",
+        ),
+        *uses_gpu(),
+    ]
+    return _compose(args, _requires_gpu)
+
+
+def requires_cuda(*args):
+    """Mark a test as requiring the CUDA runtime.
+
+    This also marks the test as requiring a cuda gpu.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_cuda = [
+        pytest.mark.cuda,
+        pytest.mark.skipif(not device_enabled("cuda"), reason="CUDA support not enabled"),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_cuda)
+
+
+def requires_cudnn(*args):
+    """Mark a test as requiring the cuDNN library.
+
+    This also marks the test as requiring a cuda gpu.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+
+    requirements = [
+        pytest.mark.skipif(
+            not cudnn.exists(), reason="cuDNN library not enabled, or not installed"
+        ),
+        *requires_cuda(),
+    ]
+    return _compose(args, requirements)
+
+
+def requires_nvptx(*args):
+    """Mark a test as requiring the NVPTX compilation on the CUDA runtime
+
+    This also marks the test as requiring a cuda gpu, and requiring
+    LLVM support.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+
+    """
+    _requires_nvptx = [
+        pytest.mark.skipif(not device_enabled("nvptx"), reason="NVPTX support not enabled"),
+        *requires_llvm(),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_nvptx)
+
+
+def requires_cudagraph(*args):
+    """Mark a test as requiring the CUDA Graph Feature
+
+    This also marks the test as requiring cuda
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_cudagraph = [
+        pytest.mark.skipif(
+            not nvcc.have_cudagraph(), reason="CUDA Graph is not supported in this environment"
+        ),
+        *requires_cuda(),
+    ]
+    return _compose(args, _requires_cudagraph)
+
+
+def requires_opencl(*args):
+    """Mark a test as requiring the OpenCL runtime.
+
+    This also marks the test as requiring a gpu.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_opencl = [
+        pytest.mark.opencl,
+        pytest.mark.skipif(not device_enabled("opencl"), reason="OpenCL support not enabled"),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_opencl)
+
+
+def requires_rocm(*args):
+    """Mark a test as requiring the rocm runtime.
+
+    This also marks the test as requiring a gpu.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_rocm = [
+        pytest.mark.rocm,
+        pytest.mark.skipif(not device_enabled("rocm"), reason="rocm support not enabled"),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_rocm)
+
+
+def requires_metal(*args):
+    """Mark a test as requiring the metal runtime.
+
+    This also marks the test as requiring a gpu.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_metal = [
+        pytest.mark.metal,
+        pytest.mark.skipif(not device_enabled("metal"), reason="metal support not enabled"),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_metal)
+
+
+def requires_vulkan(*args):
+    """Mark a test as requiring the vulkan runtime.
+
+    This also marks the test as requiring a gpu.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_vulkan = [
+        pytest.mark.vulkan,
+        pytest.mark.skipif(not device_enabled("vulkan"), reason="vulkan support not enabled"),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_vulkan)
+
+
+def requires_tensorcore(*args):
+    """Mark a test as requiring a tensorcore to run.
+
+    Tests with this mark will not be run unless a tensorcore is present.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_tensorcore = [
+        pytest.mark.tensorcore,
+        pytest.mark.skipif(
+            not tvm.cuda().exist or not nvcc.have_tensorcore(tvm.cuda(0).compute_version),
+            reason="No tensorcore present",
+        ),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_tensorcore)
+
+
+def requires_llvm(*args):
+    """Mark a test as requiring llvm to run.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_llvm = [
+        pytest.mark.llvm,
+        pytest.mark.skipif(not device_enabled("llvm"), reason="LLVM support not enabled"),
+    ]
+    return _compose(args, _requires_llvm)
+
+
+def requires_micro(*args):
+    """Mark a test as requiring microTVM to run.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_micro = [
+        pytest.mark.skipif(
+            tvm.support.libinfo().get("USE_MICRO", "OFF") != "ON",
+            reason="MicroTVM support not enabled. Set USE_MICRO=ON in config.cmake to enable.",
+        )
+    ]
+    return _compose(args, _requires_micro)
+
+
+def requires_rpc(*args):
+    """Mark a test as requiring rpc to run.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_rpc = [
+        pytest.mark.skipif(
+            tvm.support.libinfo().get("USE_RPC", "OFF") != "ON",
+            reason="RPC support not enabled. Set USE_RPC=ON in config.cmake to enable.",
+        )
+    ]
+    return _compose(args, _requires_rpc)
+
+
+def _target_to_requirement(target):
+    if isinstance(target, str):
+        target = tvm.target.Target(target)
+
+    # mapping from target to decorator
+    if target.kind.name == "cuda" and "cudnn" in target.attrs.get("libs", []):
+        return requires_cudnn()
+    if target.kind.name == "cuda":
+        return requires_cuda()
+    if target.kind.name == "rocm":
+        return requires_rocm()
+    if target.kind.name == "vulkan":
+        return requires_vulkan()
+    if target.kind.name == "nvptx":
+        return requires_nvptx()
+    if target.kind.name == "metal":
+        return requires_metal()
+    if target.kind.name == "opencl":
+        return requires_opencl()
+    if target.kind.name == "llvm":
+        return requires_llvm()
+    return []
+
+
+def _pytest_target_params(targets, excluded_targets=None, xfail_targets=None):
+    # Include unrunnable targets here.  They get skipped by the
+    # pytest.mark.skipif in _target_to_requirement(), showing up as
+    # skipped tests instead of being hidden entirely.
+    if targets is None:
+        if excluded_targets is None:
+            excluded_targets = set()
+
+        if xfail_targets is None:
+            xfail_targets = set()
+
+        target_marks = []
+        for t in _get_targets():
+            # Excluded targets aren't included in the params at all.
+            if t["target_kind"] not in excluded_targets:
+
+                # Known failing targets are included, but are marked
+                # as expected to fail.
+                extra_marks = []
+                if t["target_kind"] in xfail_targets:
+                    extra_marks.append(
+                        pytest.mark.xfail(
+                            reason='Known failing test for target "{}"'.format(t["target_kind"])
+                        )
+                    )
+
+                target_marks.append((t["target"], extra_marks))
+
+    else:
+        target_marks = [(target, []) for target in targets]
+
+    return [
+        pytest.param(target, marks=_target_to_requirement(target) + extra_marks)
+        for target, extra_marks in target_marks
+    ]
+
+
+def _auto_parametrize_target(metafunc):
+    """Automatically applies parametrize_targets
+
+    Used if a test function uses the "target" fixture, but isn't
+    already marked with @tvm.testing.parametrize_targets.  Intended
+    for use in the pytest_generate_tests() handler of a conftest.py
+    file.
+
+    """
+
+    def update_parametrize_target_arg(
+        argnames,
+        argvalues,
+        *args,
+        **kwargs,
+    ):
+        args = [arg.strip() for arg in argnames.split(",") if arg.strip()]
+        if "target" in args:
+            target_i = args.index("target")
+
+            new_argvalues = []
+            for argvalue in argvalues:
+
+                if isinstance(argvalue, _pytest.mark.structures.ParameterSet):
+                    # The parametrized value is already a
+                    # pytest.param, so track any marks already
+                    # defined.
+                    param_set = argvalue.values
+                    target = param_set[target_i]
+                    additional_marks = argvalue.marks
+                elif len(args) == 1:
+                    # Single value parametrization, argvalue is a list of values.
+                    target = argvalue
+                    param_set = (target,)
+                    additional_marks = []
+                else:
+                    # Multiple correlated parameters, argvalue is a list of tuple of values.
+                    param_set = argvalue
+                    target = param_set[target_i]
+                    additional_marks = []
+
+                new_argvalues.append(
+                    pytest.param(
+                        *param_set, marks=_target_to_requirement(target) + additional_marks
+                    )
+                )
+
+            try:
+                argvalues[:] = new_argvalues
+            except TypeError as e:
+                pyfunc = metafunc.definition.function
+                filename = pyfunc.__code__.co_filename
+                line_number = pyfunc.__code__.co_firstlineno
+                msg = (
+                    f"Unit test {metafunc.function.__name__} ({filename}:{line_number}) "
+                    "is parametrized using a tuple of parameters instead of a list "
+                    "of parameters."
+                )
+                raise TypeError(msg) from e
+
+    if "target" in metafunc.fixturenames:
+        # Update any explicit use of @pytest.mark.parmaetrize to
+        # parametrize over targets.  This adds the appropriate
+        # @tvm.testing.requires_* markers for each target.
+        for mark in metafunc.definition.iter_markers("parametrize"):
+            update_parametrize_target_arg(*mark.args, **mark.kwargs)
+
+        # Check if any explicit parametrizations exist, and apply one
+        # if they do not.  If the function is marked with either
+        # excluded or known failing targets, use these to determine
+        # the targets to be used.
+        parametrized_args = [
+            arg.strip()
+            for mark in metafunc.definition.iter_markers("parametrize")
+            for arg in mark.args[0].split(",")
+        ]
+        if "target" not in parametrized_args:
+            excluded_targets = getattr(metafunc.function, "tvm_excluded_targets", [])
+            xfail_targets = getattr(metafunc.function, "tvm_known_failing_targets", [])
+            metafunc.parametrize(
+                "target",
+                _pytest_target_params(None, excluded_targets, xfail_targets),
+                scope="session",
+            )
+
+
+def parametrize_targets(*args):
+    """Parametrize a test over a specific set of targets.
+
+    Use this decorator when you want your test to be run over a
+    specific set of targets and devices.  It is intended for use where
+    a test is applicable only to a specific target, and is
+    inapplicable to any others (e.g. verifying target-specific
+    assembly code matches known assembly code).  In most
+    circumstances, :py:func:`tvm.testing.exclude_targets` or
+    :py:func:`tvm.testing.known_failing_targets` should be used
+    instead.
+
+    If used as a decorator without arguments, the test will be
+    parametrized over all targets in
+    :py:func:`tvm.testing.enabled_targets`.  This behavior is
+    automatically enabled for any target that accepts arguments of
+    ``target`` or ``dev``, so the explicit use of the bare decorator
+    is no longer needed, and is maintained for backwards
+    compatibility.
+
+    Parameters
+    ----------
+    f : function
+        Function to parametrize. Must be of the form `def test_xxxxxxxxx(target, dev)`:,
+        where `xxxxxxxxx` is any name.
+    targets : list[str], optional
+        Set of targets to run against. If not supplied,
+        :py:func:`tvm.testing.enabled_targets` will be used.
+
+    Example
+    -------
+    >>> @tvm.testing.parametrize_targets("llvm", "cuda")
+    >>> def test_mytest(target, dev):
+    >>>     ...  # do something
+    """
+
+    # Backwards compatibility, when used as a decorator with no
+    # arguments implicitly parametrizes over "target".  The
+    # parametrization is now handled by _auto_parametrize_target, so
+    # this use case can just return the decorated function.
+    if len(args) == 1 and callable(args[0]):
+        return args[0]
+
+    return pytest.mark.parametrize("target", list(args), scope="session")
+
+
+def exclude_targets(*args):
+    """Exclude a test from running on a particular target.
+
+    Use this decorator when you want your test to be run over a
+    variety of targets and devices (including cpu and gpu devices),
+    but want to exclude some particular target or targets.  For
+    example, a test may wish to be run against all targets in
+    tvm.testing.enabled_targets(), except for a particular target that
+    does not support the capabilities.
+
+    Applies pytest.mark.skipif to the targets given.
+
+    Parameters
+    ----------
+    f : function
+        Function to parametrize. Must be of the form `def test_xxxxxxxxx(target, dev)`:,
+        where `xxxxxxxxx` is any name.
+    targets : list[str]
+        Set of targets to exclude.
+
+    Example
+    -------
+    >>> @tvm.testing.exclude_targets("cuda")
+    >>> def test_mytest(target, dev):
+    >>>     ...  # do something
+
+    Or
+
+    >>> @tvm.testing.exclude_targets("llvm", "cuda")
+    >>> def test_mytest(target, dev):
+    >>>     ...  # do something
+
+    """
+
+    def wraps(func):
+        func.tvm_excluded_targets = args
+        return func
+
+    return wraps
+
+
+def known_failing_targets(*args):
+    """Skip a test that is known to fail on a particular target.
+
+    Use this decorator when you want your test to be run over a
+    variety of targets and devices (including cpu and gpu devices),
+    but know that it fails for some targets.  For example, a newly
+    implemented runtime may not support all features being tested, and
+    should be excluded.
+
+    Applies pytest.mark.xfail to the targets given.
+
+    Parameters
+    ----------
+    f : function
+        Function to parametrize. Must be of the form `def test_xxxxxxxxx(target, dev)`:,
+        where `xxxxxxxxx` is any name.
+    targets : list[str]
+        Set of targets to skip.
+
+    Example
+    -------
+    >>> @tvm.testing.known_failing_targets("cuda")
+    >>> def test_mytest(target, dev):
+    >>>     ...  # do something
+
+    Or
+
+    >>> @tvm.testing.known_failing_targets("llvm", "cuda")
+    >>> def test_mytest(target, dev):
+    >>>     ...  # do something
+
+    """
+
+    def wraps(func):
+        func.tvm_known_failing_targets = args
+        return func
+
+    return wraps
+
+
+def parameter(*values, ids=None):
+    """Convenience function to define pytest parametrized fixtures.
+
+    Declaring a variable using ``tvm.testing.parameter`` will define a
+    parametrized pytest fixture that can be used by test
+    functions. This is intended for cases that have no setup cost,
+    such as strings, integers, tuples, etc.  For cases that have a
+    significant setup cost, please use :py:func:`tvm.testing.fixture`
+    instead.
+
+    If a test function accepts multiple parameters defined using
+    ``tvm.testing.parameter``, then the test will be run using every
+    combination of those parameters.
+
+    The parameter definition applies to all tests in a module.  If a
+    specific test should have different values for the parameter, that
+    test should be marked with ``@pytest.mark.parametrize``.
+
+    Parameters
+    ----------
+    values
+       A list of parameter values.  A unit test that accepts this
+       parameter as an argument will be run once for each parameter
+       given.
+
+    ids : List[str], optional
+       A list of names for the parameters.  If None, pytest will
+       generate a name from the value.  These generated names may not
+       be readable/useful for composite types such as tuples.
+
+    Returns
+    -------
+    function
+       A function output from pytest.fixture.
+
+    Example
+    -------
+    >>> size = tvm.testing.parameter(1, 10, 100)
+    >>> def test_using_size(size):
+    >>>     ... # Test code here
+
+    Or
+
+    >>> shape = tvm.testing.parameter((5,10), (512,1024), ids=['small','large'])
+    >>> def test_using_size(shape):
+    >>>     ... # Test code here
+
+    """
+
+    # Optional cls parameter in case a parameter is defined inside a
+    # class scope.
+    @pytest.fixture(params=values, ids=ids)
+    def as_fixture(*_cls, request):
+        return request.param
+
+    return as_fixture
+
+
+_parametrize_group = 0
+
+
+def parameters(*value_sets):
+    """Convenience function to define pytest parametrized fixtures.
+
+    Declaring a variable using tvm.testing.parameters will define a
+    parametrized pytest fixture that can be used by test
+    functions. Like :py:func:`tvm.testing.parameter`, this is intended
+    for cases that have no setup cost, such as strings, integers,
+    tuples, etc.  For cases that have a significant setup cost, please
+    use :py:func:`tvm.testing.fixture` instead.
+
+    Unlike :py:func:`tvm.testing.parameter`, if a test function
+    accepts multiple parameters defined using a single call to
+    ``tvm.testing.parameters``, then the test will only be run once
+    for each set of parameters, not for all combinations of
+    parameters.
+
+    These parameter definitions apply to all tests in a module.  If a
+    specific test should have different values for some parameters,
+    that test should be marked with ``@pytest.mark.parametrize``.
+
+    Parameters
+    ----------
+    values : List[tuple]
+       A list of parameter value sets.  Each set of values represents
+       a single combination of values to be tested.  A unit test that
+       accepts parameters defined will be run once for every set of
+       parameters in the list.
+
+    Returns
+    -------
+    List[function]
+       Function outputs from pytest.fixture.  These should be unpacked
+       into individual named parameters.
+
+    Example
+    -------
+    >>> size, dtype = tvm.testing.parameters( (16,'float32'), (512,'float16') )
+    >>> def test_feature_x(size, dtype):
+    >>>     # Test code here
+    >>>     assert( (size,dtype) in [(16,'float32'), (512,'float16')])
+
+    """
+    global _parametrize_group
+    parametrize_group = _parametrize_group
+    _parametrize_group += 1
+
+    outputs = []
+    for param_values in zip(*value_sets):
+
+        # Optional cls parameter in case a parameter is defined inside a
+        # class scope.
+        def fixture_func(*_cls, request):
+            return request.param
+
+        fixture_func.parametrize_group = parametrize_group
+        fixture_func.parametrize_values = param_values
+        outputs.append(pytest.fixture(fixture_func))
+
+    return outputs
+
+
+def _parametrize_correlated_parameters(metafunc):
+    parametrize_needed = collections.defaultdict(list)
+
+    for name, fixturedefs in metafunc.definition._fixtureinfo.name2fixturedefs.items():
+        fixturedef = fixturedefs[-1]
+        if hasattr(fixturedef.func, "parametrize_group") and hasattr(
+            fixturedef.func, "parametrize_values"
+        ):
+            group = fixturedef.func.parametrize_group
+            values = fixturedef.func.parametrize_values
+            parametrize_needed[group].append((name, values))
+
+    for parametrize_group in parametrize_needed.values():
+        if len(parametrize_group) == 1:
+            name, values = parametrize_group[0]
+            metafunc.parametrize(name, values, indirect=True)
+        else:
+            names = ",".join(name for name, values in parametrize_group)
+            value_sets = zip(*[values for name, values in parametrize_group])
+            metafunc.parametrize(names, value_sets, indirect=True)
+
+
+def fixture(func=None, *, cache_return_value=False):
+    """Convenience function to define pytest fixtures.
+
+    This should be used as a decorator to mark functions that set up
+    state before a function.  The return value of that fixture
+    function is then accessible by test functions as that accept it as
+    a parameter.
+
+    Fixture functions can accept parameters defined with
+    :py:func:`tvm.testing.parameter`.
+
+    By default, the setup will be performed once for each unit test
+    that uses a fixture, to ensure that unit tests are independent.
+    If the setup is expensive to perform, then the
+    cache_return_value=True argument can be passed to cache the setup.
+    The fixture function will be run only once (or once per parameter,
+    if used with tvm.testing.parameter), and the same return value
+    will be passed to all tests that use it.  If the environment
+    variable TVM_TEST_DISABLE_CACHE is set to a non-zero value, it
+    will disable this feature and no caching will be performed.
+
+    Example
+    -------
+    >>> @tvm.testing.fixture
+    >>> def cheap_setup():
+    >>>     return 5 # Setup code here.
+    >>>
+    >>> def test_feature_x(target, dev, cheap_setup)
+    >>>     assert(cheap_setup == 5) # Run test here
+
+    Or
+
+    >>> size = tvm.testing.parameter(1, 10, 100)
+    >>>
+    >>> @tvm.testing.fixture
+    >>> def cheap_setup(size):
+    >>>     return 5*size # Setup code here, based on size.
+    >>>
+    >>> def test_feature_x(cheap_setup):
+    >>>     assert(cheap_setup in [5, 50, 500])
+
+    Or
+
+    >>> @tvm.testing.fixture(cache_return_value=True)
+    >>> def expensive_setup():
+    >>>     time.sleep(10) # Setup code here
+    >>>     return 5
+    >>>
+    >>> def test_feature_x(target, dev, expensive_setup):
+    >>>     assert(expensive_setup == 5)
+
+    """
+
+    force_disable_cache = bool(int(os.environ.get("TVM_TEST_DISABLE_CACHE", "0")))
+    cache_return_value = cache_return_value and not force_disable_cache
+
+    # Deliberately at function scope, so that caching can track how
+    # many times the fixture has been used.  If used, the cache gets
+    # cleared after the fixture is no longer needed.
+    scope = "function"
+
+    def wraps(func):
+        if cache_return_value:
+            func = _fixture_cache(func)
+        func = pytest.fixture(func, scope=scope)
+        return func
+
+    if func is None:
+        return wraps
+
+    return wraps(func)
+
+
+class _DeepCopyAllowedClasses(dict):
+    def __init__(self, allowed_class_list):
+        self.allowed_class_list = allowed_class_list
+        super().__init__()
+
+    def get(self, key, *args, **kwargs):
+        """Overrides behavior of copy.deepcopy to avoid implicit copy.
+
+        By default, copy.deepcopy uses a dict of id->object to track
+        all objects that it has seen, which is passed as the second
+        argument to all recursive calls.  This class is intended to be
+        passed in instead, and inspects the type of all objects being
+        copied.
+
+        Where copy.deepcopy does a best-effort attempt at copying an
+        object, for unit tests we would rather have all objects either
+        be copied correctly, or to throw an error.  Classes that
+        define an explicit method to perform a copy are allowed, as
+        are any explicitly listed classes.  Classes that would fall
+        back to using object.__reduce__, and are not explicitly listed
+        as safe, will throw an exception.
+
+        """
+        obj = ctypes.cast(key, ctypes.py_object).value
+        cls = type(obj)
+        if (
+            cls in copy._deepcopy_dispatch
+            or issubclass(cls, type)
+            or getattr(obj, "__deepcopy__", None)
+            or copyreg.dispatch_table.get(cls)
+            or cls.__reduce__ is not object.__reduce__
+            or cls.__reduce_ex__ is not object.__reduce_ex__
+            or cls in self.allowed_class_list
+        ):
+            return super().get(key, *args, **kwargs)
+
+        rfc_url = (
+            "https://github.com/apache/tvm-rfcs/blob/main/rfcs/0007-parametrized-unit-tests.md"
+        )
+        raise TypeError(
+            (
+                f"Cannot copy fixture of type {cls.__name__}.  TVM fixture caching "
+                "is limited to objects that explicitly provide the ability "
+                "to be copied (e.g. through __deepcopy__, __getstate__, or __setstate__),"
+                "and forbids the use of the default `object.__reduce__` and "
+                "`object.__reduce_ex__`.  For third-party classes that are "
+                "safe to use with copy.deepcopy, please add the class to "
+                "the arguments of _DeepCopyAllowedClasses in tvm.testing._fixture_cache.\n"
+                "\n"
+                f"For discussion on this restriction, please see {rfc_url}."
+            )
+        )
+
+
+def _fixture_cache(func):
+    cache = {}
+
+    # Can't use += on a bound method's property.  Therefore, this is a
+    # list rather than a variable so that it can be accessed from the
+    # pytest_collection_modifyitems().
+    num_uses_remaining = [0]
+
+    # Using functools.lru_cache would require the function arguments
+    # to be hashable, which wouldn't allow caching fixtures that
+    # depend on numpy arrays.  For example, a fixture that takes a
+    # numpy array as input, then calculates uses a slow method to
+    # compute a known correct output for that input.  Therefore,
+    # including a fallback for serializable types.
+    def get_cache_key(*args, **kwargs):
+        try:
+            hash((args, kwargs))
+            return (args, kwargs)
+        except TypeError as e:
+            pass
+
+        try:
+            return pickle.dumps((args, kwargs))
+        except TypeError as e:
+            raise TypeError(
+                "TVM caching of fixtures requires arguments to the fixture "
+                "to be either hashable or serializable"
+            ) from e
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        try:
+            cache_key = get_cache_key(*args, **kwargs)
+
+            try:
+                cached_value = cache[cache_key]
+            except KeyError:
+                cached_value = cache[cache_key] = func(*args, **kwargs)
+
+            yield copy.deepcopy(
+                cached_value,
+                # allowed_class_list should be a list of classes that
+                # are safe to copy using copy.deepcopy, but do not
+                # implement __deepcopy__, __reduce__, or
+                # __reduce_ex__.
+                _DeepCopyAllowedClasses(allowed_class_list=[]),
+            )
+
+        finally:
+            # Clear the cache once all tests that use a particular fixture
+            # have completed.
+            num_uses_remaining[0] -= 1
+            if not num_uses_remaining[0]:
+                cache.clear()
+
+    # Set in the pytest_collection_modifyitems()
+    wrapper.num_uses_remaining = num_uses_remaining
+
+    return wrapper
+
+
+def _count_num_fixture_uses(items):
+    # Helper function, counts the number of tests that use each cached
+    # fixture.  Should be called from pytest_collection_modifyitems().
+    for item in items:
+        is_skipped = item.get_closest_marker("skip") or any(
+            mark.args[0] for mark in item.iter_markers("skipif")
+        )
+        if is_skipped:
+            continue
+
+        for fixturedefs in item._fixtureinfo.name2fixturedefs.values():
+            # Only increment the active fixturedef, in a name has been overridden.
+            fixturedef = fixturedefs[-1]
+            if hasattr(fixturedef.func, "num_uses_remaining"):
+                fixturedef.func.num_uses_remaining[0] += 1
+
+
+def _remove_global_fixture_definitions(items):
+    # Helper function, removes fixture definitions from the global
+    # variables of the modules they were defined in.  This is intended
+    # to improve readability of error messages by giving a NameError
+    # if a test function accesses a pytest fixture but doesn't include
+    # it as an argument.  Should be called from
+    # pytest_collection_modifyitems().
+
+    modules = set(item.module for item in items)
+
+    for module in modules:
+        for name in dir(module):
+            obj = getattr(module, name)
+            if hasattr(obj, "_pytestfixturefunction") and isinstance(
+                obj._pytestfixturefunction, _pytest.fixtures.FixtureFunctionMarker
+            ):
+                delattr(module, name)
+
+
+def identity_after(x, sleep):
+    """Testing function to return identity after sleep
+
+    Parameters
+    ----------
+    x : int
+        The input value.
+
+    sleep : float
+        The amount of time to sleep
+
+    Returns
+    -------
+    x : object
+        The original value
+    """
+    if sleep:
+        time.sleep(sleep)
+    return x
+
+
+def terminate_self():
+    """Testing function to terminate the process."""
+    sys.exit(-1)

From 0cec344d2656395dc4c6697a6fd8d9588ffa1194 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 25 Aug 2021 16:06:08 -0700
Subject: [PATCH 06/14] unsqueeze

---
 python/tvm/relay/frontend/onnx.py | 32 +++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 49a1c18703ff..aa8e6b6cd792 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -19,6 +19,7 @@
 """ONNX: Open Neural Network Exchange frontend for Relay."""
 import copy
 import warnings
+from os import read
 
 import numpy as np
 import tvm
@@ -36,9 +37,21 @@
 from .. import random as _random
 from .. import ty as _ty
 from .. import vision as _vision
-from .common import (AttrCvt, Renamer, fold_constant, get_name, get_relay_op,
-                     gru_cell, infer_channels, infer_shape, infer_type,
-                     infer_value, lstm_cell, new_var, unbind)
+from .common import (
+    AttrCvt,
+    Renamer,
+    fold_constant,
+    get_name,
+    get_relay_op,
+    gru_cell,
+    infer_channels,
+    infer_shape,
+    infer_type,
+    infer_value,
+    lstm_cell,
+    new_var,
+    unbind,
+)
 
 __all__ = ["from_onnx"]
 
@@ -1378,6 +1391,14 @@ def _impl_v1(cls, inputs, attr, params):
             inputs[0] = _op.expand_dims(inputs[0], axis=axis, num_newaxis=1)
         return inputs[0]
 
+    @classmethod
+    def _impl_v13(cls, inputs, attr, params):
+        input_tensor, axes = inputs
+        axes = sorted(axes)
+        for axis in axes:
+            input_tensor = _op.expand_dims(input_tensor, axis=axis, num_newaxis=1)
+        return input_tensor
+
 
 class Split(OnnxOpConverter):
     """Operator converter for Split."""
@@ -3489,12 +3510,11 @@ def _impl_v13(cls, inputs, attr, params):
         if len(inputs) == 3:
             weight_tensor = inputs[2]
         else:
-            weight_tensor = None 
-            
+            weight_tensor = None
         loss = -input_tensor
         if weight_tensor is not None:
             loss *= weight_tensor
-
+        relay.squeeze()
         if target_tensor is not None and ignore_index is not None:
             mask_tensor = target_tensor == ignore_index
 

From 173054dc16085abd21400a69ad023917b24a7148 Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@system76-pc.localdomain>
Date: Wed, 25 Aug 2021 18:01:31 -0700
Subject: [PATCH 07/14] proper weight handling

---
 python/tvm/relay/frontend/onnx.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 1856be4a834d..0217357c29e7 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -3508,8 +3508,16 @@ def _impl_v13(cls, inputs, attr, params):
         target_tensor = relay.expand_dims(target_tensor, 1)
         loss = -relay.gather(input_tensor, axis=1, indices=target_tensor)
         loss = relay.squeeze(loss, axis=[1])
+
+        weight_total = None
         if weight_tensor is not None:
-            loss *= weight_tensor
+            expanded_target_tensor = relay.expand_dims(target_tensor, 0)
+            expanded_target_tensor = relay.nn.batch_flatten(expanded_target_tensor)
+            flattened_weights = relay.gather_nd(weight_tensor, expanded_target_tensor)
+            select_weights = relay.reshape_like(flattened_weights, loss)
+            loss *= select_weights
+            weight_total = relay.sum(select_weights)
+            
         if target_tensor is not None and ignore_index is not None:
             mask_tensor = relay.equal(target_tensor, relay.const(ignore_index, dtype=target_tensor_type))
 
@@ -3517,9 +3525,10 @@ def _impl_v13(cls, inputs, attr, params):
             mask_tensor = relay.const(1, dtype='int8') - relay.cast(mask_tensor, 'int8')
 
             loss *= relay.cast_like(mask_tensor, loss)
+            
         if reduction == "mean":
-            if weight_tensor is not None:
-                return relay.sum(loss) / relay.sum(weight_tensor)
+            if weight_total is not None:
+                return relay.sum(loss) / weight_total
             else:
                 return relay.mean(loss)
         elif reduction == "sum":

From e69997f1b91d6b1fbe23e571b13307e1a5a62c6c Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Fri, 27 Aug 2021 13:39:18 -0700
Subject: [PATCH 08/14] simplify if statement

---
 python/tvm/relay/frontend/onnx.py | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 0217357c29e7..ffbb52014f94 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -37,21 +37,9 @@
 from .. import random as _random
 from .. import ty as _ty
 from .. import vision as _vision
-from .common import (
-    AttrCvt,
-    Renamer,
-    fold_constant,
-    get_name,
-    get_relay_op,
-    gru_cell,
-    infer_channels,
-    infer_shape,
-    infer_type,
-    infer_value,
-    lstm_cell,
-    new_var,
-    unbind,
-)
+from .common import (AttrCvt, Renamer, fold_constant, get_name, get_relay_op,
+                     gru_cell, infer_channels, infer_shape, infer_type,
+                     infer_value, lstm_cell, new_var, unbind)
 
 __all__ = ["from_onnx"]
 
@@ -3518,7 +3506,7 @@ def _impl_v13(cls, inputs, attr, params):
             loss *= select_weights
             weight_total = relay.sum(select_weights)
             
-        if target_tensor is not None and ignore_index is not None:
+        if ignore_index is not None:
             mask_tensor = relay.equal(target_tensor, relay.const(ignore_index, dtype=target_tensor_type))
 
             # Turn all "True" entries to 0 and all "False" entries to 1

From 8949e5fc042977f10ec65687660f92308d172b19 Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@system76-pc.localdomain>
Date: Tue, 31 Aug 2021 14:12:02 -0700
Subject: [PATCH 09/14] fix tests

---
 python/tvm/relay/frontend/onnx.py          | 64 +++++++++++++---------
 tests/python/frontend/onnx/test_forward.py | 18 ------
 2 files changed, 39 insertions(+), 43 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index ffbb52014f94..557169b44bcc 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -37,9 +37,21 @@
 from .. import random as _random
 from .. import ty as _ty
 from .. import vision as _vision
-from .common import (AttrCvt, Renamer, fold_constant, get_name, get_relay_op,
-                     gru_cell, infer_channels, infer_shape, infer_type,
-                     infer_value, lstm_cell, new_var, unbind)
+from .common import (
+    AttrCvt,
+    Renamer,
+    fold_constant,
+    get_name,
+    get_relay_op,
+    gru_cell,
+    infer_channels,
+    infer_shape,
+    infer_type,
+    infer_value,
+    lstm_cell,
+    new_var,
+    unbind,
+)
 
 __all__ = ["from_onnx"]
 
@@ -3487,38 +3499,40 @@ def _impl_v13(cls, inputs, attr, params):
             )
 
         input_tensor, target_tensor = inputs[0], inputs[1]
-        target_tensor_type = target_tensor.type_annotation.dtype
         if len(inputs) == 3:
             weight_tensor = inputs[2]
         else:
-            weight_tensor = None 
+            channels = infer_shape(input_tensor)[1]
+            weight_tensor = relay.ones(
+                [channels],
+                dtype=input_tensor.type_annotation.dtype,
+            )
 
-        target_tensor = relay.expand_dims(target_tensor, 1)
-        loss = -relay.gather(input_tensor, axis=1, indices=target_tensor)
+        loss = -relay.gather(input_tensor, axis=1, indices=relay.expand_dims(target_tensor, 1))
         loss = relay.squeeze(loss, axis=[1])
 
-        weight_total = None
-        if weight_tensor is not None:
-            expanded_target_tensor = relay.expand_dims(target_tensor, 0)
-            expanded_target_tensor = relay.nn.batch_flatten(expanded_target_tensor)
-            flattened_weights = relay.gather_nd(weight_tensor, expanded_target_tensor)
-            select_weights = relay.reshape_like(flattened_weights, loss)
-            loss *= select_weights
-            weight_total = relay.sum(select_weights)
-            
+        expanded_target_tensor = relay.expand_dims(target_tensor, 0)
+        expanded_target_tensor = relay.nn.batch_flatten(expanded_target_tensor)
+        flattened_weights = relay.gather_nd(weight_tensor, expanded_target_tensor)
+        select_weights = relay.reshape_like(flattened_weights, loss)
+        loss *= select_weights
+
         if ignore_index is not None:
-            mask_tensor = relay.equal(target_tensor, relay.const(ignore_index, dtype=target_tensor_type))
+            # "Ignore" values whose target is the ignore_index
+            mask_tensor = relay.equal(
+                target_tensor, relay.const(ignore_index, dtype=target_tensor.type_annotation.dtype)
+            )
+            mask_tensor = relay.const(1, dtype="int8") - relay.cast(mask_tensor, "int8")
+            loss *= relay.cast_like(mask_tensor, loss)
 
-            # Turn all "True" entries to 0 and all "False" entries to 1
-            mask_tensor = relay.const(1, dtype='int8') - relay.cast(mask_tensor, 'int8')
+            # This is not explained super clearly in the onnx spec, but masked values don't
+            # contribute toward the final value in reduction
+            select_weights *= relay.cast_like(mask_tensor, select_weights)
+
+        weight_total = relay.sum(select_weights)
 
-            loss *= relay.cast_like(mask_tensor, loss)
-            
         if reduction == "mean":
-            if weight_total is not None:
-                return relay.sum(loss) / weight_total
-            else:
-                return relay.mean(loss)
+            return relay.sum(loss) / weight_total
         elif reduction == "sum":
             return relay.sum(loss)
         else:
diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py
index 9e0eb1f75217..d8f4fd2c9e03 100644
--- a/tests/python/frontend/onnx/test_forward.py
+++ b/tests/python/frontend/onnx/test_forward.py
@@ -4752,41 +4752,23 @@ def verify_eyelike(indata):
     "test_momentum_multiple",
     "test_mvn",
     "test_nesterov_momentum",
-    "test_nllloss_NC",
     "test_nllloss_NC_expanded",
-    "test_nllloss_NCd1",
     "test_nllloss_NCd1_expanded",
-    "test_nllloss_NCd1_ii",
     "test_nllloss_NCd1_ii_expanded",
-    "test_nllloss_NCd1_mean_weight_negative_ii",
     "test_nllloss_NCd1_mean_weight_negative_ii_expanded",
-    "test_nllloss_NCd1_weight",
     "test_nllloss_NCd1_weight_expanded",
-    "test_nllloss_NCd1_weight_ii",
     "test_nllloss_NCd1_weight_ii_expanded",
-    "test_nllloss_NCd1d2",
     "test_nllloss_NCd1d2_expanded",
-    "test_nllloss_NCd1d2_no_weight_reduction_mean_ii",
     "test_nllloss_NCd1d2_no_weight_reduction_mean_ii_expanded",
-    "test_nllloss_NCd1d2_reduction_mean",
     "test_nllloss_NCd1d2_reduction_mean_expanded",
-    "test_nllloss_NCd1d2_reduction_sum",
     "test_nllloss_NCd1d2_reduction_sum_expanded",
-    "test_nllloss_NCd1d2_with_weight",
     "test_nllloss_NCd1d2_with_weight_expanded",
-    "test_nllloss_NCd1d2_with_weight_reduction_mean",
     "test_nllloss_NCd1d2_with_weight_reduction_mean_expanded",
-    "test_nllloss_NCd1d2_with_weight_reduction_sum",
     "test_nllloss_NCd1d2_with_weight_reduction_sum_expanded",
-    "test_nllloss_NCd1d2_with_weight_reduction_sum_ii",
     "test_nllloss_NCd1d2_with_weight_reduction_sum_ii_expanded",
-    "test_nllloss_NCd1d2d3_none_no_weight_negative_ii",
     "test_nllloss_NCd1d2d3_none_no_weight_negative_ii_expanded",
-    "test_nllloss_NCd1d2d3_sum_weight_high_ii",
     "test_nllloss_NCd1d2d3_sum_weight_high_ii_expanded",
-    "test_nllloss_NCd1d2d3d4d5_mean_weight",
     "test_nllloss_NCd1d2d3d4d5_mean_weight_expanded",
-    "test_nllloss_NCd1d2d3d4d5_none_no_weight",
     "test_nllloss_NCd1d2d3d4d5_none_no_weight_expanded",
     "test_pow_types_float",
     "test_pow_types_float32_int32",

From ed36b7591c1d175fb7de6f4866c6a03a1ea0398b Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@system76-pc.localdomain>
Date: Tue, 31 Aug 2021 14:13:36 -0700
Subject: [PATCH 10/14] add comment about tests

---
 tests/python/frontend/onnx/test_forward.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py
index d8f4fd2c9e03..7693b636e373 100644
--- a/tests/python/frontend/onnx/test_forward.py
+++ b/tests/python/frontend/onnx/test_forward.py
@@ -4752,6 +4752,7 @@ def verify_eyelike(indata):
     "test_momentum_multiple",
     "test_mvn",
     "test_nesterov_momentum",
+    # When unsqueeze is fully supported, remaining nllloss tests should work:
     "test_nllloss_NC_expanded",
     "test_nllloss_NCd1_expanded",
     "test_nllloss_NCd1_ii_expanded",

From 409b8a3ba20d97ba5cbd948a26bebf5793f52385 Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@system76-pc.localdomain>
Date: Tue, 31 Aug 2021 14:14:38 -0700
Subject: [PATCH 11/14] delete extra file

---
 python/tvm/testing/#utils.py# | 1436 ---------------------------------
 1 file changed, 1436 deletions(-)
 delete mode 100644 python/tvm/testing/#utils.py#

diff --git a/python/tvm/testing/#utils.py# b/python/tvm/testing/#utils.py#
deleted file mode 100644
index 04a235b64fdf..000000000000
--- a/python/tvm/testing/#utils.py#
+++ /dev/null
@@ -1,1436 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# pylint: disable=invalid-name,unnecessary-comprehension
-""" TVM testing utilities
-
-Testing Markers
-***************
-
-We use pytest markers to specify the requirements of test functions. Currently
-there is a single distinction that matters for our testing environment: does
-the test require a gpu. For tests that require just a gpu or just a cpu, we
-have the decorator :py:func:`requires_gpu` that enables the test when a gpu is
-available. To avoid running tests that don't require a gpu on gpu nodes, this
-decorator also sets the pytest marker `gpu` so we can use select the gpu subset
-of tests (using `pytest -m gpu`).
-
-Unfortunately, many tests are written like this:
-
-.. python::
-
-    def test_something():
-        for target in all_targets():
-            do_something()
-
-The test uses both gpu and cpu targets, so the test needs to be run on both cpu
-and gpu nodes. But we still want to only run the cpu targets on the cpu testing
-node. The solution is to mark these tests with the gpu marker so they will be
-run on the gpu nodes. But we also modify all_targets (renamed to
-enabled_targets) so that it only returns gpu targets on gpu nodes and cpu
-targets on cpu nodes (using an environment variable).
-
-Instead of using the all_targets function, future tests that would like to
-test against a variety of targets should use the
-:py:func:`tvm.testing.parametrize_targets` functionality. This allows us
-greater control over which targets are run on which testing nodes.
-
-If in the future we want to add a new type of testing node (for example
-fpgas), we need to add a new marker in `tests/python/pytest.ini` and a new
-function in this module. Then targets using this node should be added to the
-`TVM_TEST_TARGETS` environment variable in the CI.
-"""
-import collections
-import copy
-import copyreg
-import ctypes
-import functools
-import logging
-import os
-import sys
-import time
-import pickle
-import pytest
-import _pytest
-import numpy as np
-import tvm
-import tvm.arith
-import tvm.tir
-import tvm.te
-import tvm._ffi
-
-from tvm.contrib import nvcc, cudnn
-from tvm.error import TVMError
-
-
-def assert_allclose(actual, desired, rtol=1e-7, atol=1e-7):
-    """Version of np.testing.assert_allclose with `atol` and `rtol` fields set
-    in reasonable defaults.
-
-    Arguments `actual` and `desired` are not interchangeable, since the function
-    compares the `abs(actual-desired)` with `atol+rtol*abs(desired)`.  Since we
-    often allow `desired` to be close to zero, we generally want non-zero `atol`.
-    """
-    actual = np.asanyarray(actual)
-    desired = np.asanyarray(desired)
-    np.testing.assert_allclose(actual.shape, desired.shape)
-    np.testing.assert_allclose(actual, desired, rtol=rtol, atol=atol, verbose=True)
-
-
-def check_numerical_grads(
-    function, input_values, grad_values, function_value=None, delta=1e-3, atol=1e-2, rtol=0.1
-):
-    """A helper function that checks that numerical gradients of a function are
-    equal to gradients computed in some different way (analytical gradients).
-
-    Numerical gradients are computed using finite difference approximation. To
-    reduce the number of function evaluations, the number of points used is
-    gradually increased if the error value is too high (up to 5 points).
-
-    Parameters
-    ----------
-    function
-        A function that takes inputs either as positional or as keyword
-        arguments (either `function(*input_values)` or `function(**input_values)`
-        should be correct) and returns a scalar result. Should accept numpy
-        ndarrays.
-
-    input_values : Dict[str, numpy.ndarray] or List[numpy.ndarray]
-        A list of values or a dict assigning values to variables. Represents the
-        point at which gradients should be computed.
-
-    grad_values : Dict[str, numpy.ndarray] or List[numpy.ndarray]
-        Gradients computed using a different method.
-
-    function_value : float, optional
-        Should be equal to `function(**input_values)`.
-
-    delta : float, optional
-        A small number used for numerical computation of partial derivatives.
-        The default 1e-3 is a good choice for float32.
-
-    atol : float, optional
-        Absolute tolerance. Gets multiplied by `sqrt(n)` where n is the size of a
-        gradient.
-
-    rtol : float, optional
-        Relative tolerance.
-    """
-    # If input_values is a list then function accepts positional arguments
-    # In this case transform it to a function taking kwargs of the form {"0": ..., "1": ...}
-    if not isinstance(input_values, dict):
-        input_len = len(input_values)
-        input_values = {str(idx): val for idx, val in enumerate(input_values)}
-
-        def _function(_input_len=input_len, _orig_function=function, **kwargs):
-            return _orig_function(*(kwargs[str(i)] for i in range(input_len)))
-
-        function = _function
-
-        grad_values = {str(idx): val for idx, val in enumerate(grad_values)}
-
-    if function_value is None:
-        function_value = function(**input_values)
-
-    # a helper to modify j-th element of val by a_delta
-    def modify(val, j, a_delta):
-        val = val.copy()
-        val.reshape(-1)[j] = val.reshape(-1)[j] + a_delta
-        return val
-
-    # numerically compute a partial derivative with respect to j-th element of the var `name`
-    def derivative(x_name, j, a_delta):
-        modified_values = {
-            n: modify(val, j, a_delta) if n == x_name else val for n, val in input_values.items()
-        }
-        return (function(**modified_values) - function_value) / a_delta
-
-    def compare_derivative(j, n_der, grad):
-        der = grad.reshape(-1)[j]
-        return np.abs(n_der - der) < atol + rtol * np.abs(n_der)
-
-    for x_name, grad in grad_values.items():
-        if grad.shape != input_values[x_name].shape:
-            raise AssertionError(
-                "Gradient wrt '{}' has unexpected shape {}, expected {} ".format(
-                    x_name, grad.shape, input_values[x_name].shape
-                )
-            )
-
-        ngrad = np.zeros_like(grad)
-
-        wrong_positions = []
-
-        # compute partial derivatives for each position in this variable
-        for j in range(np.prod(grad.shape)):
-            # forward difference approximation
-            nder = derivative(x_name, j, delta)
-
-            # if the derivative is not equal to the analytical one, try to use more
-            # precise and expensive methods
-            if not compare_derivative(j, nder, grad):
-                # central difference approximation
-                nder = (derivative(x_name, j, -delta) + nder) / 2
-
-                if not compare_derivative(j, nder, grad):
-                    # central difference approximation using h = delta/2
-                    cnder2 = (
-                        derivative(x_name, j, delta / 2) + derivative(x_name, j, -delta / 2)
-                    ) / 2
-                    # five-point derivative
-                    nder = (4 * cnder2 - nder) / 3
-
-            # if the derivatives still don't match, add this position to the
-            # list of wrong positions
-            if not compare_derivative(j, nder, grad):
-                wrong_positions.append(np.unravel_index(j, grad.shape))
-
-            ngrad.reshape(-1)[j] = nder
-
-        wrong_percentage = int(100 * len(wrong_positions) / np.prod(grad.shape))
-
-        dist = np.sqrt(np.sum((ngrad - grad) ** 2))
-        grad_norm = np.sqrt(np.sum(ngrad ** 2))
-
-        if not (np.isfinite(dist) and np.isfinite(grad_norm)):
-            raise ValueError(
-                "NaN or infinity detected during numerical gradient checking wrt '{}'\n"
-                "analytical grad = {}\n numerical grad = {}\n".format(x_name, grad, ngrad)
-            )
-
-        # we multiply atol by this number to make it more universal for different sizes
-        sqrt_n = np.sqrt(float(np.prod(grad.shape)))
-
-        if dist > atol * sqrt_n + rtol * grad_norm:
-            raise AssertionError(
-                "Analytical and numerical grads wrt '{}' differ too much\n"
-                "analytical grad = {}\n numerical grad = {}\n"
-                "{}% of elements differ, first 10 of wrong positions: {}\n"
-                "distance > atol*sqrt(n) + rtol*grad_norm\n"
-                "distance {} > {}*{} + {}*{}".format(
-                    x_name,
-                    grad,
-                    ngrad,
-                    wrong_percentage,
-                    wrong_positions[:10],
-                    dist,
-                    atol,
-                    sqrt_n,
-                    rtol,
-                    grad_norm,
-                )
-            )
-
-        max_diff = np.max(np.abs(ngrad - grad))
-        avg_diff = np.mean(np.abs(ngrad - grad))
-        logging.info(
-            "Numerical grad test wrt '%s' of shape %s passes, "
-            "dist = %f, max_diff = %f, avg_diff = %f",
-            x_name,
-            grad.shape,
-            dist,
-            max_diff,
-            avg_diff,
-        )
-
-
-def assert_prim_expr_equal(lhs, rhs):
-    """Assert lhs and rhs equals to each iother.
-
-    Parameters
-    ----------
-    lhs : tvm.tir.PrimExpr
-        The left operand.
-
-    rhs : tvm.tir.PrimExpr
-        The left operand.
-    """
-    ana = tvm.arith.Analyzer()
-    res = ana.simplify(lhs - rhs)
-    equal = isinstance(res, tvm.tir.IntImm) and res.value == 0
-    if not equal:
-        raise ValueError("{} and {} are not equal".format(lhs, rhs))
-
-
-def check_bool_expr_is_true(bool_expr, vranges, cond=None):
-    """Check that bool_expr holds given the condition cond
-    for every value of free variables from vranges.
-
-    for example, 2x > 4y solves to x > 2y given x in (0, 10) and y in (0, 10)
-    here bool_expr is x > 2y, vranges is {x: (0, 10), y: (0, 10)}, cond is 2x > 4y
-    We creates iterations to check,
-    for x in range(10):
-      for y in range(10):
-        assert !(2x > 4y) || (x > 2y)
-
-    Parameters
-    ----------
-    bool_expr : tvm.ir.PrimExpr
-        Boolean expression to check
-    vranges: Dict[tvm.tir.expr.Var, tvm.ir.Range]
-        Free variables and their ranges
-    cond: tvm.ir.PrimExpr
-        extra conditions needs to be satisfied.
-    """
-    if cond is not None:
-        bool_expr = tvm.te.any(tvm.tir.Not(cond), bool_expr)
-
-    def _run_expr(expr, vranges):
-        """Evaluate expr for every value of free variables
-        given by vranges and return the tensor of results.
-        """
-
-        def _compute_body(*us):
-            vmap = {v: u + r.min for (v, r), u in zip(vranges.items(), us)}
-            return tvm.tir.stmt_functor.substitute(expr, vmap)
-
-        A = tvm.te.compute([r.extent.value for v, r in vranges.items()], _compute_body)
-        args = [tvm.nd.empty(A.shape, A.dtype)]
-        sch = tvm.te.create_schedule(A.op)
-        mod = tvm.build(sch, [A])
-        mod(*args)
-        return args[0].numpy()
-
-    res = _run_expr(bool_expr, vranges)
-    if not np.all(res):
-        indices = list(np.argwhere(res == 0)[0])
-        counterex = [(str(v), i + r.min) for (v, r), i in zip(vranges.items(), indices)]
-        counterex = sorted(counterex, key=lambda x: x[0])
-        counterex = ", ".join([v + " = " + str(i) for v, i in counterex])
-        ana = tvm.arith.Analyzer()
-        raise AssertionError(
-            "Expression {}\nis not true on {}\n"
-            "Counterexample: {}".format(ana.simplify(bool_expr), vranges, counterex)
-        )
-
-
-def check_int_constraints_trans_consistency(constraints_trans, vranges=None):
-    """Check IntConstraintsTransform is a bijective transformation.
-
-    Parameters
-    ----------
-    constraints_trans : arith.IntConstraintsTransform
-        Integer constraints transformation
-    vranges: Dict[tvm.tir.Var, tvm.ir.Range]
-        Free variables and their ranges
-    """
-    if vranges is None:
-        vranges = {}
-
-    def _check_forward(constraints1, constraints2, varmap, backvarmap):
-        ana = tvm.arith.Analyzer()
-        all_vranges = vranges.copy()
-        all_vranges.update({v: r for v, r in constraints1.ranges.items()})
-
-        # Check that the transformation is injective
-        cond_on_vars = tvm.tir.const(1, "bool")
-        for v in constraints1.variables:
-            if v in varmap:
-                # variable mapping is consistent
-                v_back = ana.simplify(tvm.tir.stmt_functor.substitute(varmap[v], backvarmap))
-                cond_on_vars = tvm.te.all(cond_on_vars, v == v_back)
-        # Also we have to check that the new relations are true when old relations are true
-        cond_subst = tvm.tir.stmt_functor.substitute(
-            tvm.te.all(tvm.tir.const(1, "bool"), *constraints2.relations), backvarmap
-        )
-        # We have to include relations from vranges too
-        for v in constraints2.variables:
-            if v in constraints2.ranges:
-                r = constraints2.ranges[v]
-                range_cond = tvm.te.all(v >= r.min, v < r.min + r.extent)
-                range_cond = tvm.tir.stmt_functor.substitute(range_cond, backvarmap)
-                cond_subst = tvm.te.all(cond_subst, range_cond)
-        cond_subst = ana.simplify(cond_subst)
-        check_bool_expr_is_true(
-            tvm.te.all(cond_subst, cond_on_vars),
-            all_vranges,
-            cond=tvm.te.all(tvm.tir.const(1, "bool"), *constraints1.relations),
-        )
-
-    _check_forward(
-        constraints_trans.src,
-        constraints_trans.dst,
-        constraints_trans.src_to_dst,
-        constraints_trans.dst_to_src,
-    )
-    _check_forward(
-        constraints_trans.dst,
-        constraints_trans.src,
-        constraints_trans.dst_to_src,
-        constraints_trans.src_to_dst,
-    )
-
-
-def _get_targets(target_str=None):
-    if target_str is None:
-        target_str = os.environ.get("TVM_TEST_TARGETS", "")
-        # Use dict instead of set for de-duplication so that the
-        # targets stay in the order specified.
-        target_names = list({t.strip(): None for t in target_str.split(";") if t.strip()})
-
-    if not target_names:
-        target_names = DEFAULT_TEST_TARGETS
-
-    targets = []
-    for target in target_names:
-        target_kind = target.split()[0]
-
-        if target_kind == "cuda" and "cudnn" in tvm.target.Target(target).attrs.get("libs", []):
-            is_enabled = tvm.support.libinfo()["USE_CUDNN"].lower() in ["on", "true", "1"]
-            is_runnable = is_enabled and cudnn.exists()
-        else:
-            is_enabled = tvm.runtime.enabled(target_kind)
-            is_runnable = is_enabled and tvm.device(target_kind).exist
-
-        targets.append(
-            {
-                "target": target,
-                "target_kind": target_kind,
-                "is_enabled": is_enabled,
-                "is_runnable": is_runnable,
-            }
-        )
-
-    if all(not t["is_runnable"] for t in targets):
-        if tvm.runtime.enabled("llvm"):
-            logging.warning(
-                "None of the following targets are supported by this build of TVM: %s."
-                " Try setting TVM_TEST_TARGETS to a supported target. Defaulting to llvm.",
-                target_str,
-            )
-            return _get_targets("llvm")
-
-        raise TVMError(
-            "None of the following targets are supported by this build of TVM: %s."
-            " Try setting TVM_TEST_TARGETS to a supported target."
-            " Cannot default to llvm, as it is not enabled." % target_str
-        )
-
-    return targets
-
-
-DEFAULT_TEST_TARGETS = [
-    "llvm",
-    "llvm -device=arm_cpu",
-    "cuda",
-    "cuda -model=unknown -libs=cudnn",
-    "nvptx",
-    "vulkan -from_device=0",
-    "opencl",
-    "opencl -device=mali,aocl_sw_emu",
-    "opencl -device=intel_graphics",
-    "metal",
-    "rocm",
-]
-
-
-def device_enabled(target):
-    """Check if a target should be used when testing.
-
-    It is recommended that you use :py:func:`tvm.testing.parametrize_targets`
-    instead of manually checking if a target is enabled.
-
-    This allows the user to control which devices they are testing against. In
-    tests, this should be used to check if a device should be used when said
-    device is an optional part of the test.
-
-    Parameters
-    ----------
-    target : str
-        Target string to check against
-
-    Returns
-    -------
-    bool
-        Whether or not the device associated with this target is enabled.
-
-    Example
-    -------
-    >>> @tvm.testing.uses_gpu
-    >>> def test_mytest():
-    >>>     for target in ["cuda", "llvm"]:
-    >>>         if device_enabled(target):
-    >>>             test_body...
-
-    Here, `test_body` will only be reached by with `target="cuda"` on gpu test
-    nodes and `target="llvm"` on cpu test nodes.
-    """
-    assert isinstance(target, str), "device_enabled requires a target as a string"
-    # only check if device name is found, sometime there are extra flags
-    target_kind = target.split(" ")[0]
-    return any(target_kind == t["target_kind"] for t in _get_targets() if t["is_runnable"])
-
-
-def enabled_targets():
-    """Get all enabled targets with associated devices.
-
-    In most cases, you should use :py:func:`tvm.testing.parametrize_targets` instead of
-    this function.
-
-    In this context, enabled means that TVM was built with support for
-    this target, the target name appears in the TVM_TEST_TARGETS
-    environment variable, and a suitable device for running this
-    target exists.  If TVM_TEST_TARGETS is not set, it defaults to
-    variable DEFAULT_TEST_TARGETS in this module.
-
-    If you use this function in a test, you **must** decorate the test with
-    :py:func:`tvm.testing.uses_gpu` (otherwise it will never be run on the gpu).
-
-    Returns
-    -------
-    targets: list
-        A list of pairs of all enabled devices and the associated context
-
-    """
-    return [(t["target"], tvm.device(t["target"])) for t in _get_targets() if t["is_runnable"]]
-
-
-def _compose(args, decs):
-    """Helper to apply multiple markers"""
-    if len(args) > 0:
-        f = args[0]
-        for d in reversed(decs):
-            f = d(f)
-        return f
-    return decs
-
-
-def uses_gpu(*args):
-    """Mark to differentiate tests that use the GPU in some capacity.
-
-    These tests will be run on CPU-only test nodes and on test nodes with GPUs.
-    To mark a test that must have a GPU present to run, use
-    :py:func:`tvm.testing.requires_gpu`.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _uses_gpu = [pytest.mark.gpu]
-    return _compose(args, _uses_gpu)
-
-
-def requires_gpu(*args):
-    """Mark a test as requiring a GPU to run.
-
-    Tests with this mark will not be run unless a gpu is present.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_gpu = [
-        pytest.mark.skipif(
-            not tvm.cuda().exist
-            and not tvm.rocm().exist
-            and not tvm.opencl().exist
-            and not tvm.metal().exist
-            and not tvm.vulkan().exist,
-            reason="No GPU present",
-        ),
-        *uses_gpu(),
-    ]
-    return _compose(args, _requires_gpu)
-
-
-def requires_cuda(*args):
-    """Mark a test as requiring the CUDA runtime.
-
-    This also marks the test as requiring a cuda gpu.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_cuda = [
-        pytest.mark.cuda,
-        pytest.mark.skipif(not device_enabled("cuda"), reason="CUDA support not enabled"),
-        *requires_gpu(),
-    ]
-    return _compose(args, _requires_cuda)
-
-
-def requires_cudnn(*args):
-    """Mark a test as requiring the cuDNN library.
-
-    This also marks the test as requiring a cuda gpu.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-
-    requirements = [
-        pytest.mark.skipif(
-            not cudnn.exists(), reason="cuDNN library not enabled, or not installed"
-        ),
-        *requires_cuda(),
-    ]
-    return _compose(args, requirements)
-
-
-def requires_nvptx(*args):
-    """Mark a test as requiring the NVPTX compilation on the CUDA runtime
-
-    This also marks the test as requiring a cuda gpu, and requiring
-    LLVM support.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-
-    """
-    _requires_nvptx = [
-        pytest.mark.skipif(not device_enabled("nvptx"), reason="NVPTX support not enabled"),
-        *requires_llvm(),
-        *requires_gpu(),
-    ]
-    return _compose(args, _requires_nvptx)
-
-
-def requires_cudagraph(*args):
-    """Mark a test as requiring the CUDA Graph Feature
-
-    This also marks the test as requiring cuda
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_cudagraph = [
-        pytest.mark.skipif(
-            not nvcc.have_cudagraph(), reason="CUDA Graph is not supported in this environment"
-        ),
-        *requires_cuda(),
-    ]
-    return _compose(args, _requires_cudagraph)
-
-
-def requires_opencl(*args):
-    """Mark a test as requiring the OpenCL runtime.
-
-    This also marks the test as requiring a gpu.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_opencl = [
-        pytest.mark.opencl,
-        pytest.mark.skipif(not device_enabled("opencl"), reason="OpenCL support not enabled"),
-        *requires_gpu(),
-    ]
-    return _compose(args, _requires_opencl)
-
-
-def requires_rocm(*args):
-    """Mark a test as requiring the rocm runtime.
-
-    This also marks the test as requiring a gpu.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_rocm = [
-        pytest.mark.rocm,
-        pytest.mark.skipif(not device_enabled("rocm"), reason="rocm support not enabled"),
-        *requires_gpu(),
-    ]
-    return _compose(args, _requires_rocm)
-
-
-def requires_metal(*args):
-    """Mark a test as requiring the metal runtime.
-
-    This also marks the test as requiring a gpu.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_metal = [
-        pytest.mark.metal,
-        pytest.mark.skipif(not device_enabled("metal"), reason="metal support not enabled"),
-        *requires_gpu(),
-    ]
-    return _compose(args, _requires_metal)
-
-
-def requires_vulkan(*args):
-    """Mark a test as requiring the vulkan runtime.
-
-    This also marks the test as requiring a gpu.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_vulkan = [
-        pytest.mark.vulkan,
-        pytest.mark.skipif(not device_enabled("vulkan"), reason="vulkan support not enabled"),
-        *requires_gpu(),
-    ]
-    return _compose(args, _requires_vulkan)
-
-
-def requires_tensorcore(*args):
-    """Mark a test as requiring a tensorcore to run.
-
-    Tests with this mark will not be run unless a tensorcore is present.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_tensorcore = [
-        pytest.mark.tensorcore,
-        pytest.mark.skipif(
-            not tvm.cuda().exist or not nvcc.have_tensorcore(tvm.cuda(0).compute_version),
-            reason="No tensorcore present",
-        ),
-        *requires_gpu(),
-    ]
-    return _compose(args, _requires_tensorcore)
-
-
-def requires_llvm(*args):
-    """Mark a test as requiring llvm to run.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_llvm = [
-        pytest.mark.llvm,
-        pytest.mark.skipif(not device_enabled("llvm"), reason="LLVM support not enabled"),
-    ]
-    return _compose(args, _requires_llvm)
-
-
-def requires_micro(*args):
-    """Mark a test as requiring microTVM to run.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_micro = [
-        pytest.mark.skipif(
-            tvm.support.libinfo().get("USE_MICRO", "OFF") != "ON",
-            reason="MicroTVM support not enabled. Set USE_MICRO=ON in config.cmake to enable.",
-        )
-    ]
-    return _compose(args, _requires_micro)
-
-
-def requires_rpc(*args):
-    """Mark a test as requiring rpc to run.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_rpc = [
-        pytest.mark.skipif(
-            tvm.support.libinfo().get("USE_RPC", "OFF") != "ON",
-            reason="RPC support not enabled. Set USE_RPC=ON in config.cmake to enable.",
-        )
-    ]
-    return _compose(args, _requires_rpc)
-
-
-def _target_to_requirement(target):
-    if isinstance(target, str):
-        target = tvm.target.Target(target)
-
-    # mapping from target to decorator
-    if target.kind.name == "cuda" and "cudnn" in target.attrs.get("libs", []):
-        return requires_cudnn()
-    if target.kind.name == "cuda":
-        return requires_cuda()
-    if target.kind.name == "rocm":
-        return requires_rocm()
-    if target.kind.name == "vulkan":
-        return requires_vulkan()
-    if target.kind.name == "nvptx":
-        return requires_nvptx()
-    if target.kind.name == "metal":
-        return requires_metal()
-    if target.kind.name == "opencl":
-        return requires_opencl()
-    if target.kind.name == "llvm":
-        return requires_llvm()
-    return []
-
-
-def _pytest_target_params(targets, excluded_targets=None, xfail_targets=None):
-    # Include unrunnable targets here.  They get skipped by the
-    # pytest.mark.skipif in _target_to_requirement(), showing up as
-    # skipped tests instead of being hidden entirely.
-    if targets is None:
-        if excluded_targets is None:
-            excluded_targets = set()
-
-        if xfail_targets is None:
-            xfail_targets = set()
-
-        target_marks = []
-        for t in _get_targets():
-            # Excluded targets aren't included in the params at all.
-            if t["target_kind"] not in excluded_targets:
-
-                # Known failing targets are included, but are marked
-                # as expected to fail.
-                extra_marks = []
-                if t["target_kind"] in xfail_targets:
-                    extra_marks.append(
-                        pytest.mark.xfail(
-                            reason='Known failing test for target "{}"'.format(t["target_kind"])
-                        )
-                    )
-
-                target_marks.append((t["target"], extra_marks))
-
-    else:
-        target_marks = [(target, []) for target in targets]
-
-    return [
-        pytest.param(target, marks=_target_to_requirement(target) + extra_marks)
-        for target, extra_marks in target_marks
-    ]
-
-
-def _auto_parametrize_target(metafunc):
-    """Automatically applies parametrize_targets
-
-    Used if a test function uses the "target" fixture, but isn't
-    already marked with @tvm.testing.parametrize_targets.  Intended
-    for use in the pytest_generate_tests() handler of a conftest.py
-    file.
-
-    """
-
-    def update_parametrize_target_arg(
-        argnames,
-        argvalues,
-        *args,
-        **kwargs,
-    ):
-        args = [arg.strip() for arg in argnames.split(",") if arg.strip()]
-        if "target" in args:
-            target_i = args.index("target")
-
-            new_argvalues = []
-            for argvalue in argvalues:
-
-                if isinstance(argvalue, _pytest.mark.structures.ParameterSet):
-                    # The parametrized value is already a
-                    # pytest.param, so track any marks already
-                    # defined.
-                    param_set = argvalue.values
-                    target = param_set[target_i]
-                    additional_marks = argvalue.marks
-                elif len(args) == 1:
-                    # Single value parametrization, argvalue is a list of values.
-                    target = argvalue
-                    param_set = (target,)
-                    additional_marks = []
-                else:
-                    # Multiple correlated parameters, argvalue is a list of tuple of values.
-                    param_set = argvalue
-                    target = param_set[target_i]
-                    additional_marks = []
-
-                new_argvalues.append(
-                    pytest.param(
-                        *param_set, marks=_target_to_requirement(target) + additional_marks
-                    )
-                )
-
-            try:
-                argvalues[:] = new_argvalues
-            except TypeError as e:
-                pyfunc = metafunc.definition.function
-                filename = pyfunc.__code__.co_filename
-                line_number = pyfunc.__code__.co_firstlineno
-                msg = (
-                    f"Unit test {metafunc.function.__name__} ({filename}:{line_number}) "
-                    "is parametrized using a tuple of parameters instead of a list "
-                    "of parameters."
-                )
-                raise TypeError(msg) from e
-
-    if "target" in metafunc.fixturenames:
-        # Update any explicit use of @pytest.mark.parmaetrize to
-        # parametrize over targets.  This adds the appropriate
-        # @tvm.testing.requires_* markers for each target.
-        for mark in metafunc.definition.iter_markers("parametrize"):
-            update_parametrize_target_arg(*mark.args, **mark.kwargs)
-
-        # Check if any explicit parametrizations exist, and apply one
-        # if they do not.  If the function is marked with either
-        # excluded or known failing targets, use these to determine
-        # the targets to be used.
-        parametrized_args = [
-            arg.strip()
-            for mark in metafunc.definition.iter_markers("parametrize")
-            for arg in mark.args[0].split(",")
-        ]
-        if "target" not in parametrized_args:
-            excluded_targets = getattr(metafunc.function, "tvm_excluded_targets", [])
-            xfail_targets = getattr(metafunc.function, "tvm_known_failing_targets", [])
-            metafunc.parametrize(
-                "target",
-                _pytest_target_params(None, excluded_targets, xfail_targets),
-                scope="session",
-            )
-
-
-def parametrize_targets(*args):
-    """Parametrize a test over a specific set of targets.
-
-    Use this decorator when you want your test to be run over a
-    specific set of targets and devices.  It is intended for use where
-    a test is applicable only to a specific target, and is
-    inapplicable to any others (e.g. verifying target-specific
-    assembly code matches known assembly code).  In most
-    circumstances, :py:func:`tvm.testing.exclude_targets` or
-    :py:func:`tvm.testing.known_failing_targets` should be used
-    instead.
-
-    If used as a decorator without arguments, the test will be
-    parametrized over all targets in
-    :py:func:`tvm.testing.enabled_targets`.  This behavior is
-    automatically enabled for any target that accepts arguments of
-    ``target`` or ``dev``, so the explicit use of the bare decorator
-    is no longer needed, and is maintained for backwards
-    compatibility.
-
-    Parameters
-    ----------
-    f : function
-        Function to parametrize. Must be of the form `def test_xxxxxxxxx(target, dev)`:,
-        where `xxxxxxxxx` is any name.
-    targets : list[str], optional
-        Set of targets to run against. If not supplied,
-        :py:func:`tvm.testing.enabled_targets` will be used.
-
-    Example
-    -------
-    >>> @tvm.testing.parametrize_targets("llvm", "cuda")
-    >>> def test_mytest(target, dev):
-    >>>     ...  # do something
-    """
-
-    # Backwards compatibility, when used as a decorator with no
-    # arguments implicitly parametrizes over "target".  The
-    # parametrization is now handled by _auto_parametrize_target, so
-    # this use case can just return the decorated function.
-    if len(args) == 1 and callable(args[0]):
-        return args[0]
-
-    return pytest.mark.parametrize("target", list(args), scope="session")
-
-
-def exclude_targets(*args):
-    """Exclude a test from running on a particular target.
-
-    Use this decorator when you want your test to be run over a
-    variety of targets and devices (including cpu and gpu devices),
-    but want to exclude some particular target or targets.  For
-    example, a test may wish to be run against all targets in
-    tvm.testing.enabled_targets(), except for a particular target that
-    does not support the capabilities.
-
-    Applies pytest.mark.skipif to the targets given.
-
-    Parameters
-    ----------
-    f : function
-        Function to parametrize. Must be of the form `def test_xxxxxxxxx(target, dev)`:,
-        where `xxxxxxxxx` is any name.
-    targets : list[str]
-        Set of targets to exclude.
-
-    Example
-    -------
-    >>> @tvm.testing.exclude_targets("cuda")
-    >>> def test_mytest(target, dev):
-    >>>     ...  # do something
-
-    Or
-
-    >>> @tvm.testing.exclude_targets("llvm", "cuda")
-    >>> def test_mytest(target, dev):
-    >>>     ...  # do something
-
-    """
-
-    def wraps(func):
-        func.tvm_excluded_targets = args
-        return func
-
-    return wraps
-
-
-def known_failing_targets(*args):
-    """Skip a test that is known to fail on a particular target.
-
-    Use this decorator when you want your test to be run over a
-    variety of targets and devices (including cpu and gpu devices),
-    but know that it fails for some targets.  For example, a newly
-    implemented runtime may not support all features being tested, and
-    should be excluded.
-
-    Applies pytest.mark.xfail to the targets given.
-
-    Parameters
-    ----------
-    f : function
-        Function to parametrize. Must be of the form `def test_xxxxxxxxx(target, dev)`:,
-        where `xxxxxxxxx` is any name.
-    targets : list[str]
-        Set of targets to skip.
-
-    Example
-    -------
-    >>> @tvm.testing.known_failing_targets("cuda")
-    >>> def test_mytest(target, dev):
-    >>>     ...  # do something
-
-    Or
-
-    >>> @tvm.testing.known_failing_targets("llvm", "cuda")
-    >>> def test_mytest(target, dev):
-    >>>     ...  # do something
-
-    """
-
-    def wraps(func):
-        func.tvm_known_failing_targets = args
-        return func
-
-    return wraps
-
-
-def parameter(*values, ids=None):
-    """Convenience function to define pytest parametrized fixtures.
-
-    Declaring a variable using ``tvm.testing.parameter`` will define a
-    parametrized pytest fixture that can be used by test
-    functions. This is intended for cases that have no setup cost,
-    such as strings, integers, tuples, etc.  For cases that have a
-    significant setup cost, please use :py:func:`tvm.testing.fixture`
-    instead.
-
-    If a test function accepts multiple parameters defined using
-    ``tvm.testing.parameter``, then the test will be run using every
-    combination of those parameters.
-
-    The parameter definition applies to all tests in a module.  If a
-    specific test should have different values for the parameter, that
-    test should be marked with ``@pytest.mark.parametrize``.
-
-    Parameters
-    ----------
-    values
-       A list of parameter values.  A unit test that accepts this
-       parameter as an argument will be run once for each parameter
-       given.
-
-    ids : List[str], optional
-       A list of names for the parameters.  If None, pytest will
-       generate a name from the value.  These generated names may not
-       be readable/useful for composite types such as tuples.
-
-    Returns
-    -------
-    function
-       A function output from pytest.fixture.
-
-    Example
-    -------
-    >>> size = tvm.testing.parameter(1, 10, 100)
-    >>> def test_using_size(size):
-    >>>     ... # Test code here
-
-    Or
-
-    >>> shape = tvm.testing.parameter((5,10), (512,1024), ids=['small','large'])
-    >>> def test_using_size(shape):
-    >>>     ... # Test code here
-
-    """
-
-    # Optional cls parameter in case a parameter is defined inside a
-    # class scope.
-    @pytest.fixture(params=values, ids=ids)
-    def as_fixture(*_cls, request):
-        return request.param
-
-    return as_fixture
-
-
-_parametrize_group = 0
-
-
-def parameters(*value_sets):
-    """Convenience function to define pytest parametrized fixtures.
-
-    Declaring a variable using tvm.testing.parameters will define a
-    parametrized pytest fixture that can be used by test
-    functions. Like :py:func:`tvm.testing.parameter`, this is intended
-    for cases that have no setup cost, such as strings, integers,
-    tuples, etc.  For cases that have a significant setup cost, please
-    use :py:func:`tvm.testing.fixture` instead.
-
-    Unlike :py:func:`tvm.testing.parameter`, if a test function
-    accepts multiple parameters defined using a single call to
-    ``tvm.testing.parameters``, then the test will only be run once
-    for each set of parameters, not for all combinations of
-    parameters.
-
-    These parameter definitions apply to all tests in a module.  If a
-    specific test should have different values for some parameters,
-    that test should be marked with ``@pytest.mark.parametrize``.
-
-    Parameters
-    ----------
-    values : List[tuple]
-       A list of parameter value sets.  Each set of values represents
-       a single combination of values to be tested.  A unit test that
-       accepts parameters defined will be run once for every set of
-       parameters in the list.
-
-    Returns
-    -------
-    List[function]
-       Function outputs from pytest.fixture.  These should be unpacked
-       into individual named parameters.
-
-    Example
-    -------
-    >>> size, dtype = tvm.testing.parameters( (16,'float32'), (512,'float16') )
-    >>> def test_feature_x(size, dtype):
-    >>>     # Test code here
-    >>>     assert( (size,dtype) in [(16,'float32'), (512,'float16')])
-
-    """
-    global _parametrize_group
-    parametrize_group = _parametrize_group
-    _parametrize_group += 1
-
-    outputs = []
-    for param_values in zip(*value_sets):
-
-        # Optional cls parameter in case a parameter is defined inside a
-        # class scope.
-        def fixture_func(*_cls, request):
-            return request.param
-
-        fixture_func.parametrize_group = parametrize_group
-        fixture_func.parametrize_values = param_values
-        outputs.append(pytest.fixture(fixture_func))
-
-    return outputs
-
-
-def _parametrize_correlated_parameters(metafunc):
-    parametrize_needed = collections.defaultdict(list)
-
-    for name, fixturedefs in metafunc.definition._fixtureinfo.name2fixturedefs.items():
-        fixturedef = fixturedefs[-1]
-        if hasattr(fixturedef.func, "parametrize_group") and hasattr(
-            fixturedef.func, "parametrize_values"
-        ):
-            group = fixturedef.func.parametrize_group
-            values = fixturedef.func.parametrize_values
-            parametrize_needed[group].append((name, values))
-
-    for parametrize_group in parametrize_needed.values():
-        if len(parametrize_group) == 1:
-            name, values = parametrize_group[0]
-            metafunc.parametrize(name, values, indirect=True)
-        else:
-            names = ",".join(name for name, values in parametrize_group)
-            value_sets = zip(*[values for name, values in parametrize_group])
-            metafunc.parametrize(names, value_sets, indirect=True)
-
-
-def fixture(func=None, *, cache_return_value=False):
-    """Convenience function to define pytest fixtures.
-
-    This should be used as a decorator to mark functions that set up
-    state before a function.  The return value of that fixture
-    function is then accessible by test functions as that accept it as
-    a parameter.
-
-    Fixture functions can accept parameters defined with
-    :py:func:`tvm.testing.parameter`.
-
-    By default, the setup will be performed once for each unit test
-    that uses a fixture, to ensure that unit tests are independent.
-    If the setup is expensive to perform, then the
-    cache_return_value=True argument can be passed to cache the setup.
-    The fixture function will be run only once (or once per parameter,
-    if used with tvm.testing.parameter), and the same return value
-    will be passed to all tests that use it.  If the environment
-    variable TVM_TEST_DISABLE_CACHE is set to a non-zero value, it
-    will disable this feature and no caching will be performed.
-
-    Example
-    -------
-    >>> @tvm.testing.fixture
-    >>> def cheap_setup():
-    >>>     return 5 # Setup code here.
-    >>>
-    >>> def test_feature_x(target, dev, cheap_setup)
-    >>>     assert(cheap_setup == 5) # Run test here
-
-    Or
-
-    >>> size = tvm.testing.parameter(1, 10, 100)
-    >>>
-    >>> @tvm.testing.fixture
-    >>> def cheap_setup(size):
-    >>>     return 5*size # Setup code here, based on size.
-    >>>
-    >>> def test_feature_x(cheap_setup):
-    >>>     assert(cheap_setup in [5, 50, 500])
-
-    Or
-
-    >>> @tvm.testing.fixture(cache_return_value=True)
-    >>> def expensive_setup():
-    >>>     time.sleep(10) # Setup code here
-    >>>     return 5
-    >>>
-    >>> def test_feature_x(target, dev, expensive_setup):
-    >>>     assert(expensive_setup == 5)
-
-    """
-
-    force_disable_cache = bool(int(os.environ.get("TVM_TEST_DISABLE_CACHE", "0")))
-    cache_return_value = cache_return_value and not force_disable_cache
-
-    # Deliberately at function scope, so that caching can track how
-    # many times the fixture has been used.  If used, the cache gets
-    # cleared after the fixture is no longer needed.
-    scope = "function"
-
-    def wraps(func):
-        if cache_return_value:
-            func = _fixture_cache(func)
-        func = pytest.fixture(func, scope=scope)
-        return func
-
-    if func is None:
-        return wraps
-
-    return wraps(func)
-
-
-class _DeepCopyAllowedClasses(dict):
-    def __init__(self, allowed_class_list):
-        self.allowed_class_list = allowed_class_list
-        super().__init__()
-
-    def get(self, key, *args, **kwargs):
-        """Overrides behavior of copy.deepcopy to avoid implicit copy.
-
-        By default, copy.deepcopy uses a dict of id->object to track
-        all objects that it has seen, which is passed as the second
-        argument to all recursive calls.  This class is intended to be
-        passed in instead, and inspects the type of all objects being
-        copied.
-
-        Where copy.deepcopy does a best-effort attempt at copying an
-        object, for unit tests we would rather have all objects either
-        be copied correctly, or to throw an error.  Classes that
-        define an explicit method to perform a copy are allowed, as
-        are any explicitly listed classes.  Classes that would fall
-        back to using object.__reduce__, and are not explicitly listed
-        as safe, will throw an exception.
-
-        """
-        obj = ctypes.cast(key, ctypes.py_object).value
-        cls = type(obj)
-        if (
-            cls in copy._deepcopy_dispatch
-            or issubclass(cls, type)
-            or getattr(obj, "__deepcopy__", None)
-            or copyreg.dispatch_table.get(cls)
-            or cls.__reduce__ is not object.__reduce__
-            or cls.__reduce_ex__ is not object.__reduce_ex__
-            or cls in self.allowed_class_list
-        ):
-            return super().get(key, *args, **kwargs)
-
-        rfc_url = (
-            "https://github.com/apache/tvm-rfcs/blob/main/rfcs/0007-parametrized-unit-tests.md"
-        )
-        raise TypeError(
-            (
-                f"Cannot copy fixture of type {cls.__name__}.  TVM fixture caching "
-                "is limited to objects that explicitly provide the ability "
-                "to be copied (e.g. through __deepcopy__, __getstate__, or __setstate__),"
-                "and forbids the use of the default `object.__reduce__` and "
-                "`object.__reduce_ex__`.  For third-party classes that are "
-                "safe to use with copy.deepcopy, please add the class to "
-                "the arguments of _DeepCopyAllowedClasses in tvm.testing._fixture_cache.\n"
-                "\n"
-                f"For discussion on this restriction, please see {rfc_url}."
-            )
-        )
-
-
-def _fixture_cache(func):
-    cache = {}
-
-    # Can't use += on a bound method's property.  Therefore, this is a
-    # list rather than a variable so that it can be accessed from the
-    # pytest_collection_modifyitems().
-    num_uses_remaining = [0]
-
-    # Using functools.lru_cache would require the function arguments
-    # to be hashable, which wouldn't allow caching fixtures that
-    # depend on numpy arrays.  For example, a fixture that takes a
-    # numpy array as input, then calculates uses a slow method to
-    # compute a known correct output for that input.  Therefore,
-    # including a fallback for serializable types.
-    def get_cache_key(*args, **kwargs):
-        try:
-            hash((args, kwargs))
-            return (args, kwargs)
-        except TypeError as e:
-            pass
-
-        try:
-            return pickle.dumps((args, kwargs))
-        except TypeError as e:
-            raise TypeError(
-                "TVM caching of fixtures requires arguments to the fixture "
-                "to be either hashable or serializable"
-            ) from e
-
-    @functools.wraps(func)
-    def wrapper(*args, **kwargs):
-        try:
-            cache_key = get_cache_key(*args, **kwargs)
-
-            try:
-                cached_value = cache[cache_key]
-            except KeyError:
-                cached_value = cache[cache_key] = func(*args, **kwargs)
-
-            yield copy.deepcopy(
-                cached_value,
-                # allowed_class_list should be a list of classes that
-                # are safe to copy using copy.deepcopy, but do not
-                # implement __deepcopy__, __reduce__, or
-                # __reduce_ex__.
-                _DeepCopyAllowedClasses(allowed_class_list=[]),
-            )
-
-        finally:
-            # Clear the cache once all tests that use a particular fixture
-            # have completed.
-            num_uses_remaining[0] -= 1
-            if not num_uses_remaining[0]:
-                cache.clear()
-
-    # Set in the pytest_collection_modifyitems()
-    wrapper.num_uses_remaining = num_uses_remaining
-
-    return wrapper
-
-
-def _count_num_fixture_uses(items):
-    # Helper function, counts the number of tests that use each cached
-    # fixture.  Should be called from pytest_collection_modifyitems().
-    for item in items:
-        is_skipped = item.get_closest_marker("skip") or any(
-            mark.args[0] for mark in item.iter_markers("skipif")
-        )
-        if is_skipped:
-            continue
-
-        for fixturedefs in item._fixtureinfo.name2fixturedefs.values():
-            # Only increment the active fixturedef, in a name has been overridden.
-            fixturedef = fixturedefs[-1]
-            if hasattr(fixturedef.func, "num_uses_remaining"):
-                fixturedef.func.num_uses_remaining[0] += 1
-
-
-def _remove_global_fixture_definitions(items):
-    # Helper function, removes fixture definitions from the global
-    # variables of the modules they were defined in.  This is intended
-    # to improve readability of error messages by giving a NameError
-    # if a test function accesses a pytest fixture but doesn't include
-    # it as an argument.  Should be called from
-    # pytest_collection_modifyitems().
-
-    modules = set(item.module for item in items)
-
-    for module in modules:
-        for name in dir(module):
-            obj = getattr(module, name)
-            if hasattr(obj, "_pytestfixturefunction") and isinstance(
-                obj._pytestfixturefunction, _pytest.fixtures.FixtureFunctionMarker
-            ):
-                delattr(module, name)
-
-
-def identity_after(x, sleep):
-    """Testing function to return identity after sleep
-
-    Parameters
-    ----------
-    x : int
-        The input value.
-
-    sleep : float
-        The amount of time to sleep
-
-    Returns
-    -------
-    x : object
-        The original value
-    """
-    if sleep:
-        time.sleep(sleep)
-    return x
-
-
-def terminate_self():
-    """Testing function to terminate the process."""
-    sys.exit(-1)

From 86229c22cb3d7b5babba0db061e4c02eb9cd3a8c Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@system76-pc.localdomain>
Date: Tue, 31 Aug 2021 15:47:40 -0700
Subject: [PATCH 12/14] lint

---
 python/tvm/relay/frontend/onnx.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 557169b44bcc..90fbb15acb33 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -19,7 +19,6 @@
 """ONNX: Open Neural Network Exchange frontend for Relay."""
 import copy
 import warnings
-from os import read
 
 import numpy as np
 import tvm
@@ -3535,9 +3534,8 @@ def _impl_v13(cls, inputs, attr, params):
             return relay.sum(loss) / weight_total
         elif reduction == "sum":
             return relay.sum(loss)
-        else:
-            # Case reduction == 'none'
-            return loss
+        # Case reduction == 'none'
+        return loss
 
 
 # compatible operators that do NOT require any conversion.

From 2a7b2b72d8576a774371a1e41cf464546c5fe21a Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@system76-pc.localdomain>
Date: Tue, 31 Aug 2021 15:56:33 -0700
Subject: [PATCH 13/14] so cool

---
 python/tvm/relay/frontend/onnx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 90fbb15acb33..a5ae959901ba 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -3532,7 +3532,7 @@ def _impl_v13(cls, inputs, attr, params):
 
         if reduction == "mean":
             return relay.sum(loss) / weight_total
-        elif reduction == "sum":
+        if reduction == "sum":
             return relay.sum(loss)
         # Case reduction == 'none'
         return loss

From c6b41f347e2e9780d69d78e24421047d6bc35fd9 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Tue, 31 Aug 2021 21:43:50 -0700
Subject: [PATCH 14/14] jostle ci