From 17a65ab17ed1ffee471aad944fb1f3cf59390a21 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Mon, 2 Aug 2021 17:03:49 -0700
Subject: [PATCH 01/38] reapply changes

---
 python/tvm/auto_scheduler/utils.py            | 44 ++++---------------
 python/tvm/autotvm/record.py                  |  4 +-
 .../tvm/autotvm/tuner/xgboost_cost_model.py   |  3 +-
 python/tvm/autotvm/utils.py                   |  4 +-
 python/tvm/contrib/popen_pool.py              |  7 ++-
 5 files changed, 21 insertions(+), 41 deletions(-)

diff --git a/python/tvm/auto_scheduler/utils.py b/python/tvm/auto_scheduler/utils.py
index 1c03491c5614..dcac6eaa7deb 100644
--- a/python/tvm/auto_scheduler/utils.py
+++ b/python/tvm/auto_scheduler/utils.py
@@ -20,9 +20,6 @@
 
 from typing import Hashable
 import json
-import multiprocessing
-import multiprocessing.pool
-import queue
 import signal
 import threading
 import traceback
@@ -40,6 +37,7 @@
 from tvm.tir import expr
 from tvm.tir.transform import Simplify
 from tvm.ir.transform import Sequential
+from tvm.contrib.popen_pool import PopenWorker
 from ..te import Tensor, placeholder
 
 
@@ -289,41 +287,17 @@ def wrapper():
     return res[0]
 
 
-def _func_wrapper(que, func, args, kwargs, add_thread_wrapper):
-    """Call function and return the result over the queue."""
-    try:
-        if add_thread_wrapper:
-            # Add a new layer of threadinng to avoid the conflict between
-            # python's multiprocessing and tvm's thread pool.
-            res = call_func_with_thread(func, args, kwargs)
-        else:
-            res = func(*args, **kwargs)
-        que.put(res)
-    except Exception:  # pylint: disable=broad-except
-        que.put(Exception(make_traceback_info()))
-
-
-def call_func_with_timeout(timeout, func, args=(), kwargs=None, add_thread_wrapper=False):
+def call_func_with_timeout(
+    timeout, func, args=(), kwargs=None, add_thread_wrapper=False
+):  # pylint: disable=unused-argument
     """Call a function with timeout"""
-    que = multiprocessing.Queue(2)
-    process = multiprocessing.Process(
-        target=_func_wrapper, args=(que, func, args, kwargs or {}, add_thread_wrapper)
-    )
-    process.start()
+    process = PopenWorker()
+    process.send(func, args, kwargs, timeout)
 
     try:
-        res = que.get(timeout=timeout)
-    except queue.Empty:
-        res = TimeoutError()
-
-    # clean queue and process
-    kill_child_processes(process.pid)
-    process.terminate()
-    process.join()
-    que.close()
-    que.join_thread()
-    del process
-    del que
+        res = process.recv()
+    except Exception:  # pylint: disable=broad-except
+        res = Exception(make_traceback_info())
 
     return res
 
diff --git a/python/tvm/autotvm/record.py b/python/tvm/autotvm/record.py
index 4f11aea2911f..8145563f5075 100644
--- a/python/tvm/autotvm/record.py
+++ b/python/tvm/autotvm/record.py
@@ -21,7 +21,6 @@
 import argparse
 import base64
 import logging
-import multiprocessing
 import pickle
 import json
 import time
@@ -32,6 +31,7 @@
 
 from .. import build, lower
 from ..target import Target
+from ..contrib import popen_pool
 from .. import __version__
 from . import task
 from .task import ConfigEntity, ApplyHistoryBest
@@ -230,7 +230,7 @@ def split_workload(in_file, clean=True):
     lines = list(open(in_file).readlines())
 
     logger.info("start converting...")
-    pool = multiprocessing.Pool()
+    pool = popen_pool.PopenPoolExecutor()
     lines = [rec for rec in pool.map(decode, lines) if rec is not None]
     logger.info("map done %.2f", time.time() - tic)
 
diff --git a/python/tvm/autotvm/tuner/xgboost_cost_model.py b/python/tvm/autotvm/tuner/xgboost_cost_model.py
index 81904354c5fd..8670210db1ee 100644
--- a/python/tvm/autotvm/tuner/xgboost_cost_model.py
+++ b/python/tvm/autotvm/tuner/xgboost_cost_model.py
@@ -23,6 +23,7 @@
 
 import numpy as np
 
+from ...contrib import popen_pool
 from .. import feature
 from ..utils import get_rank
 from .metric import max_curve, recall_curve, cover_curve
@@ -161,7 +162,7 @@ def _reset_pool(self, space, target, task):
         _extract_space = space
         _extract_target = target
         _extract_task = task
-        self.pool = multiprocessing.Pool(self.num_threads)
+        self.pool = popen_pool.PopenPoolExecutor(self.num_threads)
 
     def _close_pool(self):
         if self.pool:
diff --git a/python/tvm/autotvm/utils.py b/python/tvm/autotvm/utils.py
index fa1dcfd1241b..ec3f18daa6c9 100644
--- a/python/tvm/autotvm/utils.py
+++ b/python/tvm/autotvm/utils.py
@@ -17,7 +17,6 @@
 # pylint: disable=invalid-name
 """Utilities"""
 import logging
-import multiprocessing
 import time
 
 from random import randrange
@@ -25,6 +24,7 @@
 import numpy as np
 import tvm.arith
 from tvm.tir import expr
+from tvm.contrib.popen_pool import PopenPoolExecutor
 
 logger = logging.getLogger("autotvm")
 
@@ -111,7 +111,7 @@ def pool_map(func, args, batch_size, verbose=False, pool=None):
 
     ret = None
     tic = time.time()
-    local_pool = pool or multiprocessing.Pool()
+    local_pool = pool or PopenPoolExecutor()
     if verbose:
         logger.info("mapping begin")
     for i in range(0, len(args), batch_size):
diff --git a/python/tvm/contrib/popen_pool.py b/python/tvm/contrib/popen_pool.py
index 2f552034e9f8..c091da637e2b 100644
--- a/python/tvm/contrib/popen_pool.py
+++ b/python/tvm/contrib/popen_pool.py
@@ -269,9 +269,14 @@ class PopenPoolExecutor:
 
     timeout : float
         Timeout value for each function submit.
+    Note
+    ----
+    If max_workers is NONE then the number returned by
+    os.cpu_count() is used. This method aligns with the
+    behavior of multiprocessing.pool().
     """
 
-    def __init__(self, max_workers, timeout=None):
+    def __init__(self, max_workers=os.cpu_count(), timeout=None):
         # Use an internal thread pool to send to popen workers
         self._threadpool = concurrent.futures.ThreadPoolExecutor(max_workers=max_workers)
         self._timeout = timeout

From 3ecfbb71c5aaa8b4a3ce9cdd40cd5b5d729d7624 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Mon, 2 Aug 2021 17:20:04 -0700
Subject: [PATCH 02/38] create tvm.testing

---
 .../tvm/testing/auto_scheduler_common.py                        | 0
 tests/python/unittest/test_auto_scheduler_compute_dag.py        | 2 +-
 tests/python/unittest/test_auto_scheduler_cost_model.py         | 2 +-
 .../python/unittest/test_auto_scheduler_evolutionary_search.py  | 2 +-
 tests/python/unittest/test_auto_scheduler_feature.py            | 2 +-
 tests/python/unittest/test_auto_scheduler_layout_rewrite.py     | 2 +-
 tests/python/unittest/test_auto_scheduler_loop_state.py         | 2 +-
 tests/python/unittest/test_auto_scheduler_measure.py            | 2 +-
 tests/python/unittest/test_auto_scheduler_search_policy.py      | 2 +-
 tests/python/unittest/test_auto_scheduler_search_task.py        | 2 +-
 tests/python/unittest/test_auto_scheduler_sketch_generation.py  | 2 +-
 tests/python/unittest/test_auto_scheduler_task_scheduler.py     | 2 +-
 12 files changed, 11 insertions(+), 11 deletions(-)
 rename tests/python/unittest/test_auto_scheduler_common.py => python/tvm/testing/auto_scheduler_common.py (100%)

diff --git a/tests/python/unittest/test_auto_scheduler_common.py b/python/tvm/testing/auto_scheduler_common.py
similarity index 100%
rename from tests/python/unittest/test_auto_scheduler_common.py
rename to python/tvm/testing/auto_scheduler_common.py
diff --git a/tests/python/unittest/test_auto_scheduler_compute_dag.py b/tests/python/unittest/test_auto_scheduler_compute_dag.py
index e394115619a4..ea1326de0def 100644
--- a/tests/python/unittest/test_auto_scheduler_compute_dag.py
+++ b/tests/python/unittest/test_auto_scheduler_compute_dag.py
@@ -23,7 +23,7 @@
 from tvm import topi
 from tvm import auto_scheduler, te
 
-from test_auto_scheduler_common import (
+from tvm.testing.auto_scheduler_common import (
     get_tiled_matmul,
     invalid_compute_definition,
     matmul_auto_scheduler_test,
diff --git a/tests/python/unittest/test_auto_scheduler_cost_model.py b/tests/python/unittest/test_auto_scheduler_cost_model.py
index 0b34615583db..d8fc61cefe57 100644
--- a/tests/python/unittest/test_auto_scheduler_cost_model.py
+++ b/tests/python/unittest/test_auto_scheduler_cost_model.py
@@ -24,7 +24,7 @@
 import tvm
 from tvm import auto_scheduler
 
-from test_auto_scheduler_common import matmul_auto_scheduler_test
+from tvm.testing.auto_scheduler_common import matmul_auto_scheduler_test
 
 
 def get_sample_records(number):
diff --git a/tests/python/unittest/test_auto_scheduler_evolutionary_search.py b/tests/python/unittest/test_auto_scheduler_evolutionary_search.py
index e28219d0979f..37b802d013a5 100644
--- a/tests/python/unittest/test_auto_scheduler_evolutionary_search.py
+++ b/tests/python/unittest/test_auto_scheduler_evolutionary_search.py
@@ -18,7 +18,7 @@
 
 import tvm
 import pytest
-from test_auto_scheduler_common import matmul_auto_scheduler_test
+from tvm.testing.auto_scheduler_common import matmul_auto_scheduler_test
 from tvm import auto_scheduler, te
 from tvm.auto_scheduler.cost_model.cost_model import PythonBasedModel
 
diff --git a/tests/python/unittest/test_auto_scheduler_feature.py b/tests/python/unittest/test_auto_scheduler_feature.py
index 82cfb1d6508b..8b42d79417a4 100644
--- a/tests/python/unittest/test_auto_scheduler_feature.py
+++ b/tests/python/unittest/test_auto_scheduler_feature.py
@@ -23,7 +23,7 @@
 import tvm
 from tvm import te, auto_scheduler
 
-from test_auto_scheduler_common import matmul_auto_scheduler_test
+from tvm.testing.auto_scheduler_common import matmul_auto_scheduler_test
 
 
 def fequal(a, b):
diff --git a/tests/python/unittest/test_auto_scheduler_layout_rewrite.py b/tests/python/unittest/test_auto_scheduler_layout_rewrite.py
index c9291965613b..83f779955f29 100644
--- a/tests/python/unittest/test_auto_scheduler_layout_rewrite.py
+++ b/tests/python/unittest/test_auto_scheduler_layout_rewrite.py
@@ -26,7 +26,7 @@
 from tvm import topi
 from tvm import auto_scheduler, te
 
-from test_auto_scheduler_common import get_tiled_matmul, matmul_auto_scheduler_test
+from tvm.testing.auto_scheduler_common import get_tiled_matmul, matmul_auto_scheduler_test
 
 
 def test_apply_steps_with_layout_rewrite():
diff --git a/tests/python/unittest/test_auto_scheduler_loop_state.py b/tests/python/unittest/test_auto_scheduler_loop_state.py
index 44ed1fc42562..f528387a163c 100644
--- a/tests/python/unittest/test_auto_scheduler_loop_state.py
+++ b/tests/python/unittest/test_auto_scheduler_loop_state.py
@@ -23,7 +23,7 @@
 from tvm import auto_scheduler, te
 from tvm import topi
 
-from test_auto_scheduler_common import (
+from tvm.testing.auto_scheduler_common import (
     matmul_auto_scheduler_test,
     conv2d_nchw_bn_relu_auto_scheduler_test,
 )
diff --git a/tests/python/unittest/test_auto_scheduler_measure.py b/tests/python/unittest/test_auto_scheduler_measure.py
index 375f8167ff08..63142feb3086 100644
--- a/tests/python/unittest/test_auto_scheduler_measure.py
+++ b/tests/python/unittest/test_auto_scheduler_measure.py
@@ -26,7 +26,7 @@
 import tempfile
 import tvm.testing
 import pickle
-from test_auto_scheduler_common import matmul_auto_scheduler_test
+from tvm.testing.auto_scheduler_common import matmul_auto_scheduler_test
 from tvm.auto_scheduler import workload_registry
 
 
diff --git a/tests/python/unittest/test_auto_scheduler_search_policy.py b/tests/python/unittest/test_auto_scheduler_search_policy.py
index d114ce4f9d16..009efe728e7e 100644
--- a/tests/python/unittest/test_auto_scheduler_search_policy.py
+++ b/tests/python/unittest/test_auto_scheduler_search_policy.py
@@ -27,7 +27,7 @@
 from tvm import auto_scheduler
 from tvm.auto_scheduler.utils import get_const_tuple
 
-from test_auto_scheduler_common import (
+from tvm.testing.auto_scheduler_common import (
     matmul_auto_scheduler_test,
     zero_rank_compute_auto_scheduler_test,
     zero_rank_reduce_auto_scheduler_test,
diff --git a/tests/python/unittest/test_auto_scheduler_search_task.py b/tests/python/unittest/test_auto_scheduler_search_task.py
index cd47f1e468ff..a5cd44f14cb8 100644
--- a/tests/python/unittest/test_auto_scheduler_search_task.py
+++ b/tests/python/unittest/test_auto_scheduler_search_task.py
@@ -24,7 +24,7 @@
 import tvm.testing
 from tvm import auto_scheduler
 from tvm.auto_scheduler.utils import get_const_tuple
-from test_auto_scheduler_common import (
+from tvm.testing.auto_scheduler_common import (
     matmul_auto_scheduler_test,
     zero_rank_compute_auto_scheduler_test,
     zero_rank_reduce_auto_scheduler_test,
diff --git a/tests/python/unittest/test_auto_scheduler_sketch_generation.py b/tests/python/unittest/test_auto_scheduler_sketch_generation.py
index 4092ae0b0500..5e32ddf9b3bf 100644
--- a/tests/python/unittest/test_auto_scheduler_sketch_generation.py
+++ b/tests/python/unittest/test_auto_scheduler_sketch_generation.py
@@ -27,7 +27,7 @@
 from tvm.auto_scheduler import _ffi_api
 from tvm.auto_scheduler.loop_state import Stage
 
-from test_auto_scheduler_common import (
+from tvm.testing.auto_scheduler_common import (
     matmul_auto_scheduler_test,
     double_matmul_auto_scheduler_test,
     conv2d_nchw_bn_relu_auto_scheduler_test,
diff --git a/tests/python/unittest/test_auto_scheduler_task_scheduler.py b/tests/python/unittest/test_auto_scheduler_task_scheduler.py
index bbe29b1ba4f9..a8dc4f3f899f 100644
--- a/tests/python/unittest/test_auto_scheduler_task_scheduler.py
+++ b/tests/python/unittest/test_auto_scheduler_task_scheduler.py
@@ -25,7 +25,7 @@
 import tvm.testing
 from tvm import auto_scheduler
 
-from test_auto_scheduler_common import matmul_auto_scheduler_test
+from tvm.testing.auto_scheduler_common import matmul_auto_scheduler_test
 
 
 @tvm.testing.requires_llvm

From 02bdfb9597ae400a7bef5eaebc7ce47c428e4d8f Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Tue, 3 Aug 2021 11:19:13 -0700
Subject: [PATCH 03/38] reorganize testing utils

---
 python/tvm/testing/__init__.py                |   19 +
 ..._scheduler_common.py => auto_scheduler.py} |    0
 python/tvm/testing/utils.py                   | 1290 +++++++++++++++++
 .../test_auto_scheduler_compute_dag.py        |    2 +-
 .../test_auto_scheduler_cost_model.py         |    2 +-
 ...test_auto_scheduler_evolutionary_search.py |    2 +-
 .../unittest/test_auto_scheduler_feature.py   |    2 +-
 .../test_auto_scheduler_layout_rewrite.py     |    2 +-
 .../test_auto_scheduler_loop_state.py         |    2 +-
 .../unittest/test_auto_scheduler_measure.py   |    2 +-
 .../test_auto_scheduler_search_policy.py      |    2 +-
 .../test_auto_scheduler_search_task.py        |    2 +-
 .../test_auto_scheduler_sketch_generation.py  |    2 +-
 .../test_auto_scheduler_task_scheduler.py     |    2 +-
 14 files changed, 1320 insertions(+), 11 deletions(-)
 create mode 100644 python/tvm/testing/__init__.py
 rename python/tvm/testing/{auto_scheduler_common.py => auto_scheduler.py} (100%)
 create mode 100644 python/tvm/testing/utils.py

diff --git a/python/tvm/testing/__init__.py b/python/tvm/testing/__init__.py
new file mode 100644
index 000000000000..942ed64b2250
--- /dev/null
+++ b/python/tvm/testing/__init__.py
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=redefined-builtin, wildcard-import
+from .utils import *
+from .auto_scheduler import *
diff --git a/python/tvm/testing/auto_scheduler_common.py b/python/tvm/testing/auto_scheduler.py
similarity index 100%
rename from python/tvm/testing/auto_scheduler_common.py
rename to python/tvm/testing/auto_scheduler.py
diff --git a/python/tvm/testing/utils.py b/python/tvm/testing/utils.py
new file mode 100644
index 000000000000..79518ac24984
--- /dev/null
+++ b/python/tvm/testing/utils.py
@@ -0,0 +1,1290 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# pylint: disable=invalid-name,unnecessary-comprehension
+""" TVM testing utilities
+
+Testing Markers
+***************
+
+We use pytest markers to specify the requirements of test functions. Currently
+there is a single distinction that matters for our testing environment: does
+the test require a gpu. For tests that require just a gpu or just a cpu, we
+have the decorator :py:func:`requires_gpu` that enables the test when a gpu is
+available. To avoid running tests that don't require a gpu on gpu nodes, this
+decorator also sets the pytest marker `gpu` so we can use select the gpu subset
+of tests (using `pytest -m gpu`).
+
+Unfortunately, many tests are written like this:
+
+.. python::
+
+    def test_something():
+        for target in all_targets():
+            do_something()
+
+The test uses both gpu and cpu targets, so the test needs to be run on both cpu
+and gpu nodes. But we still want to only run the cpu targets on the cpu testing
+node. The solution is to mark these tests with the gpu marker so they will be
+run on the gpu nodes. But we also modify all_targets (renamed to
+enabled_targets) so that it only returns gpu targets on gpu nodes and cpu
+targets on cpu nodes (using an environment variable).
+
+Instead of using the all_targets function, future tests that would like to
+test against a variety of targets should use the
+:py:func:`tvm.testing.parametrize_targets` functionality. This allows us
+greater control over which targets are run on which testing nodes.
+
+If in the future we want to add a new type of testing node (for example
+fpgas), we need to add a new marker in `tests/python/pytest.ini` and a new
+function in this module. Then targets using this node should be added to the
+`TVM_TEST_TARGETS` environment variable in the CI.
+"""
+import collections
+import copy
+import functools
+import logging
+import os
+import sys
+import time
+import pickle
+import pytest
+import _pytest
+import numpy as np
+import tvm
+import tvm.arith
+import tvm.tir
+import tvm.te
+import tvm._ffi
+from tvm.contrib import nvcc
+from tvm.error import TVMError
+
+
+def assert_allclose(actual, desired, rtol=1e-7, atol=1e-7):
+    """Version of np.testing.assert_allclose with `atol` and `rtol` fields set
+    in reasonable defaults.
+
+    Arguments `actual` and `desired` are not interchangeable, since the function
+    compares the `abs(actual-desired)` with `atol+rtol*abs(desired)`.  Since we
+    often allow `desired` to be close to zero, we generally want non-zero `atol`.
+    """
+    actual = np.asanyarray(actual)
+    desired = np.asanyarray(desired)
+    np.testing.assert_allclose(actual.shape, desired.shape)
+    np.testing.assert_allclose(actual, desired, rtol=rtol, atol=atol, verbose=True)
+
+
+def check_numerical_grads(
+    function, input_values, grad_values, function_value=None, delta=1e-3, atol=1e-2, rtol=0.1
+):
+    """A helper function that checks that numerical gradients of a function are
+    equal to gradients computed in some different way (analytical gradients).
+
+    Numerical gradients are computed using finite difference approximation. To
+    reduce the number of function evaluations, the number of points used is
+    gradually increased if the error value is too high (up to 5 points).
+
+    Parameters
+    ----------
+    function
+        A function that takes inputs either as positional or as keyword
+        arguments (either `function(*input_values)` or `function(**input_values)`
+        should be correct) and returns a scalar result. Should accept numpy
+        ndarrays.
+
+    input_values : Dict[str, numpy.ndarray] or List[numpy.ndarray]
+        A list of values or a dict assigning values to variables. Represents the
+        point at which gradients should be computed.
+
+    grad_values : Dict[str, numpy.ndarray] or List[numpy.ndarray]
+        Gradients computed using a different method.
+
+    function_value : float, optional
+        Should be equal to `function(**input_values)`.
+
+    delta : float, optional
+        A small number used for numerical computation of partial derivatives.
+        The default 1e-3 is a good choice for float32.
+
+    atol : float, optional
+        Absolute tolerance. Gets multiplied by `sqrt(n)` where n is the size of a
+        gradient.
+
+    rtol : float, optional
+        Relative tolerance.
+    """
+    # If input_values is a list then function accepts positional arguments
+    # In this case transform it to a function taking kwargs of the form {"0": ..., "1": ...}
+    if not isinstance(input_values, dict):
+        input_len = len(input_values)
+        input_values = {str(idx): val for idx, val in enumerate(input_values)}
+
+        def _function(_input_len=input_len, _orig_function=function, **kwargs):
+            return _orig_function(*(kwargs[str(i)] for i in range(input_len)))
+
+        function = _function
+
+        grad_values = {str(idx): val for idx, val in enumerate(grad_values)}
+
+    if function_value is None:
+        function_value = function(**input_values)
+
+    # a helper to modify j-th element of val by a_delta
+    def modify(val, j, a_delta):
+        val = val.copy()
+        val.reshape(-1)[j] = val.reshape(-1)[j] + a_delta
+        return val
+
+    # numerically compute a partial derivative with respect to j-th element of the var `name`
+    def derivative(x_name, j, a_delta):
+        modified_values = {
+            n: modify(val, j, a_delta) if n == x_name else val for n, val in input_values.items()
+        }
+        return (function(**modified_values) - function_value) / a_delta
+
+    def compare_derivative(j, n_der, grad):
+        der = grad.reshape(-1)[j]
+        return np.abs(n_der - der) < atol + rtol * np.abs(n_der)
+
+    for x_name, grad in grad_values.items():
+        if grad.shape != input_values[x_name].shape:
+            raise AssertionError(
+                "Gradient wrt '{}' has unexpected shape {}, expected {} ".format(
+                    x_name, grad.shape, input_values[x_name].shape
+                )
+            )
+
+        ngrad = np.zeros_like(grad)
+
+        wrong_positions = []
+
+        # compute partial derivatives for each position in this variable
+        for j in range(np.prod(grad.shape)):
+            # forward difference approximation
+            nder = derivative(x_name, j, delta)
+
+            # if the derivative is not equal to the analytical one, try to use more
+            # precise and expensive methods
+            if not compare_derivative(j, nder, grad):
+                # central difference approximation
+                nder = (derivative(x_name, j, -delta) + nder) / 2
+
+                if not compare_derivative(j, nder, grad):
+                    # central difference approximation using h = delta/2
+                    cnder2 = (
+                        derivative(x_name, j, delta / 2) + derivative(x_name, j, -delta / 2)
+                    ) / 2
+                    # five-point derivative
+                    nder = (4 * cnder2 - nder) / 3
+
+            # if the derivatives still don't match, add this position to the
+            # list of wrong positions
+            if not compare_derivative(j, nder, grad):
+                wrong_positions.append(np.unravel_index(j, grad.shape))
+
+            ngrad.reshape(-1)[j] = nder
+
+        wrong_percentage = int(100 * len(wrong_positions) / np.prod(grad.shape))
+
+        dist = np.sqrt(np.sum((ngrad - grad) ** 2))
+        grad_norm = np.sqrt(np.sum(ngrad ** 2))
+
+        if not (np.isfinite(dist) and np.isfinite(grad_norm)):
+            raise ValueError(
+                "NaN or infinity detected during numerical gradient checking wrt '{}'\n"
+                "analytical grad = {}\n numerical grad = {}\n".format(x_name, grad, ngrad)
+            )
+
+        # we multiply atol by this number to make it more universal for different sizes
+        sqrt_n = np.sqrt(float(np.prod(grad.shape)))
+
+        if dist > atol * sqrt_n + rtol * grad_norm:
+            raise AssertionError(
+                "Analytical and numerical grads wrt '{}' differ too much\n"
+                "analytical grad = {}\n numerical grad = {}\n"
+                "{}% of elements differ, first 10 of wrong positions: {}\n"
+                "distance > atol*sqrt(n) + rtol*grad_norm\n"
+                "distance {} > {}*{} + {}*{}".format(
+                    x_name,
+                    grad,
+                    ngrad,
+                    wrong_percentage,
+                    wrong_positions[:10],
+                    dist,
+                    atol,
+                    sqrt_n,
+                    rtol,
+                    grad_norm,
+                )
+            )
+
+        max_diff = np.max(np.abs(ngrad - grad))
+        avg_diff = np.mean(np.abs(ngrad - grad))
+        logging.info(
+            "Numerical grad test wrt '%s' of shape %s passes, "
+            "dist = %f, max_diff = %f, avg_diff = %f",
+            x_name,
+            grad.shape,
+            dist,
+            max_diff,
+            avg_diff,
+        )
+
+
+def assert_prim_expr_equal(lhs, rhs):
+    """Assert lhs and rhs equals to each iother.
+
+    Parameters
+    ----------
+    lhs : tvm.tir.PrimExpr
+        The left operand.
+
+    rhs : tvm.tir.PrimExpr
+        The left operand.
+    """
+    ana = tvm.arith.Analyzer()
+    res = ana.simplify(lhs - rhs)
+    equal = isinstance(res, tvm.tir.IntImm) and res.value == 0
+    if not equal:
+        raise ValueError("{} and {} are not equal".format(lhs, rhs))
+
+
+def check_bool_expr_is_true(bool_expr, vranges, cond=None):
+    """Check that bool_expr holds given the condition cond
+    for every value of free variables from vranges.
+
+    for example, 2x > 4y solves to x > 2y given x in (0, 10) and y in (0, 10)
+    here bool_expr is x > 2y, vranges is {x: (0, 10), y: (0, 10)}, cond is 2x > 4y
+    We creates iterations to check,
+    for x in range(10):
+      for y in range(10):
+        assert !(2x > 4y) || (x > 2y)
+
+    Parameters
+    ----------
+    bool_expr : tvm.ir.PrimExpr
+        Boolean expression to check
+    vranges: Dict[tvm.tir.expr.Var, tvm.ir.Range]
+        Free variables and their ranges
+    cond: tvm.ir.PrimExpr
+        extra conditions needs to be satisfied.
+    """
+    if cond is not None:
+        bool_expr = tvm.te.any(tvm.tir.Not(cond), bool_expr)
+
+    def _run_expr(expr, vranges):
+        """Evaluate expr for every value of free variables
+        given by vranges and return the tensor of results.
+        """
+
+        def _compute_body(*us):
+            vmap = {v: u + r.min for (v, r), u in zip(vranges.items(), us)}
+            return tvm.tir.stmt_functor.substitute(expr, vmap)
+
+        A = tvm.te.compute([r.extent.value for v, r in vranges.items()], _compute_body)
+        args = [tvm.nd.empty(A.shape, A.dtype)]
+        sch = tvm.te.create_schedule(A.op)
+        mod = tvm.build(sch, [A])
+        mod(*args)
+        return args[0].numpy()
+
+    res = _run_expr(bool_expr, vranges)
+    if not np.all(res):
+        indices = list(np.argwhere(res == 0)[0])
+        counterex = [(str(v), i + r.min) for (v, r), i in zip(vranges.items(), indices)]
+        counterex = sorted(counterex, key=lambda x: x[0])
+        counterex = ", ".join([v + " = " + str(i) for v, i in counterex])
+        ana = tvm.arith.Analyzer()
+        raise AssertionError(
+            "Expression {}\nis not true on {}\n"
+            "Counterexample: {}".format(ana.simplify(bool_expr), vranges, counterex)
+        )
+
+
+def check_int_constraints_trans_consistency(constraints_trans, vranges=None):
+    """Check IntConstraintsTransform is a bijective transformation.
+
+    Parameters
+    ----------
+    constraints_trans : arith.IntConstraintsTransform
+        Integer constraints transformation
+    vranges: Dict[tvm.tir.Var, tvm.ir.Range]
+        Free variables and their ranges
+    """
+    if vranges is None:
+        vranges = {}
+
+    def _check_forward(constraints1, constraints2, varmap, backvarmap):
+        ana = tvm.arith.Analyzer()
+        all_vranges = vranges.copy()
+        all_vranges.update({v: r for v, r in constraints1.ranges.items()})
+
+        # Check that the transformation is injective
+        cond_on_vars = tvm.tir.const(1, "bool")
+        for v in constraints1.variables:
+            if v in varmap:
+                # variable mapping is consistent
+                v_back = ana.simplify(tvm.tir.stmt_functor.substitute(varmap[v], backvarmap))
+                cond_on_vars = tvm.te.all(cond_on_vars, v == v_back)
+        # Also we have to check that the new relations are true when old relations are true
+        cond_subst = tvm.tir.stmt_functor.substitute(
+            tvm.te.all(tvm.tir.const(1, "bool"), *constraints2.relations), backvarmap
+        )
+        # We have to include relations from vranges too
+        for v in constraints2.variables:
+            if v in constraints2.ranges:
+                r = constraints2.ranges[v]
+                range_cond = tvm.te.all(v >= r.min, v < r.min + r.extent)
+                range_cond = tvm.tir.stmt_functor.substitute(range_cond, backvarmap)
+                cond_subst = tvm.te.all(cond_subst, range_cond)
+        cond_subst = ana.simplify(cond_subst)
+        check_bool_expr_is_true(
+            tvm.te.all(cond_subst, cond_on_vars),
+            all_vranges,
+            cond=tvm.te.all(tvm.tir.const(1, "bool"), *constraints1.relations),
+        )
+
+    _check_forward(
+        constraints_trans.src,
+        constraints_trans.dst,
+        constraints_trans.src_to_dst,
+        constraints_trans.dst_to_src,
+    )
+    _check_forward(
+        constraints_trans.dst,
+        constraints_trans.src,
+        constraints_trans.dst_to_src,
+        constraints_trans.src_to_dst,
+    )
+
+
+def _get_targets(target_str=None):
+    if target_str is None:
+        target_str = os.environ.get("TVM_TEST_TARGETS", "")
+
+    if len(target_str) == 0:
+        target_str = DEFAULT_TEST_TARGETS
+
+    target_names = set(t.strip() for t in target_str.split(";") if t.strip())
+
+    targets = []
+    for target in target_names:
+        target_kind = target.split()[0]
+        is_enabled = tvm.runtime.enabled(target_kind)
+        is_runnable = is_enabled and tvm.device(target_kind).exist
+        targets.append(
+            {
+                "target": target,
+                "target_kind": target_kind,
+                "is_enabled": is_enabled,
+                "is_runnable": is_runnable,
+            }
+        )
+
+    if all(not t["is_runnable"] for t in targets):
+        if tvm.runtime.enabled("llvm"):
+            logging.warning(
+                "None of the following targets are supported by this build of TVM: %s."
+                " Try setting TVM_TEST_TARGETS to a supported target. Defaulting to llvm.",
+                target_str,
+            )
+            return _get_targets("llvm")
+
+        raise TVMError(
+            "None of the following targets are supported by this build of TVM: %s."
+            " Try setting TVM_TEST_TARGETS to a supported target."
+            " Cannot default to llvm, as it is not enabled." % target_str
+        )
+
+    return targets
+
+
+DEFAULT_TEST_TARGETS = (
+    "llvm;cuda;opencl;metal;rocm;vulkan -from_device=0;nvptx;"
+    "llvm -device=arm_cpu;opencl -device=mali,aocl_sw_emu"
+)
+
+
+def device_enabled(target):
+    """Check if a target should be used when testing.
+
+    It is recommended that you use :py:func:`tvm.testing.parametrize_targets`
+    instead of manually checking if a target is enabled.
+
+    This allows the user to control which devices they are testing against. In
+    tests, this should be used to check if a device should be used when said
+    device is an optional part of the test.
+
+    Parameters
+    ----------
+    target : str
+        Target string to check against
+
+    Returns
+    -------
+    bool
+        Whether or not the device associated with this target is enabled.
+
+    Example
+    -------
+    >>> @tvm.testing.uses_gpu
+    >>> def test_mytest():
+    >>>     for target in ["cuda", "llvm"]:
+    >>>         if device_enabled(target):
+    >>>             test_body...
+
+    Here, `test_body` will only be reached by with `target="cuda"` on gpu test
+    nodes and `target="llvm"` on cpu test nodes.
+    """
+    assert isinstance(target, str), "device_enabled requires a target as a string"
+    # only check if device name is found, sometime there are extra flags
+    target_kind = target.split(" ")[0]
+    return any(target_kind == t["target_kind"] for t in _get_targets() if t["is_runnable"])
+
+
+def enabled_targets():
+    """Get all enabled targets with associated devices.
+
+    In most cases, you should use :py:func:`tvm.testing.parametrize_targets` instead of
+    this function.
+
+    In this context, enabled means that TVM was built with support for
+    this target, the target name appears in the TVM_TEST_TARGETS
+    environment variable, and a suitable device for running this
+    target exists.  If TVM_TEST_TARGETS is not set, it defaults to
+    variable DEFAULT_TEST_TARGETS in this module.
+
+    If you use this function in a test, you **must** decorate the test with
+    :py:func:`tvm.testing.uses_gpu` (otherwise it will never be run on the gpu).
+
+    Returns
+    -------
+    targets: list
+        A list of pairs of all enabled devices and the associated context
+
+    """
+    return [(t["target"], tvm.device(t["target"])) for t in _get_targets() if t["is_runnable"]]
+
+
+def _compose(args, decs):
+    """Helper to apply multiple markers"""
+    if len(args) > 0:
+        f = args[0]
+        for d in reversed(decs):
+            f = d(f)
+        return f
+    return decs
+
+
+def uses_gpu(*args):
+    """Mark to differentiate tests that use the GPU in some capacity.
+
+    These tests will be run on CPU-only test nodes and on test nodes with GPUs.
+    To mark a test that must have a GPU present to run, use
+    :py:func:`tvm.testing.requires_gpu`.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _uses_gpu = [pytest.mark.gpu]
+    return _compose(args, _uses_gpu)
+
+
+def requires_gpu(*args):
+    """Mark a test as requiring a GPU to run.
+
+    Tests with this mark will not be run unless a gpu is present.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_gpu = [
+        pytest.mark.skipif(
+            not tvm.cuda().exist
+            and not tvm.rocm().exist
+            and not tvm.opencl().exist
+            and not tvm.metal().exist
+            and not tvm.vulkan().exist,
+            reason="No GPU present",
+        ),
+        *uses_gpu(),
+    ]
+    return _compose(args, _requires_gpu)
+
+
+def requires_cuda(*args):
+    """Mark a test as requiring the CUDA runtime.
+
+    This also marks the test as requiring a cuda gpu.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_cuda = [
+        pytest.mark.cuda,
+        pytest.mark.skipif(not device_enabled("cuda"), reason="CUDA support not enabled"),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_cuda)
+
+
+def requires_nvptx(*args):
+    """Mark a test as requiring the NVPTX compilation on the CUDA runtime
+
+    This also marks the test as requiring a cuda gpu, and requiring
+    LLVM support.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+
+    """
+    _requires_nvptx = [
+        pytest.mark.skipif(not device_enabled("nvptx"), reason="NVPTX support not enabled"),
+        *requires_llvm(),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_nvptx)
+
+
+def requires_cudagraph(*args):
+    """Mark a test as requiring the CUDA Graph Feature
+
+    This also marks the test as requiring cuda
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_cudagraph = [
+        pytest.mark.skipif(
+            not nvcc.have_cudagraph(), reason="CUDA Graph is not supported in this environment"
+        ),
+        *requires_cuda(),
+    ]
+    return _compose(args, _requires_cudagraph)
+
+
+def requires_opencl(*args):
+    """Mark a test as requiring the OpenCL runtime.
+
+    This also marks the test as requiring a gpu.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_opencl = [
+        pytest.mark.opencl,
+        pytest.mark.skipif(not device_enabled("opencl"), reason="OpenCL support not enabled"),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_opencl)
+
+
+def requires_rocm(*args):
+    """Mark a test as requiring the rocm runtime.
+
+    This also marks the test as requiring a gpu.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_rocm = [
+        pytest.mark.rocm,
+        pytest.mark.skipif(not device_enabled("rocm"), reason="rocm support not enabled"),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_rocm)
+
+
+def requires_metal(*args):
+    """Mark a test as requiring the metal runtime.
+
+    This also marks the test as requiring a gpu.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_metal = [
+        pytest.mark.metal,
+        pytest.mark.skipif(not device_enabled("metal"), reason="metal support not enabled"),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_metal)
+
+
+def requires_vulkan(*args):
+    """Mark a test as requiring the vulkan runtime.
+
+    This also marks the test as requiring a gpu.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_vulkan = [
+        pytest.mark.vulkan,
+        pytest.mark.skipif(not device_enabled("vulkan"), reason="vulkan support not enabled"),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_vulkan)
+
+
+def requires_tensorcore(*args):
+    """Mark a test as requiring a tensorcore to run.
+
+    Tests with this mark will not be run unless a tensorcore is present.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_tensorcore = [
+        pytest.mark.tensorcore,
+        pytest.mark.skipif(
+            not tvm.cuda().exist or not nvcc.have_tensorcore(tvm.cuda(0).compute_version),
+            reason="No tensorcore present",
+        ),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_tensorcore)
+
+
+def requires_llvm(*args):
+    """Mark a test as requiring llvm to run.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_llvm = [
+        pytest.mark.llvm,
+        pytest.mark.skipif(not device_enabled("llvm"), reason="LLVM support not enabled"),
+    ]
+    return _compose(args, _requires_llvm)
+
+
+def requires_micro(*args):
+    """Mark a test as requiring microTVM to run.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_micro = [
+        pytest.mark.skipif(
+            tvm.support.libinfo().get("USE_MICRO", "OFF") != "ON",
+            reason="MicroTVM support not enabled. Set USE_MICRO=ON in config.cmake to enable.",
+        )
+    ]
+    return _compose(args, _requires_micro)
+
+
+def requires_rpc(*args):
+    """Mark a test as requiring rpc to run.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_rpc = [
+        pytest.mark.skipif(
+            tvm.support.libinfo().get("USE_RPC", "OFF") != "ON",
+            reason="RPC support not enabled. Set USE_RPC=ON in config.cmake to enable.",
+        )
+    ]
+    return _compose(args, _requires_rpc)
+
+
+def _target_to_requirement(target):
+    # mapping from target to decorator
+    if target.startswith("cuda"):
+        return requires_cuda()
+    if target.startswith("rocm"):
+        return requires_rocm()
+    if target.startswith("vulkan"):
+        return requires_vulkan()
+    if target.startswith("nvptx"):
+        return requires_nvptx()
+    if target.startswith("metal"):
+        return requires_metal()
+    if target.startswith("opencl"):
+        return requires_opencl()
+    if target.startswith("llvm"):
+        return requires_llvm()
+    return []
+
+
+def _pytest_target_params(targets, excluded_targets=None, xfail_targets=None):
+    # Include unrunnable targets here.  They get skipped by the
+    # pytest.mark.skipif in _target_to_requirement(), showing up as
+    # skipped tests instead of being hidden entirely.
+    if targets is None:
+        if excluded_targets is None:
+            excluded_targets = set()
+
+        if xfail_targets is None:
+            xfail_targets = set()
+
+        target_marks = []
+        for t in _get_targets():
+            # Excluded targets aren't included in the params at all.
+            if t["target_kind"] not in excluded_targets:
+
+                # Known failing targets are included, but are marked
+                # as expected to fail.
+                extra_marks = []
+                if t["target_kind"] in xfail_targets:
+                    extra_marks.append(
+                        pytest.mark.xfail(
+                            reason='Known failing test for target "{}"'.format(t["target_kind"])
+                        )
+                    )
+
+                target_marks.append((t["target"], extra_marks))
+
+    else:
+        target_marks = [(target, []) for target in targets]
+
+    return [
+        pytest.param(target, marks=_target_to_requirement(target) + extra_marks)
+        for target, extra_marks in target_marks
+    ]
+
+
+def _auto_parametrize_target(metafunc):
+    """Automatically applies parametrize_targets
+
+    Used if a test function uses the "target" fixture, but isn't
+    already marked with @tvm.testing.parametrize_targets.  Intended
+    for use in the pytest_generate_tests() handler of a conftest.py
+    file.
+
+    """
+    if "target" in metafunc.fixturenames:
+        parametrized_args = [
+            arg.strip()
+            for mark in metafunc.definition.iter_markers("parametrize")
+            for arg in mark.args[0].split(",")
+        ]
+
+        if "target" not in parametrized_args:
+            # Check if the function is marked with either excluded or
+            # known failing targets.
+            excluded_targets = getattr(metafunc.function, "tvm_excluded_targets", [])
+            xfail_targets = getattr(metafunc.function, "tvm_known_failing_targets", [])
+            metafunc.parametrize(
+                "target",
+                _pytest_target_params(None, excluded_targets, xfail_targets),
+                scope="session",
+            )
+
+
+def parametrize_targets(*args):
+    """Parametrize a test over a specific set of targets.
+
+    Use this decorator when you want your test to be run over a
+    specific set of targets and devices.  It is intended for use where
+    a test is applicable only to a specific target, and is
+    inapplicable to any others (e.g. verifying target-specific
+    assembly code matches known assembly code).  In most
+    circumstances, :py:func:`tvm.testing.exclude_targets` or
+    :py:func:`tvm.testing.known_failing_targets` should be used
+    instead.
+
+    If used as a decorator without arguments, the test will be
+    parametrized over all targets in
+    :py:func:`tvm.testing.enabled_targets`.  This behavior is
+    automatically enabled for any target that accepts arguments of
+    ``target`` or ``dev``, so the explicit use of the bare decorator
+    is no longer needed, and is maintained for backwards
+    compatibility.
+
+    Parameters
+    ----------
+    f : function
+        Function to parametrize. Must be of the form `def test_xxxxxxxxx(target, dev)`:,
+        where `xxxxxxxxx` is any name.
+    targets : list[str], optional
+        Set of targets to run against. If not supplied,
+        :py:func:`tvm.testing.enabled_targets` will be used.
+
+    Example
+    -------
+    >>> @tvm.testing.parametrize_targets("llvm", "cuda")
+    >>> def test_mytest(target, dev):
+    >>>     ...  # do something
+    """
+
+    def wrap(targets):
+        def func(f):
+            return pytest.mark.parametrize(
+                "target", _pytest_target_params(targets), scope="session"
+            )(f)
+
+        return func
+
+    if len(args) == 1 and callable(args[0]):
+        return wrap(None)(args[0])
+    return wrap(args)
+
+
+def exclude_targets(*args):
+    """Exclude a test from running on a particular target.
+
+    Use this decorator when you want your test to be run over a
+    variety of targets and devices (including cpu and gpu devices),
+    but want to exclude some particular target or targets.  For
+    example, a test may wish to be run against all targets in
+    tvm.testing.enabled_targets(), except for a particular target that
+    does not support the capabilities.
+
+    Applies pytest.mark.skipif to the targets given.
+
+    Parameters
+    ----------
+    f : function
+        Function to parametrize. Must be of the form `def test_xxxxxxxxx(target, dev)`:,
+        where `xxxxxxxxx` is any name.
+    targets : list[str]
+        Set of targets to exclude.
+
+    Example
+    -------
+    >>> @tvm.testing.exclude_targets("cuda")
+    >>> def test_mytest(target, dev):
+    >>>     ...  # do something
+
+    Or
+
+    >>> @tvm.testing.exclude_targets("llvm", "cuda")
+    >>> def test_mytest(target, dev):
+    >>>     ...  # do something
+
+    """
+
+    def wraps(func):
+        func.tvm_excluded_targets = args
+        return func
+
+    return wraps
+
+
+def known_failing_targets(*args):
+    """Skip a test that is known to fail on a particular target.
+
+    Use this decorator when you want your test to be run over a
+    variety of targets and devices (including cpu and gpu devices),
+    but know that it fails for some targets.  For example, a newly
+    implemented runtime may not support all features being tested, and
+    should be excluded.
+
+    Applies pytest.mark.xfail to the targets given.
+
+    Parameters
+    ----------
+    f : function
+        Function to parametrize. Must be of the form `def test_xxxxxxxxx(target, dev)`:,
+        where `xxxxxxxxx` is any name.
+    targets : list[str]
+        Set of targets to skip.
+
+    Example
+    -------
+    >>> @tvm.testing.known_failing_targets("cuda")
+    >>> def test_mytest(target, dev):
+    >>>     ...  # do something
+
+    Or
+
+    >>> @tvm.testing.known_failing_targets("llvm", "cuda")
+    >>> def test_mytest(target, dev):
+    >>>     ...  # do something
+
+    """
+
+    def wraps(func):
+        func.tvm_known_failing_targets = args
+        return func
+
+    return wraps
+
+
+def parameter(*values, ids=None):
+    """Convenience function to define pytest parametrized fixtures.
+
+    Declaring a variable using ``tvm.testing.parameter`` will define a
+    parametrized pytest fixture that can be used by test
+    functions. This is intended for cases that have no setup cost,
+    such as strings, integers, tuples, etc.  For cases that have a
+    significant setup cost, please use :py:func:`tvm.testing.fixture`
+    instead.
+
+    If a test function accepts multiple parameters defined using
+    ``tvm.testing.parameter``, then the test will be run using every
+    combination of those parameters.
+
+    The parameter definition applies to all tests in a module.  If a
+    specific test should have different values for the parameter, that
+    test should be marked with ``@pytest.mark.parametrize``.
+
+    Parameters
+    ----------
+    values
+       A list of parameter values.  A unit test that accepts this
+       parameter as an argument will be run once for each parameter
+       given.
+
+    ids : List[str], optional
+       A list of names for the parameters.  If None, pytest will
+       generate a name from the value.  These generated names may not
+       be readable/useful for composite types such as tuples.
+
+    Returns
+    -------
+    function
+       A function output from pytest.fixture.
+
+    Example
+    -------
+    >>> size = tvm.testing.parameter(1, 10, 100)
+    >>> def test_using_size(size):
+    >>>     ... # Test code here
+
+    Or
+
+    >>> shape = tvm.testing.parameter((5,10), (512,1024), ids=['small','large'])
+    >>> def test_using_size(shape):
+    >>>     ... # Test code here
+
+    """
+
+    # Optional cls parameter in case a parameter is defined inside a
+    # class scope.
+    @pytest.fixture(params=values, ids=ids)
+    def as_fixture(*_cls, request):
+        return request.param
+
+    return as_fixture
+
+
+_parametrize_group = 0
+
+
+def parameters(*value_sets):
+    """Convenience function to define pytest parametrized fixtures.
+
+    Declaring a variable using tvm.testing.parameters will define a
+    parametrized pytest fixture that can be used by test
+    functions. Like :py:func:`tvm.testing.parameter`, this is intended
+    for cases that have no setup cost, such as strings, integers,
+    tuples, etc.  For cases that have a significant setup cost, please
+    use :py:func:`tvm.testing.fixture` instead.
+
+    Unlike :py:func:`tvm.testing.parameter`, if a test function
+    accepts multiple parameters defined using a single call to
+    ``tvm.testing.parameters``, then the test will only be run once
+    for each set of parameters, not for all combinations of
+    parameters.
+
+    These parameter definitions apply to all tests in a module.  If a
+    specific test should have different values for some parameters,
+    that test should be marked with ``@pytest.mark.parametrize``.
+
+    Parameters
+    ----------
+    values : List[tuple]
+       A list of parameter value sets.  Each set of values represents
+       a single combination of values to be tested.  A unit test that
+       accepts parameters defined will be run once for every set of
+       parameters in the list.
+
+    Returns
+    -------
+    List[function]
+       Function outputs from pytest.fixture.  These should be unpacked
+       into individual named parameters.
+
+    Example
+    -------
+    >>> size, dtype = tvm.testing.parameters( (16,'float32'), (512,'float16') )
+    >>> def test_feature_x(size, dtype):
+    >>>     # Test code here
+    >>>     assert( (size,dtype) in [(16,'float32'), (512,'float16')])
+
+    """
+    global _parametrize_group
+    parametrize_group = _parametrize_group
+    _parametrize_group += 1
+
+    outputs = []
+    for param_values in zip(*value_sets):
+
+        # Optional cls parameter in case a parameter is defined inside a
+        # class scope.
+        def fixture_func(*_cls, request):
+            return request.param
+
+        fixture_func.parametrize_group = parametrize_group
+        fixture_func.parametrize_values = param_values
+        outputs.append(pytest.fixture(fixture_func))
+
+    return outputs
+
+
+def _parametrize_correlated_parameters(metafunc):
+    parametrize_needed = collections.defaultdict(list)
+
+    for name, fixturedefs in metafunc.definition._fixtureinfo.name2fixturedefs.items():
+        fixturedef = fixturedefs[-1]
+        if hasattr(fixturedef.func, "parametrize_group") and hasattr(
+            fixturedef.func, "parametrize_values"
+        ):
+            group = fixturedef.func.parametrize_group
+            values = fixturedef.func.parametrize_values
+            parametrize_needed[group].append((name, values))
+
+    for parametrize_group in parametrize_needed.values():
+        if len(parametrize_group) == 1:
+            name, values = parametrize_group[0]
+            metafunc.parametrize(name, values, indirect=True)
+        else:
+            names = ",".join(name for name, values in parametrize_group)
+            value_sets = zip(*[values for name, values in parametrize_group])
+            metafunc.parametrize(names, value_sets, indirect=True)
+
+
+def fixture(func=None, *, cache_return_value=False):
+    """Convenience function to define pytest fixtures.
+
+    This should be used as a decorator to mark functions that set up
+    state before a function.  The return value of that fixture
+    function is then accessible by test functions as that accept it as
+    a parameter.
+
+    Fixture functions can accept parameters defined with
+    :py:func:`tvm.testing.parameter`.
+
+    By default, the setup will be performed once for each unit test
+    that uses a fixture, to ensure that unit tests are independent.
+    If the setup is expensive to perform, then the
+    cache_return_value=True argument can be passed to cache the setup.
+    The fixture function will be run only once (or once per parameter,
+    if used with tvm.testing.parameter), and the same return value
+    will be passed to all tests that use it.  If the environment
+    variable TVM_TEST_DISABLE_CACHE is set to a non-zero value, it
+    will disable this feature and no caching will be performed.
+
+    Example
+    -------
+    >>> @tvm.testing.fixture
+    >>> def cheap_setup():
+    >>>     return 5 # Setup code here.
+    >>>
+    >>> def test_feature_x(target, dev, cheap_setup)
+    >>>     assert(cheap_setup == 5) # Run test here
+
+    Or
+
+    >>> size = tvm.testing.parameter(1, 10, 100)
+    >>>
+    >>> @tvm.testing.fixture
+    >>> def cheap_setup(size):
+    >>>     return 5*size # Setup code here, based on size.
+    >>>
+    >>> def test_feature_x(cheap_setup):
+    >>>     assert(cheap_setup in [5, 50, 500])
+
+    Or
+
+    >>> @tvm.testing.fixture(cache_return_value=True)
+    >>> def expensive_setup():
+    >>>     time.sleep(10) # Setup code here
+    >>>     return 5
+    >>>
+    >>> def test_feature_x(target, dev, expensive_setup):
+    >>>     assert(expensive_setup == 5)
+
+    """
+
+    force_disable_cache = bool(int(os.environ.get("TVM_TEST_DISABLE_CACHE", "0")))
+    cache_return_value = cache_return_value and not force_disable_cache
+
+    # Deliberately at function scope, so that caching can track how
+    # many times the fixture has been used.  If used, the cache gets
+    # cleared after the fixture is no longer needed.
+    scope = "function"
+
+    def wraps(func):
+        if cache_return_value:
+            func = _fixture_cache(func)
+        func = pytest.fixture(func, scope=scope)
+        return func
+
+    if func is None:
+        return wraps
+
+    return wraps(func)
+
+
+def _fixture_cache(func):
+    cache = {}
+
+    # Can't use += on a bound method's property.  Therefore, this is a
+    # list rather than a variable so that it can be accessed from the
+    # pytest_collection_modifyitems().
+    num_uses_remaining = [0]
+
+    # Using functools.lru_cache would require the function arguments
+    # to be hashable, which wouldn't allow caching fixtures that
+    # depend on numpy arrays.  For example, a fixture that takes a
+    # numpy array as input, then calculates uses a slow method to
+    # compute a known correct output for that input.  Therefore,
+    # including a fallback for serializable types.
+    def get_cache_key(*args, **kwargs):
+        try:
+            hash((args, kwargs))
+            return (args, kwargs)
+        except TypeError as e:
+            pass
+
+        try:
+            return pickle.dumps((args, kwargs))
+        except TypeError as e:
+            raise TypeError(
+                "TVM caching of fixtures requires arguments to the fixture "
+                "to be either hashable or serializable"
+            ) from e
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        try:
+            cache_key = get_cache_key(*args, **kwargs)
+
+            try:
+                cached_value = cache[cache_key]
+            except KeyError:
+                cached_value = cache[cache_key] = func(*args, **kwargs)
+
+            try:
+                yield copy.deepcopy(cached_value)
+            except TypeError as e:
+                rfc_url = (
+                    "https://github.com/apache/tvm-rfcs/blob/main/rfcs/"
+                    "0007-parametrized-unit-tests.md#unresolved-questions"
+                )
+                message = (
+                    "TVM caching of fixtures can only be used on serializable data types, not {}.\n"
+                    "Please see {} for details/discussion."
+                ).format(type(cached_value), rfc_url)
+                raise TypeError(message) from e
+
+        finally:
+            # Clear the cache once all tests that use a particular fixture
+            # have completed.
+            num_uses_remaining[0] -= 1
+            if not num_uses_remaining[0]:
+                cache.clear()
+
+    # Set in the pytest_collection_modifyitems()
+    wrapper.num_uses_remaining = num_uses_remaining
+
+    return wrapper
+
+
+def _count_num_fixture_uses(items):
+    # Helper function, counts the number of tests that use each cached
+    # fixture.  Should be called from pytest_collection_modifyitems().
+    for item in items:
+        is_skipped = item.get_closest_marker("skip") or any(
+            mark.args[0] for mark in item.iter_markers("skipif")
+        )
+        if is_skipped:
+            continue
+
+        for fixturedefs in item._fixtureinfo.name2fixturedefs.values():
+            # Only increment the active fixturedef, in a name has been overridden.
+            fixturedef = fixturedefs[-1]
+            if hasattr(fixturedef.func, "num_uses_remaining"):
+                fixturedef.func.num_uses_remaining[0] += 1
+
+
+def _remove_global_fixture_definitions(items):
+    # Helper function, removes fixture definitions from the global
+    # variables of the modules they were defined in.  This is intended
+    # to improve readability of error messages by giving a NameError
+    # if a test function accesses a pytest fixture but doesn't include
+    # it as an argument.  Should be called from
+    # pytest_collection_modifyitems().
+
+    modules = set(item.module for item in items)
+
+    for module in modules:
+        for name in dir(module):
+            obj = getattr(module, name)
+            if hasattr(obj, "_pytestfixturefunction") and isinstance(
+                obj._pytestfixturefunction, _pytest.fixtures.FixtureFunctionMarker
+            ):
+                delattr(module, name)
+
+
+def identity_after(x, sleep):
+    """Testing function to return identity after sleep
+
+    Parameters
+    ----------
+    x : int
+        The input value.
+
+    sleep : float
+        The amount of time to sleep
+
+    Returns
+    -------
+    x : object
+        The original value
+    """
+    if sleep:
+        time.sleep(sleep)
+    return x
+
+
+def terminate_self():
+    """Testing function to terminate the process."""
+    sys.exit(-1)
+
+
+tvm._ffi._init_api("testing", __name__)
diff --git a/tests/python/unittest/test_auto_scheduler_compute_dag.py b/tests/python/unittest/test_auto_scheduler_compute_dag.py
index ea1326de0def..81ee5cabbfbc 100644
--- a/tests/python/unittest/test_auto_scheduler_compute_dag.py
+++ b/tests/python/unittest/test_auto_scheduler_compute_dag.py
@@ -23,7 +23,7 @@
 from tvm import topi
 from tvm import auto_scheduler, te
 
-from tvm.testing.auto_scheduler_common import (
+from tvm.testing.auto_scheduler import (
     get_tiled_matmul,
     invalid_compute_definition,
     matmul_auto_scheduler_test,
diff --git a/tests/python/unittest/test_auto_scheduler_cost_model.py b/tests/python/unittest/test_auto_scheduler_cost_model.py
index d8fc61cefe57..50e3ceb6f5fa 100644
--- a/tests/python/unittest/test_auto_scheduler_cost_model.py
+++ b/tests/python/unittest/test_auto_scheduler_cost_model.py
@@ -24,7 +24,7 @@
 import tvm
 from tvm import auto_scheduler
 
-from tvm.testing.auto_scheduler_common import matmul_auto_scheduler_test
+from tvm.testing.auto_scheduler import matmul_auto_scheduler_test
 
 
 def get_sample_records(number):
diff --git a/tests/python/unittest/test_auto_scheduler_evolutionary_search.py b/tests/python/unittest/test_auto_scheduler_evolutionary_search.py
index 37b802d013a5..b5c99c0f05fd 100644
--- a/tests/python/unittest/test_auto_scheduler_evolutionary_search.py
+++ b/tests/python/unittest/test_auto_scheduler_evolutionary_search.py
@@ -18,7 +18,7 @@
 
 import tvm
 import pytest
-from tvm.testing.auto_scheduler_common import matmul_auto_scheduler_test
+from tvm.testing.auto_scheduler import matmul_auto_scheduler_test
 from tvm import auto_scheduler, te
 from tvm.auto_scheduler.cost_model.cost_model import PythonBasedModel
 
diff --git a/tests/python/unittest/test_auto_scheduler_feature.py b/tests/python/unittest/test_auto_scheduler_feature.py
index 8b42d79417a4..96090e328328 100644
--- a/tests/python/unittest/test_auto_scheduler_feature.py
+++ b/tests/python/unittest/test_auto_scheduler_feature.py
@@ -23,7 +23,7 @@
 import tvm
 from tvm import te, auto_scheduler
 
-from tvm.testing.auto_scheduler_common import matmul_auto_scheduler_test
+from tvm.testing.auto_scheduler import matmul_auto_scheduler_test
 
 
 def fequal(a, b):
diff --git a/tests/python/unittest/test_auto_scheduler_layout_rewrite.py b/tests/python/unittest/test_auto_scheduler_layout_rewrite.py
index 83f779955f29..39673fad2495 100644
--- a/tests/python/unittest/test_auto_scheduler_layout_rewrite.py
+++ b/tests/python/unittest/test_auto_scheduler_layout_rewrite.py
@@ -26,7 +26,7 @@
 from tvm import topi
 from tvm import auto_scheduler, te
 
-from tvm.testing.auto_scheduler_common import get_tiled_matmul, matmul_auto_scheduler_test
+from tvm.testing.auto_scheduler import get_tiled_matmul, matmul_auto_scheduler_test
 
 
 def test_apply_steps_with_layout_rewrite():
diff --git a/tests/python/unittest/test_auto_scheduler_loop_state.py b/tests/python/unittest/test_auto_scheduler_loop_state.py
index f528387a163c..0965ed9efbac 100644
--- a/tests/python/unittest/test_auto_scheduler_loop_state.py
+++ b/tests/python/unittest/test_auto_scheduler_loop_state.py
@@ -23,7 +23,7 @@
 from tvm import auto_scheduler, te
 from tvm import topi
 
-from tvm.testing.auto_scheduler_common import (
+from tvm.testing.auto_scheduler import (
     matmul_auto_scheduler_test,
     conv2d_nchw_bn_relu_auto_scheduler_test,
 )
diff --git a/tests/python/unittest/test_auto_scheduler_measure.py b/tests/python/unittest/test_auto_scheduler_measure.py
index 63142feb3086..9eae3dd33672 100644
--- a/tests/python/unittest/test_auto_scheduler_measure.py
+++ b/tests/python/unittest/test_auto_scheduler_measure.py
@@ -26,7 +26,7 @@
 import tempfile
 import tvm.testing
 import pickle
-from tvm.testing.auto_scheduler_common import matmul_auto_scheduler_test
+from tvm.testing.auto_scheduler import matmul_auto_scheduler_test
 from tvm.auto_scheduler import workload_registry
 
 
diff --git a/tests/python/unittest/test_auto_scheduler_search_policy.py b/tests/python/unittest/test_auto_scheduler_search_policy.py
index 009efe728e7e..a9f6596a8548 100644
--- a/tests/python/unittest/test_auto_scheduler_search_policy.py
+++ b/tests/python/unittest/test_auto_scheduler_search_policy.py
@@ -27,7 +27,7 @@
 from tvm import auto_scheduler
 from tvm.auto_scheduler.utils import get_const_tuple
 
-from tvm.testing.auto_scheduler_common import (
+from tvm.testing.auto_scheduler import (
     matmul_auto_scheduler_test,
     zero_rank_compute_auto_scheduler_test,
     zero_rank_reduce_auto_scheduler_test,
diff --git a/tests/python/unittest/test_auto_scheduler_search_task.py b/tests/python/unittest/test_auto_scheduler_search_task.py
index a5cd44f14cb8..f23b02c24298 100644
--- a/tests/python/unittest/test_auto_scheduler_search_task.py
+++ b/tests/python/unittest/test_auto_scheduler_search_task.py
@@ -24,7 +24,7 @@
 import tvm.testing
 from tvm import auto_scheduler
 from tvm.auto_scheduler.utils import get_const_tuple
-from tvm.testing.auto_scheduler_common import (
+from tvm.testing.auto_scheduler import (
     matmul_auto_scheduler_test,
     zero_rank_compute_auto_scheduler_test,
     zero_rank_reduce_auto_scheduler_test,
diff --git a/tests/python/unittest/test_auto_scheduler_sketch_generation.py b/tests/python/unittest/test_auto_scheduler_sketch_generation.py
index 5e32ddf9b3bf..6d2f870ca14d 100644
--- a/tests/python/unittest/test_auto_scheduler_sketch_generation.py
+++ b/tests/python/unittest/test_auto_scheduler_sketch_generation.py
@@ -27,7 +27,7 @@
 from tvm.auto_scheduler import _ffi_api
 from tvm.auto_scheduler.loop_state import Stage
 
-from tvm.testing.auto_scheduler_common import (
+from tvm.testing.auto_scheduler import (
     matmul_auto_scheduler_test,
     double_matmul_auto_scheduler_test,
     conv2d_nchw_bn_relu_auto_scheduler_test,
diff --git a/tests/python/unittest/test_auto_scheduler_task_scheduler.py b/tests/python/unittest/test_auto_scheduler_task_scheduler.py
index a8dc4f3f899f..a3f356929dd1 100644
--- a/tests/python/unittest/test_auto_scheduler_task_scheduler.py
+++ b/tests/python/unittest/test_auto_scheduler_task_scheduler.py
@@ -25,7 +25,7 @@
 import tvm.testing
 from tvm import auto_scheduler
 
-from tvm.testing.auto_scheduler_common import matmul_auto_scheduler_test
+from tvm.testing.auto_scheduler import matmul_auto_scheduler_test
 
 
 @tvm.testing.requires_llvm

From 3100d99b58c93c4d4d23751e7e2f52c8b3994a4d Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Tue, 3 Aug 2021 11:29:59 -0700
Subject: [PATCH 04/38] remove wild card matching for auto_scheduler_common

---
 python/tvm/testing/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/tvm/testing/__init__.py b/python/tvm/testing/__init__.py
index 942ed64b2250..baa81926cf83 100644
--- a/python/tvm/testing/__init__.py
+++ b/python/tvm/testing/__init__.py
@@ -16,4 +16,3 @@
 # under the License.
 # pylint: disable=redefined-builtin, wildcard-import
 from .utils import *
-from .auto_scheduler import *

From 4b3735403a099f7cbd39041359fe47aa790e91d7 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Tue, 3 Aug 2021 13:08:36 -0700
Subject: [PATCH 05/38] disable invalid name

---
 python/tvm/testing/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/tvm/testing/__init__.py b/python/tvm/testing/__init__.py
index baa81926cf83..b69b4c10bedd 100644
--- a/python/tvm/testing/__init__.py
+++ b/python/tvm/testing/__init__.py
@@ -14,5 +14,6 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-# pylint: disable=redefined-builtin, wildcard-import
+# pylint: disable=redefined-builtin, wildcard-import, invalid-name
 from .utils import *
+from .auto_scheduler import *

From 8f659bf26a8198666286b23ea5a762e63a7ab5ef Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Tue, 3 Aug 2021 13:42:48 -0700
Subject: [PATCH 06/38] nit

---
 python/tvm/testing/__init__.py       | 2 +-
 python/tvm/testing/auto_scheduler.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/tvm/testing/__init__.py b/python/tvm/testing/__init__.py
index b69b4c10bedd..942ed64b2250 100644
--- a/python/tvm/testing/__init__.py
+++ b/python/tvm/testing/__init__.py
@@ -14,6 +14,6 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-# pylint: disable=redefined-builtin, wildcard-import, invalid-name
+# pylint: disable=redefined-builtin, wildcard-import
 from .utils import *
 from .auto_scheduler import *
diff --git a/python/tvm/testing/auto_scheduler.py b/python/tvm/testing/auto_scheduler.py
index 4890268c907b..1a13dfd6b05c 100644
--- a/python/tvm/testing/auto_scheduler.py
+++ b/python/tvm/testing/auto_scheduler.py
@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
+# pylint: disable=invalid-name
 """Common functions for auto_scheduler test cases"""
 import tvm
 from tvm import auto_scheduler, te, topi

From d7d9af66a67a4d0bcf7a42d7b20be14329a4045e Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Tue, 3 Aug 2021 13:47:46 -0700
Subject: [PATCH 07/38] address comments

---
 python/tvm/auto_scheduler/measure.py | 1 -
 python/tvm/auto_scheduler/utils.py   | 4 +---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/python/tvm/auto_scheduler/measure.py b/python/tvm/auto_scheduler/measure.py
index 8d762602bfd1..7bc0c3a6ce79 100644
--- a/python/tvm/auto_scheduler/measure.py
+++ b/python/tvm/auto_scheduler/measure.py
@@ -991,7 +991,6 @@ def local_run(
                     enable_cpu_cache_flush,
                     verbose,
                 ),
-                add_thread_wrapper=True,
             )
             if isinstance(res, TimeoutError):
                 if verbose >= 1:
diff --git a/python/tvm/auto_scheduler/utils.py b/python/tvm/auto_scheduler/utils.py
index dcac6eaa7deb..0f174991e8a9 100644
--- a/python/tvm/auto_scheduler/utils.py
+++ b/python/tvm/auto_scheduler/utils.py
@@ -287,9 +287,7 @@ def wrapper():
     return res[0]
 
 
-def call_func_with_timeout(
-    timeout, func, args=(), kwargs=None, add_thread_wrapper=False
-):  # pylint: disable=unused-argument
+def call_func_with_timeout(timeout, func, args=(), kwargs=None):  # pylint: disable=unused-argument
     """Call a function with timeout"""
     process = PopenWorker()
     process.send(func, args, kwargs, timeout)

From 7bb1aaa1825cfbf283d13db7408db9dba4961ff9 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Tue, 3 Aug 2021 13:57:24 -0700
Subject: [PATCH 08/38] linting

---
 python/tvm/testing/auto_scheduler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/testing/auto_scheduler.py b/python/tvm/testing/auto_scheduler.py
index 1a13dfd6b05c..bc335c82d324 100644
--- a/python/tvm/testing/auto_scheduler.py
+++ b/python/tvm/testing/auto_scheduler.py
@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-# pylint: disable=invalid-name
+# pylint: disable=invalid-name, missing-function-docstring
 """Common functions for auto_scheduler test cases"""
 import tvm
 from tvm import auto_scheduler, te, topi

From 1b46d98baa074099f760a3fd2c90e01db651e470 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Tue, 3 Aug 2021 14:02:06 -0700
Subject: [PATCH 09/38] add module docstring

---
 python/tvm/testing/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/tvm/testing/__init__.py b/python/tvm/testing/__init__.py
index 942ed64b2250..d9040d3ee51c 100644
--- a/python/tvm/testing/__init__.py
+++ b/python/tvm/testing/__init__.py
@@ -15,5 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 # pylint: disable=redefined-builtin, wildcard-import
+"""Utility Python functions for TVM testing"""
 from .utils import *
 from .auto_scheduler import *

From b339852978538cca1908dfcb0fead0bb2642382e Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Tue, 3 Aug 2021 14:40:44 -0700
Subject: [PATCH 10/38] remove __init__.py in testing

---
 python/tvm/testing/__init__.py | 20 --------------------
 1 file changed, 20 deletions(-)
 delete mode 100644 python/tvm/testing/__init__.py

diff --git a/python/tvm/testing/__init__.py b/python/tvm/testing/__init__.py
deleted file mode 100644
index d9040d3ee51c..000000000000
--- a/python/tvm/testing/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=redefined-builtin, wildcard-import
-"""Utility Python functions for TVM testing"""
-from .utils import *
-from .auto_scheduler import *

From 51976f041f8f7984c3287e0df367edcae041f796 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Tue, 3 Aug 2021 14:45:55 -0700
Subject: [PATCH 11/38] address Junru's comment

---
 python/tvm/contrib/popen_pool.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/tvm/contrib/popen_pool.py b/python/tvm/contrib/popen_pool.py
index c091da637e2b..68c21ef5f212 100644
--- a/python/tvm/contrib/popen_pool.py
+++ b/python/tvm/contrib/popen_pool.py
@@ -276,7 +276,9 @@ class PopenPoolExecutor:
     behavior of multiprocessing.pool().
     """
 
-    def __init__(self, max_workers=os.cpu_count(), timeout=None):
+    def __init__(self, max_workers=None, timeout=None):
+        if max_workers is None:
+            max_workers = os.cpu_count()
         # Use an internal thread pool to send to popen workers
         self._threadpool = concurrent.futures.ThreadPoolExecutor(max_workers=max_workers)
         self._timeout = timeout

From 41182bb949c2c769f3359ec16e6bcb01c16dc08f Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Tue, 3 Aug 2021 15:42:23 -0700
Subject: [PATCH 12/38] add __init__.py

---
 python/tvm/testing/__init__.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 python/tvm/testing/__init__.py

diff --git a/python/tvm/testing/__init__.py b/python/tvm/testing/__init__.py
new file mode 100644
index 000000000000..c960d7d34a4b
--- /dev/null
+++ b/python/tvm/testing/__init__.py
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=redefined-builtin, wildcard-import
+"""Utility Python functions for TVM testing"""
+from utils import *
+from . import auto_scheduler

From 6432d6cd39a75bebcce08ea58094f02f8c780860 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Tue, 3 Aug 2021 15:44:32 -0700
Subject: [PATCH 13/38] remove testing.py

---
 python/tvm/testing.py | 1381 -----------------------------------------
 1 file changed, 1381 deletions(-)
 delete mode 100644 python/tvm/testing.py

diff --git a/python/tvm/testing.py b/python/tvm/testing.py
deleted file mode 100644
index 9515189815e9..000000000000
--- a/python/tvm/testing.py
+++ /dev/null
@@ -1,1381 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# pylint: disable=invalid-name,unnecessary-comprehension
-""" TVM testing utilities
-
-Testing Markers
-***************
-
-We use pytest markers to specify the requirements of test functions. Currently
-there is a single distinction that matters for our testing environment: does
-the test require a gpu. For tests that require just a gpu or just a cpu, we
-have the decorator :py:func:`requires_gpu` that enables the test when a gpu is
-available. To avoid running tests that don't require a gpu on gpu nodes, this
-decorator also sets the pytest marker `gpu` so we can use select the gpu subset
-of tests (using `pytest -m gpu`).
-
-Unfortunately, many tests are written like this:
-
-.. python::
-
-    def test_something():
-        for target in all_targets():
-            do_something()
-
-The test uses both gpu and cpu targets, so the test needs to be run on both cpu
-and gpu nodes. But we still want to only run the cpu targets on the cpu testing
-node. The solution is to mark these tests with the gpu marker so they will be
-run on the gpu nodes. But we also modify all_targets (renamed to
-enabled_targets) so that it only returns gpu targets on gpu nodes and cpu
-targets on cpu nodes (using an environment variable).
-
-Instead of using the all_targets function, future tests that would like to
-test against a variety of targets should use the
-:py:func:`tvm.testing.parametrize_targets` functionality. This allows us
-greater control over which targets are run on which testing nodes.
-
-If in the future we want to add a new type of testing node (for example
-fpgas), we need to add a new marker in `tests/python/pytest.ini` and a new
-function in this module. Then targets using this node should be added to the
-`TVM_TEST_TARGETS` environment variable in the CI.
-"""
-import collections
-import copy
-import functools
-import logging
-import os
-import sys
-import time
-import pickle
-import pytest
-import _pytest
-import numpy as np
-import tvm
-import tvm.arith
-import tvm.tir
-import tvm.te
-import tvm._ffi
-
-from tvm.contrib import nvcc, cudnn
-from tvm.error import TVMError
-
-
-def assert_allclose(actual, desired, rtol=1e-7, atol=1e-7):
-    """Version of np.testing.assert_allclose with `atol` and `rtol` fields set
-    in reasonable defaults.
-
-    Arguments `actual` and `desired` are not interchangeable, since the function
-    compares the `abs(actual-desired)` with `atol+rtol*abs(desired)`.  Since we
-    often allow `desired` to be close to zero, we generally want non-zero `atol`.
-    """
-    actual = np.asanyarray(actual)
-    desired = np.asanyarray(desired)
-    np.testing.assert_allclose(actual.shape, desired.shape)
-    np.testing.assert_allclose(actual, desired, rtol=rtol, atol=atol, verbose=True)
-
-
-def check_numerical_grads(
-    function, input_values, grad_values, function_value=None, delta=1e-3, atol=1e-2, rtol=0.1
-):
-    """A helper function that checks that numerical gradients of a function are
-    equal to gradients computed in some different way (analytical gradients).
-
-    Numerical gradients are computed using finite difference approximation. To
-    reduce the number of function evaluations, the number of points used is
-    gradually increased if the error value is too high (up to 5 points).
-
-    Parameters
-    ----------
-    function
-        A function that takes inputs either as positional or as keyword
-        arguments (either `function(*input_values)` or `function(**input_values)`
-        should be correct) and returns a scalar result. Should accept numpy
-        ndarrays.
-
-    input_values : Dict[str, numpy.ndarray] or List[numpy.ndarray]
-        A list of values or a dict assigning values to variables. Represents the
-        point at which gradients should be computed.
-
-    grad_values : Dict[str, numpy.ndarray] or List[numpy.ndarray]
-        Gradients computed using a different method.
-
-    function_value : float, optional
-        Should be equal to `function(**input_values)`.
-
-    delta : float, optional
-        A small number used for numerical computation of partial derivatives.
-        The default 1e-3 is a good choice for float32.
-
-    atol : float, optional
-        Absolute tolerance. Gets multiplied by `sqrt(n)` where n is the size of a
-        gradient.
-
-    rtol : float, optional
-        Relative tolerance.
-    """
-    # If input_values is a list then function accepts positional arguments
-    # In this case transform it to a function taking kwargs of the form {"0": ..., "1": ...}
-    if not isinstance(input_values, dict):
-        input_len = len(input_values)
-        input_values = {str(idx): val for idx, val in enumerate(input_values)}
-
-        def _function(_input_len=input_len, _orig_function=function, **kwargs):
-            return _orig_function(*(kwargs[str(i)] for i in range(input_len)))
-
-        function = _function
-
-        grad_values = {str(idx): val for idx, val in enumerate(grad_values)}
-
-    if function_value is None:
-        function_value = function(**input_values)
-
-    # a helper to modify j-th element of val by a_delta
-    def modify(val, j, a_delta):
-        val = val.copy()
-        val.reshape(-1)[j] = val.reshape(-1)[j] + a_delta
-        return val
-
-    # numerically compute a partial derivative with respect to j-th element of the var `name`
-    def derivative(x_name, j, a_delta):
-        modified_values = {
-            n: modify(val, j, a_delta) if n == x_name else val for n, val in input_values.items()
-        }
-        return (function(**modified_values) - function_value) / a_delta
-
-    def compare_derivative(j, n_der, grad):
-        der = grad.reshape(-1)[j]
-        return np.abs(n_der - der) < atol + rtol * np.abs(n_der)
-
-    for x_name, grad in grad_values.items():
-        if grad.shape != input_values[x_name].shape:
-            raise AssertionError(
-                "Gradient wrt '{}' has unexpected shape {}, expected {} ".format(
-                    x_name, grad.shape, input_values[x_name].shape
-                )
-            )
-
-        ngrad = np.zeros_like(grad)
-
-        wrong_positions = []
-
-        # compute partial derivatives for each position in this variable
-        for j in range(np.prod(grad.shape)):
-            # forward difference approximation
-            nder = derivative(x_name, j, delta)
-
-            # if the derivative is not equal to the analytical one, try to use more
-            # precise and expensive methods
-            if not compare_derivative(j, nder, grad):
-                # central difference approximation
-                nder = (derivative(x_name, j, -delta) + nder) / 2
-
-                if not compare_derivative(j, nder, grad):
-                    # central difference approximation using h = delta/2
-                    cnder2 = (
-                        derivative(x_name, j, delta / 2) + derivative(x_name, j, -delta / 2)
-                    ) / 2
-                    # five-point derivative
-                    nder = (4 * cnder2 - nder) / 3
-
-            # if the derivatives still don't match, add this position to the
-            # list of wrong positions
-            if not compare_derivative(j, nder, grad):
-                wrong_positions.append(np.unravel_index(j, grad.shape))
-
-            ngrad.reshape(-1)[j] = nder
-
-        wrong_percentage = int(100 * len(wrong_positions) / np.prod(grad.shape))
-
-        dist = np.sqrt(np.sum((ngrad - grad) ** 2))
-        grad_norm = np.sqrt(np.sum(ngrad ** 2))
-
-        if not (np.isfinite(dist) and np.isfinite(grad_norm)):
-            raise ValueError(
-                "NaN or infinity detected during numerical gradient checking wrt '{}'\n"
-                "analytical grad = {}\n numerical grad = {}\n".format(x_name, grad, ngrad)
-            )
-
-        # we multiply atol by this number to make it more universal for different sizes
-        sqrt_n = np.sqrt(float(np.prod(grad.shape)))
-
-        if dist > atol * sqrt_n + rtol * grad_norm:
-            raise AssertionError(
-                "Analytical and numerical grads wrt '{}' differ too much\n"
-                "analytical grad = {}\n numerical grad = {}\n"
-                "{}% of elements differ, first 10 of wrong positions: {}\n"
-                "distance > atol*sqrt(n) + rtol*grad_norm\n"
-                "distance {} > {}*{} + {}*{}".format(
-                    x_name,
-                    grad,
-                    ngrad,
-                    wrong_percentage,
-                    wrong_positions[:10],
-                    dist,
-                    atol,
-                    sqrt_n,
-                    rtol,
-                    grad_norm,
-                )
-            )
-
-        max_diff = np.max(np.abs(ngrad - grad))
-        avg_diff = np.mean(np.abs(ngrad - grad))
-        logging.info(
-            "Numerical grad test wrt '%s' of shape %s passes, "
-            "dist = %f, max_diff = %f, avg_diff = %f",
-            x_name,
-            grad.shape,
-            dist,
-            max_diff,
-            avg_diff,
-        )
-
-
-def assert_prim_expr_equal(lhs, rhs):
-    """Assert lhs and rhs equals to each iother.
-
-    Parameters
-    ----------
-    lhs : tvm.tir.PrimExpr
-        The left operand.
-
-    rhs : tvm.tir.PrimExpr
-        The left operand.
-    """
-    ana = tvm.arith.Analyzer()
-    res = ana.simplify(lhs - rhs)
-    equal = isinstance(res, tvm.tir.IntImm) and res.value == 0
-    if not equal:
-        raise ValueError("{} and {} are not equal".format(lhs, rhs))
-
-
-def check_bool_expr_is_true(bool_expr, vranges, cond=None):
-    """Check that bool_expr holds given the condition cond
-    for every value of free variables from vranges.
-
-    for example, 2x > 4y solves to x > 2y given x in (0, 10) and y in (0, 10)
-    here bool_expr is x > 2y, vranges is {x: (0, 10), y: (0, 10)}, cond is 2x > 4y
-    We creates iterations to check,
-    for x in range(10):
-      for y in range(10):
-        assert !(2x > 4y) || (x > 2y)
-
-    Parameters
-    ----------
-    bool_expr : tvm.ir.PrimExpr
-        Boolean expression to check
-    vranges: Dict[tvm.tir.expr.Var, tvm.ir.Range]
-        Free variables and their ranges
-    cond: tvm.ir.PrimExpr
-        extra conditions needs to be satisfied.
-    """
-    if cond is not None:
-        bool_expr = tvm.te.any(tvm.tir.Not(cond), bool_expr)
-
-    def _run_expr(expr, vranges):
-        """Evaluate expr for every value of free variables
-        given by vranges and return the tensor of results.
-        """
-
-        def _compute_body(*us):
-            vmap = {v: u + r.min for (v, r), u in zip(vranges.items(), us)}
-            return tvm.tir.stmt_functor.substitute(expr, vmap)
-
-        A = tvm.te.compute([r.extent.value for v, r in vranges.items()], _compute_body)
-        args = [tvm.nd.empty(A.shape, A.dtype)]
-        sch = tvm.te.create_schedule(A.op)
-        mod = tvm.build(sch, [A])
-        mod(*args)
-        return args[0].numpy()
-
-    res = _run_expr(bool_expr, vranges)
-    if not np.all(res):
-        indices = list(np.argwhere(res == 0)[0])
-        counterex = [(str(v), i + r.min) for (v, r), i in zip(vranges.items(), indices)]
-        counterex = sorted(counterex, key=lambda x: x[0])
-        counterex = ", ".join([v + " = " + str(i) for v, i in counterex])
-        ana = tvm.arith.Analyzer()
-        raise AssertionError(
-            "Expression {}\nis not true on {}\n"
-            "Counterexample: {}".format(ana.simplify(bool_expr), vranges, counterex)
-        )
-
-
-def check_int_constraints_trans_consistency(constraints_trans, vranges=None):
-    """Check IntConstraintsTransform is a bijective transformation.
-
-    Parameters
-    ----------
-    constraints_trans : arith.IntConstraintsTransform
-        Integer constraints transformation
-    vranges: Dict[tvm.tir.Var, tvm.ir.Range]
-        Free variables and their ranges
-    """
-    if vranges is None:
-        vranges = {}
-
-    def _check_forward(constraints1, constraints2, varmap, backvarmap):
-        ana = tvm.arith.Analyzer()
-        all_vranges = vranges.copy()
-        all_vranges.update({v: r for v, r in constraints1.ranges.items()})
-
-        # Check that the transformation is injective
-        cond_on_vars = tvm.tir.const(1, "bool")
-        for v in constraints1.variables:
-            if v in varmap:
-                # variable mapping is consistent
-                v_back = ana.simplify(tvm.tir.stmt_functor.substitute(varmap[v], backvarmap))
-                cond_on_vars = tvm.te.all(cond_on_vars, v == v_back)
-        # Also we have to check that the new relations are true when old relations are true
-        cond_subst = tvm.tir.stmt_functor.substitute(
-            tvm.te.all(tvm.tir.const(1, "bool"), *constraints2.relations), backvarmap
-        )
-        # We have to include relations from vranges too
-        for v in constraints2.variables:
-            if v in constraints2.ranges:
-                r = constraints2.ranges[v]
-                range_cond = tvm.te.all(v >= r.min, v < r.min + r.extent)
-                range_cond = tvm.tir.stmt_functor.substitute(range_cond, backvarmap)
-                cond_subst = tvm.te.all(cond_subst, range_cond)
-        cond_subst = ana.simplify(cond_subst)
-        check_bool_expr_is_true(
-            tvm.te.all(cond_subst, cond_on_vars),
-            all_vranges,
-            cond=tvm.te.all(tvm.tir.const(1, "bool"), *constraints1.relations),
-        )
-
-    _check_forward(
-        constraints_trans.src,
-        constraints_trans.dst,
-        constraints_trans.src_to_dst,
-        constraints_trans.dst_to_src,
-    )
-    _check_forward(
-        constraints_trans.dst,
-        constraints_trans.src,
-        constraints_trans.dst_to_src,
-        constraints_trans.src_to_dst,
-    )
-
-
-def _get_targets(target_str=None):
-    if target_str is None:
-        target_str = os.environ.get("TVM_TEST_TARGETS", "")
-        # Use dict instead of set for de-duplication so that the
-        # targets stay in the order specified.
-        target_names = list({t.strip(): None for t in target_str.split(";") if t.strip()})
-
-    if not target_names:
-        target_names = DEFAULT_TEST_TARGETS
-
-    targets = []
-    for target in target_names:
-        target_kind = target.split()[0]
-        is_enabled = tvm.runtime.enabled(target_kind)
-        is_runnable = is_enabled and tvm.device(target_kind).exist
-        targets.append(
-            {
-                "target": target,
-                "target_kind": target_kind,
-                "is_enabled": is_enabled,
-                "is_runnable": is_runnable,
-            }
-        )
-
-    if all(not t["is_runnable"] for t in targets):
-        if tvm.runtime.enabled("llvm"):
-            logging.warning(
-                "None of the following targets are supported by this build of TVM: %s."
-                " Try setting TVM_TEST_TARGETS to a supported target. Defaulting to llvm.",
-                target_str,
-            )
-            return _get_targets("llvm")
-
-        raise TVMError(
-            "None of the following targets are supported by this build of TVM: %s."
-            " Try setting TVM_TEST_TARGETS to a supported target."
-            " Cannot default to llvm, as it is not enabled." % target_str
-        )
-
-    return targets
-
-
-DEFAULT_TEST_TARGETS = [
-    "llvm",
-    "llvm -device=arm_cpu",
-    "cuda",
-    "cuda -model=unknown -libs=cudnn",
-    "nvptx",
-    "vulkan -from_device=0",
-    "opencl",
-    "opencl -device=mali,aocl_sw_emu",
-    "opencl -device=intel_graphics",
-    "metal",
-    "rocm",
-]
-
-
-def device_enabled(target):
-    """Check if a target should be used when testing.
-
-    It is recommended that you use :py:func:`tvm.testing.parametrize_targets`
-    instead of manually checking if a target is enabled.
-
-    This allows the user to control which devices they are testing against. In
-    tests, this should be used to check if a device should be used when said
-    device is an optional part of the test.
-
-    Parameters
-    ----------
-    target : str
-        Target string to check against
-
-    Returns
-    -------
-    bool
-        Whether or not the device associated with this target is enabled.
-
-    Example
-    -------
-    >>> @tvm.testing.uses_gpu
-    >>> def test_mytest():
-    >>>     for target in ["cuda", "llvm"]:
-    >>>         if device_enabled(target):
-    >>>             test_body...
-
-    Here, `test_body` will only be reached by with `target="cuda"` on gpu test
-    nodes and `target="llvm"` on cpu test nodes.
-    """
-    assert isinstance(target, str), "device_enabled requires a target as a string"
-    # only check if device name is found, sometime there are extra flags
-    target_kind = target.split(" ")[0]
-    return any(target_kind == t["target_kind"] for t in _get_targets() if t["is_runnable"])
-
-
-def enabled_targets():
-    """Get all enabled targets with associated devices.
-
-    In most cases, you should use :py:func:`tvm.testing.parametrize_targets` instead of
-    this function.
-
-    In this context, enabled means that TVM was built with support for
-    this target, the target name appears in the TVM_TEST_TARGETS
-    environment variable, and a suitable device for running this
-    target exists.  If TVM_TEST_TARGETS is not set, it defaults to
-    variable DEFAULT_TEST_TARGETS in this module.
-
-    If you use this function in a test, you **must** decorate the test with
-    :py:func:`tvm.testing.uses_gpu` (otherwise it will never be run on the gpu).
-
-    Returns
-    -------
-    targets: list
-        A list of pairs of all enabled devices and the associated context
-
-    """
-    return [(t["target"], tvm.device(t["target"])) for t in _get_targets() if t["is_runnable"]]
-
-
-def _compose(args, decs):
-    """Helper to apply multiple markers"""
-    if len(args) > 0:
-        f = args[0]
-        for d in reversed(decs):
-            f = d(f)
-        return f
-    return decs
-
-
-def uses_gpu(*args):
-    """Mark to differentiate tests that use the GPU in some capacity.
-
-    These tests will be run on CPU-only test nodes and on test nodes with GPUs.
-    To mark a test that must have a GPU present to run, use
-    :py:func:`tvm.testing.requires_gpu`.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _uses_gpu = [pytest.mark.gpu]
-    return _compose(args, _uses_gpu)
-
-
-def requires_gpu(*args):
-    """Mark a test as requiring a GPU to run.
-
-    Tests with this mark will not be run unless a gpu is present.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_gpu = [
-        pytest.mark.skipif(
-            not tvm.cuda().exist
-            and not tvm.rocm().exist
-            and not tvm.opencl().exist
-            and not tvm.metal().exist
-            and not tvm.vulkan().exist,
-            reason="No GPU present",
-        ),
-        *uses_gpu(),
-    ]
-    return _compose(args, _requires_gpu)
-
-
-def requires_cuda(*args):
-    """Mark a test as requiring the CUDA runtime.
-
-    This also marks the test as requiring a cuda gpu.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_cuda = [
-        pytest.mark.cuda,
-        pytest.mark.skipif(not device_enabled("cuda"), reason="CUDA support not enabled"),
-        *requires_gpu(),
-    ]
-    return _compose(args, _requires_cuda)
-
-
-def requires_cudnn(*args):
-    """Mark a test as requiring the cuDNN library.
-
-    This also marks the test as requiring a cuda gpu.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-
-    requirements = [
-        pytest.mark.skipif(
-            not cudnn.exists(), reason="cuDNN library not enabled, or not installed"
-        ),
-        *requires_cuda(),
-    ]
-    return _compose(args, requirements)
-
-
-def requires_nvptx(*args):
-    """Mark a test as requiring the NVPTX compilation on the CUDA runtime
-
-    This also marks the test as requiring a cuda gpu, and requiring
-    LLVM support.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-
-    """
-    _requires_nvptx = [
-        pytest.mark.skipif(not device_enabled("nvptx"), reason="NVPTX support not enabled"),
-        *requires_llvm(),
-        *requires_gpu(),
-    ]
-    return _compose(args, _requires_nvptx)
-
-
-def requires_cudagraph(*args):
-    """Mark a test as requiring the CUDA Graph Feature
-
-    This also marks the test as requiring cuda
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_cudagraph = [
-        pytest.mark.skipif(
-            not nvcc.have_cudagraph(), reason="CUDA Graph is not supported in this environment"
-        ),
-        *requires_cuda(),
-    ]
-    return _compose(args, _requires_cudagraph)
-
-
-def requires_opencl(*args):
-    """Mark a test as requiring the OpenCL runtime.
-
-    This also marks the test as requiring a gpu.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_opencl = [
-        pytest.mark.opencl,
-        pytest.mark.skipif(not device_enabled("opencl"), reason="OpenCL support not enabled"),
-        *requires_gpu(),
-    ]
-    return _compose(args, _requires_opencl)
-
-
-def requires_rocm(*args):
-    """Mark a test as requiring the rocm runtime.
-
-    This also marks the test as requiring a gpu.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_rocm = [
-        pytest.mark.rocm,
-        pytest.mark.skipif(not device_enabled("rocm"), reason="rocm support not enabled"),
-        *requires_gpu(),
-    ]
-    return _compose(args, _requires_rocm)
-
-
-def requires_metal(*args):
-    """Mark a test as requiring the metal runtime.
-
-    This also marks the test as requiring a gpu.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_metal = [
-        pytest.mark.metal,
-        pytest.mark.skipif(not device_enabled("metal"), reason="metal support not enabled"),
-        *requires_gpu(),
-    ]
-    return _compose(args, _requires_metal)
-
-
-def requires_vulkan(*args):
-    """Mark a test as requiring the vulkan runtime.
-
-    This also marks the test as requiring a gpu.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_vulkan = [
-        pytest.mark.vulkan,
-        pytest.mark.skipif(not device_enabled("vulkan"), reason="vulkan support not enabled"),
-        *requires_gpu(),
-    ]
-    return _compose(args, _requires_vulkan)
-
-
-def requires_tensorcore(*args):
-    """Mark a test as requiring a tensorcore to run.
-
-    Tests with this mark will not be run unless a tensorcore is present.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_tensorcore = [
-        pytest.mark.tensorcore,
-        pytest.mark.skipif(
-            not tvm.cuda().exist or not nvcc.have_tensorcore(tvm.cuda(0).compute_version),
-            reason="No tensorcore present",
-        ),
-        *requires_gpu(),
-    ]
-    return _compose(args, _requires_tensorcore)
-
-
-def requires_llvm(*args):
-    """Mark a test as requiring llvm to run.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_llvm = [
-        pytest.mark.llvm,
-        pytest.mark.skipif(not device_enabled("llvm"), reason="LLVM support not enabled"),
-    ]
-    return _compose(args, _requires_llvm)
-
-
-def requires_micro(*args):
-    """Mark a test as requiring microTVM to run.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_micro = [
-        pytest.mark.skipif(
-            tvm.support.libinfo().get("USE_MICRO", "OFF") != "ON",
-            reason="MicroTVM support not enabled. Set USE_MICRO=ON in config.cmake to enable.",
-        )
-    ]
-    return _compose(args, _requires_micro)
-
-
-def requires_rpc(*args):
-    """Mark a test as requiring rpc to run.
-
-    Parameters
-    ----------
-    f : function
-        Function to mark
-    """
-    _requires_rpc = [
-        pytest.mark.skipif(
-            tvm.support.libinfo().get("USE_RPC", "OFF") != "ON",
-            reason="RPC support not enabled. Set USE_RPC=ON in config.cmake to enable.",
-        )
-    ]
-    return _compose(args, _requires_rpc)
-
-
-def _target_to_requirement(target):
-    if isinstance(target, str):
-        target = tvm.target.Target(target)
-
-    # mapping from target to decorator
-    if target.kind.name == "cuda" and "cudnn" in target.attrs.get("libs", []):
-        return requires_cudnn()
-    if target.kind.name == "cuda":
-        return requires_cuda()
-    if target.kind.name == "rocm":
-        return requires_rocm()
-    if target.kind.name == "vulkan":
-        return requires_vulkan()
-    if target.kind.name == "nvptx":
-        return requires_nvptx()
-    if target.kind.name == "metal":
-        return requires_metal()
-    if target.kind.name == "opencl":
-        return requires_opencl()
-    if target.kind.name == "llvm":
-        return requires_llvm()
-    return []
-
-
-def _pytest_target_params(targets, excluded_targets=None, xfail_targets=None):
-    # Include unrunnable targets here.  They get skipped by the
-    # pytest.mark.skipif in _target_to_requirement(), showing up as
-    # skipped tests instead of being hidden entirely.
-    if targets is None:
-        if excluded_targets is None:
-            excluded_targets = set()
-
-        if xfail_targets is None:
-            xfail_targets = set()
-
-        target_marks = []
-        for t in _get_targets():
-            # Excluded targets aren't included in the params at all.
-            if t["target_kind"] not in excluded_targets:
-
-                # Known failing targets are included, but are marked
-                # as expected to fail.
-                extra_marks = []
-                if t["target_kind"] in xfail_targets:
-                    extra_marks.append(
-                        pytest.mark.xfail(
-                            reason='Known failing test for target "{}"'.format(t["target_kind"])
-                        )
-                    )
-
-                target_marks.append((t["target"], extra_marks))
-
-    else:
-        target_marks = [(target, []) for target in targets]
-
-    return [
-        pytest.param(target, marks=_target_to_requirement(target) + extra_marks)
-        for target, extra_marks in target_marks
-    ]
-
-
-def _auto_parametrize_target(metafunc):
-    """Automatically applies parametrize_targets
-
-    Used if a test function uses the "target" fixture, but isn't
-    already marked with @tvm.testing.parametrize_targets.  Intended
-    for use in the pytest_generate_tests() handler of a conftest.py
-    file.
-
-    """
-
-    def update_parametrize_target_arg(
-        argnames,
-        argvalues,
-        *args,
-        **kwargs,
-    ):
-        args = [arg.strip() for arg in argnames.split(",") if arg.strip()]
-        if "target" in args:
-            target_i = args.index("target")
-
-            new_argvalues = []
-            for argvalue in argvalues:
-
-                if isinstance(argvalue, _pytest.mark.structures.ParameterSet):
-                    # The parametrized value is already a
-                    # pytest.param, so track any marks already
-                    # defined.
-                    param_set = argvalue.values
-                    target = param_set[target_i]
-                    additional_marks = argvalue.marks
-                elif len(args) == 1:
-                    # Single value parametrization, argvalue is a list of values.
-                    target = argvalue
-                    param_set = (target,)
-                    additional_marks = []
-                else:
-                    # Multiple correlated parameters, argvalue is a list of tuple of values.
-                    param_set = argvalue
-                    target = param_set[target_i]
-                    additional_marks = []
-
-                new_argvalues.append(
-                    pytest.param(
-                        *param_set, marks=_target_to_requirement(target) + additional_marks
-                    )
-                )
-
-            try:
-                argvalues[:] = new_argvalues
-            except TypeError as e:
-                pyfunc = metafunc.definition.function
-                filename = pyfunc.__code__.co_filename
-                line_number = pyfunc.__code__.co_firstlineno
-                msg = (
-                    f"Unit test {metafunc.function.__name__} ({filename}:{line_number}) "
-                    "is parametrized using a tuple of parameters instead of a list "
-                    "of parameters."
-                )
-                raise TypeError(msg) from e
-
-    if "target" in metafunc.fixturenames:
-        # Update any explicit use of @pytest.mark.parmaetrize to
-        # parametrize over targets.  This adds the appropriate
-        # @tvm.testing.requires_* markers for each target.
-        for mark in metafunc.definition.iter_markers("parametrize"):
-            update_parametrize_target_arg(*mark.args, **mark.kwargs)
-
-        # Check if any explicit parametrizations exist, and apply one
-        # if they do not.  If the function is marked with either
-        # excluded or known failing targets, use these to determine
-        # the targets to be used.
-        parametrized_args = [
-            arg.strip()
-            for mark in metafunc.definition.iter_markers("parametrize")
-            for arg in mark.args[0].split(",")
-        ]
-        if "target" not in parametrized_args:
-            excluded_targets = getattr(metafunc.function, "tvm_excluded_targets", [])
-            xfail_targets = getattr(metafunc.function, "tvm_known_failing_targets", [])
-            metafunc.parametrize(
-                "target",
-                _pytest_target_params(None, excluded_targets, xfail_targets),
-                scope="session",
-            )
-
-
-def parametrize_targets(*args):
-    """Parametrize a test over a specific set of targets.
-
-    Use this decorator when you want your test to be run over a
-    specific set of targets and devices.  It is intended for use where
-    a test is applicable only to a specific target, and is
-    inapplicable to any others (e.g. verifying target-specific
-    assembly code matches known assembly code).  In most
-    circumstances, :py:func:`tvm.testing.exclude_targets` or
-    :py:func:`tvm.testing.known_failing_targets` should be used
-    instead.
-
-    If used as a decorator without arguments, the test will be
-    parametrized over all targets in
-    :py:func:`tvm.testing.enabled_targets`.  This behavior is
-    automatically enabled for any target that accepts arguments of
-    ``target`` or ``dev``, so the explicit use of the bare decorator
-    is no longer needed, and is maintained for backwards
-    compatibility.
-
-    Parameters
-    ----------
-    f : function
-        Function to parametrize. Must be of the form `def test_xxxxxxxxx(target, dev)`:,
-        where `xxxxxxxxx` is any name.
-    targets : list[str], optional
-        Set of targets to run against. If not supplied,
-        :py:func:`tvm.testing.enabled_targets` will be used.
-
-    Example
-    -------
-    >>> @tvm.testing.parametrize_targets("llvm", "cuda")
-    >>> def test_mytest(target, dev):
-    >>>     ...  # do something
-    """
-
-    # Backwards compatibility, when used as a decorator with no
-    # arguments implicitly parametrizes over "target".  The
-    # parametrization is now handled by _auto_parametrize_target, so
-    # this use case can just return the decorated function.
-    if len(args) == 1 and callable(args[0]):
-        return args[0]
-
-    return pytest.mark.parametrize("target", list(args), scope="session")
-
-
-def exclude_targets(*args):
-    """Exclude a test from running on a particular target.
-
-    Use this decorator when you want your test to be run over a
-    variety of targets and devices (including cpu and gpu devices),
-    but want to exclude some particular target or targets.  For
-    example, a test may wish to be run against all targets in
-    tvm.testing.enabled_targets(), except for a particular target that
-    does not support the capabilities.
-
-    Applies pytest.mark.skipif to the targets given.
-
-    Parameters
-    ----------
-    f : function
-        Function to parametrize. Must be of the form `def test_xxxxxxxxx(target, dev)`:,
-        where `xxxxxxxxx` is any name.
-    targets : list[str]
-        Set of targets to exclude.
-
-    Example
-    -------
-    >>> @tvm.testing.exclude_targets("cuda")
-    >>> def test_mytest(target, dev):
-    >>>     ...  # do something
-
-    Or
-
-    >>> @tvm.testing.exclude_targets("llvm", "cuda")
-    >>> def test_mytest(target, dev):
-    >>>     ...  # do something
-
-    """
-
-    def wraps(func):
-        func.tvm_excluded_targets = args
-        return func
-
-    return wraps
-
-
-def known_failing_targets(*args):
-    """Skip a test that is known to fail on a particular target.
-
-    Use this decorator when you want your test to be run over a
-    variety of targets and devices (including cpu and gpu devices),
-    but know that it fails for some targets.  For example, a newly
-    implemented runtime may not support all features being tested, and
-    should be excluded.
-
-    Applies pytest.mark.xfail to the targets given.
-
-    Parameters
-    ----------
-    f : function
-        Function to parametrize. Must be of the form `def test_xxxxxxxxx(target, dev)`:,
-        where `xxxxxxxxx` is any name.
-    targets : list[str]
-        Set of targets to skip.
-
-    Example
-    -------
-    >>> @tvm.testing.known_failing_targets("cuda")
-    >>> def test_mytest(target, dev):
-    >>>     ...  # do something
-
-    Or
-
-    >>> @tvm.testing.known_failing_targets("llvm", "cuda")
-    >>> def test_mytest(target, dev):
-    >>>     ...  # do something
-
-    """
-
-    def wraps(func):
-        func.tvm_known_failing_targets = args
-        return func
-
-    return wraps
-
-
-def parameter(*values, ids=None):
-    """Convenience function to define pytest parametrized fixtures.
-
-    Declaring a variable using ``tvm.testing.parameter`` will define a
-    parametrized pytest fixture that can be used by test
-    functions. This is intended for cases that have no setup cost,
-    such as strings, integers, tuples, etc.  For cases that have a
-    significant setup cost, please use :py:func:`tvm.testing.fixture`
-    instead.
-
-    If a test function accepts multiple parameters defined using
-    ``tvm.testing.parameter``, then the test will be run using every
-    combination of those parameters.
-
-    The parameter definition applies to all tests in a module.  If a
-    specific test should have different values for the parameter, that
-    test should be marked with ``@pytest.mark.parametrize``.
-
-    Parameters
-    ----------
-    values
-       A list of parameter values.  A unit test that accepts this
-       parameter as an argument will be run once for each parameter
-       given.
-
-    ids : List[str], optional
-       A list of names for the parameters.  If None, pytest will
-       generate a name from the value.  These generated names may not
-       be readable/useful for composite types such as tuples.
-
-    Returns
-    -------
-    function
-       A function output from pytest.fixture.
-
-    Example
-    -------
-    >>> size = tvm.testing.parameter(1, 10, 100)
-    >>> def test_using_size(size):
-    >>>     ... # Test code here
-
-    Or
-
-    >>> shape = tvm.testing.parameter((5,10), (512,1024), ids=['small','large'])
-    >>> def test_using_size(shape):
-    >>>     ... # Test code here
-
-    """
-
-    # Optional cls parameter in case a parameter is defined inside a
-    # class scope.
-    @pytest.fixture(params=values, ids=ids)
-    def as_fixture(*_cls, request):
-        return request.param
-
-    return as_fixture
-
-
-_parametrize_group = 0
-
-
-def parameters(*value_sets):
-    """Convenience function to define pytest parametrized fixtures.
-
-    Declaring a variable using tvm.testing.parameters will define a
-    parametrized pytest fixture that can be used by test
-    functions. Like :py:func:`tvm.testing.parameter`, this is intended
-    for cases that have no setup cost, such as strings, integers,
-    tuples, etc.  For cases that have a significant setup cost, please
-    use :py:func:`tvm.testing.fixture` instead.
-
-    Unlike :py:func:`tvm.testing.parameter`, if a test function
-    accepts multiple parameters defined using a single call to
-    ``tvm.testing.parameters``, then the test will only be run once
-    for each set of parameters, not for all combinations of
-    parameters.
-
-    These parameter definitions apply to all tests in a module.  If a
-    specific test should have different values for some parameters,
-    that test should be marked with ``@pytest.mark.parametrize``.
-
-    Parameters
-    ----------
-    values : List[tuple]
-       A list of parameter value sets.  Each set of values represents
-       a single combination of values to be tested.  A unit test that
-       accepts parameters defined will be run once for every set of
-       parameters in the list.
-
-    Returns
-    -------
-    List[function]
-       Function outputs from pytest.fixture.  These should be unpacked
-       into individual named parameters.
-
-    Example
-    -------
-    >>> size, dtype = tvm.testing.parameters( (16,'float32'), (512,'float16') )
-    >>> def test_feature_x(size, dtype):
-    >>>     # Test code here
-    >>>     assert( (size,dtype) in [(16,'float32'), (512,'float16')])
-
-    """
-    global _parametrize_group
-    parametrize_group = _parametrize_group
-    _parametrize_group += 1
-
-    outputs = []
-    for param_values in zip(*value_sets):
-
-        # Optional cls parameter in case a parameter is defined inside a
-        # class scope.
-        def fixture_func(*_cls, request):
-            return request.param
-
-        fixture_func.parametrize_group = parametrize_group
-        fixture_func.parametrize_values = param_values
-        outputs.append(pytest.fixture(fixture_func))
-
-    return outputs
-
-
-def _parametrize_correlated_parameters(metafunc):
-    parametrize_needed = collections.defaultdict(list)
-
-    for name, fixturedefs in metafunc.definition._fixtureinfo.name2fixturedefs.items():
-        fixturedef = fixturedefs[-1]
-        if hasattr(fixturedef.func, "parametrize_group") and hasattr(
-            fixturedef.func, "parametrize_values"
-        ):
-            group = fixturedef.func.parametrize_group
-            values = fixturedef.func.parametrize_values
-            parametrize_needed[group].append((name, values))
-
-    for parametrize_group in parametrize_needed.values():
-        if len(parametrize_group) == 1:
-            name, values = parametrize_group[0]
-            metafunc.parametrize(name, values, indirect=True)
-        else:
-            names = ",".join(name for name, values in parametrize_group)
-            value_sets = zip(*[values for name, values in parametrize_group])
-            metafunc.parametrize(names, value_sets, indirect=True)
-
-
-def fixture(func=None, *, cache_return_value=False):
-    """Convenience function to define pytest fixtures.
-
-    This should be used as a decorator to mark functions that set up
-    state before a function.  The return value of that fixture
-    function is then accessible by test functions as that accept it as
-    a parameter.
-
-    Fixture functions can accept parameters defined with
-    :py:func:`tvm.testing.parameter`.
-
-    By default, the setup will be performed once for each unit test
-    that uses a fixture, to ensure that unit tests are independent.
-    If the setup is expensive to perform, then the
-    cache_return_value=True argument can be passed to cache the setup.
-    The fixture function will be run only once (or once per parameter,
-    if used with tvm.testing.parameter), and the same return value
-    will be passed to all tests that use it.  If the environment
-    variable TVM_TEST_DISABLE_CACHE is set to a non-zero value, it
-    will disable this feature and no caching will be performed.
-
-    Example
-    -------
-    >>> @tvm.testing.fixture
-    >>> def cheap_setup():
-    >>>     return 5 # Setup code here.
-    >>>
-    >>> def test_feature_x(target, dev, cheap_setup)
-    >>>     assert(cheap_setup == 5) # Run test here
-
-    Or
-
-    >>> size = tvm.testing.parameter(1, 10, 100)
-    >>>
-    >>> @tvm.testing.fixture
-    >>> def cheap_setup(size):
-    >>>     return 5*size # Setup code here, based on size.
-    >>>
-    >>> def test_feature_x(cheap_setup):
-    >>>     assert(cheap_setup in [5, 50, 500])
-
-    Or
-
-    >>> @tvm.testing.fixture(cache_return_value=True)
-    >>> def expensive_setup():
-    >>>     time.sleep(10) # Setup code here
-    >>>     return 5
-    >>>
-    >>> def test_feature_x(target, dev, expensive_setup):
-    >>>     assert(expensive_setup == 5)
-
-    """
-
-    force_disable_cache = bool(int(os.environ.get("TVM_TEST_DISABLE_CACHE", "0")))
-    cache_return_value = cache_return_value and not force_disable_cache
-
-    # Deliberately at function scope, so that caching can track how
-    # many times the fixture has been used.  If used, the cache gets
-    # cleared after the fixture is no longer needed.
-    scope = "function"
-
-    def wraps(func):
-        if cache_return_value:
-            func = _fixture_cache(func)
-        func = pytest.fixture(func, scope=scope)
-        return func
-
-    if func is None:
-        return wraps
-
-    return wraps(func)
-
-
-def _fixture_cache(func):
-    cache = {}
-
-    # Can't use += on a bound method's property.  Therefore, this is a
-    # list rather than a variable so that it can be accessed from the
-    # pytest_collection_modifyitems().
-    num_uses_remaining = [0]
-
-    # Using functools.lru_cache would require the function arguments
-    # to be hashable, which wouldn't allow caching fixtures that
-    # depend on numpy arrays.  For example, a fixture that takes a
-    # numpy array as input, then calculates uses a slow method to
-    # compute a known correct output for that input.  Therefore,
-    # including a fallback for serializable types.
-    def get_cache_key(*args, **kwargs):
-        try:
-            hash((args, kwargs))
-            return (args, kwargs)
-        except TypeError as e:
-            pass
-
-        try:
-            return pickle.dumps((args, kwargs))
-        except TypeError as e:
-            raise TypeError(
-                "TVM caching of fixtures requires arguments to the fixture "
-                "to be either hashable or serializable"
-            ) from e
-
-    @functools.wraps(func)
-    def wrapper(*args, **kwargs):
-        try:
-            cache_key = get_cache_key(*args, **kwargs)
-
-            try:
-                cached_value = cache[cache_key]
-            except KeyError:
-                cached_value = cache[cache_key] = func(*args, **kwargs)
-
-            try:
-                yield copy.deepcopy(cached_value)
-            except TypeError as e:
-                rfc_url = (
-                    "https://github.com/apache/tvm-rfcs/blob/main/rfcs/"
-                    "0007-parametrized-unit-tests.md#unresolved-questions"
-                )
-                message = (
-                    "TVM caching of fixtures can only be used on serializable data types, not {}.\n"
-                    "Please see {} for details/discussion."
-                ).format(type(cached_value), rfc_url)
-                raise TypeError(message) from e
-
-        finally:
-            # Clear the cache once all tests that use a particular fixture
-            # have completed.
-            num_uses_remaining[0] -= 1
-            if not num_uses_remaining[0]:
-                cache.clear()
-
-    # Set in the pytest_collection_modifyitems()
-    wrapper.num_uses_remaining = num_uses_remaining
-
-    return wrapper
-
-
-def _count_num_fixture_uses(items):
-    # Helper function, counts the number of tests that use each cached
-    # fixture.  Should be called from pytest_collection_modifyitems().
-    for item in items:
-        is_skipped = item.get_closest_marker("skip") or any(
-            mark.args[0] for mark in item.iter_markers("skipif")
-        )
-        if is_skipped:
-            continue
-
-        for fixturedefs in item._fixtureinfo.name2fixturedefs.values():
-            # Only increment the active fixturedef, in a name has been overridden.
-            fixturedef = fixturedefs[-1]
-            if hasattr(fixturedef.func, "num_uses_remaining"):
-                fixturedef.func.num_uses_remaining[0] += 1
-
-
-def _remove_global_fixture_definitions(items):
-    # Helper function, removes fixture definitions from the global
-    # variables of the modules they were defined in.  This is intended
-    # to improve readability of error messages by giving a NameError
-    # if a test function accesses a pytest fixture but doesn't include
-    # it as an argument.  Should be called from
-    # pytest_collection_modifyitems().
-
-    modules = set(item.module for item in items)
-
-    for module in modules:
-        for name in dir(module):
-            obj = getattr(module, name)
-            if hasattr(obj, "_pytestfixturefunction") and isinstance(
-                obj._pytestfixturefunction, _pytest.fixtures.FixtureFunctionMarker
-            ):
-                delattr(module, name)
-
-
-def identity_after(x, sleep):
-    """Testing function to return identity after sleep
-
-    Parameters
-    ----------
-    x : int
-        The input value.
-
-    sleep : float
-        The amount of time to sleep
-
-    Returns
-    -------
-    x : object
-        The original value
-    """
-    if sleep:
-        time.sleep(sleep)
-    return x
-
-
-def terminate_self():
-    """Testing function to terminate the process."""
-    sys.exit(-1)
-
-
-tvm._ffi._init_api("testing", __name__)

From b76d1aa338c8c4fd01df618450b60d28ea299b53 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Tue, 3 Aug 2021 16:53:21 -0700
Subject: [PATCH 14/38] get subpackage to work

---
 python/tvm/testing/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/testing/__init__.py b/python/tvm/testing/__init__.py
index c960d7d34a4b..ce34c6e35556 100644
--- a/python/tvm/testing/__init__.py
+++ b/python/tvm/testing/__init__.py
@@ -16,5 +16,5 @@
 # under the License.
 # pylint: disable=redefined-builtin, wildcard-import
 """Utility Python functions for TVM testing"""
-from utils import *
+from .utils import *
 from . import auto_scheduler

From 5a227193b85c973428efc4944a2bf3646989a5c2 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Wed, 4 Aug 2021 11:16:20 -0700
Subject: [PATCH 15/38] avoid wild card matching

---
 python/tvm/testing/__init__.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/python/tvm/testing/__init__.py b/python/tvm/testing/__init__.py
index ce34c6e35556..803202d395a1 100644
--- a/python/tvm/testing/__init__.py
+++ b/python/tvm/testing/__init__.py
@@ -16,5 +16,12 @@
 # under the License.
 # pylint: disable=redefined-builtin, wildcard-import
 """Utility Python functions for TVM testing"""
-from .utils import *
-from . import auto_scheduler
+from . import (
+    assert_allclose,
+    assert_prim_expr_equal,
+    auto_scheduler,
+    device_enabled,
+    enabled_targets,
+    parameters,
+    utils,
+)

From 32c5e2265e84427c425caa6f66e6298e5ff47af0 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Wed, 4 Aug 2021 11:42:26 -0700
Subject: [PATCH 16/38] resolve self import

---
 python/tvm/testing/__init__.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/tvm/testing/__init__.py b/python/tvm/testing/__init__.py
index 803202d395a1..fe27471e9735 100644
--- a/python/tvm/testing/__init__.py
+++ b/python/tvm/testing/__init__.py
@@ -16,12 +16,12 @@
 # under the License.
 # pylint: disable=redefined-builtin, wildcard-import
 """Utility Python functions for TVM testing"""
-from . import (
+from .utils import (
     assert_allclose,
     assert_prim_expr_equal,
-    auto_scheduler,
     device_enabled,
     enabled_targets,
     parameters,
-    utils,
 )
+
+from . import auto_scheduler

From 2598c361d838f03247b895d064f037dee20809d5 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Wed, 4 Aug 2021 11:53:38 -0700
Subject: [PATCH 17/38] more dependencies

---
 python/tvm/testing/__init__.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/python/tvm/testing/__init__.py b/python/tvm/testing/__init__.py
index fe27471e9735..69463a6ba01f 100644
--- a/python/tvm/testing/__init__.py
+++ b/python/tvm/testing/__init__.py
@@ -19,9 +19,33 @@
 from .utils import (
     assert_allclose,
     assert_prim_expr_equal,
+    check_numerical_grads,
     device_enabled,
+    device_test,
+    echo,
     enabled_targets,
+    exclude_targets,
+    fixture,
+    parameter,
     parameters,
+    parametrize_targets,
+    uses_gpu,
+    known_failing_targets,
+    object_use_count,
+    requires_cuda,
+    requires_cudagraph,
+    requires_gpu,
+    requires_llvm,
+    requires_rocm,
+    requires_rpc,
+    requires_tensorcore,
+    requires_metal,
+    requires_micro,
+    requires_opencl,
+    _auto_parametrize_target,
+    _count_num_fixture_uses,
+    _remove_global_fixture_definitions,
+    _parametrize_correlated_parameters,
 )
 
 from . import auto_scheduler

From 3c172b5d6b6abf91b5cb389a69f58499d4e333ff Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Wed, 4 Aug 2021 14:24:20 -0700
Subject: [PATCH 18/38] fix ci issues

---
 python/tvm/autotvm/tuner/xgboost_cost_model.py | 4 +---
 python/tvm/testing/__init__.py                 | 4 ++++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/python/tvm/autotvm/tuner/xgboost_cost_model.py b/python/tvm/autotvm/tuner/xgboost_cost_model.py
index 8670210db1ee..9316c34dc59e 100644
--- a/python/tvm/autotvm/tuner/xgboost_cost_model.py
+++ b/python/tvm/autotvm/tuner/xgboost_cost_model.py
@@ -166,9 +166,7 @@ def _reset_pool(self, space, target, task):
 
     def _close_pool(self):
         if self.pool:
-            self.pool.terminate()
-            self.pool.join()
-            self.pool = None
+            del self.pool
 
     def _get_pool(self):
         if self.upper_model:
diff --git a/python/tvm/testing/__init__.py b/python/tvm/testing/__init__.py
index 69463a6ba01f..d616917fbb1d 100644
--- a/python/tvm/testing/__init__.py
+++ b/python/tvm/testing/__init__.py
@@ -19,6 +19,8 @@
 from .utils import (
     assert_allclose,
     assert_prim_expr_equal,
+    check_bool_expr_is_true,
+    check_int_constraints_trans_consistency,
     check_numerical_grads,
     device_enabled,
     device_test,
@@ -42,6 +44,8 @@
     requires_metal,
     requires_micro,
     requires_opencl,
+    test_raise_error_callback,
+    test_wrap_callback,
     _auto_parametrize_target,
     _count_num_fixture_uses,
     _remove_global_fixture_definitions,

From d94fb07d908731aa6e7b15fddcddef2a7a31131f Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Wed, 4 Aug 2021 15:36:55 -0700
Subject: [PATCH 19/38] fixing ci issues

---
 python/tvm/autotvm/tuner/xgboost_cost_model.py | 4 ++--
 python/tvm/testing/__init__.py                 | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/tvm/autotvm/tuner/xgboost_cost_model.py b/python/tvm/autotvm/tuner/xgboost_cost_model.py
index 9316c34dc59e..50e6769c0a60 100644
--- a/python/tvm/autotvm/tuner/xgboost_cost_model.py
+++ b/python/tvm/autotvm/tuner/xgboost_cost_model.py
@@ -246,7 +246,7 @@ def fit_log(self, records, plan_size, min_seed_records=500):
             feature_extract_func = _extract_curve_feature_log
         else:
             raise RuntimeError("Invalid feature type: " + self.fea_type)
-        res = pool.map(feature_extract_func, data)
+        res = pool.map_with_error_catching(feature_extract_func, data)
 
         # filter out feature with different shapes
         fea_len = len(self._get_feature([0])[0])
@@ -328,7 +328,7 @@ def _get_feature(self, indexes):
             pool = self._get_pool()
             # If we are forking, we can pass arguments in globals for better performance
             if multiprocessing.get_start_method(False) == "fork":
-                feas = pool.map(self.feature_extract_func, need_extract)
+                feas = pool.map_with_error_catching(self.feature_extract_func, need_extract)
             else:
                 args = [(self.space.get(x), self.target, self.task) for x in need_extract]
                 feas = pool.map(self.feature_extract_func, args)
diff --git a/python/tvm/testing/__init__.py b/python/tvm/testing/__init__.py
index d616917fbb1d..59aabfd6c50d 100644
--- a/python/tvm/testing/__init__.py
+++ b/python/tvm/testing/__init__.py
@@ -44,6 +44,7 @@
     requires_metal,
     requires_micro,
     requires_opencl,
+    test_check_eq_callback,
     test_raise_error_callback,
     test_wrap_callback,
     _auto_parametrize_target,

From 5cb628e1e4a91893f2c68ccfd8866e80604c9a2a Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Wed, 4 Aug 2021 16:27:15 -0700
Subject: [PATCH 20/38] last xgboost error

---
 python/tvm/autotvm/tuner/xgboost_cost_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/autotvm/tuner/xgboost_cost_model.py b/python/tvm/autotvm/tuner/xgboost_cost_model.py
index 50e6769c0a60..fc45a504c354 100644
--- a/python/tvm/autotvm/tuner/xgboost_cost_model.py
+++ b/python/tvm/autotvm/tuner/xgboost_cost_model.py
@@ -331,7 +331,7 @@ def _get_feature(self, indexes):
                 feas = pool.map_with_error_catching(self.feature_extract_func, need_extract)
             else:
                 args = [(self.space.get(x), self.target, self.task) for x in need_extract]
-                feas = pool.map(self.feature_extract_func, args)
+                feas = pool.map_with_error_catching(self.feature_extract_func, args)
             for i, fea in zip(need_extract, feas):
                 fea_cache[i] = fea
 

From 2e06c35d2eb77abad0ac5f41185347dfc57ee01d Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Wed, 4 Aug 2021 16:36:39 -0700
Subject: [PATCH 21/38] revert changes to xgboost_cost_model

---
 python/tvm/autotvm/tuner/xgboost_cost_model.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/python/tvm/autotvm/tuner/xgboost_cost_model.py b/python/tvm/autotvm/tuner/xgboost_cost_model.py
index fc45a504c354..81904354c5fd 100644
--- a/python/tvm/autotvm/tuner/xgboost_cost_model.py
+++ b/python/tvm/autotvm/tuner/xgboost_cost_model.py
@@ -23,7 +23,6 @@
 
 import numpy as np
 
-from ...contrib import popen_pool
 from .. import feature
 from ..utils import get_rank
 from .metric import max_curve, recall_curve, cover_curve
@@ -162,11 +161,13 @@ def _reset_pool(self, space, target, task):
         _extract_space = space
         _extract_target = target
         _extract_task = task
-        self.pool = popen_pool.PopenPoolExecutor(self.num_threads)
+        self.pool = multiprocessing.Pool(self.num_threads)
 
     def _close_pool(self):
         if self.pool:
-            del self.pool
+            self.pool.terminate()
+            self.pool.join()
+            self.pool = None
 
     def _get_pool(self):
         if self.upper_model:
@@ -246,7 +247,7 @@ def fit_log(self, records, plan_size, min_seed_records=500):
             feature_extract_func = _extract_curve_feature_log
         else:
             raise RuntimeError("Invalid feature type: " + self.fea_type)
-        res = pool.map_with_error_catching(feature_extract_func, data)
+        res = pool.map(feature_extract_func, data)
 
         # filter out feature with different shapes
         fea_len = len(self._get_feature([0])[0])
@@ -328,10 +329,10 @@ def _get_feature(self, indexes):
             pool = self._get_pool()
             # If we are forking, we can pass arguments in globals for better performance
             if multiprocessing.get_start_method(False) == "fork":
-                feas = pool.map_with_error_catching(self.feature_extract_func, need_extract)
+                feas = pool.map(self.feature_extract_func, need_extract)
             else:
                 args = [(self.space.get(x), self.target, self.task) for x in need_extract]
-                feas = pool.map_with_error_catching(self.feature_extract_func, args)
+                feas = pool.map(self.feature_extract_func, args)
             for i, fea in zip(need_extract, feas):
                 fea_cache[i] = fea
 

From e9619438af9382d298747e776af565505ff052f5 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Thu, 5 Aug 2021 12:12:56 -0700
Subject: [PATCH 22/38] add ErrorTest and _ffi_api

---
 python/tvm/testing/__init__.py | 48 +++++++++-------------------------
 1 file changed, 12 insertions(+), 36 deletions(-)

diff --git a/python/tvm/testing/__init__.py b/python/tvm/testing/__init__.py
index 59aabfd6c50d..6b79eefae015 100644
--- a/python/tvm/testing/__init__.py
+++ b/python/tvm/testing/__init__.py
@@ -16,41 +16,17 @@
 # under the License.
 # pylint: disable=redefined-builtin, wildcard-import
 """Utility Python functions for TVM testing"""
-from .utils import (
-    assert_allclose,
-    assert_prim_expr_equal,
-    check_bool_expr_is_true,
-    check_int_constraints_trans_consistency,
-    check_numerical_grads,
-    device_enabled,
-    device_test,
-    echo,
-    enabled_targets,
-    exclude_targets,
-    fixture,
-    parameter,
-    parameters,
-    parametrize_targets,
-    uses_gpu,
-    known_failing_targets,
-    object_use_count,
-    requires_cuda,
-    requires_cudagraph,
-    requires_gpu,
-    requires_llvm,
-    requires_rocm,
-    requires_rpc,
-    requires_tensorcore,
-    requires_metal,
-    requires_micro,
-    requires_opencl,
-    test_check_eq_callback,
-    test_raise_error_callback,
-    test_wrap_callback,
-    _auto_parametrize_target,
-    _count_num_fixture_uses,
-    _remove_global_fixture_definitions,
-    _parametrize_correlated_parameters,
-)
+from .utils import assert_allclose, assert_prim_expr_equal, check_bool_expr_is_true
+from .utils import check_int_constraints_trans_consistency, check_numerical_grads
+from .utils import device_enabled, enabled_targets, exclude_targets
+from .utils import fixture, parameter, parameters, parametrize_targets, uses_gpu
+from .utils import known_failing_targets, requires_cuda, requires_cudagraph
+from .utils import requires_gpu, requires_llvm, requires_rocm, requires_rpc, requires_tensorcore
+from .utils import requires_metal, requires_rocm, requires_rpc, requires_tensorcore, requires_metal
+from .utils import requires_micro, requires_opencl
+from .utils import device_test, echo, ErrorTest, object_use_count
+from .utils import test_check_eq_callback, test_raise_error_callback, test_wrap_callback
+from .utils import _auto_parametrize_target, _count_num_fixture_uses
+from .utils import _remove_global_fixture_definitions, _parametrize_correlated_parameters
 
 from . import auto_scheduler

From 0aa7ba1e7773511ecde0c2e92ecaae75e3604924 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Thu, 5 Aug 2021 12:27:02 -0700
Subject: [PATCH 23/38] reorg

---
 python/tvm/testing/__init__.py |  6 ++++--
 python/tvm/testing/_ffi_api.py | 21 +++++++++++++++++++++
 python/tvm/testing/utils.py    |  3 ---
 3 files changed, 25 insertions(+), 5 deletions(-)
 create mode 100644 python/tvm/testing/_ffi_api.py

diff --git a/python/tvm/testing/__init__.py b/python/tvm/testing/__init__.py
index 6b79eefae015..8340e7e25404 100644
--- a/python/tvm/testing/__init__.py
+++ b/python/tvm/testing/__init__.py
@@ -24,9 +24,11 @@
 from .utils import requires_gpu, requires_llvm, requires_rocm, requires_rpc, requires_tensorcore
 from .utils import requires_metal, requires_rocm, requires_rpc, requires_tensorcore, requires_metal
 from .utils import requires_micro, requires_opencl
-from .utils import device_test, echo, ErrorTest, object_use_count
-from .utils import test_check_eq_callback, test_raise_error_callback, test_wrap_callback
 from .utils import _auto_parametrize_target, _count_num_fixture_uses
 from .utils import _remove_global_fixture_definitions, _parametrize_correlated_parameters
 
+from ._ffi_api import nop, echo, device_test, run_check_signal, object_use_count
+from ._ffi_api import test_wrap_callback, test_raise_error_callback, test_check_eq_callback
+from ._ffi_api import ErrorTest, FrontendTestModule
+
 from . import auto_scheduler
diff --git a/python/tvm/testing/_ffi_api.py b/python/tvm/testing/_ffi_api.py
new file mode 100644
index 000000000000..2252758e2e58
--- /dev/null
+++ b/python/tvm/testing/_ffi_api.py
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""FFI APIs for tvm.testing"""
+import tvm._ffi
+
+
+tvm._ffi._init_api("testing", __name__)
\ No newline at end of file
diff --git a/python/tvm/testing/utils.py b/python/tvm/testing/utils.py
index 79518ac24984..6ab616984718 100644
--- a/python/tvm/testing/utils.py
+++ b/python/tvm/testing/utils.py
@@ -1285,6 +1285,3 @@ def identity_after(x, sleep):
 def terminate_self():
     """Testing function to terminate the process."""
     sys.exit(-1)
-
-
-tvm._ffi._init_api("testing", __name__)

From 108402c6709d561d637d6a785367755322ca1367 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Thu, 5 Aug 2021 13:07:28 -0700
Subject: [PATCH 24/38] format _ffi_api.py

---
 python/tvm/testing/_ffi_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/testing/_ffi_api.py b/python/tvm/testing/_ffi_api.py
index 2252758e2e58..56a77223b767 100644
--- a/python/tvm/testing/_ffi_api.py
+++ b/python/tvm/testing/_ffi_api.py
@@ -18,4 +18,4 @@
 import tvm._ffi
 
 
-tvm._ffi._init_api("testing", __name__)
\ No newline at end of file
+tvm._ffi._init_api("testing", __name__)

From 43b289404f3e25ec5c26d46523d11fd93beb65ae Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Thu, 5 Aug 2021 13:12:28 -0700
Subject: [PATCH 25/38] fix reimported

---
 python/tvm/testing/__init__.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/python/tvm/testing/__init__.py b/python/tvm/testing/__init__.py
index 8340e7e25404..343c1c5d7b3b 100644
--- a/python/tvm/testing/__init__.py
+++ b/python/tvm/testing/__init__.py
@@ -21,9 +21,8 @@
 from .utils import device_enabled, enabled_targets, exclude_targets
 from .utils import fixture, parameter, parameters, parametrize_targets, uses_gpu
 from .utils import known_failing_targets, requires_cuda, requires_cudagraph
-from .utils import requires_gpu, requires_llvm, requires_rocm, requires_rpc, requires_tensorcore
-from .utils import requires_metal, requires_rocm, requires_rpc, requires_tensorcore, requires_metal
-from .utils import requires_micro, requires_opencl
+from .utils import requires_gpu, requires_llvm, requires_rocm, requires_rpc
+from .utils import requires_tensorcore, requires_metal, requires_micro, requires_opencl
 from .utils import _auto_parametrize_target, _count_num_fixture_uses
 from .utils import _remove_global_fixture_definitions, _parametrize_correlated_parameters
 

From 10aeba9076d97fcb9c863fcbc428396b68c1c1a0 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Fri, 6 Aug 2021 18:08:36 -0700
Subject: [PATCH 26/38] all changes

---
 python/tvm/auto_scheduler/__init__.py         |   1 +
 python/tvm/auto_scheduler/measure.py          |  27 ++-
 .../tvm/auto_scheduler/task_input_buffer.py   | 210 ++++++++++++++++++
 3 files changed, 233 insertions(+), 5 deletions(-)
 create mode 100644 python/tvm/auto_scheduler/task_input_buffer.py

diff --git a/python/tvm/auto_scheduler/__init__.py b/python/tvm/auto_scheduler/__init__.py
index ff6d82a0242c..69729631b0ed 100644
--- a/python/tvm/auto_scheduler/__init__.py
+++ b/python/tvm/auto_scheduler/__init__.py
@@ -26,6 +26,7 @@
 from . import relay_integration
 from . import search_policy
 from . import search_task
+from . import task_input_buffer
 from . import task_scheduler
 from . import utils
 from . import workload_registry
diff --git a/python/tvm/auto_scheduler/measure.py b/python/tvm/auto_scheduler/measure.py
index 7bc0c3a6ce79..741e549e3689 100644
--- a/python/tvm/auto_scheduler/measure.py
+++ b/python/tvm/auto_scheduler/measure.py
@@ -36,6 +36,7 @@
 import shutil
 import tempfile
 import multiprocessing
+from multiprocessing.pool import ThreadPool
 import logging
 
 import tvm._ffi
@@ -62,6 +63,11 @@
     deserialize_workload_registry_entry,
 )
 
+from .task_input_buffer import (
+    serialize_task_input_buffer,
+    deserialize_task_input_buffer,
+)
+
 # pylint: disable=invalid-name
 logger = logging.getLogger("auto_scheduler")
 
@@ -139,12 +145,14 @@ def serialize(self):
         return [
             _ffi_api.SerializeMeasureInput(self),
             serialize_workload_registry_entry(self.task.workload_key),
+            serialize_task_input_buffer(self.task.workload_key),
         ]
 
     @staticmethod
     def deserialize(data):
         inp = _ffi_api.DeserializeMeasureInput(data[0])
         deserialize_workload_registry_entry(data[1])
+        deserialize_task_input_buffer(data[2])
         return recover_measure_input(inp)
 
 
@@ -702,7 +710,7 @@ def local_builder_build(inputs, timeout, n_parallel, build_func="default", verbo
         The build results of these MeasureInputs.
     """
     # This pool is not doing computationally intensive work, so we can use threads
-    pool = multiprocessing.pool.ThreadPool(n_parallel)
+    pool = ThreadPool(n_parallel)
     tuple_res = pool.map(
         local_build_worker,
         [
@@ -820,6 +828,7 @@ def prepare_input_map(args):
 def _timed_eval_func(
     inp_serialized,
     build_res,
+    tensor_input_map,
     number,
     repeat,
     min_repeat_ms,
@@ -863,7 +872,8 @@ def _timed_eval_func(
             random_fill = tvm.get_global_func("tvm.contrib.random.random_fill", True)
             assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake"
 
-            tensor_input_map = prepare_input_map(build_res.args) if task_input_names else {}
+            if not task_input_names:
+                tensor_input_map = {}
             args = []
             task_inputs_count = 0
             for arg in build_res.args:
@@ -968,6 +978,7 @@ def local_run(
 
     measure_results = []
     assert len(inputs) == len(build_results), "Measure input size should be equal to build results"
+    tensor_input_map = prepare_input_map(build_results.args)
     for inp, build_res in zip(inputs, build_results):
         if build_res.error_no != 0:
             res = (
@@ -984,6 +995,7 @@ def local_run(
                 args=(
                     inp.serialize(),
                     build_res,
+                    tensor_input_map,
                     number,
                     repeat,
                     min_repeat_ms,
@@ -1027,6 +1039,7 @@ def _timed_rpc_run(
     key,
     host,
     port,
+    tensor_input_map,
     priority,
     timeout,
     number,
@@ -1079,7 +1092,8 @@ def _timed_rpc_run(
                 random_fill
             ), "Please make sure USE_RANDOM is ON in the config.cmake on the remote devices"
 
-            tensor_input_map = prepare_input_map(build_res.args) if task_input_names else {}
+            if not task_input_names:
+                tensor_input_map = {}
             args = []
             task_inputs_count = 0
             for arg in build_res.args:
@@ -1151,7 +1165,7 @@ def _rpc_run_worker(args):
     res : MeasureResult
         The measure result of this Runner thread.
     """
-    _, build_res, _, _, _, _, timeout, _, _, _, _, _, verbose = args
+    _, build_res, _, _, _, _, _, timeout, _, _, _, _, _, verbose = args
     if build_res.error_no != MeasureErrorNo.NO_ERROR:
         return (
             (MAX_FLOAT,),
@@ -1193,6 +1207,7 @@ def rpc_runner_run(
     key,
     host,
     port,
+    tensor_input_map,
     priority=1,
     n_parallel=1,
     timeout=10,
@@ -1258,7 +1273,8 @@ def rpc_runner_run(
     """
     assert len(inputs) == len(build_results), "Measure input size should be equal to build results"
     # This pool is not doing computationally intensive work, so we can use threads
-    pool = multiprocessing.pool.ThreadPool(n_parallel)
+    pool = ThreadPool(n_parallel)
+    tensor_input_map = prepare_input_map(build_results.args)
     tuple_res = pool.map(
         _rpc_run_worker,
         [
@@ -1276,6 +1292,7 @@ def rpc_runner_run(
                 cooldown_interval,
                 enable_cpu_cache_flush,
                 verbose,
+                tensor_input_map,
             )
             for inp, build_res in zip(inputs, build_results)
         ],
diff --git a/python/tvm/auto_scheduler/task_input_buffer.py b/python/tvm/auto_scheduler/task_input_buffer.py
new file mode 100644
index 000000000000..ef787216ad0c
--- /dev/null
+++ b/python/tvm/auto_scheduler/task_input_buffer.py
@@ -0,0 +1,210 @@
+""" The definiton of SearchTask """
+import os
+import numpy as np
+
+from tvm.runtime import ndarray
+from tvm.runtime._ffi_node_api import LoadJSON, SaveJSON
+
+
+# The map stores special registered buffer for measurement.
+# This can be used for sparse workloads when we cannot use random tensors for measurment.
+# {
+#     "workload_key_0": {
+#         "task_input_0": Tensor(...),
+#         "task_input_1": Tensor(...)
+#     },
+#     "workload_key_1": {
+#         "task_input_2": Tensor(...),
+#         "task_input_3": Tensor(...)
+#     },
+#     ...
+# }
+TASK_INPUT_BUFFER_TABLE = {}
+
+
+def _save_buffer_to_file(buffer_name, buffer_data):
+    """Save the current Tensor buffer to a numpy file.
+
+    File name will be: {buffer_name}.{buffer_shape}_{buffer_data_type}.npy
+    """
+    np_data = buffer_data.numpy()
+
+    buffer_name += "."
+    for i in np_data.shape:
+        buffer_name += "%d_" % (i)
+    buffer_name += "%s" % (np_data.dtype)
+    buffer_name += ".npy"
+
+    np_data.tofile(buffer_name, " ")
+
+
+def _try_load_buffer_from_file(buffer_name):
+    """Try to load buffer from a numpy file, if not found, return None.
+
+    File name has a same format as `_save_buffer_to_file`.
+    """
+    filelist = os.listdir()
+
+    for file in filelist:
+        if file.startswith(buffer_name + "."):
+            meta_info = file.split(".")[-2].split("_")
+            shape = [int(i) for i in meta_info[:-1]]
+            dtype = meta_info[-1]
+            buffer_data = np.fromfile(file, dtype=dtype, sep=" ")
+            buffer_data = buffer_data.reshape(shape)
+            return ndarray.array(buffer_data)
+
+    return None
+
+
+def register_task_input_buffer(
+    workload_key,
+    input_name,
+    input_data,
+    overwrite=False,
+    save_to_file=False,
+):
+    """Register special buffer for measurement.
+
+    Parameters
+    ----------
+    workload_key : str
+        The workload key of the SearchTask.
+
+    input_name : str
+        The name of input buffer.
+
+    input_data : tvm.nd.NDArray
+        The input Tensor data.
+
+    overwrite : bool = False
+        Whether to overwrite the data if a name has already registered.
+
+    save_to_file : bool = False
+        Whether to save the data to a local file as well. This can be reused to resume the last
+        tuning process.
+
+    Returns
+    -------
+    tvm.nd.NDArray
+        The actual registered Tensor data of this input_name. With `overwrite` set to False, will
+        return the original one if the name has already registered before.
+    """
+    global TASK_INPUT_BUFFER_TABLE
+
+    if workload_key not in TASK_INPUT_BUFFER_TABLE:
+        TASK_INPUT_BUFFER_TABLE[workload_key] = {}
+    input_table = TASK_INPUT_BUFFER_TABLE[workload_key]
+
+    if not overwrite:
+        if input_name not in input_table.keys():
+            # Try to load buffer data from local file
+            tensor_from_file = _try_load_buffer_from_file(input_name)
+            if tensor_from_file:
+                input_table[input_name] = tensor_from_file
+        elif input_name in input_table.keys():
+            raise RuntimeError(
+                "Tensor %s exists in TASK_INPUT_BUFFER_TABLE, %s"
+                % (input_name, "set overwrite to True or this Tensor will not be registered")
+            )
+
+    input_table[input_name] = input_data
+    if save_to_file:
+        _save_buffer_to_file(input_name, input_data)
+    return input_data
+
+
+def get_task_input_buffer(workload_key, input_name):
+    """Get special buffer for measurement.
+
+    The buffers are registered by `register_task_input_buffer`.
+
+    Parameters
+    ----------
+    workload_key : str
+        The workload key of the SearchTask.
+
+    input_name : str
+        The name of input buffer.
+
+    Returns
+    -------
+    tvm.nd.NDArray
+        The registered input buffer.
+    """
+    global TASK_INPUT_BUFFER_TABLE
+
+    if workload_key not in TASK_INPUT_BUFFER_TABLE:
+        TASK_INPUT_BUFFER_TABLE[workload_key] = {}
+    input_table = TASK_INPUT_BUFFER_TABLE[workload_key]
+
+    if input_name not in input_table:
+        # Try to load buffer data from local file
+        tensor_from_file = _try_load_buffer_from_file(input_name)
+        if tensor_from_file:
+            input_table[input_name] = tensor_from_file
+
+    # Then check for the default table, the input names extracted from a relay model will be
+    # stored here for we're not able to get the workload_key at that time
+    if input_name not in input_table:
+        input_table = TASK_INPUT_BUFFER_TABLE["default"]
+
+    if input_name in input_table:
+        return input_table[input_name]
+
+    raise ValueError(
+        "%s not found in TASK_INPUT_BUFFER_TABLE, " % (input_name)
+        + "should provide with `SearchTask(..., task_inputs={...})`"
+    )
+
+
+def serialize_task_input_buffer(workload_key):
+    """
+    Serialize a task input buffer entry.
+
+    This is used when the start method of multiprocessing is spawn.
+    We need to serialize the task input buffer table it in the new processes.
+
+    Parameters
+    ----------
+    workload_key : str
+        The workload key
+
+    Returns
+    -------
+    data: Tuple
+        The serialized pickable data
+    """
+    sname = workload_key
+
+    # the return value of get_task_input_buffer is tvm.ndarray
+    # convert it to np.array to make it picklable,
+    global TASK_INPUT_BUFFER_TABLE
+
+    if workload_key not in TASK_INPUT_BUFFER_TABLE:
+        TASK_INPUT_BUFFER_TABLE[workload_key] = {}
+    svalue = TASK_INPUT_BUFFER_TABLE[workload_key]
+    if not callable(svalue):
+        # pylint: disable=assignment-from-no-return
+        svalue = SaveJSON(svalue)
+
+    return sname, svalue
+
+
+def deserialize_task_input_buffer(data):
+    """
+    Deserialize a task input buffer entry.
+    This should be used along with :code:`serialize_task_input_buffer_table`
+
+    Parameters
+    ----------
+    data: Tuple
+        The return value of :code:`serialize_task_input_buffer_table`
+    """
+    global TASK_INPUT_BUFFER_TABLE
+
+    name, value = data
+    # pylint: disable=assignment-from-no-return
+    if not callable(value):
+        value = LoadJSON(value)
+        TASK_INPUT_BUFFER_TABLE[name] = value

From 615450ac78741dea5ed2ab8b3f0be4c396b29758 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Mon, 9 Aug 2021 14:42:57 -0700
Subject: [PATCH 27/38] fix measure tests

---
 python/tvm/auto_scheduler/measure.py | 139 ++++++++++++++-------------
 1 file changed, 71 insertions(+), 68 deletions(-)

diff --git a/python/tvm/auto_scheduler/measure.py b/python/tvm/auto_scheduler/measure.py
index 741e549e3689..f761731239f8 100644
--- a/python/tvm/auto_scheduler/measure.py
+++ b/python/tvm/auto_scheduler/measure.py
@@ -825,10 +825,56 @@ def prepare_input_map(args):
     return tensor_input_map
 
 
+def prepare_runner_args(inp, build_res):
+    """This function prepare the arguments for local/rpc runner in main process
+    Parameters
+    ----------
+    inp : MeasureInput
+        Measure input to be measured.
+
+    build_res : BuildResult
+        Build result to be measured.
+
+    Returns
+    -------
+    List[NDArray, None] :
+        List of NDArray of task input buffer. None if argument not present in task_input_names.
+
+    """
+    # pylint: disable=import-outside-toplevel
+    from .search_task import get_task_input_buffer  # lazily import to avoid recursive dependency
+
+    task_input_names = inp.task.task_input_names
+    dev = ndarray.device(str(inp.task.target), 0)
+    tensor_input_map = prepare_input_map(build_res.args)
+    if not task_input_names:
+        tensor_input_map = {}
+    args = []
+    task_inputs_count = 0
+    for arg in build_res.args:
+        if arg in tensor_input_map:
+            tensor_name = tensor_input_map[arg]
+            if tensor_name in task_input_names:
+                temp_ndarray = get_task_input_buffer(inp.task.workload_key, tensor_name)
+                # convert tvm.NDArray to picklable numpy.ndarray
+                args.append(ndarray.NDArray.numpy(temp_ndarray))
+                task_inputs_count += 1
+            else:
+                raise ValueError(
+                    "%s not found in task_inputs, " % (tensor_name)
+                    + "should provide with `SearchTask(..., task_inputs={...})`"
+                )
+        else:
+            args.append(None)
+    if task_inputs_count != len(task_input_names):
+        raise RuntimeError("task_inputs not fully matched, check if there's any unexpected error")
+    return args
+
+
 def _timed_eval_func(
     inp_serialized,
     build_res,
-    tensor_input_map,
+    args,
     number,
     repeat,
     min_repeat_ms,
@@ -836,11 +882,7 @@ def _timed_eval_func(
     enable_cpu_cache_flush,
     verbose,
 ):
-    # pylint: disable=import-outside-toplevel
-    from .search_task import get_task_input_buffer  # lazily import to avoid recursive dependency
-
     inp = MeasureInput.deserialize(inp_serialized)
-    task_input_names = inp.task.task_input_names
     tic = time.time()
     error_no = 0
     error_msg = None
@@ -871,34 +913,17 @@ def _timed_eval_func(
         try:
             random_fill = tvm.get_global_func("tvm.contrib.random.random_fill", True)
             assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake"
-
-            if not task_input_names:
-                tensor_input_map = {}
-            args = []
-            task_inputs_count = 0
-            for arg in build_res.args:
-                if arg in tensor_input_map:
-                    tensor_name = tensor_input_map[arg]
-                    if tensor_name in task_input_names:
-                        args.append(
-                            ndarray.array(
-                                get_task_input_buffer(inp.task.workload_key, tensor_name), dev
-                            )
-                        )
-                        task_inputs_count += 1
-                    else:
-                        raise ValueError(
-                            "%s not found in task_inputs, " % (tensor_name)
-                            + "should provide with `SearchTask(..., task_inputs={...})`"
-                        )
-                else:
-                    empty_array = ndarray.empty(get_const_tuple(arg.shape), arg.dtype, dev)
+            assert len(args) == len(build_res.args)
+            for idx in range(len(args)):
+                if args[idx] == None:
+                    build_res_arg = build_res.args[idx]
+                    empty_array = ndarray.empty(
+                        get_const_tuple(build_res_arg.shape), build_res_arg.dtype, dev
+                    )
                     random_fill(empty_array)
-                    args.append(empty_array)
-            if task_inputs_count != len(task_input_names):
-                raise RuntimeError(
-                    "task_inputs not fully matched, check if there's any unexpected error"
-                )
+                    args[idx] = empty_array
+                else:
+                    args[idx] = ndarray.array(args[idx], dev)
             dev.sync()
             costs = time_f(*args).results
         # pylint: disable=broad-except
@@ -978,7 +1003,6 @@ def local_run(
 
     measure_results = []
     assert len(inputs) == len(build_results), "Measure input size should be equal to build results"
-    tensor_input_map = prepare_input_map(build_results.args)
     for inp, build_res in zip(inputs, build_results):
         if build_res.error_no != 0:
             res = (
@@ -989,13 +1013,14 @@ def local_run(
                 time.time(),
             )
         else:
+            args = prepare_runner_args(inp, build_res)
             res = call_func_with_timeout(
                 timeout,
                 _timed_eval_func,
                 args=(
                     inp.serialize(),
                     build_res,
-                    tensor_input_map,
+                    args,
                     number,
                     repeat,
                     min_repeat_ms,
@@ -1036,10 +1061,10 @@ def local_run(
 def _timed_rpc_run(
     inp_serialized,
     build_res,
+    args,
     key,
     host,
     port,
-    tensor_input_map,
     priority,
     timeout,
     number,
@@ -1049,11 +1074,7 @@ def _timed_rpc_run(
     enable_cpu_cache_flush,
     verbose,
 ):
-    # pylint: disable=import-outside-toplevel
-    from .search_task import get_task_input_buffer  # lazily import to avoid recursive dependency
-
     inp = MeasureInput.deserialize(inp_serialized)
-    task_input_names = inp.task.task_input_names
     tic = time.time()
     error_no = 0
     error_msg = None
@@ -1092,33 +1113,17 @@ def _timed_rpc_run(
                 random_fill
             ), "Please make sure USE_RANDOM is ON in the config.cmake on the remote devices"
 
-            if not task_input_names:
-                tensor_input_map = {}
-            args = []
-            task_inputs_count = 0
-            for arg in build_res.args:
-                if arg in tensor_input_map:
-                    tensor_name = tensor_input_map[arg]
-                    if tensor_name in task_input_names:
-                        args.append(
-                            ndarray.array(
-                                get_task_input_buffer(inp.task.workload_key, tensor_name), dev
-                            )
-                        )
-                        task_inputs_count += 1
-                    else:
-                        raise ValueError(
-                            "%s not found in task_inputs, " % (tensor_name)
-                            + "should provide with `SearchTask(..., task_inputs={...})`"
-                        )
-                else:
-                    empty_array = ndarray.empty(get_const_tuple(arg.shape), arg.dtype, dev)
+            assert len(args) == len(build_res.args)
+            for idx in range(len(args)):
+                if args[idx] == None:
+                    build_res_arg = build_res.args[idx]
+                    empty_array = ndarray.empty(
+                        get_const_tuple(build_res_arg.shape), build_res_arg.dtype, dev
+                    )
                     random_fill(empty_array)
-                    args.append(empty_array)
-            if task_inputs_count != len(task_input_names):
-                logger.warning(
-                    "task_inputs not fully matched, check if there's any unexpected error"
-                )
+                    args[idx] = empty_array
+                else:
+                    args[idx] = ndarray.array(args[idx], dev)
             dev.sync()
 
             # First run for check that the kernel is correct
@@ -1207,7 +1212,6 @@ def rpc_runner_run(
     key,
     host,
     port,
-    tensor_input_map,
     priority=1,
     n_parallel=1,
     timeout=10,
@@ -1274,13 +1278,13 @@ def rpc_runner_run(
     assert len(inputs) == len(build_results), "Measure input size should be equal to build results"
     # This pool is not doing computationally intensive work, so we can use threads
     pool = ThreadPool(n_parallel)
-    tensor_input_map = prepare_input_map(build_results.args)
     tuple_res = pool.map(
         _rpc_run_worker,
         [
             (
                 inp.serialize(),
                 build_res,
+                prepare_runner_args(inp, build_res),
                 key,
                 host,
                 port,
@@ -1292,7 +1296,6 @@ def rpc_runner_run(
                 cooldown_interval,
                 enable_cpu_cache_flush,
                 verbose,
-                tensor_input_map,
             )
             for inp, build_res in zip(inputs, build_results)
         ],

From 1849e297f5602e1b6de00b240cd79cb983da5527 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Mon, 9 Aug 2021 14:51:06 -0700
Subject: [PATCH 28/38] restore tvm.python.testing.py

---
 python/tvm/testing/utils.py | 253 +++++++++++++++++-------------------
 1 file changed, 118 insertions(+), 135 deletions(-)

diff --git a/python/tvm/testing/utils.py b/python/tvm/testing/utils.py
index 6ab616984718..7aa448a64f4a 100644
--- a/python/tvm/testing/utils.py
+++ b/python/tvm/testing/utils.py
@@ -17,10 +17,8 @@
 
 # pylint: disable=invalid-name,unnecessary-comprehension
 """ TVM testing utilities
-
 Testing Markers
 ***************
-
 We use pytest markers to specify the requirements of test functions. Currently
 there is a single distinction that matters for our testing environment: does
 the test require a gpu. For tests that require just a gpu or just a cpu, we
@@ -28,27 +26,21 @@
 available. To avoid running tests that don't require a gpu on gpu nodes, this
 decorator also sets the pytest marker `gpu` so we can use select the gpu subset
 of tests (using `pytest -m gpu`).
-
 Unfortunately, many tests are written like this:
-
 .. python::
-
     def test_something():
         for target in all_targets():
             do_something()
-
 The test uses both gpu and cpu targets, so the test needs to be run on both cpu
 and gpu nodes. But we still want to only run the cpu targets on the cpu testing
 node. The solution is to mark these tests with the gpu marker so they will be
 run on the gpu nodes. But we also modify all_targets (renamed to
 enabled_targets) so that it only returns gpu targets on gpu nodes and cpu
 targets on cpu nodes (using an environment variable).
-
 Instead of using the all_targets function, future tests that would like to
 test against a variety of targets should use the
 :py:func:`tvm.testing.parametrize_targets` functionality. This allows us
 greater control over which targets are run on which testing nodes.
-
 If in the future we want to add a new type of testing node (for example
 fpgas), we need to add a new marker in `tests/python/pytest.ini` and a new
 function in this module. Then targets using this node should be added to the
@@ -70,14 +62,14 @@ def test_something():
 import tvm.tir
 import tvm.te
 import tvm._ffi
-from tvm.contrib import nvcc
+
+from tvm.contrib import nvcc, cudnn
 from tvm.error import TVMError
 
 
 def assert_allclose(actual, desired, rtol=1e-7, atol=1e-7):
     """Version of np.testing.assert_allclose with `atol` and `rtol` fields set
     in reasonable defaults.
-
     Arguments `actual` and `desired` are not interchangeable, since the function
     compares the `abs(actual-desired)` with `atol+rtol*abs(desired)`.  Since we
     often allow `desired` to be close to zero, we generally want non-zero `atol`.
@@ -93,11 +85,9 @@ def check_numerical_grads(
 ):
     """A helper function that checks that numerical gradients of a function are
     equal to gradients computed in some different way (analytical gradients).
-
     Numerical gradients are computed using finite difference approximation. To
     reduce the number of function evaluations, the number of points used is
     gradually increased if the error value is too high (up to 5 points).
-
     Parameters
     ----------
     function
@@ -105,25 +95,19 @@ def check_numerical_grads(
         arguments (either `function(*input_values)` or `function(**input_values)`
         should be correct) and returns a scalar result. Should accept numpy
         ndarrays.
-
     input_values : Dict[str, numpy.ndarray] or List[numpy.ndarray]
         A list of values or a dict assigning values to variables. Represents the
         point at which gradients should be computed.
-
     grad_values : Dict[str, numpy.ndarray] or List[numpy.ndarray]
         Gradients computed using a different method.
-
     function_value : float, optional
         Should be equal to `function(**input_values)`.
-
     delta : float, optional
         A small number used for numerical computation of partial derivatives.
         The default 1e-3 is a good choice for float32.
-
     atol : float, optional
         Absolute tolerance. Gets multiplied by `sqrt(n)` where n is the size of a
         gradient.
-
     rtol : float, optional
         Relative tolerance.
     """
@@ -247,12 +231,10 @@ def compare_derivative(j, n_der, grad):
 
 def assert_prim_expr_equal(lhs, rhs):
     """Assert lhs and rhs equals to each iother.
-
     Parameters
     ----------
     lhs : tvm.tir.PrimExpr
         The left operand.
-
     rhs : tvm.tir.PrimExpr
         The left operand.
     """
@@ -266,14 +248,12 @@ def assert_prim_expr_equal(lhs, rhs):
 def check_bool_expr_is_true(bool_expr, vranges, cond=None):
     """Check that bool_expr holds given the condition cond
     for every value of free variables from vranges.
-
     for example, 2x > 4y solves to x > 2y given x in (0, 10) and y in (0, 10)
     here bool_expr is x > 2y, vranges is {x: (0, 10), y: (0, 10)}, cond is 2x > 4y
     We creates iterations to check,
     for x in range(10):
       for y in range(10):
         assert !(2x > 4y) || (x > 2y)
-
     Parameters
     ----------
     bool_expr : tvm.ir.PrimExpr
@@ -317,7 +297,6 @@ def _compute_body(*us):
 
 def check_int_constraints_trans_consistency(constraints_trans, vranges=None):
     """Check IntConstraintsTransform is a bijective transformation.
-
     Parameters
     ----------
     constraints_trans : arith.IntConstraintsTransform
@@ -375,11 +354,12 @@ def _check_forward(constraints1, constraints2, varmap, backvarmap):
 def _get_targets(target_str=None):
     if target_str is None:
         target_str = os.environ.get("TVM_TEST_TARGETS", "")
+        # Use dict instead of set for de-duplication so that the
+        # targets stay in the order specified.
+        target_names = list({t.strip(): None for t in target_str.split(";") if t.strip()})
 
-    if len(target_str) == 0:
-        target_str = DEFAULT_TEST_TARGETS
-
-    target_names = set(t.strip() for t in target_str.split(";") if t.strip())
+    if not target_names:
+        target_names = DEFAULT_TEST_TARGETS
 
     targets = []
     for target in target_names:
@@ -413,32 +393,36 @@ def _get_targets(target_str=None):
     return targets
 
 
-DEFAULT_TEST_TARGETS = (
-    "llvm;cuda;opencl;metal;rocm;vulkan -from_device=0;nvptx;"
-    "llvm -device=arm_cpu;opencl -device=mali,aocl_sw_emu"
-)
+DEFAULT_TEST_TARGETS = [
+    "llvm",
+    "llvm -device=arm_cpu",
+    "cuda",
+    "cuda -model=unknown -libs=cudnn",
+    "nvptx",
+    "vulkan -from_device=0",
+    "opencl",
+    "opencl -device=mali,aocl_sw_emu",
+    "opencl -device=intel_graphics",
+    "metal",
+    "rocm",
+]
 
 
 def device_enabled(target):
     """Check if a target should be used when testing.
-
     It is recommended that you use :py:func:`tvm.testing.parametrize_targets`
     instead of manually checking if a target is enabled.
-
     This allows the user to control which devices they are testing against. In
     tests, this should be used to check if a device should be used when said
     device is an optional part of the test.
-
     Parameters
     ----------
     target : str
         Target string to check against
-
     Returns
     -------
     bool
         Whether or not the device associated with this target is enabled.
-
     Example
     -------
     >>> @tvm.testing.uses_gpu
@@ -446,7 +430,6 @@ def device_enabled(target):
     >>>     for target in ["cuda", "llvm"]:
     >>>         if device_enabled(target):
     >>>             test_body...
-
     Here, `test_body` will only be reached by with `target="cuda"` on gpu test
     nodes and `target="llvm"` on cpu test nodes.
     """
@@ -458,24 +441,19 @@ def device_enabled(target):
 
 def enabled_targets():
     """Get all enabled targets with associated devices.
-
     In most cases, you should use :py:func:`tvm.testing.parametrize_targets` instead of
     this function.
-
     In this context, enabled means that TVM was built with support for
     this target, the target name appears in the TVM_TEST_TARGETS
     environment variable, and a suitable device for running this
     target exists.  If TVM_TEST_TARGETS is not set, it defaults to
     variable DEFAULT_TEST_TARGETS in this module.
-
     If you use this function in a test, you **must** decorate the test with
     :py:func:`tvm.testing.uses_gpu` (otherwise it will never be run on the gpu).
-
     Returns
     -------
     targets: list
         A list of pairs of all enabled devices and the associated context
-
     """
     return [(t["target"], tvm.device(t["target"])) for t in _get_targets() if t["is_runnable"]]
 
@@ -492,11 +470,9 @@ def _compose(args, decs):
 
 def uses_gpu(*args):
     """Mark to differentiate tests that use the GPU in some capacity.
-
     These tests will be run on CPU-only test nodes and on test nodes with GPUs.
     To mark a test that must have a GPU present to run, use
     :py:func:`tvm.testing.requires_gpu`.
-
     Parameters
     ----------
     f : function
@@ -508,9 +484,7 @@ def uses_gpu(*args):
 
 def requires_gpu(*args):
     """Mark a test as requiring a GPU to run.
-
     Tests with this mark will not be run unless a gpu is present.
-
     Parameters
     ----------
     f : function
@@ -532,9 +506,7 @@ def requires_gpu(*args):
 
 def requires_cuda(*args):
     """Mark a test as requiring the CUDA runtime.
-
     This also marks the test as requiring a cuda gpu.
-
     Parameters
     ----------
     f : function
@@ -548,17 +520,32 @@ def requires_cuda(*args):
     return _compose(args, _requires_cuda)
 
 
+def requires_cudnn(*args):
+    """Mark a test as requiring the cuDNN library.
+    This also marks the test as requiring a cuda gpu.
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+
+    requirements = [
+        pytest.mark.skipif(
+            not cudnn.exists(), reason="cuDNN library not enabled, or not installed"
+        ),
+        *requires_cuda(),
+    ]
+    return _compose(args, requirements)
+
+
 def requires_nvptx(*args):
     """Mark a test as requiring the NVPTX compilation on the CUDA runtime
-
     This also marks the test as requiring a cuda gpu, and requiring
     LLVM support.
-
     Parameters
     ----------
     f : function
         Function to mark
-
     """
     _requires_nvptx = [
         pytest.mark.skipif(not device_enabled("nvptx"), reason="NVPTX support not enabled"),
@@ -570,9 +557,7 @@ def requires_nvptx(*args):
 
 def requires_cudagraph(*args):
     """Mark a test as requiring the CUDA Graph Feature
-
     This also marks the test as requiring cuda
-
     Parameters
     ----------
     f : function
@@ -589,9 +574,7 @@ def requires_cudagraph(*args):
 
 def requires_opencl(*args):
     """Mark a test as requiring the OpenCL runtime.
-
     This also marks the test as requiring a gpu.
-
     Parameters
     ----------
     f : function
@@ -607,9 +590,7 @@ def requires_opencl(*args):
 
 def requires_rocm(*args):
     """Mark a test as requiring the rocm runtime.
-
     This also marks the test as requiring a gpu.
-
     Parameters
     ----------
     f : function
@@ -625,9 +606,7 @@ def requires_rocm(*args):
 
 def requires_metal(*args):
     """Mark a test as requiring the metal runtime.
-
     This also marks the test as requiring a gpu.
-
     Parameters
     ----------
     f : function
@@ -643,9 +622,7 @@ def requires_metal(*args):
 
 def requires_vulkan(*args):
     """Mark a test as requiring the vulkan runtime.
-
     This also marks the test as requiring a gpu.
-
     Parameters
     ----------
     f : function
@@ -661,9 +638,7 @@ def requires_vulkan(*args):
 
 def requires_tensorcore(*args):
     """Mark a test as requiring a tensorcore to run.
-
     Tests with this mark will not be run unless a tensorcore is present.
-
     Parameters
     ----------
     f : function
@@ -682,7 +657,6 @@ def requires_tensorcore(*args):
 
 def requires_llvm(*args):
     """Mark a test as requiring llvm to run.
-
     Parameters
     ----------
     f : function
@@ -697,7 +671,6 @@ def requires_llvm(*args):
 
 def requires_micro(*args):
     """Mark a test as requiring microTVM to run.
-
     Parameters
     ----------
     f : function
@@ -714,7 +687,6 @@ def requires_micro(*args):
 
 def requires_rpc(*args):
     """Mark a test as requiring rpc to run.
-
     Parameters
     ----------
     f : function
@@ -730,20 +702,25 @@ def requires_rpc(*args):
 
 
 def _target_to_requirement(target):
+    if isinstance(target, str):
+        target = tvm.target.Target(target)
+
     # mapping from target to decorator
-    if target.startswith("cuda"):
+    if target.kind.name == "cuda" and "cudnn" in target.attrs.get("libs", []):
+        return requires_cudnn()
+    if target.kind.name == "cuda":
         return requires_cuda()
-    if target.startswith("rocm"):
+    if target.kind.name == "rocm":
         return requires_rocm()
-    if target.startswith("vulkan"):
+    if target.kind.name == "vulkan":
         return requires_vulkan()
-    if target.startswith("nvptx"):
+    if target.kind.name == "nvptx":
         return requires_nvptx()
-    if target.startswith("metal"):
+    if target.kind.name == "metal":
         return requires_metal()
-    if target.startswith("opencl"):
+    if target.kind.name == "opencl":
         return requires_opencl()
-    if target.startswith("llvm"):
+    if target.kind.name == "llvm":
         return requires_llvm()
     return []
 
@@ -787,23 +764,79 @@ def _pytest_target_params(targets, excluded_targets=None, xfail_targets=None):
 
 def _auto_parametrize_target(metafunc):
     """Automatically applies parametrize_targets
-
     Used if a test function uses the "target" fixture, but isn't
     already marked with @tvm.testing.parametrize_targets.  Intended
     for use in the pytest_generate_tests() handler of a conftest.py
     file.
-
     """
+
+    def update_parametrize_target_arg(
+        argnames,
+        argvalues,
+        *args,
+        **kwargs,
+    ):
+        args = [arg.strip() for arg in argnames.split(",") if arg.strip()]
+        if "target" in args:
+            target_i = args.index("target")
+
+            new_argvalues = []
+            for argvalue in argvalues:
+
+                if isinstance(argvalue, _pytest.mark.structures.ParameterSet):
+                    # The parametrized value is already a
+                    # pytest.param, so track any marks already
+                    # defined.
+                    param_set = argvalue.values
+                    target = param_set[target_i]
+                    additional_marks = argvalue.marks
+                elif len(args) == 1:
+                    # Single value parametrization, argvalue is a list of values.
+                    target = argvalue
+                    param_set = (target,)
+                    additional_marks = []
+                else:
+                    # Multiple correlated parameters, argvalue is a list of tuple of values.
+                    param_set = argvalue
+                    target = param_set[target_i]
+                    additional_marks = []
+
+                new_argvalues.append(
+                    pytest.param(
+                        *param_set, marks=_target_to_requirement(target) + additional_marks
+                    )
+                )
+
+            try:
+                argvalues[:] = new_argvalues
+            except TypeError as e:
+                pyfunc = metafunc.definition.function
+                filename = pyfunc.__code__.co_filename
+                line_number = pyfunc.__code__.co_firstlineno
+                msg = (
+                    f"Unit test {metafunc.function.__name__} ({filename}:{line_number}) "
+                    "is parametrized using a tuple of parameters instead of a list "
+                    "of parameters."
+                )
+                raise TypeError(msg) from e
+
     if "target" in metafunc.fixturenames:
+        # Update any explicit use of @pytest.mark.parmaetrize to
+        # parametrize over targets.  This adds the appropriate
+        # @tvm.testing.requires_* markers for each target.
+        for mark in metafunc.definition.iter_markers("parametrize"):
+            update_parametrize_target_arg(*mark.args, **mark.kwargs)
+
+        # Check if any explicit parametrizations exist, and apply one
+        # if they do not.  If the function is marked with either
+        # excluded or known failing targets, use these to determine
+        # the targets to be used.
         parametrized_args = [
             arg.strip()
             for mark in metafunc.definition.iter_markers("parametrize")
             for arg in mark.args[0].split(",")
         ]
-
         if "target" not in parametrized_args:
-            # Check if the function is marked with either excluded or
-            # known failing targets.
             excluded_targets = getattr(metafunc.function, "tvm_excluded_targets", [])
             xfail_targets = getattr(metafunc.function, "tvm_known_failing_targets", [])
             metafunc.parametrize(
@@ -815,7 +848,6 @@ def _auto_parametrize_target(metafunc):
 
 def parametrize_targets(*args):
     """Parametrize a test over a specific set of targets.
-
     Use this decorator when you want your test to be run over a
     specific set of targets and devices.  It is intended for use where
     a test is applicable only to a specific target, and is
@@ -824,7 +856,6 @@ def parametrize_targets(*args):
     circumstances, :py:func:`tvm.testing.exclude_targets` or
     :py:func:`tvm.testing.known_failing_targets` should be used
     instead.
-
     If used as a decorator without arguments, the test will be
     parametrized over all targets in
     :py:func:`tvm.testing.enabled_targets`.  This behavior is
@@ -832,7 +863,6 @@ def parametrize_targets(*args):
     ``target`` or ``dev``, so the explicit use of the bare decorator
     is no longer needed, and is maintained for backwards
     compatibility.
-
     Parameters
     ----------
     f : function
@@ -841,7 +871,6 @@ def parametrize_targets(*args):
     targets : list[str], optional
         Set of targets to run against. If not supplied,
         :py:func:`tvm.testing.enabled_targets` will be used.
-
     Example
     -------
     >>> @tvm.testing.parametrize_targets("llvm", "cuda")
@@ -849,31 +878,25 @@ def parametrize_targets(*args):
     >>>     ...  # do something
     """
 
-    def wrap(targets):
-        def func(f):
-            return pytest.mark.parametrize(
-                "target", _pytest_target_params(targets), scope="session"
-            )(f)
-
-        return func
-
+    # Backwards compatibility, when used as a decorator with no
+    # arguments implicitly parametrizes over "target".  The
+    # parametrization is now handled by _auto_parametrize_target, so
+    # this use case can just return the decorated function.
     if len(args) == 1 and callable(args[0]):
-        return wrap(None)(args[0])
-    return wrap(args)
+        return args[0]
+
+    return pytest.mark.parametrize("target", list(args), scope="session")
 
 
 def exclude_targets(*args):
     """Exclude a test from running on a particular target.
-
     Use this decorator when you want your test to be run over a
     variety of targets and devices (including cpu and gpu devices),
     but want to exclude some particular target or targets.  For
     example, a test may wish to be run against all targets in
     tvm.testing.enabled_targets(), except for a particular target that
     does not support the capabilities.
-
     Applies pytest.mark.skipif to the targets given.
-
     Parameters
     ----------
     f : function
@@ -881,19 +904,15 @@ def exclude_targets(*args):
         where `xxxxxxxxx` is any name.
     targets : list[str]
         Set of targets to exclude.
-
     Example
     -------
     >>> @tvm.testing.exclude_targets("cuda")
     >>> def test_mytest(target, dev):
     >>>     ...  # do something
-
     Or
-
     >>> @tvm.testing.exclude_targets("llvm", "cuda")
     >>> def test_mytest(target, dev):
     >>>     ...  # do something
-
     """
 
     def wraps(func):
@@ -905,15 +924,12 @@ def wraps(func):
 
 def known_failing_targets(*args):
     """Skip a test that is known to fail on a particular target.
-
     Use this decorator when you want your test to be run over a
     variety of targets and devices (including cpu and gpu devices),
     but know that it fails for some targets.  For example, a newly
     implemented runtime may not support all features being tested, and
     should be excluded.
-
     Applies pytest.mark.xfail to the targets given.
-
     Parameters
     ----------
     f : function
@@ -921,19 +937,15 @@ def known_failing_targets(*args):
         where `xxxxxxxxx` is any name.
     targets : list[str]
         Set of targets to skip.
-
     Example
     -------
     >>> @tvm.testing.known_failing_targets("cuda")
     >>> def test_mytest(target, dev):
     >>>     ...  # do something
-
     Or
-
     >>> @tvm.testing.known_failing_targets("llvm", "cuda")
     >>> def test_mytest(target, dev):
     >>>     ...  # do something
-
     """
 
     def wraps(func):
@@ -945,51 +957,41 @@ def wraps(func):
 
 def parameter(*values, ids=None):
     """Convenience function to define pytest parametrized fixtures.
-
     Declaring a variable using ``tvm.testing.parameter`` will define a
     parametrized pytest fixture that can be used by test
     functions. This is intended for cases that have no setup cost,
     such as strings, integers, tuples, etc.  For cases that have a
     significant setup cost, please use :py:func:`tvm.testing.fixture`
     instead.
-
     If a test function accepts multiple parameters defined using
     ``tvm.testing.parameter``, then the test will be run using every
     combination of those parameters.
-
     The parameter definition applies to all tests in a module.  If a
     specific test should have different values for the parameter, that
     test should be marked with ``@pytest.mark.parametrize``.
-
     Parameters
     ----------
     values
        A list of parameter values.  A unit test that accepts this
        parameter as an argument will be run once for each parameter
        given.
-
     ids : List[str], optional
        A list of names for the parameters.  If None, pytest will
        generate a name from the value.  These generated names may not
        be readable/useful for composite types such as tuples.
-
     Returns
     -------
     function
        A function output from pytest.fixture.
-
     Example
     -------
     >>> size = tvm.testing.parameter(1, 10, 100)
     >>> def test_using_size(size):
     >>>     ... # Test code here
-
     Or
-
     >>> shape = tvm.testing.parameter((5,10), (512,1024), ids=['small','large'])
     >>> def test_using_size(shape):
     >>>     ... # Test code here
-
     """
 
     # Optional cls parameter in case a parameter is defined inside a
@@ -1006,24 +1008,20 @@ def as_fixture(*_cls, request):
 
 def parameters(*value_sets):
     """Convenience function to define pytest parametrized fixtures.
-
     Declaring a variable using tvm.testing.parameters will define a
     parametrized pytest fixture that can be used by test
     functions. Like :py:func:`tvm.testing.parameter`, this is intended
     for cases that have no setup cost, such as strings, integers,
     tuples, etc.  For cases that have a significant setup cost, please
     use :py:func:`tvm.testing.fixture` instead.
-
     Unlike :py:func:`tvm.testing.parameter`, if a test function
     accepts multiple parameters defined using a single call to
     ``tvm.testing.parameters``, then the test will only be run once
     for each set of parameters, not for all combinations of
     parameters.
-
     These parameter definitions apply to all tests in a module.  If a
     specific test should have different values for some parameters,
     that test should be marked with ``@pytest.mark.parametrize``.
-
     Parameters
     ----------
     values : List[tuple]
@@ -1031,20 +1029,17 @@ def parameters(*value_sets):
        a single combination of values to be tested.  A unit test that
        accepts parameters defined will be run once for every set of
        parameters in the list.
-
     Returns
     -------
     List[function]
        Function outputs from pytest.fixture.  These should be unpacked
        into individual named parameters.
-
     Example
     -------
     >>> size, dtype = tvm.testing.parameters( (16,'float32'), (512,'float16') )
     >>> def test_feature_x(size, dtype):
     >>>     # Test code here
     >>>     assert( (size,dtype) in [(16,'float32'), (512,'float16')])
-
     """
     global _parametrize_group
     parametrize_group = _parametrize_group
@@ -1089,15 +1084,12 @@ def _parametrize_correlated_parameters(metafunc):
 
 def fixture(func=None, *, cache_return_value=False):
     """Convenience function to define pytest fixtures.
-
     This should be used as a decorator to mark functions that set up
     state before a function.  The return value of that fixture
     function is then accessible by test functions as that accept it as
     a parameter.
-
     Fixture functions can accept parameters defined with
     :py:func:`tvm.testing.parameter`.
-
     By default, the setup will be performed once for each unit test
     that uses a fixture, to ensure that unit tests are independent.
     If the setup is expensive to perform, then the
@@ -1107,7 +1099,6 @@ def fixture(func=None, *, cache_return_value=False):
     will be passed to all tests that use it.  If the environment
     variable TVM_TEST_DISABLE_CACHE is set to a non-zero value, it
     will disable this feature and no caching will be performed.
-
     Example
     -------
     >>> @tvm.testing.fixture
@@ -1116,9 +1107,7 @@ def fixture(func=None, *, cache_return_value=False):
     >>>
     >>> def test_feature_x(target, dev, cheap_setup)
     >>>     assert(cheap_setup == 5) # Run test here
-
     Or
-
     >>> size = tvm.testing.parameter(1, 10, 100)
     >>>
     >>> @tvm.testing.fixture
@@ -1127,9 +1116,7 @@ def fixture(func=None, *, cache_return_value=False):
     >>>
     >>> def test_feature_x(cheap_setup):
     >>>     assert(cheap_setup in [5, 50, 500])
-
     Or
-
     >>> @tvm.testing.fixture(cache_return_value=True)
     >>> def expensive_setup():
     >>>     time.sleep(10) # Setup code here
@@ -1137,7 +1124,6 @@ def fixture(func=None, *, cache_return_value=False):
     >>>
     >>> def test_feature_x(target, dev, expensive_setup):
     >>>     assert(expensive_setup == 5)
-
     """
 
     force_disable_cache = bool(int(os.environ.get("TVM_TEST_DISABLE_CACHE", "0")))
@@ -1263,15 +1249,12 @@ def _remove_global_fixture_definitions(items):
 
 def identity_after(x, sleep):
     """Testing function to return identity after sleep
-
     Parameters
     ----------
     x : int
         The input value.
-
     sleep : float
         The amount of time to sleep
-
     Returns
     -------
     x : object

From ea27936287c658bd2582cac81fd905c9b084d609 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Mon, 9 Aug 2021 14:56:28 -0700
Subject: [PATCH 29/38] restore python.tvm.testing.utils.py

---
 python/tvm/testing/utils.py | 108 ++++++++++++++++++++++++++++++++++++
 1 file changed, 108 insertions(+)

diff --git a/python/tvm/testing/utils.py b/python/tvm/testing/utils.py
index 7aa448a64f4a..71ab0770d64e 100644
--- a/python/tvm/testing/utils.py
+++ b/python/tvm/testing/utils.py
@@ -17,8 +17,10 @@
 
 # pylint: disable=invalid-name,unnecessary-comprehension
 """ TVM testing utilities
+
 Testing Markers
 ***************
+
 We use pytest markers to specify the requirements of test functions. Currently
 there is a single distinction that matters for our testing environment: does
 the test require a gpu. For tests that require just a gpu or just a cpu, we
@@ -26,21 +28,27 @@
 available. To avoid running tests that don't require a gpu on gpu nodes, this
 decorator also sets the pytest marker `gpu` so we can use select the gpu subset
 of tests (using `pytest -m gpu`).
+
 Unfortunately, many tests are written like this:
+
 .. python::
+
     def test_something():
         for target in all_targets():
             do_something()
+
 The test uses both gpu and cpu targets, so the test needs to be run on both cpu
 and gpu nodes. But we still want to only run the cpu targets on the cpu testing
 node. The solution is to mark these tests with the gpu marker so they will be
 run on the gpu nodes. But we also modify all_targets (renamed to
 enabled_targets) so that it only returns gpu targets on gpu nodes and cpu
 targets on cpu nodes (using an environment variable).
+
 Instead of using the all_targets function, future tests that would like to
 test against a variety of targets should use the
 :py:func:`tvm.testing.parametrize_targets` functionality. This allows us
 greater control over which targets are run on which testing nodes.
+
 If in the future we want to add a new type of testing node (for example
 fpgas), we need to add a new marker in `tests/python/pytest.ini` and a new
 function in this module. Then targets using this node should be added to the
@@ -70,6 +78,7 @@ def test_something():
 def assert_allclose(actual, desired, rtol=1e-7, atol=1e-7):
     """Version of np.testing.assert_allclose with `atol` and `rtol` fields set
     in reasonable defaults.
+
     Arguments `actual` and `desired` are not interchangeable, since the function
     compares the `abs(actual-desired)` with `atol+rtol*abs(desired)`.  Since we
     often allow `desired` to be close to zero, we generally want non-zero `atol`.
@@ -85,9 +94,11 @@ def check_numerical_grads(
 ):
     """A helper function that checks that numerical gradients of a function are
     equal to gradients computed in some different way (analytical gradients).
+
     Numerical gradients are computed using finite difference approximation. To
     reduce the number of function evaluations, the number of points used is
     gradually increased if the error value is too high (up to 5 points).
+
     Parameters
     ----------
     function
@@ -95,19 +106,25 @@ def check_numerical_grads(
         arguments (either `function(*input_values)` or `function(**input_values)`
         should be correct) and returns a scalar result. Should accept numpy
         ndarrays.
+
     input_values : Dict[str, numpy.ndarray] or List[numpy.ndarray]
         A list of values or a dict assigning values to variables. Represents the
         point at which gradients should be computed.
+
     grad_values : Dict[str, numpy.ndarray] or List[numpy.ndarray]
         Gradients computed using a different method.
+
     function_value : float, optional
         Should be equal to `function(**input_values)`.
+
     delta : float, optional
         A small number used for numerical computation of partial derivatives.
         The default 1e-3 is a good choice for float32.
+
     atol : float, optional
         Absolute tolerance. Gets multiplied by `sqrt(n)` where n is the size of a
         gradient.
+
     rtol : float, optional
         Relative tolerance.
     """
@@ -231,10 +248,12 @@ def compare_derivative(j, n_der, grad):
 
 def assert_prim_expr_equal(lhs, rhs):
     """Assert lhs and rhs equals to each iother.
+
     Parameters
     ----------
     lhs : tvm.tir.PrimExpr
         The left operand.
+
     rhs : tvm.tir.PrimExpr
         The left operand.
     """
@@ -248,12 +267,14 @@ def assert_prim_expr_equal(lhs, rhs):
 def check_bool_expr_is_true(bool_expr, vranges, cond=None):
     """Check that bool_expr holds given the condition cond
     for every value of free variables from vranges.
+
     for example, 2x > 4y solves to x > 2y given x in (0, 10) and y in (0, 10)
     here bool_expr is x > 2y, vranges is {x: (0, 10), y: (0, 10)}, cond is 2x > 4y
     We creates iterations to check,
     for x in range(10):
       for y in range(10):
         assert !(2x > 4y) || (x > 2y)
+
     Parameters
     ----------
     bool_expr : tvm.ir.PrimExpr
@@ -297,6 +318,7 @@ def _compute_body(*us):
 
 def check_int_constraints_trans_consistency(constraints_trans, vranges=None):
     """Check IntConstraintsTransform is a bijective transformation.
+
     Parameters
     ----------
     constraints_trans : arith.IntConstraintsTransform
@@ -410,19 +432,24 @@ def _get_targets(target_str=None):
 
 def device_enabled(target):
     """Check if a target should be used when testing.
+
     It is recommended that you use :py:func:`tvm.testing.parametrize_targets`
     instead of manually checking if a target is enabled.
+
     This allows the user to control which devices they are testing against. In
     tests, this should be used to check if a device should be used when said
     device is an optional part of the test.
+
     Parameters
     ----------
     target : str
         Target string to check against
+
     Returns
     -------
     bool
         Whether or not the device associated with this target is enabled.
+
     Example
     -------
     >>> @tvm.testing.uses_gpu
@@ -430,6 +457,7 @@ def device_enabled(target):
     >>>     for target in ["cuda", "llvm"]:
     >>>         if device_enabled(target):
     >>>             test_body...
+
     Here, `test_body` will only be reached by with `target="cuda"` on gpu test
     nodes and `target="llvm"` on cpu test nodes.
     """
@@ -441,19 +469,24 @@ def device_enabled(target):
 
 def enabled_targets():
     """Get all enabled targets with associated devices.
+
     In most cases, you should use :py:func:`tvm.testing.parametrize_targets` instead of
     this function.
+
     In this context, enabled means that TVM was built with support for
     this target, the target name appears in the TVM_TEST_TARGETS
     environment variable, and a suitable device for running this
     target exists.  If TVM_TEST_TARGETS is not set, it defaults to
     variable DEFAULT_TEST_TARGETS in this module.
+
     If you use this function in a test, you **must** decorate the test with
     :py:func:`tvm.testing.uses_gpu` (otherwise it will never be run on the gpu).
+
     Returns
     -------
     targets: list
         A list of pairs of all enabled devices and the associated context
+
     """
     return [(t["target"], tvm.device(t["target"])) for t in _get_targets() if t["is_runnable"]]
 
@@ -470,9 +503,11 @@ def _compose(args, decs):
 
 def uses_gpu(*args):
     """Mark to differentiate tests that use the GPU in some capacity.
+
     These tests will be run on CPU-only test nodes and on test nodes with GPUs.
     To mark a test that must have a GPU present to run, use
     :py:func:`tvm.testing.requires_gpu`.
+
     Parameters
     ----------
     f : function
@@ -484,7 +519,9 @@ def uses_gpu(*args):
 
 def requires_gpu(*args):
     """Mark a test as requiring a GPU to run.
+
     Tests with this mark will not be run unless a gpu is present.
+
     Parameters
     ----------
     f : function
@@ -506,7 +543,9 @@ def requires_gpu(*args):
 
 def requires_cuda(*args):
     """Mark a test as requiring the CUDA runtime.
+
     This also marks the test as requiring a cuda gpu.
+
     Parameters
     ----------
     f : function
@@ -522,7 +561,9 @@ def requires_cuda(*args):
 
 def requires_cudnn(*args):
     """Mark a test as requiring the cuDNN library.
+
     This also marks the test as requiring a cuda gpu.
+
     Parameters
     ----------
     f : function
@@ -540,12 +581,15 @@ def requires_cudnn(*args):
 
 def requires_nvptx(*args):
     """Mark a test as requiring the NVPTX compilation on the CUDA runtime
+
     This also marks the test as requiring a cuda gpu, and requiring
     LLVM support.
+
     Parameters
     ----------
     f : function
         Function to mark
+
     """
     _requires_nvptx = [
         pytest.mark.skipif(not device_enabled("nvptx"), reason="NVPTX support not enabled"),
@@ -557,7 +601,9 @@ def requires_nvptx(*args):
 
 def requires_cudagraph(*args):
     """Mark a test as requiring the CUDA Graph Feature
+
     This also marks the test as requiring cuda
+
     Parameters
     ----------
     f : function
@@ -574,7 +620,9 @@ def requires_cudagraph(*args):
 
 def requires_opencl(*args):
     """Mark a test as requiring the OpenCL runtime.
+
     This also marks the test as requiring a gpu.
+
     Parameters
     ----------
     f : function
@@ -590,7 +638,9 @@ def requires_opencl(*args):
 
 def requires_rocm(*args):
     """Mark a test as requiring the rocm runtime.
+
     This also marks the test as requiring a gpu.
+
     Parameters
     ----------
     f : function
@@ -606,7 +656,9 @@ def requires_rocm(*args):
 
 def requires_metal(*args):
     """Mark a test as requiring the metal runtime.
+
     This also marks the test as requiring a gpu.
+
     Parameters
     ----------
     f : function
@@ -622,7 +674,9 @@ def requires_metal(*args):
 
 def requires_vulkan(*args):
     """Mark a test as requiring the vulkan runtime.
+
     This also marks the test as requiring a gpu.
+
     Parameters
     ----------
     f : function
@@ -638,7 +692,9 @@ def requires_vulkan(*args):
 
 def requires_tensorcore(*args):
     """Mark a test as requiring a tensorcore to run.
+
     Tests with this mark will not be run unless a tensorcore is present.
+
     Parameters
     ----------
     f : function
@@ -657,6 +713,7 @@ def requires_tensorcore(*args):
 
 def requires_llvm(*args):
     """Mark a test as requiring llvm to run.
+
     Parameters
     ----------
     f : function
@@ -671,6 +728,7 @@ def requires_llvm(*args):
 
 def requires_micro(*args):
     """Mark a test as requiring microTVM to run.
+
     Parameters
     ----------
     f : function
@@ -687,6 +745,7 @@ def requires_micro(*args):
 
 def requires_rpc(*args):
     """Mark a test as requiring rpc to run.
+
     Parameters
     ----------
     f : function
@@ -764,10 +823,12 @@ def _pytest_target_params(targets, excluded_targets=None, xfail_targets=None):
 
 def _auto_parametrize_target(metafunc):
     """Automatically applies parametrize_targets
+
     Used if a test function uses the "target" fixture, but isn't
     already marked with @tvm.testing.parametrize_targets.  Intended
     for use in the pytest_generate_tests() handler of a conftest.py
     file.
+
     """
 
     def update_parametrize_target_arg(
@@ -848,6 +909,7 @@ def update_parametrize_target_arg(
 
 def parametrize_targets(*args):
     """Parametrize a test over a specific set of targets.
+
     Use this decorator when you want your test to be run over a
     specific set of targets and devices.  It is intended for use where
     a test is applicable only to a specific target, and is
@@ -856,6 +918,7 @@ def parametrize_targets(*args):
     circumstances, :py:func:`tvm.testing.exclude_targets` or
     :py:func:`tvm.testing.known_failing_targets` should be used
     instead.
+
     If used as a decorator without arguments, the test will be
     parametrized over all targets in
     :py:func:`tvm.testing.enabled_targets`.  This behavior is
@@ -863,6 +926,7 @@ def parametrize_targets(*args):
     ``target`` or ``dev``, so the explicit use of the bare decorator
     is no longer needed, and is maintained for backwards
     compatibility.
+
     Parameters
     ----------
     f : function
@@ -871,6 +935,7 @@ def parametrize_targets(*args):
     targets : list[str], optional
         Set of targets to run against. If not supplied,
         :py:func:`tvm.testing.enabled_targets` will be used.
+
     Example
     -------
     >>> @tvm.testing.parametrize_targets("llvm", "cuda")
@@ -890,13 +955,16 @@ def parametrize_targets(*args):
 
 def exclude_targets(*args):
     """Exclude a test from running on a particular target.
+
     Use this decorator when you want your test to be run over a
     variety of targets and devices (including cpu and gpu devices),
     but want to exclude some particular target or targets.  For
     example, a test may wish to be run against all targets in
     tvm.testing.enabled_targets(), except for a particular target that
     does not support the capabilities.
+
     Applies pytest.mark.skipif to the targets given.
+
     Parameters
     ----------
     f : function
@@ -904,15 +972,19 @@ def exclude_targets(*args):
         where `xxxxxxxxx` is any name.
     targets : list[str]
         Set of targets to exclude.
+
     Example
     -------
     >>> @tvm.testing.exclude_targets("cuda")
     >>> def test_mytest(target, dev):
     >>>     ...  # do something
+
     Or
+
     >>> @tvm.testing.exclude_targets("llvm", "cuda")
     >>> def test_mytest(target, dev):
     >>>     ...  # do something
+
     """
 
     def wraps(func):
@@ -924,12 +996,15 @@ def wraps(func):
 
 def known_failing_targets(*args):
     """Skip a test that is known to fail on a particular target.
+
     Use this decorator when you want your test to be run over a
     variety of targets and devices (including cpu and gpu devices),
     but know that it fails for some targets.  For example, a newly
     implemented runtime may not support all features being tested, and
     should be excluded.
+
     Applies pytest.mark.xfail to the targets given.
+
     Parameters
     ----------
     f : function
@@ -937,15 +1012,19 @@ def known_failing_targets(*args):
         where `xxxxxxxxx` is any name.
     targets : list[str]
         Set of targets to skip.
+
     Example
     -------
     >>> @tvm.testing.known_failing_targets("cuda")
     >>> def test_mytest(target, dev):
     >>>     ...  # do something
+
     Or
+
     >>> @tvm.testing.known_failing_targets("llvm", "cuda")
     >>> def test_mytest(target, dev):
     >>>     ...  # do something
+
     """
 
     def wraps(func):
@@ -957,41 +1036,51 @@ def wraps(func):
 
 def parameter(*values, ids=None):
     """Convenience function to define pytest parametrized fixtures.
+
     Declaring a variable using ``tvm.testing.parameter`` will define a
     parametrized pytest fixture that can be used by test
     functions. This is intended for cases that have no setup cost,
     such as strings, integers, tuples, etc.  For cases that have a
     significant setup cost, please use :py:func:`tvm.testing.fixture`
     instead.
+
     If a test function accepts multiple parameters defined using
     ``tvm.testing.parameter``, then the test will be run using every
     combination of those parameters.
+
     The parameter definition applies to all tests in a module.  If a
     specific test should have different values for the parameter, that
     test should be marked with ``@pytest.mark.parametrize``.
+
     Parameters
     ----------
     values
        A list of parameter values.  A unit test that accepts this
        parameter as an argument will be run once for each parameter
        given.
+
     ids : List[str], optional
        A list of names for the parameters.  If None, pytest will
        generate a name from the value.  These generated names may not
        be readable/useful for composite types such as tuples.
+
     Returns
     -------
     function
        A function output from pytest.fixture.
+
     Example
     -------
     >>> size = tvm.testing.parameter(1, 10, 100)
     >>> def test_using_size(size):
     >>>     ... # Test code here
+
     Or
+
     >>> shape = tvm.testing.parameter((5,10), (512,1024), ids=['small','large'])
     >>> def test_using_size(shape):
     >>>     ... # Test code here
+
     """
 
     # Optional cls parameter in case a parameter is defined inside a
@@ -1008,20 +1097,24 @@ def as_fixture(*_cls, request):
 
 def parameters(*value_sets):
     """Convenience function to define pytest parametrized fixtures.
+
     Declaring a variable using tvm.testing.parameters will define a
     parametrized pytest fixture that can be used by test
     functions. Like :py:func:`tvm.testing.parameter`, this is intended
     for cases that have no setup cost, such as strings, integers,
     tuples, etc.  For cases that have a significant setup cost, please
     use :py:func:`tvm.testing.fixture` instead.
+
     Unlike :py:func:`tvm.testing.parameter`, if a test function
     accepts multiple parameters defined using a single call to
     ``tvm.testing.parameters``, then the test will only be run once
     for each set of parameters, not for all combinations of
     parameters.
+
     These parameter definitions apply to all tests in a module.  If a
     specific test should have different values for some parameters,
     that test should be marked with ``@pytest.mark.parametrize``.
+
     Parameters
     ----------
     values : List[tuple]
@@ -1029,17 +1122,20 @@ def parameters(*value_sets):
        a single combination of values to be tested.  A unit test that
        accepts parameters defined will be run once for every set of
        parameters in the list.
+
     Returns
     -------
     List[function]
        Function outputs from pytest.fixture.  These should be unpacked
        into individual named parameters.
+
     Example
     -------
     >>> size, dtype = tvm.testing.parameters( (16,'float32'), (512,'float16') )
     >>> def test_feature_x(size, dtype):
     >>>     # Test code here
     >>>     assert( (size,dtype) in [(16,'float32'), (512,'float16')])
+
     """
     global _parametrize_group
     parametrize_group = _parametrize_group
@@ -1084,12 +1180,15 @@ def _parametrize_correlated_parameters(metafunc):
 
 def fixture(func=None, *, cache_return_value=False):
     """Convenience function to define pytest fixtures.
+
     This should be used as a decorator to mark functions that set up
     state before a function.  The return value of that fixture
     function is then accessible by test functions as that accept it as
     a parameter.
+
     Fixture functions can accept parameters defined with
     :py:func:`tvm.testing.parameter`.
+
     By default, the setup will be performed once for each unit test
     that uses a fixture, to ensure that unit tests are independent.
     If the setup is expensive to perform, then the
@@ -1099,6 +1198,7 @@ def fixture(func=None, *, cache_return_value=False):
     will be passed to all tests that use it.  If the environment
     variable TVM_TEST_DISABLE_CACHE is set to a non-zero value, it
     will disable this feature and no caching will be performed.
+
     Example
     -------
     >>> @tvm.testing.fixture
@@ -1107,7 +1207,9 @@ def fixture(func=None, *, cache_return_value=False):
     >>>
     >>> def test_feature_x(target, dev, cheap_setup)
     >>>     assert(cheap_setup == 5) # Run test here
+
     Or
+
     >>> size = tvm.testing.parameter(1, 10, 100)
     >>>
     >>> @tvm.testing.fixture
@@ -1116,7 +1218,9 @@ def fixture(func=None, *, cache_return_value=False):
     >>>
     >>> def test_feature_x(cheap_setup):
     >>>     assert(cheap_setup in [5, 50, 500])
+
     Or
+
     >>> @tvm.testing.fixture(cache_return_value=True)
     >>> def expensive_setup():
     >>>     time.sleep(10) # Setup code here
@@ -1124,6 +1228,7 @@ def fixture(func=None, *, cache_return_value=False):
     >>>
     >>> def test_feature_x(target, dev, expensive_setup):
     >>>     assert(expensive_setup == 5)
+
     """
 
     force_disable_cache = bool(int(os.environ.get("TVM_TEST_DISABLE_CACHE", "0")))
@@ -1249,12 +1354,15 @@ def _remove_global_fixture_definitions(items):
 
 def identity_after(x, sleep):
     """Testing function to return identity after sleep
+
     Parameters
     ----------
     x : int
         The input value.
+
     sleep : float
         The amount of time to sleep
+
     Returns
     -------
     x : object

From a6e06ee4a987baa059fa5eb2c9dc246dc97f585a Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Mon, 9 Aug 2021 15:11:45 -0700
Subject: [PATCH 30/38] remove task_input_buffer

---
 python/tvm/auto_scheduler/measure.py          |   6 -
 .../tvm/auto_scheduler/task_input_buffer.py   | 210 ------------------
 2 files changed, 216 deletions(-)
 delete mode 100644 python/tvm/auto_scheduler/task_input_buffer.py

diff --git a/python/tvm/auto_scheduler/measure.py b/python/tvm/auto_scheduler/measure.py
index f761731239f8..ef2518fee419 100644
--- a/python/tvm/auto_scheduler/measure.py
+++ b/python/tvm/auto_scheduler/measure.py
@@ -63,10 +63,6 @@
     deserialize_workload_registry_entry,
 )
 
-from .task_input_buffer import (
-    serialize_task_input_buffer,
-    deserialize_task_input_buffer,
-)
 
 # pylint: disable=invalid-name
 logger = logging.getLogger("auto_scheduler")
@@ -145,14 +141,12 @@ def serialize(self):
         return [
             _ffi_api.SerializeMeasureInput(self),
             serialize_workload_registry_entry(self.task.workload_key),
-            serialize_task_input_buffer(self.task.workload_key),
         ]
 
     @staticmethod
     def deserialize(data):
         inp = _ffi_api.DeserializeMeasureInput(data[0])
         deserialize_workload_registry_entry(data[1])
-        deserialize_task_input_buffer(data[2])
         return recover_measure_input(inp)
 
 
diff --git a/python/tvm/auto_scheduler/task_input_buffer.py b/python/tvm/auto_scheduler/task_input_buffer.py
deleted file mode 100644
index ef787216ad0c..000000000000
--- a/python/tvm/auto_scheduler/task_input_buffer.py
+++ /dev/null
@@ -1,210 +0,0 @@
-""" The definiton of SearchTask """
-import os
-import numpy as np
-
-from tvm.runtime import ndarray
-from tvm.runtime._ffi_node_api import LoadJSON, SaveJSON
-
-
-# The map stores special registered buffer for measurement.
-# This can be used for sparse workloads when we cannot use random tensors for measurment.
-# {
-#     "workload_key_0": {
-#         "task_input_0": Tensor(...),
-#         "task_input_1": Tensor(...)
-#     },
-#     "workload_key_1": {
-#         "task_input_2": Tensor(...),
-#         "task_input_3": Tensor(...)
-#     },
-#     ...
-# }
-TASK_INPUT_BUFFER_TABLE = {}
-
-
-def _save_buffer_to_file(buffer_name, buffer_data):
-    """Save the current Tensor buffer to a numpy file.
-
-    File name will be: {buffer_name}.{buffer_shape}_{buffer_data_type}.npy
-    """
-    np_data = buffer_data.numpy()
-
-    buffer_name += "."
-    for i in np_data.shape:
-        buffer_name += "%d_" % (i)
-    buffer_name += "%s" % (np_data.dtype)
-    buffer_name += ".npy"
-
-    np_data.tofile(buffer_name, " ")
-
-
-def _try_load_buffer_from_file(buffer_name):
-    """Try to load buffer from a numpy file, if not found, return None.
-
-    File name has a same format as `_save_buffer_to_file`.
-    """
-    filelist = os.listdir()
-
-    for file in filelist:
-        if file.startswith(buffer_name + "."):
-            meta_info = file.split(".")[-2].split("_")
-            shape = [int(i) for i in meta_info[:-1]]
-            dtype = meta_info[-1]
-            buffer_data = np.fromfile(file, dtype=dtype, sep=" ")
-            buffer_data = buffer_data.reshape(shape)
-            return ndarray.array(buffer_data)
-
-    return None
-
-
-def register_task_input_buffer(
-    workload_key,
-    input_name,
-    input_data,
-    overwrite=False,
-    save_to_file=False,
-):
-    """Register special buffer for measurement.
-
-    Parameters
-    ----------
-    workload_key : str
-        The workload key of the SearchTask.
-
-    input_name : str
-        The name of input buffer.
-
-    input_data : tvm.nd.NDArray
-        The input Tensor data.
-
-    overwrite : bool = False
-        Whether to overwrite the data if a name has already registered.
-
-    save_to_file : bool = False
-        Whether to save the data to a local file as well. This can be reused to resume the last
-        tuning process.
-
-    Returns
-    -------
-    tvm.nd.NDArray
-        The actual registered Tensor data of this input_name. With `overwrite` set to False, will
-        return the original one if the name has already registered before.
-    """
-    global TASK_INPUT_BUFFER_TABLE
-
-    if workload_key not in TASK_INPUT_BUFFER_TABLE:
-        TASK_INPUT_BUFFER_TABLE[workload_key] = {}
-    input_table = TASK_INPUT_BUFFER_TABLE[workload_key]
-
-    if not overwrite:
-        if input_name not in input_table.keys():
-            # Try to load buffer data from local file
-            tensor_from_file = _try_load_buffer_from_file(input_name)
-            if tensor_from_file:
-                input_table[input_name] = tensor_from_file
-        elif input_name in input_table.keys():
-            raise RuntimeError(
-                "Tensor %s exists in TASK_INPUT_BUFFER_TABLE, %s"
-                % (input_name, "set overwrite to True or this Tensor will not be registered")
-            )
-
-    input_table[input_name] = input_data
-    if save_to_file:
-        _save_buffer_to_file(input_name, input_data)
-    return input_data
-
-
-def get_task_input_buffer(workload_key, input_name):
-    """Get special buffer for measurement.
-
-    The buffers are registered by `register_task_input_buffer`.
-
-    Parameters
-    ----------
-    workload_key : str
-        The workload key of the SearchTask.
-
-    input_name : str
-        The name of input buffer.
-
-    Returns
-    -------
-    tvm.nd.NDArray
-        The registered input buffer.
-    """
-    global TASK_INPUT_BUFFER_TABLE
-
-    if workload_key not in TASK_INPUT_BUFFER_TABLE:
-        TASK_INPUT_BUFFER_TABLE[workload_key] = {}
-    input_table = TASK_INPUT_BUFFER_TABLE[workload_key]
-
-    if input_name not in input_table:
-        # Try to load buffer data from local file
-        tensor_from_file = _try_load_buffer_from_file(input_name)
-        if tensor_from_file:
-            input_table[input_name] = tensor_from_file
-
-    # Then check for the default table, the input names extracted from a relay model will be
-    # stored here for we're not able to get the workload_key at that time
-    if input_name not in input_table:
-        input_table = TASK_INPUT_BUFFER_TABLE["default"]
-
-    if input_name in input_table:
-        return input_table[input_name]
-
-    raise ValueError(
-        "%s not found in TASK_INPUT_BUFFER_TABLE, " % (input_name)
-        + "should provide with `SearchTask(..., task_inputs={...})`"
-    )
-
-
-def serialize_task_input_buffer(workload_key):
-    """
-    Serialize a task input buffer entry.
-
-    This is used when the start method of multiprocessing is spawn.
-    We need to serialize the task input buffer table it in the new processes.
-
-    Parameters
-    ----------
-    workload_key : str
-        The workload key
-
-    Returns
-    -------
-    data: Tuple
-        The serialized pickable data
-    """
-    sname = workload_key
-
-    # the return value of get_task_input_buffer is tvm.ndarray
-    # convert it to np.array to make it picklable,
-    global TASK_INPUT_BUFFER_TABLE
-
-    if workload_key not in TASK_INPUT_BUFFER_TABLE:
-        TASK_INPUT_BUFFER_TABLE[workload_key] = {}
-    svalue = TASK_INPUT_BUFFER_TABLE[workload_key]
-    if not callable(svalue):
-        # pylint: disable=assignment-from-no-return
-        svalue = SaveJSON(svalue)
-
-    return sname, svalue
-
-
-def deserialize_task_input_buffer(data):
-    """
-    Deserialize a task input buffer entry.
-    This should be used along with :code:`serialize_task_input_buffer_table`
-
-    Parameters
-    ----------
-    data: Tuple
-        The return value of :code:`serialize_task_input_buffer_table`
-    """
-    global TASK_INPUT_BUFFER_TABLE
-
-    name, value = data
-    # pylint: disable=assignment-from-no-return
-    if not callable(value):
-        value = LoadJSON(value)
-        TASK_INPUT_BUFFER_TABLE[name] = value

From 9991d3fd6e19797471523ceac5c759cfc396a4cb Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Mon, 9 Aug 2021 15:33:02 -0700
Subject: [PATCH 31/38] linting and naming convention updated

---
 python/tvm/auto_scheduler/__init__.py |  1 -
 python/tvm/auto_scheduler/measure.py  | 11 ++++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/tvm/auto_scheduler/__init__.py b/python/tvm/auto_scheduler/__init__.py
index 69729631b0ed..ff6d82a0242c 100644
--- a/python/tvm/auto_scheduler/__init__.py
+++ b/python/tvm/auto_scheduler/__init__.py
@@ -26,7 +26,6 @@
 from . import relay_integration
 from . import search_policy
 from . import search_task
-from . import task_input_buffer
 from . import task_scheduler
 from . import utils
 from . import workload_registry
diff --git a/python/tvm/auto_scheduler/measure.py b/python/tvm/auto_scheduler/measure.py
index ef2518fee419..b5dcfd6a8708 100644
--- a/python/tvm/auto_scheduler/measure.py
+++ b/python/tvm/auto_scheduler/measure.py
@@ -839,7 +839,6 @@ def prepare_runner_args(inp, build_res):
     from .search_task import get_task_input_buffer  # lazily import to avoid recursive dependency
 
     task_input_names = inp.task.task_input_names
-    dev = ndarray.device(str(inp.task.target), 0)
     tensor_input_map = prepare_input_map(build_res.args)
     if not task_input_names:
         tensor_input_map = {}
@@ -849,9 +848,9 @@ def prepare_runner_args(inp, build_res):
         if arg in tensor_input_map:
             tensor_name = tensor_input_map[arg]
             if tensor_name in task_input_names:
-                temp_ndarray = get_task_input_buffer(inp.task.workload_key, tensor_name)
+                task_input_buffer = get_task_input_buffer(inp.task.workload_key, tensor_name)
                 # convert tvm.NDArray to picklable numpy.ndarray
-                args.append(ndarray.NDArray.numpy(temp_ndarray))
+                args.append(task_input_buffer.numpy())
                 task_inputs_count += 1
             else:
                 raise ValueError(
@@ -908,8 +907,9 @@ def _timed_eval_func(
             random_fill = tvm.get_global_func("tvm.contrib.random.random_fill", True)
             assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake"
             assert len(args) == len(build_res.args)
+            # pylint: disable=consider-using-enumerate
             for idx in range(len(args)):
-                if args[idx] == None:
+                if args[idx] is None:
                     build_res_arg = build_res.args[idx]
                     empty_array = ndarray.empty(
                         get_const_tuple(build_res_arg.shape), build_res_arg.dtype, dev
@@ -1108,8 +1108,9 @@ def _timed_rpc_run(
             ), "Please make sure USE_RANDOM is ON in the config.cmake on the remote devices"
 
             assert len(args) == len(build_res.args)
+            # pylint: disable=consider-using-enumerate
             for idx in range(len(args)):
-                if args[idx] == None:
+                if args[idx] is None:
                     build_res_arg = build_res.args[idx]
                     empty_array = ndarray.empty(
                         get_const_tuple(build_res_arg.shape), build_res_arg.dtype, dev

From 1d7e25fa486824914a2e91b4ae49a7aa89416e31 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Mon, 9 Aug 2021 16:36:42 -0700
Subject: [PATCH 32/38] address comments and update __init__

---
 python/tvm/auto_scheduler/measure.py | 5 ++---
 python/tvm/testing/__init__.py       | 1 +
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/tvm/auto_scheduler/measure.py b/python/tvm/auto_scheduler/measure.py
index b5dcfd6a8708..939c79ed3932 100644
--- a/python/tvm/auto_scheduler/measure.py
+++ b/python/tvm/auto_scheduler/measure.py
@@ -63,7 +63,6 @@
     deserialize_workload_registry_entry,
 )
 
-
 # pylint: disable=invalid-name
 logger = logging.getLogger("auto_scheduler")
 
@@ -831,8 +830,8 @@ def prepare_runner_args(inp, build_res):
 
     Returns
     -------
-    List[NDArray, None] :
-        List of NDArray of task input buffer. None if argument not present in task_input_names.
+    List[Optional[numpy.ndarray]] :
+        List of numpy.ndarray representation of task input buffer. None if argument not present in task_input_names.
 
     """
     # pylint: disable=import-outside-toplevel
diff --git a/python/tvm/testing/__init__.py b/python/tvm/testing/__init__.py
index 343c1c5d7b3b..c32931e7144f 100644
--- a/python/tvm/testing/__init__.py
+++ b/python/tvm/testing/__init__.py
@@ -25,6 +25,7 @@
 from .utils import requires_tensorcore, requires_metal, requires_micro, requires_opencl
 from .utils import _auto_parametrize_target, _count_num_fixture_uses
 from .utils import _remove_global_fixture_definitions, _parametrize_correlated_parameters
+from .utils import _pytest_target_params
 
 from ._ffi_api import nop, echo, device_test, run_check_signal, object_use_count
 from ._ffi_api import test_wrap_callback, test_raise_error_callback, test_check_eq_callback

From 10288a69d1370ef8d1dbaad8288308767fac4a3b Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Mon, 9 Aug 2021 17:03:07 -0700
Subject: [PATCH 33/38] address comments

---
 python/tvm/auto_scheduler/measure.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/python/tvm/auto_scheduler/measure.py b/python/tvm/auto_scheduler/measure.py
index 939c79ed3932..dc8a0363e8a2 100644
--- a/python/tvm/auto_scheduler/measure.py
+++ b/python/tvm/auto_scheduler/measure.py
@@ -819,7 +819,9 @@ def prepare_input_map(args):
 
 
 def prepare_runner_args(inp, build_res):
-    """This function prepare the arguments for local/rpc runner in main process
+    """This function prepares the pre-defined arguments in `TASK_INPUT_BUFFER_TABLE` for local/rpc
+    runner in main process
+
     Parameters
     ----------
     inp : MeasureInput
@@ -831,7 +833,8 @@ def prepare_runner_args(inp, build_res):
     Returns
     -------
     List[Optional[numpy.ndarray]] :
-        List of numpy.ndarray representation of task input buffer. None if argument not present in task_input_names.
+        List of arguments for running the program. If the argument does not have a pre-defined input
+        buffer, None is added to the list as a placeholder.
 
     """
     # pylint: disable=import-outside-toplevel

From ed51463a51a3a65f55e2851e1695cef5c711fa4c Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Mon, 9 Aug 2021 18:34:57 -0700
Subject: [PATCH 34/38] two imports added

---
 python/tvm/testing/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/testing/__init__.py b/python/tvm/testing/__init__.py
index c32931e7144f..bd1ada4fa284 100644
--- a/python/tvm/testing/__init__.py
+++ b/python/tvm/testing/__init__.py
@@ -25,7 +25,7 @@
 from .utils import requires_tensorcore, requires_metal, requires_micro, requires_opencl
 from .utils import _auto_parametrize_target, _count_num_fixture_uses
 from .utils import _remove_global_fixture_definitions, _parametrize_correlated_parameters
-from .utils import _pytest_target_params
+from .utils import _pytest_target_params, identity_after, terminate_self
 
 from ._ffi_api import nop, echo, device_test, run_check_signal, object_use_count
 from ._ffi_api import test_wrap_callback, test_raise_error_callback, test_check_eq_callback

From 211932ae66372ced18a0e478952a97eea790f4b9 Mon Sep 17 00:00:00 2001
From: Wuwei Lin <wuwei@apache.org>
Date: Tue, 10 Aug 2021 15:27:50 -0700
Subject: [PATCH 35/38] Persist PopenWorker

---
 python/tvm/auto_scheduler/measure.py | 9 +++++++--
 python/tvm/auto_scheduler/utils.py   | 9 +++------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/python/tvm/auto_scheduler/measure.py b/python/tvm/auto_scheduler/measure.py
index dc8a0363e8a2..1f0825c16a19 100644
--- a/python/tvm/auto_scheduler/measure.py
+++ b/python/tvm/auto_scheduler/measure.py
@@ -45,6 +45,7 @@
 from tvm.ir import transform
 from tvm.autotvm.measure.measure_methods import set_cuda_target_arch
 from tvm.contrib import tar, ndk
+from tvm.contrib.popen_pool import PopenWorker
 from tvm.target import Target
 
 
@@ -665,7 +666,8 @@ def local_build_worker(args):
     )
     build_func = BuildFunc.build_func
 
-    res = call_func_with_timeout(timeout, _timed_func, args=(inp, build_func, verbose))
+    worker = PopenWorker()
+    res = call_func_with_timeout(worker, timeout, _timed_func, args=(inp, build_func, verbose))
     if isinstance(res, TimeoutError):
         if verbose >= 1:
             print(".T", end="", flush=True)  # Build timeout
@@ -999,6 +1001,7 @@ def local_run(
 
     measure_results = []
     assert len(inputs) == len(build_results), "Measure input size should be equal to build results"
+    worker = PopenWorker()
     for inp, build_res in zip(inputs, build_results):
         if build_res.error_no != 0:
             res = (
@@ -1011,6 +1014,7 @@ def local_run(
         else:
             args = prepare_runner_args(inp, build_res)
             res = call_func_with_timeout(
+                worker,
                 timeout,
                 _timed_eval_func,
                 args=(
@@ -1177,7 +1181,8 @@ def _rpc_run_worker(args):
             time.time(),
         )
 
-    res = call_func_with_timeout(timeout, _timed_rpc_run, args=args)
+    worker = PopenWorker()
+    res = call_func_with_timeout(worker, timeout, _timed_rpc_run, args=args)
     if isinstance(res, TimeoutError):
         if verbose >= 1:
             print("*T", end="")  # Run timeout
diff --git a/python/tvm/auto_scheduler/utils.py b/python/tvm/auto_scheduler/utils.py
index 0f174991e8a9..b62a1b00ee70 100644
--- a/python/tvm/auto_scheduler/utils.py
+++ b/python/tvm/auto_scheduler/utils.py
@@ -37,7 +37,6 @@
 from tvm.tir import expr
 from tvm.tir.transform import Simplify
 from tvm.ir.transform import Sequential
-from tvm.contrib.popen_pool import PopenWorker
 from ..te import Tensor, placeholder
 
 
@@ -287,13 +286,11 @@ def wrapper():
     return res[0]
 
 
-def call_func_with_timeout(timeout, func, args=(), kwargs=None):  # pylint: disable=unused-argument
+def call_func_with_timeout(worker, timeout, func, args=(), kwargs=None):  # pylint: disable=unused-argument
     """Call a function with timeout"""
-    process = PopenWorker()
-    process.send(func, args, kwargs, timeout)
-
+    worker.send(func, args, kwargs, timeout)
     try:
-        res = process.recv()
+        res = worker.recv()
     except Exception:  # pylint: disable=broad-except
         res = Exception(make_traceback_info())
 

From a3274cdc2c472820e9405b2f5c2984d5e9e5e286 Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Tue, 10 Aug 2021 16:48:25 -0700
Subject: [PATCH 36/38] linting

---
 python/tvm/auto_scheduler/utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/tvm/auto_scheduler/utils.py b/python/tvm/auto_scheduler/utils.py
index b62a1b00ee70..9919bcb470ee 100644
--- a/python/tvm/auto_scheduler/utils.py
+++ b/python/tvm/auto_scheduler/utils.py
@@ -286,7 +286,9 @@ def wrapper():
     return res[0]
 
 
-def call_func_with_timeout(worker, timeout, func, args=(), kwargs=None):  # pylint: disable=unused-argument
+def call_func_with_timeout(
+    worker, timeout, func, args=(), kwargs=None
+):  # pylint: disable=unused-argument
     """Call a function with timeout"""
     worker.send(func, args, kwargs, timeout)
     try:

From 193709ba0055e099f5b5262709b4ab905ac99db5 Mon Sep 17 00:00:00 2001
From: Wuwei Lin <wuwei@apache.org>
Date: Wed, 11 Aug 2021 09:40:58 -0700
Subject: [PATCH 37/38] Use PopenpoolExecutor

---
 python/tvm/auto_scheduler/measure.py | 78 ++++++++++++++--------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/python/tvm/auto_scheduler/measure.py b/python/tvm/auto_scheduler/measure.py
index 1f0825c16a19..89838a4f8826 100644
--- a/python/tvm/auto_scheduler/measure.py
+++ b/python/tvm/auto_scheduler/measure.py
@@ -45,7 +45,7 @@
 from tvm.ir import transform
 from tvm.autotvm.measure.measure_methods import set_cuda_target_arch
 from tvm.contrib import tar, ndk
-from tvm.contrib.popen_pool import PopenWorker
+from tvm.contrib.popen_pool import PopenWorker, PopenPoolExecutor, StatusKind
 from tvm.target import Target
 
 
@@ -601,7 +601,7 @@ class MeasureErrorNo(object):
     UNKNOWN_ERROR = 8  # Unknown error
 
 
-def _timed_func(inp_serialized, build_func, verbose):
+def _local_build_worker(inp_serialized, build_func, verbose):
     tic = time.time()
     inp = MeasureInput.deserialize(inp_serialized)
     task = inp.task
@@ -666,16 +666,12 @@ def local_build_worker(args):
     )
     build_func = BuildFunc.build_func
 
-    worker = PopenWorker()
-    res = call_func_with_timeout(worker, timeout, _timed_func, args=(inp, build_func, verbose))
-    if isinstance(res, TimeoutError):
-        if verbose >= 1:
-            print(".T", end="", flush=True)  # Build timeout
-        res = None, [], MeasureErrorNo.BUILD_TIMEOUT, None, timeout
-    elif isinstance(res, Exception):
+    try:
+        res = _local_build_worker(inp, build_func, verbose)
+    except Exception:
         if verbose >= 1:
             print(".E", end="", flush=True)  # Build error
-        res = None, [], MeasureErrorNo.COMPILE_HOST, str(res), timeout
+        res = None, [], MeasureErrorNo.COMPILE_HOST, make_traceback_info(), timeout
 
     return res
 
@@ -704,9 +700,8 @@ def local_builder_build(inputs, timeout, n_parallel, build_func="default", verbo
     res : List[BuildResult]
         The build results of these MeasureInputs.
     """
-    # This pool is not doing computationally intensive work, so we can use threads
-    pool = ThreadPool(n_parallel)
-    tuple_res = pool.map(
+    executor = PopenPoolExecutor()
+    tuple_res = executor.map_with_error_catching(
         local_build_worker,
         [
             (
@@ -718,13 +713,16 @@ def local_builder_build(inputs, timeout, n_parallel, build_func="default", verbo
             for i in inputs
         ],
     )
-    pool.terminate()
-    pool.join()
-    del pool
 
     results = []
     for res in tuple_res:
-        results.append(BuildResult(*res))
+        if res.status == StatusKind.COMPLETE:
+            results.append(BuildResult(*res.value))
+        else:
+            assert res.status == StatusKind.TIMEOUT
+            if verbose >= 1:
+                print(".T", end="", flush=True)  # Build timeout
+            results.append(BuildResult(None, [], MeasureErrorNo.BUILD_TIMEOUT, None, timeout))
 
     return results
 
@@ -1058,7 +1056,7 @@ def local_run(
     return measure_results
 
 
-def _timed_rpc_run(
+def _rpc_run(
     inp_serialized,
     build_res,
     args,
@@ -1181,25 +1179,15 @@ def _rpc_run_worker(args):
             time.time(),
         )
 
-    worker = PopenWorker()
-    res = call_func_with_timeout(worker, timeout, _timed_rpc_run, args=args)
-    if isinstance(res, TimeoutError):
-        if verbose >= 1:
-            print("*T", end="")  # Run timeout
-        res = (
-            (MAX_FLOAT,),
-            MeasureErrorNo.RUN_TIMEOUT,
-            None,
-            build_res.time_cost + timeout,
-            time.time(),
-        )
-    elif isinstance(res, Exception):
+    try:
+        res = _rpc_run(*args)
+    except Exception:
         if verbose >= 1:
             print("*E", end="")  # Run error
         res = (
             (MAX_FLOAT,),
             MeasureErrorNo.RUNTIME_DEVICE,
-            str(res),
+            make_traceback_info(),
             build_res.time_cost + timeout,
             time.time(),
         )
@@ -1279,8 +1267,8 @@ def rpc_runner_run(
     """
     assert len(inputs) == len(build_results), "Measure input size should be equal to build results"
     # This pool is not doing computationally intensive work, so we can use threads
-    pool = ThreadPool(n_parallel)
-    tuple_res = pool.map(
+    executor = PopenPoolExecutor(n_parallel)
+    tuple_res = executor.map_with_error_catching(
         _rpc_run_worker,
         [
             (
@@ -1302,13 +1290,25 @@ def rpc_runner_run(
             for inp, build_res in zip(inputs, build_results)
         ],
     )
-    pool.terminate()
-    pool.join()
-    del pool
 
     results = []
-    for res in tuple_res:
-        results.append(MeasureResult(*res))
+    for i, res in enumerate(tuple_res):
+        if res.status == StatusKind.COMPLETE:
+            results.append(MeasureResult(*res.value))
+        else:
+            assert res.status == StatusKind.TIMEOUT
+            if verbose >= 1:
+                print("*T", end="")  # Run timeout
+            build_res = build_results[i]
+            results.append(
+                MeasureResult(
+                    (MAX_FLOAT,),
+                    MeasureErrorNo.RUN_TIMEOUT,
+                    None,
+                    build_res.time_cost + timeout,
+                    time.time(),
+                )
+            )
 
     if verbose >= 1:
         print("")

From 00d94768ba24dfab95ddeec71a3714c1b7e1c00a Mon Sep 17 00:00:00 2001
From: shingjan <yj@shingjan.me>
Date: Wed, 11 Aug 2021 13:50:20 -0700
Subject: [PATCH 38/38] linting

---
 python/tvm/auto_scheduler/measure.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/tvm/auto_scheduler/measure.py b/python/tvm/auto_scheduler/measure.py
index 89838a4f8826..a202e837bc9b 100644
--- a/python/tvm/auto_scheduler/measure.py
+++ b/python/tvm/auto_scheduler/measure.py
@@ -36,7 +36,6 @@
 import shutil
 import tempfile
 import multiprocessing
-from multiprocessing.pool import ThreadPool
 import logging
 
 import tvm._ffi
@@ -668,6 +667,7 @@ def local_build_worker(args):
 
     try:
         res = _local_build_worker(inp, build_func, verbose)
+    # pylint: disable=broad-except
     except Exception:
         if verbose >= 1:
             print(".E", end="", flush=True)  # Build error
@@ -700,7 +700,7 @@ def local_builder_build(inputs, timeout, n_parallel, build_func="default", verbo
     res : List[BuildResult]
         The build results of these MeasureInputs.
     """
-    executor = PopenPoolExecutor()
+    executor = PopenPoolExecutor(n_parallel, timeout)
     tuple_res = executor.map_with_error_catching(
         local_build_worker,
         [
@@ -1181,6 +1181,7 @@ def _rpc_run_worker(args):
 
     try:
         res = _rpc_run(*args)
+    # pylint: disable=broad-except
     except Exception:
         if verbose >= 1:
             print("*E", end="")  # Run error