From cb8d04fa5211565c0d65cf5c7609107bc0b42e34 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Tue, 6 Dec 2022 17:32:15 +0900
Subject: [PATCH 01/11] [MetaSchedule] Restore num_threads argument in
 tune_relay

---
 python/tvm/meta_schedule/relay_integration.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/python/tvm/meta_schedule/relay_integration.py b/python/tvm/meta_schedule/relay_integration.py
index df76684d2d42..d759cf853e79 100644
--- a/python/tvm/meta_schedule/relay_integration.py
+++ b/python/tvm/meta_schedule/relay_integration.py
@@ -249,6 +249,7 @@ def tune_relay(
     strategy: SearchStrategy.SearchStrategyType = "evolutionary",
     seed: Optional[int] = None,
     module_equality: str = "structural",
+    num_threads: Optional[int] = None,
 ) -> Database:
     """Tune a Relay program.
 
@@ -296,6 +297,8 @@ def tune_relay(
                             given module. The "ignore-ndarray" varint is used for the extracted
                             blocks or in case no anchor block is found.
                             For the definition of the anchor block, see tir/analysis/analysis.py.
+    num_threads : Optional[int]
+        The number of threads to use.
 
     Returns
     -------
@@ -308,6 +311,7 @@ def tune_relay(
         space=space,
         strategy=strategy,
         seed=seed,
+        num_threads=num_threads,
     )
     return tune_tasks(
         tasks=tasks,

From e56451ce7b7205ef93f93a79f2212e3b8938066b Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Tue, 6 Dec 2022 17:57:52 +0900
Subject: [PATCH 02/11] pass num_threads to XGBModel

---
 python/tvm/meta_schedule/cost_model/cost_model.py | 5 +++++
 python/tvm/meta_schedule/cost_model/xgb_model.py  | 7 ++++++-
 python/tvm/meta_schedule/tune.py                  | 8 +++++++-
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/python/tvm/meta_schedule/cost_model/cost_model.py b/python/tvm/meta_schedule/cost_model/cost_model.py
index f139fcc4e4b3..3f7d9eeafff6 100644
--- a/python/tvm/meta_schedule/cost_model/cost_model.py
+++ b/python/tvm/meta_schedule/cost_model/cost_model.py
@@ -126,6 +126,11 @@ def create(
 
         if kind == "xgb":
             return XGBModel(*args, **kwargs)  # type: ignore
+
+        if "num_threads" in kwargs:
+            # num_threads is only relevant for XGBModel.
+            kwargs.pop("num_threads")
+
         if kind == "random":
             return RandomModel(*args, **kwargs)  # type: ignore
         if kind == "mlp":
diff --git a/python/tvm/meta_schedule/cost_model/xgb_model.py b/python/tvm/meta_schedule/cost_model/xgb_model.py
index 0a2786c6abe0..bef36805e726 100644
--- a/python/tvm/meta_schedule/cost_model/xgb_model.py
+++ b/python/tvm/meta_schedule/cost_model/xgb_model.py
@@ -333,6 +333,7 @@ def __init__(
         verbose_eval: int = 25,
         average_peak_n: int = 32,
         adaptive_training: bool = True,
+        num_threads: Optional[int] = None,
     ):
         super().__init__()
         if not isinstance(extractor, FeatureExtractor):
@@ -342,7 +343,11 @@ def __init__(
         # model-related
         if config.nthread is None:
             # use physical core number
-            config = config._replace(nthread=cpu_count(logical=False))
+            if num_threads is None:
+                config = config._replace(nthread=cpu_count(logical=False))
+            else:
+                config = config._replace(nthread=num_threads)
+
         self.config = config
         # behavior of randomness
         self.num_warmup_samples = num_warmup_samples
diff --git a/python/tvm/meta_schedule/tune.py b/python/tvm/meta_schedule/tune.py
index a69c8f126272..c13d652ef7bd 100644
--- a/python/tvm/meta_schedule/tune.py
+++ b/python/tvm/meta_schedule/tune.py
@@ -86,10 +86,16 @@ def tune_tasks(
     database : Database
         The database with all tuning records
     """
+    if len(tasks) == 0:
+        raise ValueError("No tasks to tune." )
+
     if len(tasks) != len(task_weights):
         raise ValueError(
             f"Length of tasks ({len(tasks)}) and task_weights ({len(task_weights)}) do not match."
         )
+
+    num_threads = tasks[0].num_threads
+
     if max_trials_per_task is None:
         max_trials_per_task = max_trials_global
     if not isinstance(builder, Builder):
@@ -101,7 +107,7 @@ def tune_tasks(
     elif not isinstance(database, Database):
         database = Database.create(database, module_equality=module_equality)
     if not isinstance(cost_model, CostModel):
-        cost_model = CostModel.create(cost_model)
+        cost_model = CostModel.create(cost_model, num_threads=num_threads)
     if isinstance(measure_callbacks, MeasureCallback):
         measure_callbacks = [measure_callbacks]
     elif measure_callbacks == "default":

From bb7b06bad83fa4047334fa3ad32327e085f3556a Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Tue, 6 Dec 2022 01:06:36 -0800
Subject: [PATCH 03/11] fix default

---
 python/tvm/meta_schedule/relay_integration.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/tvm/meta_schedule/relay_integration.py b/python/tvm/meta_schedule/relay_integration.py
index d759cf853e79..3649ec255458 100644
--- a/python/tvm/meta_schedule/relay_integration.py
+++ b/python/tvm/meta_schedule/relay_integration.py
@@ -249,7 +249,7 @@ def tune_relay(
     strategy: SearchStrategy.SearchStrategyType = "evolutionary",
     seed: Optional[int] = None,
     module_equality: str = "structural",
-    num_threads: Optional[int] = None,
+    num_threads: Union[Literal["physical", "logical"], int] = "physical",
 ) -> Database:
     """Tune a Relay program.
 
@@ -297,7 +297,7 @@ def tune_relay(
                             given module. The "ignore-ndarray" varint is used for the extracted
                             blocks or in case no anchor block is found.
                             For the definition of the anchor block, see tir/analysis/analysis.py.
-    num_threads : Optional[int]
+    num_threads : Union[Literal["physical", "logical"], int]
         The number of threads to use.
 
     Returns

From 788f33acd9213f2091c30a06b85fd0483c616655 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Tue, 6 Dec 2022 02:08:27 -0800
Subject: [PATCH 04/11] pass num_threads as max_workers to Builder and Runner

---
 python/tvm/contrib/hexagon/meta_schedule.py | 12 ++++++++----
 python/tvm/meta_schedule/runner/runner.py   |  2 ++
 python/tvm/meta_schedule/tune.py            |  4 ++--
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/python/tvm/contrib/hexagon/meta_schedule.py b/python/tvm/contrib/hexagon/meta_schedule.py
index dcc7d232d8c4..5589591dc3d0 100644
--- a/python/tvm/contrib/hexagon/meta_schedule.py
+++ b/python/tvm/contrib/hexagon/meta_schedule.py
@@ -128,7 +128,8 @@ def _worker_func(hexagon_launcher, evaluator_config, alloc_repeat, artifact_path
     return costs
 
 
-def get_hexagon_local_builder(pass_context: tvm.transform.PassContext = None):
+def get_hexagon_local_builder(pass_context: tvm.transform.PassContext = None,
+                              max_workers: Optional[int] = None):
     """Return Hexagon-compatible Builder for meta schedule."""
 
     def export_func(mod):
@@ -143,13 +144,15 @@ def default_build_with_context(
             return tvm_build(mod, target=target)
 
     if pass_context is not None:
-        return LocalBuilder(f_build=default_build_with_context, f_export=export_func)
+        return LocalBuilder(f_build=default_build_with_context, f_export=export_func,
+                            max_workers=max_workers)
     else:
-        return LocalBuilder(f_export=export_func)
+        return LocalBuilder(f_export=export_func, max_workers=max_workers)
 
 
 def get_hexagon_rpc_runner(
-    hexagon_launcher: HexagonLauncherRPC, number=3, repeat=1, min_repeat_ms=100
+        hexagon_launcher: HexagonLauncherRPC, number=3, repeat=1, min_repeat_ms=100,
+        max_workers: Optional[int] = None
 ):
     """Return Hexagon-compatible RPC Runner for meta schedule.
 
@@ -180,4 +183,5 @@ def get_hexagon_rpc_runner(
     return HexagonRPCRunner(
         hexagon_launcher,
         evaluator_config,
+        max_workers=max_workers
     )
diff --git a/python/tvm/meta_schedule/runner/runner.py b/python/tvm/meta_schedule/runner/runner.py
index 1753d8b4abf9..1a8f78414e91 100644
--- a/python/tvm/meta_schedule/runner/runner.py
+++ b/python/tvm/meta_schedule/runner/runner.py
@@ -194,6 +194,8 @@ def create(  # pylint: disable=keyword-arg-before-vararg
         from . import LocalRunner, RPCRunner  # pylint: disable=import-outside-toplevel
 
         if kind == "local":
+            if "max_workers" in kwargs:
+                kwargs.pop("max_workers")
             return LocalRunner(*args, **kwargs)  # type: ignore
         elif kind == "rpc":
             return RPCRunner(*args, **kwargs)  # type: ignore
diff --git a/python/tvm/meta_schedule/tune.py b/python/tvm/meta_schedule/tune.py
index c13d652ef7bd..d10aeb322e7c 100644
--- a/python/tvm/meta_schedule/tune.py
+++ b/python/tvm/meta_schedule/tune.py
@@ -99,9 +99,9 @@ def tune_tasks(
     if max_trials_per_task is None:
         max_trials_per_task = max_trials_global
     if not isinstance(builder, Builder):
-        builder = Builder.create(builder)
+        builder = Builder.create(builder, max_workers=num_threads)
     if not isinstance(runner, Runner):
-        runner = Runner.create(runner)
+        runner = Runner.create(runner, max_workers=num_threads)
     if database == "json":
         database = Database.create(database, work_dir=work_dir, module_equality=module_equality)
     elif not isinstance(database, Database):

From 5b3a93b0b8e36037803c1ac415dd74752bc769f2 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Tue, 6 Dec 2022 02:19:07 -0800
Subject: [PATCH 05/11] add test

---
 .../test_hexagon/metaschedule_e2e/test_resnet50_int8.py   | 8 ++++++--
 tests/python/contrib/test_hexagon/test_meta_schedule.py   | 6 ++++--
 .../unittest/test_meta_schedule_relay_integration.py      | 1 +
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/tests/python/contrib/test_hexagon/metaschedule_e2e/test_resnet50_int8.py b/tests/python/contrib/test_hexagon/metaschedule_e2e/test_resnet50_int8.py
index e15b0a4e7ddb..a49361375935 100644
--- a/tests/python/contrib/test_hexagon/metaschedule_e2e/test_resnet50_int8.py
+++ b/tests/python/contrib/test_hexagon/metaschedule_e2e/test_resnet50_int8.py
@@ -33,6 +33,7 @@
     get_hexagon_rpc_runner,
 )
 from tvm.meta_schedule import postproc, schedule_rule
+from tvm.meta_schedule.utils import cpu_count
 from tvm.tir.schedule import BlockRV, Schedule
 from tvm.tir.schedule.analysis import has_block
 from tvm.tir.tensor_intrin.hexagon import (
@@ -110,6 +111,8 @@ def tune_vrmpy_auto_tensorize(mod, params, hexagon_launcher):
     # task extraction and relay.build(...).
     mod = mod.with_attr("executor", EXECUTOR)
 
+    num_threads = cpu_count(physical=True)
+
     with tempfile.TemporaryDirectory() as work_dir:
         database = ms.relay_integration.tune_relay(
             mod=mod,
@@ -125,8 +128,8 @@ def tune_vrmpy_auto_tensorize(mod, params, hexagon_launcher):
             # num_trials_per_iter=32,
             # max_trials_per_task=128,
             # strategy="evolutionary",
-            builder=get_hexagon_local_builder(),
-            runner=get_hexagon_rpc_runner(hexagon_launcher, number=20),
+            builder=get_hexagon_local_builder(max_workers=num_threads),
+            runner=get_hexagon_rpc_runner(hexagon_launcher, number=20, max_workers=num_threads),
             space=ms.space_generator.PostOrderApply(
                 sch_rules=sch_rules,
                 postprocs=postprocs,
@@ -137,6 +140,7 @@ def tune_vrmpy_auto_tensorize(mod, params, hexagon_launcher):
             # It reduces the number of conv2d tuning tasks in the int8 resnet50 model
             # from 36 to 23, with negligible performance difference.
             module_equality="anchor-block",
+            num_threads=num_threads,
         )
         return ms.relay_integration.compile_relay(
             database=database,
diff --git a/tests/python/contrib/test_hexagon/test_meta_schedule.py b/tests/python/contrib/test_hexagon/test_meta_schedule.py
index a83a3b279a7f..d3d9adb6bea9 100644
--- a/tests/python/contrib/test_hexagon/test_meta_schedule.py
+++ b/tests/python/contrib/test_hexagon/test_meta_schedule.py
@@ -73,8 +73,10 @@ def test_builder_runner(hexagon_launcher):
 
     mod = MatmulModule
 
-    builder = get_hexagon_local_builder()
-    runner = get_hexagon_rpc_runner(hexagon_launcher, number=1, repeat=1, min_repeat_ms=0)
+    max_workers = 4
+    builder = get_hexagon_local_builder(max_workers=max_workers)
+    runner = get_hexagon_rpc_runner(hexagon_launcher, number=1, repeat=1, min_repeat_ms=0,
+                                    max_workers=max_workers)
 
     (builder_result,) = builder.build([BuilderInput(mod, get_hexagon_target("v68"))])
     assert builder_result.artifact_path is not None
diff --git a/tests/python/unittest/test_meta_schedule_relay_integration.py b/tests/python/unittest/test_meta_schedule_relay_integration.py
index 021db0f86ad2..b462f0c8660e 100644
--- a/tests/python/unittest/test_meta_schedule_relay_integration.py
+++ b/tests/python/unittest/test_meta_schedule_relay_integration.py
@@ -742,6 +742,7 @@ def _test_anchor_tuning(target):
             max_trials_global=4,
             strategy="replay-trace",
             module_equality=module_equality,
+            num_threads=4,
         )
         lib = ms.relay_integration.compile_relay(database, mod, target, params)
 

From d42a14abb13d2d7552a2f37c214d9fd0d42082ce Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Tue, 6 Dec 2022 02:32:33 -0800
Subject: [PATCH 06/11] clean up

---
 python/tvm/contrib/hexagon/meta_schedule.py   | 23 ++++++------
 python/tvm/meta_schedule/tune.py              |  2 +-
 .../metaschedule_e2e/test_resnet50_int8.py    | 35 +++++++++----------
 .../test_hexagon/test_meta_schedule.py        |  5 +--
 4 files changed, 32 insertions(+), 33 deletions(-)

diff --git a/python/tvm/contrib/hexagon/meta_schedule.py b/python/tvm/contrib/hexagon/meta_schedule.py
index 5589591dc3d0..6e1541e498a9 100644
--- a/python/tvm/contrib/hexagon/meta_schedule.py
+++ b/python/tvm/contrib/hexagon/meta_schedule.py
@@ -128,8 +128,9 @@ def _worker_func(hexagon_launcher, evaluator_config, alloc_repeat, artifact_path
     return costs
 
 
-def get_hexagon_local_builder(pass_context: tvm.transform.PassContext = None,
-                              max_workers: Optional[int] = None):
+def get_hexagon_local_builder(
+    pass_context: tvm.transform.PassContext = None, max_workers: Optional[int] = None
+):
     """Return Hexagon-compatible Builder for meta schedule."""
 
     def export_func(mod):
@@ -144,15 +145,19 @@ def default_build_with_context(
             return tvm_build(mod, target=target)
 
     if pass_context is not None:
-        return LocalBuilder(f_build=default_build_with_context, f_export=export_func,
-                            max_workers=max_workers)
+        return LocalBuilder(
+            f_build=default_build_with_context, f_export=export_func, max_workers=max_workers
+        )
     else:
         return LocalBuilder(f_export=export_func, max_workers=max_workers)
 
 
 def get_hexagon_rpc_runner(
-        hexagon_launcher: HexagonLauncherRPC, number=3, repeat=1, min_repeat_ms=100,
-        max_workers: Optional[int] = None
+    hexagon_launcher: HexagonLauncherRPC,
+    number=3,
+    repeat=1,
+    min_repeat_ms=100,
+    max_workers: Optional[int] = None,
 ):
     """Return Hexagon-compatible RPC Runner for meta schedule.
 
@@ -180,8 +185,4 @@ def get_hexagon_rpc_runner(
         enable_cpu_cache_flush=False,
     )
 
-    return HexagonRPCRunner(
-        hexagon_launcher,
-        evaluator_config,
-        max_workers=max_workers
-    )
+    return HexagonRPCRunner(hexagon_launcher, evaluator_config, max_workers=max_workers)
diff --git a/python/tvm/meta_schedule/tune.py b/python/tvm/meta_schedule/tune.py
index d10aeb322e7c..f0869b5b6464 100644
--- a/python/tvm/meta_schedule/tune.py
+++ b/python/tvm/meta_schedule/tune.py
@@ -87,7 +87,7 @@ def tune_tasks(
         The database with all tuning records
     """
     if len(tasks) == 0:
-        raise ValueError("No tasks to tune." )
+        raise ValueError("No tasks to tune.")
 
     if len(tasks) != len(task_weights):
         raise ValueError(
diff --git a/tests/python/contrib/test_hexagon/metaschedule_e2e/test_resnet50_int8.py b/tests/python/contrib/test_hexagon/metaschedule_e2e/test_resnet50_int8.py
index a49361375935..f2a7a81bf965 100644
--- a/tests/python/contrib/test_hexagon/metaschedule_e2e/test_resnet50_int8.py
+++ b/tests/python/contrib/test_hexagon/metaschedule_e2e/test_resnet50_int8.py
@@ -45,10 +45,24 @@
 from ..infrastructure import get_hexagon_target
 
 MODEL_JSON = "resnet50_int8.json"
+MODEL_PARAMS = "resnet50_int8.params"
 EXECUTOR = relay.backend.Executor("graph", {"link-params": True})
 TARGET_LLVM = tvm.target.Target("llvm")
 TARGET_HEXAGON = get_hexagon_target("v68")
-MODEL_PARAMS = "resnet50_int8.params"
+
+
+def load_model():
+    """Load renset50 model."""
+    if not os.path.exists(MODEL_JSON):
+        pytest.skip(msg="Run python export_models.py first.")
+
+    with open(MODEL_JSON, "r") as file:
+        mod = tvm.ir.load_json(file.read())
+
+    with open(MODEL_PARAMS, "rb") as file:
+        params = relay.load_param_dict(file.read())
+
+    return mod, params
 
 
 def tune_vrmpy_auto_tensorize(mod, params, hexagon_launcher):
@@ -160,11 +174,8 @@ def test_resnet50(hexagon_launcher):
     if not os.path.exists(MODEL_JSON):
         pytest.skip(msg="Run python export_models.py first.")
 
-    with open(MODEL_JSON, "r") as file:
-        mod = tvm.ir.load_json(file.read())
+    mod, params = load_model()
 
-    with open(MODEL_PARAMS, "rb") as file:
-        params = relay.load_param_dict(file.read())
     inp = np.random.randn(1, 3, 224, 224).astype("float32")
     input_name = "image"
 
@@ -235,20 +246,6 @@ def evaluate_mod(hexagon_launcher, hexagon_lowered, llvm_lowered, input_name, in
         np.testing.assert_allclose(ref_result, output, atol=1e-4, rtol=1e-5)
 
 
-def load_model():
-    """Load renset50 model."""
-    if not os.path.exists(MODEL_JSON):
-        pytest.skip(msg="Run python export_models.py first.")
-
-    with open(MODEL_JSON, "r") as file:
-        mod = tvm.ir.load_json(file.read())
-
-    with open(MODEL_PARAMS, "rb") as file:
-        params = relay.load_param_dict(file.read())
-
-    return mod, params
-
-
 def _schedule_packed_8x8x32_conv2d():
     """Manually schedule a conv2d block, created from TE compute op via CreatePrimFunc,
     using 8x8x32 packed layout.
diff --git a/tests/python/contrib/test_hexagon/test_meta_schedule.py b/tests/python/contrib/test_hexagon/test_meta_schedule.py
index d3d9adb6bea9..1089f0f03589 100644
--- a/tests/python/contrib/test_hexagon/test_meta_schedule.py
+++ b/tests/python/contrib/test_hexagon/test_meta_schedule.py
@@ -75,8 +75,9 @@ def test_builder_runner(hexagon_launcher):
 
     max_workers = 4
     builder = get_hexagon_local_builder(max_workers=max_workers)
-    runner = get_hexagon_rpc_runner(hexagon_launcher, number=1, repeat=1, min_repeat_ms=0,
-                                    max_workers=max_workers)
+    runner = get_hexagon_rpc_runner(
+        hexagon_launcher, number=1, repeat=1, min_repeat_ms=0, max_workers=max_workers
+    )
 
     (builder_result,) = builder.build([BuilderInput(mod, get_hexagon_target("v68"))])
     assert builder_result.artifact_path is not None

From 71452b59a4a6f7d328d6bbd1a2498f938a374d3f Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Wed, 7 Dec 2022 04:13:44 +0900
Subject: [PATCH 07/11] fix kwarg

---
 .../contrib/test_hexagon/metaschedule_e2e/test_resnet50_int8.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python/contrib/test_hexagon/metaschedule_e2e/test_resnet50_int8.py b/tests/python/contrib/test_hexagon/metaschedule_e2e/test_resnet50_int8.py
index f2a7a81bf965..7f3bc588aeb2 100644
--- a/tests/python/contrib/test_hexagon/metaschedule_e2e/test_resnet50_int8.py
+++ b/tests/python/contrib/test_hexagon/metaschedule_e2e/test_resnet50_int8.py
@@ -125,7 +125,7 @@ def tune_vrmpy_auto_tensorize(mod, params, hexagon_launcher):
     # task extraction and relay.build(...).
     mod = mod.with_attr("executor", EXECUTOR)
 
-    num_threads = cpu_count(physical=True)
+    num_threads = cpu_count(logical=False)
 
     with tempfile.TemporaryDirectory() as work_dir:
         database = ms.relay_integration.tune_relay(

From 892e571d8f1b63fa002d4b867673d02527e8fba7 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Thu, 8 Dec 2022 11:12:07 +0900
Subject: [PATCH 08/11] num_threads -> num_tuning_cores

---
 .../tvm/meta_schedule/cost_model/cost_model.py   |  6 +++---
 python/tvm/meta_schedule/cost_model/xgb_model.py |  6 +++---
 python/tvm/meta_schedule/relay_integration.py    | 16 ++++++++--------
 python/tvm/meta_schedule/tir_integration.py      |  8 ++++----
 python/tvm/meta_schedule/tune.py                 |  8 ++++----
 .../metaschedule_e2e/test_resnet50_int8.py       |  8 ++++----
 .../test_meta_schedule_relay_integration.py      |  2 +-
 7 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/python/tvm/meta_schedule/cost_model/cost_model.py b/python/tvm/meta_schedule/cost_model/cost_model.py
index 3f7d9eeafff6..c0f6ea5fb9e1 100644
--- a/python/tvm/meta_schedule/cost_model/cost_model.py
+++ b/python/tvm/meta_schedule/cost_model/cost_model.py
@@ -127,9 +127,9 @@ def create(
         if kind == "xgb":
             return XGBModel(*args, **kwargs)  # type: ignore
 
-        if "num_threads" in kwargs:
-            # num_threads is only relevant for XGBModel.
-            kwargs.pop("num_threads")
+        if "num_tuning_cores" in kwargs:
+            # num_tuning_cores is only relevant for XGBModel.
+            kwargs.pop("num_tuning_cores")
 
         if kind == "random":
             return RandomModel(*args, **kwargs)  # type: ignore
diff --git a/python/tvm/meta_schedule/cost_model/xgb_model.py b/python/tvm/meta_schedule/cost_model/xgb_model.py
index bef36805e726..901e18ce3fa5 100644
--- a/python/tvm/meta_schedule/cost_model/xgb_model.py
+++ b/python/tvm/meta_schedule/cost_model/xgb_model.py
@@ -333,7 +333,7 @@ def __init__(
         verbose_eval: int = 25,
         average_peak_n: int = 32,
         adaptive_training: bool = True,
-        num_threads: Optional[int] = None,
+        num_tuning_cores: Optional[int] = None,
     ):
         super().__init__()
         if not isinstance(extractor, FeatureExtractor):
@@ -343,10 +343,10 @@ def __init__(
         # model-related
         if config.nthread is None:
             # use physical core number
-            if num_threads is None:
+            if num_tuning_cores is None:
                 config = config._replace(nthread=cpu_count(logical=False))
             else:
-                config = config._replace(nthread=num_threads)
+                config = config._replace(nthread=num_tuning_cores)
 
         self.config = config
         # behavior of randomness
diff --git a/python/tvm/meta_schedule/relay_integration.py b/python/tvm/meta_schedule/relay_integration.py
index 3649ec255458..4bf54a6ac8dc 100644
--- a/python/tvm/meta_schedule/relay_integration.py
+++ b/python/tvm/meta_schedule/relay_integration.py
@@ -180,7 +180,7 @@ def extracted_tasks_to_tune_contexts(
     work_dir: str,
     space: SpaceGenerator.SpaceGeneratorType = "post-order-apply",
     strategy: SearchStrategy.SearchStrategyType = "evolutionary",
-    num_threads: Union[Literal["physical", "logical"], int] = "physical",
+    num_tuning_cores: Union[Literal["physical", "logical"], int] = "physical",
     seed: Optional[int] = None,
 ) -> Tuple[List[TuneContext], List[float]]:
     """Convert ExtractedTask to TuneContext.
@@ -195,8 +195,8 @@ def extracted_tasks_to_tune_contexts(
         The space generator to use.
     strategy : SearchStrategy.SearchStrategyType
         The search strategy to use.
-    num_threads : Union[Literal["physical", "logical"], int]
-        The number of threads to use in multi-threaded search algorithm.
+    num_tuning_cores : Union[Literal["physical", "logical"], int]
+        The number of CPU cores to use during tuning.
     seed : Optional[int]
         The random seed to use.
 
@@ -223,7 +223,7 @@ def extracted_tasks_to_tune_contexts(
                 task_name=task.task_name,
                 logger=logger,
                 rand_state=rand_state,
-                num_threads=num_threads,
+                num_threads=num_tuning_cores,
             ).clone()
         )
         task_weights.append(task.weight)
@@ -249,7 +249,7 @@ def tune_relay(
     strategy: SearchStrategy.SearchStrategyType = "evolutionary",
     seed: Optional[int] = None,
     module_equality: str = "structural",
-    num_threads: Union[Literal["physical", "logical"], int] = "physical",
+    num_tuning_cores: Union[Literal["physical", "logical"], int] = "physical",
 ) -> Database:
     """Tune a Relay program.
 
@@ -297,8 +297,8 @@ def tune_relay(
                             given module. The "ignore-ndarray" varint is used for the extracted
                             blocks or in case no anchor block is found.
                             For the definition of the anchor block, see tir/analysis/analysis.py.
-    num_threads : Union[Literal["physical", "logical"], int]
-        The number of threads to use.
+    num_tuning_cores : Union[Literal["physical", "logical"], int]
+        The number of CPU cores to use during Tuning.
 
     Returns
     -------
@@ -311,7 +311,7 @@ def tune_relay(
         space=space,
         strategy=strategy,
         seed=seed,
-        num_threads=num_threads,
+        num_tuning_cores=num_tuning_cores,
     )
     return tune_tasks(
         tasks=tasks,
diff --git a/python/tvm/meta_schedule/tir_integration.py b/python/tvm/meta_schedule/tir_integration.py
index 975987ebcb67..f3d505c28b0e 100644
--- a/python/tvm/meta_schedule/tir_integration.py
+++ b/python/tvm/meta_schedule/tir_integration.py
@@ -54,7 +54,7 @@ def tune_tir(
     space: SpaceGenerator.SpaceGeneratorType = "post-order-apply",
     strategy: SearchStrategy.SearchStrategyType = "evolutionary",
     task_name: str = "main",
-    num_threads: Union[Literal["physical", "logical"], int] = "physical",
+    num_tuning_cores: Union[Literal["physical", "logical"], int] = "physical",
     seed: Optional[int] = None,
 ) -> Database:
     """Tune a TIR function.
@@ -89,8 +89,8 @@ def tune_tir(
         The search strategy.
     task_name : str
         The name of the task.
-    num_threads : Union[Literal["physical", "logical"], int]
-        The number of threads to use.
+    num_tuning_cores : Union[Literal["physical", "logical"], int]
+        The number of CPU cores to use during tuning.
     seed : Optional[int]
         The seed for the random number generator.
 
@@ -111,7 +111,7 @@ def tune_tir(
                 task_name=task_name,
                 logger=logger,
                 rand_state=seed,
-                num_threads=num_threads,
+                num_threads=num_tuning_cores,
             ).clone()
         ],
         task_weights=[1.0],
diff --git a/python/tvm/meta_schedule/tune.py b/python/tvm/meta_schedule/tune.py
index f0869b5b6464..3bcb827a9727 100644
--- a/python/tvm/meta_schedule/tune.py
+++ b/python/tvm/meta_schedule/tune.py
@@ -94,20 +94,20 @@ def tune_tasks(
             f"Length of tasks ({len(tasks)}) and task_weights ({len(task_weights)}) do not match."
         )
 
-    num_threads = tasks[0].num_threads
+    num_cores = tasks[0].num_cores
 
     if max_trials_per_task is None:
         max_trials_per_task = max_trials_global
     if not isinstance(builder, Builder):
-        builder = Builder.create(builder, max_workers=num_threads)
+        builder = Builder.create(builder, max_workers=num_cores)
     if not isinstance(runner, Runner):
-        runner = Runner.create(runner, max_workers=num_threads)
+        runner = Runner.create(runner, max_workers=num_cores)
     if database == "json":
         database = Database.create(database, work_dir=work_dir, module_equality=module_equality)
     elif not isinstance(database, Database):
         database = Database.create(database, module_equality=module_equality)
     if not isinstance(cost_model, CostModel):
-        cost_model = CostModel.create(cost_model, num_threads=num_threads)
+        cost_model = CostModel.create(cost_model, num_tuning_cores=num_cores)
     if isinstance(measure_callbacks, MeasureCallback):
         measure_callbacks = [measure_callbacks]
     elif measure_callbacks == "default":
diff --git a/tests/python/contrib/test_hexagon/metaschedule_e2e/test_resnet50_int8.py b/tests/python/contrib/test_hexagon/metaschedule_e2e/test_resnet50_int8.py
index 7f3bc588aeb2..1e01cb28a749 100644
--- a/tests/python/contrib/test_hexagon/metaschedule_e2e/test_resnet50_int8.py
+++ b/tests/python/contrib/test_hexagon/metaschedule_e2e/test_resnet50_int8.py
@@ -125,7 +125,7 @@ def tune_vrmpy_auto_tensorize(mod, params, hexagon_launcher):
     # task extraction and relay.build(...).
     mod = mod.with_attr("executor", EXECUTOR)
 
-    num_threads = cpu_count(logical=False)
+    num_cores = cpu_count(logical=False)
 
     with tempfile.TemporaryDirectory() as work_dir:
         database = ms.relay_integration.tune_relay(
@@ -142,8 +142,8 @@ def tune_vrmpy_auto_tensorize(mod, params, hexagon_launcher):
             # num_trials_per_iter=32,
             # max_trials_per_task=128,
             # strategy="evolutionary",
-            builder=get_hexagon_local_builder(max_workers=num_threads),
-            runner=get_hexagon_rpc_runner(hexagon_launcher, number=20, max_workers=num_threads),
+            builder=get_hexagon_local_builder(max_workers=num_cores),
+            runner=get_hexagon_rpc_runner(hexagon_launcher, number=20, max_workers=num_cores),
             space=ms.space_generator.PostOrderApply(
                 sch_rules=sch_rules,
                 postprocs=postprocs,
@@ -154,7 +154,7 @@ def tune_vrmpy_auto_tensorize(mod, params, hexagon_launcher):
             # It reduces the number of conv2d tuning tasks in the int8 resnet50 model
             # from 36 to 23, with negligible performance difference.
             module_equality="anchor-block",
-            num_threads=num_threads,
+            num_tuning_cores=num_cores,
         )
         return ms.relay_integration.compile_relay(
             database=database,
diff --git a/tests/python/unittest/test_meta_schedule_relay_integration.py b/tests/python/unittest/test_meta_schedule_relay_integration.py
index b462f0c8660e..062da0b00ca3 100644
--- a/tests/python/unittest/test_meta_schedule_relay_integration.py
+++ b/tests/python/unittest/test_meta_schedule_relay_integration.py
@@ -742,7 +742,7 @@ def _test_anchor_tuning(target):
             max_trials_global=4,
             strategy="replay-trace",
             module_equality=module_equality,
-            num_threads=4,
+            num_tuning_cores=4,
         )
         lib = ms.relay_integration.compile_relay(database, mod, target, params)
 

From bc78b009207b9442a549690a6567c6d80193215a Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Wed, 7 Dec 2022 18:17:40 -0800
Subject: [PATCH 09/11] typo

---
 python/tvm/meta_schedule/tune.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/meta_schedule/tune.py b/python/tvm/meta_schedule/tune.py
index 3bcb827a9727..0c4035844c71 100644
--- a/python/tvm/meta_schedule/tune.py
+++ b/python/tvm/meta_schedule/tune.py
@@ -94,7 +94,7 @@ def tune_tasks(
             f"Length of tasks ({len(tasks)}) and task_weights ({len(task_weights)}) do not match."
         )
 
-    num_cores = tasks[0].num_cores
+    num_cores = tasks[0].num_threads
 
     if max_trials_per_task is None:
         max_trials_per_task = max_trials_global

From eee7db8e4a78dc8ab52dfbf8ce26a912e73a585e Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Thu, 8 Dec 2022 11:25:33 +0900
Subject: [PATCH 10/11] num_threads -> num_tuning_cores in contrib/torch

---
 python/tvm/contrib/torch/as_torch.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/tvm/contrib/torch/as_torch.py b/python/tvm/contrib/torch/as_torch.py
index 918ce3ff3b6a..c4ca88adf738 100644
--- a/python/tvm/contrib/torch/as_torch.py
+++ b/python/tvm/contrib/torch/as_torch.py
@@ -67,7 +67,7 @@ def tune(
         space: ms.SpaceGenerator.SpaceGeneratorType = "post-order-apply",
         strategy: ms.SearchStrategy.SearchStrategyType = "replay-trace",
         task_name: str = "main",
-        num_threads: Union[Literal["physical", "logical"], int] = "physical",
+        num_tuning_cores: Union[Literal["physical", "logical"], int] = "physical",
         seed: Optional[int] = None,
     ) -> None:
         """
@@ -100,7 +100,7 @@ def tune(
                 space=space,
                 strategy=strategy,
                 task_name=task_name,
-                num_threads=num_threads,
+                num_tuning_cores=num_tuning_cores,
                 seed=seed,
             )
             sch = ms.tir_integration.compile_tir(database, self.ir_module, target)

From 725944b88582b75e3ff08879b0203f24d95b791d Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Thu, 8 Dec 2022 16:14:57 +0900
Subject: [PATCH 11/11] typo in document

---
 python/tvm/meta_schedule/relay_integration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/meta_schedule/relay_integration.py b/python/tvm/meta_schedule/relay_integration.py
index 4bf54a6ac8dc..0b8705aafea9 100644
--- a/python/tvm/meta_schedule/relay_integration.py
+++ b/python/tvm/meta_schedule/relay_integration.py
@@ -298,7 +298,7 @@ def tune_relay(
                             blocks or in case no anchor block is found.
                             For the definition of the anchor block, see tir/analysis/analysis.py.
     num_tuning_cores : Union[Literal["physical", "logical"], int]
-        The number of CPU cores to use during Tuning.
+        The number of CPU cores to use during tuning.
 
     Returns
     -------