apache · merrymercy · Oct 29, 2020 · Oct 27, 2020
diff --git a/python/tvm/autotvm/task/task.py b/python/tvm/autotvm/task/task.py
@@ -23,15 +23,14 @@
 """
 import numpy as np
 
-from tvm.target import Target
 from tvm import runtime
 from tvm.ir import container
+from tvm.target import Target
+from tvm.te import placeholder, tensor
 from tvm.tir import expr
-from tvm.te import tensor, placeholder
-
 
 from ..util import get_const_int, get_const_tuple
-from .dispatcher import DispatchContext, ApplyConfig
+from .dispatcher import ApplyConfig, DispatchContext
 from .space import ConfigSpace
 
 
@@ -173,6 +172,8 @@ def __getstate__(self):
         # some unpickable local task functions.
         # So we only pickle the name of the function
         # and restore the function by name when unpickling it.
+        import cloudpickle  # pylint: disable=import-outside-toplevel
+
         return {
             "name": self.name,
             "args": self.args,
@@ -181,14 +182,17 @@ def __getstate__(self):
             "flop": self.flop,
             "target": self.target,
             "target_host": self.target_host,
+            "func": cloudpickle.dumps(self.func),
         }
 
     def __setstate__(self, state):
+        import cloudpickle  # pylint: disable=import-outside-toplevel
+
         self.name = state["name"]
         self.args = state["args"]
         self.kwargs = state["kwargs"]
         self.config_space = state["config_space"]
-        self.func = _lookup_task(state["name"])
+        self.func = cloudpickle.loads(state["func"])
         self.flop = state["flop"]
         self.target = state["target"]
         self.target_host = state["target_host"]

diff --git a/python/tvm/autotvm/tuner/xgboost_cost_model.py b/python/tvm/autotvm/tuner/xgboost_cost_model.py
@@ -153,7 +153,10 @@ def _reset_pool(self, space, target, task):
 
         self._close_pool()
 
-        # use global variable to pass common arguments
+        # Use global variable to pass common arguments. This is only used when
+        # new processes are started with fork. We have to set the globals
+        # before we create the pool, so that processes in the pool get the
+        # correct globals.
         global _extract_space, _extract_target, _extract_task
         _extract_space = space
         _extract_target = target
@@ -324,7 +327,12 @@ def _get_feature(self, indexes):
 
         if need_extract:
             pool = self._get_pool()
-            feas = pool.map(self.feature_extract_func, need_extract)
+            # If we are forking, we can pass arguments in globals for better performance
+            if multiprocessing.get_start_method(False) == "fork":
+                feas = pool.map(self.feature_extract_func, need_extract)
+            else:
+                args = [(self.space.get(x), self.target, self.task) for x in need_extract]
+                feas = pool.map(self.feature_extract_func, args)
             for i, fea in zip(need_extract, feas):
                 fea_cache[i] = fea
 
@@ -344,18 +352,24 @@ def __del__(self):
         self._close_pool()
 
 
+# Global variables for passing arguments to extract functions.
 _extract_space = None
 _extract_target = None
 _extract_task = None
 
 
-def _extract_itervar_feature_index(index):
+def _extract_itervar_feature_index(args):
     """extract iteration var feature for an index in extract_space"""
     try:
-        config = _extract_space.get(index)
-        with _extract_target:
-            sch, args = _extract_task.instantiate(config)
-        fea = feature.get_itervar_feature_flatten(sch, args, take_log=True)
+        if multiprocessing.get_start_method(False) == "fork":
+            config = _extract_space.get(args)
+            with _extract_target:
+                sch, fargs = _extract_task.instantiate(config)
+        else:
+            config, target, task = args
+            with target:
+                sch, fargs = task.instantiate(config)
+        fea = feature.get_itervar_feature_flatten(sch, fargs, take_log=True)
         fea = np.concatenate((fea, list(config.get_other_option().values())))
         return fea
     except Exception:  # pylint: disable=broad-except
@@ -381,10 +395,13 @@ def _extract_itervar_feature_log(arg):
         return None
 
 
-def _extract_knob_feature_index(index):
+def _extract_knob_feature_index(args):
     """extract knob feature for an index in extract_space"""
     try:
-        config = _extract_space.get(index)
+        if multiprocessing.get_start_method(False) == "fork":
+            config = _extract_space.get(args)
+        else:
+            config = args[0]
         return config.get_flatten_feature()
     except Exception:  # pylint: disable=broad-except
         return None
@@ -408,13 +425,18 @@ def _extract_knob_feature_log(arg):
         return None
 
 
-def _extract_curve_feature_index(index):
+def _extract_curve_feature_index(args):
     """extract sampled curve feature for an index in extract_space"""
     try:
-        config = _extract_space.get(index)
-        with _extract_target:
-            sch, args = _extract_task.instantiate(config)
-        fea = feature.get_buffer_curve_sample_flatten(sch, args, sample_n=20)
+        if multiprocessing.get_start_method(False) == "fork":
+            config = _extract_space.get(args)
+            with _extract_target:
+                sch, fargs = _extract_task.instantiate(config)
+        else:
+            config, target, task = args
+            with target:
+                sch, fargs = task.instantiate(config)
+        fea = feature.get_buffer_curve_sample_flatten(sch, fargs, sample_n=20)
         fea = np.concatenate((fea, list(config.get_other_option().values())))
         return np.array(fea)
     except Exception:  # pylint: disable=broad-except

diff --git a/tests/python/unittest/test_autotvm_index_tuner.py b/tests/python/unittest/test_autotvm_index_tuner.py
@@ -16,6 +16,7 @@
 # under the License.
 """Test index based tuners"""
 
+import multiprocessing
 from test_autotvm_common import DummyRunner, get_sample_task
 from tvm import autotvm
 from tvm.autotvm.tuner import GridSearchTuner, RandomTuner
@@ -43,6 +44,18 @@ def test_gridsearch_tuner():
     assert not tuner.has_next()
 
 
+def grid_search_spawn():
+    assert multiprocessing.get_spawn_method(False) == "spawn"
+    test_gridsearch_tuner()
+
+
+def test_grid_search_tuner_spawn():
+    ctx = multiprocessing.get_context("spawn")
+    p = ctx.Process(target=test_gridsearch_tuner)
+    p.start()
+    p.join()
+
+
 def test_random_tuner():
     """Test RandomTuner"""
 
@@ -65,4 +78,5 @@ def test_random_tuner():
 
 if __name__ == "__main__":
     test_gridsearch_tuner()
+    test_gridsearch_tuner_spawn()
     test_random_tuner()
diff --git a/tests/python/unittest/test_autotvm_measure.py b/tests/python/unittest/test_autotvm_measure.py
@@ -16,6 +16,7 @@
 # under the License.
 """Test builder and runner"""
 import logging
+import multiprocessing
 import time
 
 import numpy as np
@@ -46,6 +47,19 @@ def test_task_tuner_without_measurement():
         assert tuner.best_flops > 1
 
 
+def task_tuner_spawn():
+    assert multiprocessing.get_start_method(False) == "spawn"
+    test_task_tuner_without_measurement()
+
+
+def test_task_tuner_without_measurement_spawn():
+    # Subprocesses inherit the spawn method of their parents
+    ctx = multiprocessing.get_context("spawn")
+    p = ctx.Process(target=task_tuner_spawn)
+    p.start()
+    p.join()
+
+
 def test_check_correctness():
     task, target = get_sample_task()
 
@@ -77,4 +91,5 @@ def _callback_wrong(tuner, measure_inputs, measure_results):
     logging.basicConfig(level=logging.INFO)
 
     test_task_tuner_without_measurement()
+    test_task_tuner_without_measurement_spawn()
     test_check_correctness()
diff --git a/tests/python/unittest/test_autotvm_xgboost_model.py b/tests/python/unittest/test_autotvm_xgboost_model.py
@@ -16,6 +16,7 @@
 # under the License.
 import time
 
+import multiprocessing
 import numpy as np
 
 import tvm
@@ -43,6 +44,19 @@ def test_fit():
     upper_model.fit(xs, ys, plan_size=32)
 
 
+def fit_spawn():
+    assert multiprocessing.get_start_method(False) == "spawn"
+    test_fit()
+
+
+def test_fit_spawn():
+    # Subprocesses inherit the spawn method of their parents
+    ctx = multiprocessing.get_context("spawn")
+    p = ctx.Process(target=test_fit)
+    p.start()
+    p.join()
+
+
 def test_tuner():
     task, target = get_sample_task()
     records = get_sample_records(n=100)
@@ -53,4 +67,5 @@ def test_tuner():
 
 if __name__ == "__main__":
     test_fit()
+    test_fit_spawn()
     test_tuner()
diff --git a/tutorials/auto_scheduler/tune_conv2d_layer_cuda.py b/tutorials/auto_scheduler/tune_conv2d_layer_cuda.py
@@ -30,6 +30,10 @@
 find a good schedule in the space.
 
 We use a convolution layer as an example in this tutorial.
+
+Note that this tutorial will not run on Windows or recent versions of macOS. To
+get it to run, you will need to wrap the body of this tutorial in a :code:`if
+__name__ == "__main__":` block.
 """
 
 import os

diff --git a/tutorials/auto_scheduler/tune_matmul_x86.py b/tutorials/auto_scheduler/tune_matmul_x86.py
@@ -27,6 +27,10 @@
 find a good schedule in the space.
 
 We use matrix multiplication as an example in this tutorial.
+
+Note that this tutorial will not run on Windows or recent versions of macOS. To
+get it to run, you will need to wrap the body of this tutorial in a :code:`if
+__name__ == "__main__":` block.
 """
 
 import os

diff --git a/tutorials/autotvm/tune_conv2d_cuda.py b/tutorials/autotvm/tune_conv2d_cuda.py
@@ -22,6 +22,10 @@
 This is an advanced tutorial for writing high performance tunable template for
 NVIDIA GPU. By running auto-tuner on this template, we can outperform the
 vendor provided library CuDNN in many cases.
+
+Note that this tutorial will not run on Windows or recent versions of macOS. To
+get it to run, you will need to wrap the body of this tutorial in a :code:`if
+__name__ == "__main__":` block.
 """
 
 ######################################################################

diff --git a/tutorials/autotvm/tune_relay_arm.py b/tutorials/autotvm/tune_relay_arm.py
@@ -35,6 +35,10 @@
 We also released pre-tuned parameters for some arm devices. You can go to
 `ARM CPU Benchmark <https://github.com/apache/incubator-tvm/wiki/Benchmark#arm-cpu>`_
 to see the results.
+
+Note that this tutorial will not run on Windows or recent versions of macOS. To
+get it to run, you will need to wrap the body of this tutorial in a :code:`if
+__name__ == "__main__":` block.
 """
 
 ######################################################################

diff --git a/tutorials/autotvm/tune_relay_cuda.py b/tutorials/autotvm/tune_relay_cuda.py
@@ -33,6 +33,10 @@
 We also released pre-tuned parameters for some NVIDIA GPUs. You can go to
 `NVIDIA GPU Benchmark <https://github.com/apache/incubator-tvm/wiki/Benchmark#nvidia-gpu>`_
 to see the results.
+
+Note that this tutorial will not run on Windows or recent versions of macOS. To
+get it to run, you will need to wrap the body of this tutorial in a :code:`if
+__name__ == "__main__":` block.
 """
 
 ######################################################################

diff --git a/tutorials/autotvm/tune_relay_mobile_gpu.py b/tutorials/autotvm/tune_relay_mobile_gpu.py
@@ -33,6 +33,10 @@
 We also released pre-tuned parameters for some arm devices. You can go to
 `Mobile GPU Benchmark <https://github.com/apache/incubator-tvm/wiki/Benchmark#mobile-gpu>`_
 to see the results.
+
+Note that this tutorial will not run on Windows or recent versions of macOS. To
+get it to run, you will need to wrap the body of this tutorial in a :code:`if
+__name__ == "__main__":` block.
 """
 
 ######################################################################

diff --git a/tutorials/autotvm/tune_relay_x86.py b/tutorials/autotvm/tune_relay_x86.py
@@ -23,6 +23,10 @@
 
 This is a tutorial about how to tune convolution neural network
 for x86 CPU.
+
+Note that this tutorial will not run on Windows or recent versions of macOS. To
+get it to run, you will need to wrap the body of this tutorial in a :code:`if
+__name__ == "__main__":` block.
 """
 import os
 import numpy as np

diff --git a/tutorials/autotvm/tune_simple_template.py b/tutorials/autotvm/tune_simple_template.py
@@ -26,6 +26,10 @@
 The second step is running a search algorithm to explore through this space.
 In this tutorial, you can learn how to perform these two steps in TVM.
 The whole workflow is illustrated by a matrix multiplication example.
+
+Note that this tutorial will not run on Windows or recent versions of macOS. To
+get it to run, you will need to wrap the body of this tutorial in a :code:`if
+__name__ == "__main__":` block.
 """
 
 ######################################################################