From 79131a1f20f43d1d05e56142ced584f2700c6648 Mon Sep 17 00:00:00 2001 From: Tristan Konolige Date: Tue, 27 Oct 2020 16:29:37 -0700 Subject: [PATCH] [FIX][AUTOTVM] Make autotvm work with spawn Like #6671 this PR fixes autotvm when using the spawn start method for multiprocessing. I've added some tests to make sure that things work with spawn in the CI. --- python/tvm/autotvm/task/task.py | 14 ++++-- .../tvm/autotvm/tuner/xgboost_cost_model.py | 50 +++++++++++++------ .../unittest/test_autotvm_index_tuner.py | 14 ++++++ tests/python/unittest/test_autotvm_measure.py | 15 ++++++ .../unittest/test_autotvm_xgboost_model.py | 15 ++++++ .../auto_scheduler/tune_conv2d_layer_cuda.py | 4 ++ tutorials/auto_scheduler/tune_matmul_x86.py | 4 ++ tutorials/autotvm/tune_conv2d_cuda.py | 4 ++ tutorials/autotvm/tune_relay_arm.py | 4 ++ tutorials/autotvm/tune_relay_cuda.py | 4 ++ tutorials/autotvm/tune_relay_mobile_gpu.py | 4 ++ tutorials/autotvm/tune_relay_x86.py | 4 ++ tutorials/autotvm/tune_simple_template.py | 4 ++ 13 files changed, 121 insertions(+), 19 deletions(-) diff --git a/python/tvm/autotvm/task/task.py b/python/tvm/autotvm/task/task.py index a7cb9a095765..8822ba971e4c 100644 --- a/python/tvm/autotvm/task/task.py +++ b/python/tvm/autotvm/task/task.py @@ -23,15 +23,14 @@ """ import numpy as np -from tvm.target import Target from tvm import runtime from tvm.ir import container +from tvm.target import Target +from tvm.te import placeholder, tensor from tvm.tir import expr -from tvm.te import tensor, placeholder - from ..util import get_const_int, get_const_tuple -from .dispatcher import DispatchContext, ApplyConfig +from .dispatcher import ApplyConfig, DispatchContext from .space import ConfigSpace @@ -173,6 +172,8 @@ def __getstate__(self): # some unpickable local task functions. # So we only pickle the name of the function # and restore the function by name when unpickling it. + import cloudpickle # pylint: disable=import-outside-toplevel + return { "name": self.name, "args": self.args, @@ -181,14 +182,17 @@ def __getstate__(self): "flop": self.flop, "target": self.target, "target_host": self.target_host, + "func": cloudpickle.dumps(self.func), } def __setstate__(self, state): + import cloudpickle # pylint: disable=import-outside-toplevel + self.name = state["name"] self.args = state["args"] self.kwargs = state["kwargs"] self.config_space = state["config_space"] - self.func = _lookup_task(state["name"]) + self.func = cloudpickle.loads(state["func"]) self.flop = state["flop"] self.target = state["target"] self.target_host = state["target_host"] diff --git a/python/tvm/autotvm/tuner/xgboost_cost_model.py b/python/tvm/autotvm/tuner/xgboost_cost_model.py index 7b9df1c99373..f66764c42520 100644 --- a/python/tvm/autotvm/tuner/xgboost_cost_model.py +++ b/python/tvm/autotvm/tuner/xgboost_cost_model.py @@ -153,7 +153,10 @@ def _reset_pool(self, space, target, task): self._close_pool() - # use global variable to pass common arguments + # Use global variable to pass common arguments. This is only used when + # new processes are started with fork. We have to set the globals + # before we create the pool, so that processes in the pool get the + # correct globals. global _extract_space, _extract_target, _extract_task _extract_space = space _extract_target = target @@ -324,7 +327,12 @@ def _get_feature(self, indexes): if need_extract: pool = self._get_pool() - feas = pool.map(self.feature_extract_func, need_extract) + # If we are forking, we can pass arguments in globals for better performance + if multiprocessing.get_start_method(False) == "fork": + feas = pool.map(self.feature_extract_func, need_extract) + else: + args = [(self.space.get(x), self.target, self.task) for x in need_extract] + feas = pool.map(self.feature_extract_func, args) for i, fea in zip(need_extract, feas): fea_cache[i] = fea @@ -344,18 +352,24 @@ def __del__(self): self._close_pool() +# Global variables for passing arguments to extract functions. _extract_space = None _extract_target = None _extract_task = None -def _extract_itervar_feature_index(index): +def _extract_itervar_feature_index(args): """extract iteration var feature for an index in extract_space""" try: - config = _extract_space.get(index) - with _extract_target: - sch, args = _extract_task.instantiate(config) - fea = feature.get_itervar_feature_flatten(sch, args, take_log=True) + if multiprocessing.get_start_method(False) == "fork": + config = _extract_space.get(args) + with _extract_target: + sch, fargs = _extract_task.instantiate(config) + else: + config, target, task = args + with target: + sch, fargs = task.instantiate(config) + fea = feature.get_itervar_feature_flatten(sch, fargs, take_log=True) fea = np.concatenate((fea, list(config.get_other_option().values()))) return fea except Exception: # pylint: disable=broad-except @@ -381,10 +395,13 @@ def _extract_itervar_feature_log(arg): return None -def _extract_knob_feature_index(index): +def _extract_knob_feature_index(args): """extract knob feature for an index in extract_space""" try: - config = _extract_space.get(index) + if multiprocessing.get_start_method(False) == "fork": + config = _extract_space.get(args) + else: + config = args[0] return config.get_flatten_feature() except Exception: # pylint: disable=broad-except return None @@ -408,13 +425,18 @@ def _extract_knob_feature_log(arg): return None -def _extract_curve_feature_index(index): +def _extract_curve_feature_index(args): """extract sampled curve feature for an index in extract_space""" try: - config = _extract_space.get(index) - with _extract_target: - sch, args = _extract_task.instantiate(config) - fea = feature.get_buffer_curve_sample_flatten(sch, args, sample_n=20) + if multiprocessing.get_start_method(False) == "fork": + config = _extract_space.get(args) + with _extract_target: + sch, fargs = _extract_task.instantiate(config) + else: + config, target, task = args + with target: + sch, fargs = task.instantiate(config) + fea = feature.get_buffer_curve_sample_flatten(sch, fargs, sample_n=20) fea = np.concatenate((fea, list(config.get_other_option().values()))) return np.array(fea) except Exception: # pylint: disable=broad-except diff --git a/tests/python/unittest/test_autotvm_index_tuner.py b/tests/python/unittest/test_autotvm_index_tuner.py index 05f12118e6af..c433d8fb7297 100644 --- a/tests/python/unittest/test_autotvm_index_tuner.py +++ b/tests/python/unittest/test_autotvm_index_tuner.py @@ -16,6 +16,7 @@ # under the License. """Test index based tuners""" +import multiprocessing from test_autotvm_common import DummyRunner, get_sample_task from tvm import autotvm from tvm.autotvm.tuner import GridSearchTuner, RandomTuner @@ -43,6 +44,18 @@ def test_gridsearch_tuner(): assert not tuner.has_next() +def grid_search_spawn(): + assert multiprocessing.get_spawn_method(False) == "spawn" + test_gridsearch_tuner() + + +def test_grid_search_tuner_spawn(): + ctx = multiprocessing.get_context("spawn") + p = ctx.Process(target=test_gridsearch_tuner) + p.start() + p.join() + + def test_random_tuner(): """Test RandomTuner""" @@ -65,4 +78,5 @@ def test_random_tuner(): if __name__ == "__main__": test_gridsearch_tuner() + test_gridsearch_tuner_spawn() test_random_tuner() diff --git a/tests/python/unittest/test_autotvm_measure.py b/tests/python/unittest/test_autotvm_measure.py index c8760d2be1b4..1a18d6122bf0 100644 --- a/tests/python/unittest/test_autotvm_measure.py +++ b/tests/python/unittest/test_autotvm_measure.py @@ -16,6 +16,7 @@ # under the License. """Test builder and runner""" import logging +import multiprocessing import time import numpy as np @@ -46,6 +47,19 @@ def test_task_tuner_without_measurement(): assert tuner.best_flops > 1 +def task_tuner_spawn(): + assert multiprocessing.get_start_method(False) == "spawn" + test_task_tuner_without_measurement() + + +def test_task_tuner_without_measurement_spawn(): + # Subprocesses inherit the spawn method of their parents + ctx = multiprocessing.get_context("spawn") + p = ctx.Process(target=task_tuner_spawn) + p.start() + p.join() + + def test_check_correctness(): task, target = get_sample_task() @@ -77,4 +91,5 @@ def _callback_wrong(tuner, measure_inputs, measure_results): logging.basicConfig(level=logging.INFO) test_task_tuner_without_measurement() + test_task_tuner_without_measurement_spawn() test_check_correctness() diff --git a/tests/python/unittest/test_autotvm_xgboost_model.py b/tests/python/unittest/test_autotvm_xgboost_model.py index 5789a9fad4d5..58b2a4d66344 100644 --- a/tests/python/unittest/test_autotvm_xgboost_model.py +++ b/tests/python/unittest/test_autotvm_xgboost_model.py @@ -16,6 +16,7 @@ # under the License. import time +import multiprocessing import numpy as np import tvm @@ -43,6 +44,19 @@ def test_fit(): upper_model.fit(xs, ys, plan_size=32) +def fit_spawn(): + assert multiprocessing.get_start_method(False) == "spawn" + test_fit() + + +def test_fit_spawn(): + # Subprocesses inherit the spawn method of their parents + ctx = multiprocessing.get_context("spawn") + p = ctx.Process(target=test_fit) + p.start() + p.join() + + def test_tuner(): task, target = get_sample_task() records = get_sample_records(n=100) @@ -53,4 +67,5 @@ def test_tuner(): if __name__ == "__main__": test_fit() + test_fit_spawn() test_tuner() diff --git a/tutorials/auto_scheduler/tune_conv2d_layer_cuda.py b/tutorials/auto_scheduler/tune_conv2d_layer_cuda.py index 10a2d1b44144..42273bf72891 100644 --- a/tutorials/auto_scheduler/tune_conv2d_layer_cuda.py +++ b/tutorials/auto_scheduler/tune_conv2d_layer_cuda.py @@ -30,6 +30,10 @@ find a good schedule in the space. We use a convolution layer as an example in this tutorial. + +Note that this tutorial will not run on Windows or recent versions of macOS. To +get it to run, you will need to wrap the body of this tutorial in a :code:`if +__name__ == "__main__":` block. """ import os diff --git a/tutorials/auto_scheduler/tune_matmul_x86.py b/tutorials/auto_scheduler/tune_matmul_x86.py index 81f2e71ff8f7..0f2ebe0e09a4 100644 --- a/tutorials/auto_scheduler/tune_matmul_x86.py +++ b/tutorials/auto_scheduler/tune_matmul_x86.py @@ -27,6 +27,10 @@ find a good schedule in the space. We use matrix multiplication as an example in this tutorial. + +Note that this tutorial will not run on Windows or recent versions of macOS. To +get it to run, you will need to wrap the body of this tutorial in a :code:`if +__name__ == "__main__":` block. """ import os diff --git a/tutorials/autotvm/tune_conv2d_cuda.py b/tutorials/autotvm/tune_conv2d_cuda.py index ce9c19860ff4..b307077905d3 100644 --- a/tutorials/autotvm/tune_conv2d_cuda.py +++ b/tutorials/autotvm/tune_conv2d_cuda.py @@ -22,6 +22,10 @@ This is an advanced tutorial for writing high performance tunable template for NVIDIA GPU. By running auto-tuner on this template, we can outperform the vendor provided library CuDNN in many cases. + +Note that this tutorial will not run on Windows or recent versions of macOS. To +get it to run, you will need to wrap the body of this tutorial in a :code:`if +__name__ == "__main__":` block. """ ###################################################################### diff --git a/tutorials/autotvm/tune_relay_arm.py b/tutorials/autotvm/tune_relay_arm.py index f024ba4f201a..31fda54a9a7e 100644 --- a/tutorials/autotvm/tune_relay_arm.py +++ b/tutorials/autotvm/tune_relay_arm.py @@ -35,6 +35,10 @@ We also released pre-tuned parameters for some arm devices. You can go to `ARM CPU Benchmark `_ to see the results. + +Note that this tutorial will not run on Windows or recent versions of macOS. To +get it to run, you will need to wrap the body of this tutorial in a :code:`if +__name__ == "__main__":` block. """ ###################################################################### diff --git a/tutorials/autotvm/tune_relay_cuda.py b/tutorials/autotvm/tune_relay_cuda.py index 4636103a22e2..e86430767b31 100644 --- a/tutorials/autotvm/tune_relay_cuda.py +++ b/tutorials/autotvm/tune_relay_cuda.py @@ -33,6 +33,10 @@ We also released pre-tuned parameters for some NVIDIA GPUs. You can go to `NVIDIA GPU Benchmark `_ to see the results. + +Note that this tutorial will not run on Windows or recent versions of macOS. To +get it to run, you will need to wrap the body of this tutorial in a :code:`if +__name__ == "__main__":` block. """ ###################################################################### diff --git a/tutorials/autotvm/tune_relay_mobile_gpu.py b/tutorials/autotvm/tune_relay_mobile_gpu.py index 61254662c463..9a112e134f4f 100644 --- a/tutorials/autotvm/tune_relay_mobile_gpu.py +++ b/tutorials/autotvm/tune_relay_mobile_gpu.py @@ -33,6 +33,10 @@ We also released pre-tuned parameters for some arm devices. You can go to `Mobile GPU Benchmark `_ to see the results. + +Note that this tutorial will not run on Windows or recent versions of macOS. To +get it to run, you will need to wrap the body of this tutorial in a :code:`if +__name__ == "__main__":` block. """ ###################################################################### diff --git a/tutorials/autotvm/tune_relay_x86.py b/tutorials/autotvm/tune_relay_x86.py index 1dd947fefd25..b1b7ca29e46a 100644 --- a/tutorials/autotvm/tune_relay_x86.py +++ b/tutorials/autotvm/tune_relay_x86.py @@ -23,6 +23,10 @@ This is a tutorial about how to tune convolution neural network for x86 CPU. + +Note that this tutorial will not run on Windows or recent versions of macOS. To +get it to run, you will need to wrap the body of this tutorial in a :code:`if +__name__ == "__main__":` block. """ import os import numpy as np diff --git a/tutorials/autotvm/tune_simple_template.py b/tutorials/autotvm/tune_simple_template.py index 357abf19a09c..b5167b3c72ab 100644 --- a/tutorials/autotvm/tune_simple_template.py +++ b/tutorials/autotvm/tune_simple_template.py @@ -26,6 +26,10 @@ The second step is running a search algorithm to explore through this space. In this tutorial, you can learn how to perform these two steps in TVM. The whole workflow is illustrated by a matrix multiplication example. + +Note that this tutorial will not run on Windows or recent versions of macOS. To +get it to run, you will need to wrap the body of this tutorial in a :code:`if +__name__ == "__main__":` block. """ ######################################################################