diff --git a/python/tvm/build_module.py b/python/tvm/build_module.py index 8e0d16286d6a..2bb7442bab76 100755 --- a/python/tvm/build_module.py +++ b/python/tvm/build_module.py @@ -379,92 +379,32 @@ def lower(sch, return stmt return ir_pass.MakeAPI(stmt, name, arg_list, 0, cfg.restricted_func) -def build(sch, - args=None, - target=None, - target_host=None, - name="default_function", - binds=None, - postpone_host_codegen=False): - """Build a function with arguments as signature. Code will be generated - for a device specified by the target. For homogeneous execution, a module - that contains both host and device code is returned. For heterogeneous - execution, a list of lowered functions for the host and a module containing - device code are returned, but actual code generation for the host module is - postponed after code generation is finished for all devices. + +def _build_for_device(flist, target, target_host): + """Build the lowered functions for a device with the given compilation + target. Parameters ---------- - sch : tvm.Schedule, or LoweredFunc - The schedule to be builded - - args : list of Buffer or Tensor or Var, optional - The argument lists to the function. + flist : list of LoweredFunc + The schedule to be built. - target : str or :any:`tvm.target.Target`, optional + target : str or :any:`tvm.target.Target` The target and option of the compilation. - target_host : str or :any:`tvm.target.Target` optional - Host compilation target, if target is device. - When TVM compiles device specific program such as CUDA, - we also need host(CPU) side code to interact with the driver - setup the dimensions and parameters correctly. - target_host is used to specify the host side codegen target. - By default, llvm is used if it is enabled, - otherwise a stackvm intepreter is used. - - name : str, optional - The name of result function. - - binds : dict, optional - Dictionary that maps the binding of symbolic buffer to Tensor. - By default, a new buffer is created for each tensor in the argument. - - postpone_host_codegen : bool, optional - A bool value that indicates if code generation for the host module - should be postponed. This variable is set to be true for heterogeneous - execution. Otherwise, it is defaulted to false. + target_host : str or :any:`tvm.target.Target` + The host compilation target. Returns ------- - ret : tvm.module, or (list of LoweredFunc, tvm.module) tuple - A module that combines both host and device code is returned when - postpone_host_codegen is not set. Otherwise, a list of lowered - functions for the host and a module contains only device code are - returned. + fhost : list of LoweredFunc + A list of lowered functions for the host. - Note - ---- - See the note on :any:`tvm.target` on target string format. + mdev : tvm.module + A module that contains device code. """ - if isinstance(sch, schedule.Schedule): - if args is None: - raise ValueError("args must be given for build from schedule") - flist = lower(sch, args, - name=name, - binds=binds) - if isinstance(flist, container.LoweredFunc): - flist = [flist] - elif isinstance(sch, container.LoweredFunc): - if args: - raise ValueError("args must be done when build from LoweredFunc") - flist = [sch] - elif isinstance(sch, (list, tuple, container.Array)): - flist = sch - else: - raise ValueError("sch have to be Schedule, LoweredFunc or list of LoweredFunc") - fname_set = set() - for x in flist: - if not isinstance(x, container.LoweredFunc): - raise ValueError("sch have to be Schedule, LoweredFunc or list of LoweredFunc") - if x.name in fname_set: - raise ValueError("Duplicate function name %s" % x.name) - fname_set.add(x.name) - - target = _target.current_target() if target is None else target - target = _target.create(target) if target else _target.create("llvm") + target = _target.create(target) device_type = ndarray.context(target.target_name, 0).device_type - fhost = [] fdevice = [] for func in flist: @@ -496,31 +436,162 @@ def build(sch, if "gpu" in target.keys and not fdevice: warnings.warn( - "Specified target %s, but cannot find device code, did you do bind?" % target) + "Specified target %s, but cannot find device code, did you do " + "bind?" % target) fhost = [ir_pass.BindDeviceType(x, device_type) for x in fhost] fhost = [ir_pass.LowerTVMBuiltin(x) for x in fhost] - if not target_host: - if device_type == ndarray.cpu(0).device_type: - target_host = target - assert not fdevice - else: - target_host = "llvm" if module.enabled("llvm") else "stackvm" + if device_type == ndarray.cpu(0).device_type and target_host == target: + assert not fdevice + target_host = _target.create(target_host) - target_device = target - fdevice = [ir_pass.LowerIntrin(x, target_device.target_name) for x in fdevice] + fdevice = [ir_pass.LowerIntrin(x, target.target_name) for x in fdevice] fhost = [ir_pass.LowerIntrin(x, target_host.target_name) for x in fhost] fhost = [ir_pass.CombineContextCall(x) for x in fhost] + mdev = codegen.build_module(fdevice, str(target)) if fdevice else None + + return fhost, mdev + + +def build(inputs, + args=None, + target=None, + target_host=None, + name="default_function", + binds=None): + """Build a function with arguments as signature. Code will be generated + for devices coupled with target information. + + Parameters + ---------- + inputs : tvm.Schedule, LoweredFunc, or dict of target to LoweredFunc list + The schedule to be built + + args : list of Buffer or Tensor or Var, optional + The argument lists to the function. + + target : str or :any:`tvm.target.Target`, optional + The target and option of the compilation. + + target_host : str or :any:`tvm.target.Target` optional + Host compilation target, if target is device. + When TVM compiles device specific program such as CUDA, + we also need host(CPU) side code to interact with the driver + setup the dimensions and parameters correctly. + target_host is used to specify the host side codegen target. + By default, llvm is used if it is enabled, + otherwise a stackvm intepreter is used. - # Append fhost to the device module and return the updated module. All - # device modules will be imported to the host module after all of them are - # collected. - mdev = codegen.build_module(fdevice, str(target_device)) if fdevice else None - if postpone_host_codegen: - return fhost, mdev + name : str, optional + The name of result function. + + binds : dict, optional + Dictionary that maps the binding of symbolic buffer to Tensor. + By default, a new buffer is created for each tensor in the argument. - mhost = codegen.build_module(fhost, str(target_host)) - if fdevice: - mhost.import_module(mdev) + Returns + ------- + ret : tvm.module + A module that combines both host and device code. + + Examples + ________ + There are two typical example uses of this function depending on the type + of the argument `inputs`: + 1. it is a list of lowered functions: + + .. code-block:: python + + n = 2 + A = tvm.placeholder((n,), name='A') + B = tvm.placeholder((n,), name='B') + C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + s = tvm.create_schedule(C.op) + f = tvm.lower(s, [A, B, C], name="test_add") + m = tvm.build(f, target="llvm") + + 2. it is a dict of compilation target to list of lowered functions: + + .. code-block:: python + + n = 2 + A = tvm.placeholder((n,), name='A') + B = tvm.placeholder((n,), name='B') + C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + s1 = tvm.create_schedule(C.op) + s2 = topi.cpp.cuda.schedule_injective("cuda", [C]) + f1 = tvm.lower(s1, [A, B, C], name="test_add1") + f2 = tvm.lower(s2, [A, B, C], name="test_add2") + m = tvm.build({"llvm": [f1], "cuda": [f2]}, target_host="llvm") + + Note + ---- + See the note on :any:`tvm.target` on target string format. + """ + if isinstance(inputs, schedule.Schedule): + if args is None: + raise ValueError("args must be given for build from schedule") + flist = lower(inputs, args, + name=name, + binds=binds) + if isinstance(flist, container.LoweredFunc): + flist = [flist] + elif isinstance(inputs, container.LoweredFunc): + if args: + raise ValueError("args must be done when build from LoweredFunc.") + flist = [inputs] + elif isinstance(inputs, (list, tuple, container.Array)): + flist = inputs + elif not isinstance(inputs, (dict, container.Map)): + raise ValueError("inputs must be Schedule, LoweredFunc, list of " + "LoweredFunc, or dict of target to list of " + "LoweredFunc.") + + if not isinstance(inputs, (dict, container.Map)): + target = _target.current_target() if target is None else target + target = target if target else "llvm" + target_flist = {target: flist} + else: + target_flist = inputs + + for tar, flist in target_flist.items(): + if not isinstance(tar, (str, _target.Target)): + raise ValueError("The key of inputs must be str or " + "_target.Target when inputs is dict.") + fname_set = set() + for x in flist: + if not isinstance(x, container.LoweredFunc): + raise ValueError("inputs must be Schedule, LoweredFunc, list " + "of LoweredFunc, or dict of str to list of " + "LoweredFunc.") + if x.name in fname_set: + raise ValueError("Duplicate function name %s" % x.name) + fname_set.add(x.name) + + if not target_host: + for tar, _ in target_flist.items(): + tar = _target.create(tar) + device_type = ndarray.context(tar.target_name, 0).device_type + if device_type == ndarray.cpu(0).device_type: + target_host = tar + break + if not target_host: + target_host = "llvm" if module.enabled("llvm") else "stackvm" + + fhost_all = [] + device_modules = [] + for tar, flist in target_flist.items(): + fhost, mdev = _build_for_device(flist, tar, target_host) + # Save the current lowered functions of the host and the device module. + fhost_all += fhost + device_modules.append(mdev) + + # Generate a unified host module. + mhost = codegen.build_module(fhost_all, str(target_host)) + + # Import all modules. + for mdev in device_modules: + if mdev: + mhost.import_module(mdev) return mhost diff --git a/tests/python/unittest/test_runtime_heterogeneous.py b/tests/python/unittest/test_runtime_heterogeneous.py index b916ee285717..3272165f0b02 100644 --- a/tests/python/unittest/test_runtime_heterogeneous.py +++ b/tests/python/unittest/test_runtime_heterogeneous.py @@ -124,9 +124,6 @@ def check_device(device, target_device): schedule_add = topi.cpp.cuda.schedule_injective(target, [elemwise_add]) lower_add = tvm.lower(schedule_add, [tensor_a, tensor_b, elemwise_add], name="elemwise_add") - host_funcs_add, lib_add = tvm.build(lower_add, target=target_device, - name="elemwise_add", - postpone_host_codegen=True) # Insert copy. Neither compute nor schedule is required for the copy # node. The compute will be performed at runtime which is just data @@ -142,16 +139,8 @@ def check_device(device, target_device): elemwise_sub], name="elemwise_sub") - host_funcs_sub, lib_sub = tvm.build(lower_sub, target=target_host, - name="elemwise_sub", - postpone_host_codegen=True) - host_funcs = host_funcs_add + host_funcs_sub - mhost = tvm.codegen.build_module(host_funcs, target_host) - if lib_add: - mhost.import_module(lib_add) - if lib_sub: - mhost.import_module(lib_sub) - + target_flist = {target_device: [lower_add], target_host: [lower_sub]} + mhost = tvm.build(target_flist, target_host=target_host) ctx = [host_ctx, device_ctx] mod = graph_runtime.create(graph, mhost, ctx) params = {} @@ -338,10 +327,6 @@ def check_device(device, target_device): lower_add1 = tvm.lower( add_schedule1, [tensor_d, copy_sub_add, elemwise_add1], name="elemwise_add1") - host_funcs_add, lib_add = tvm.build([lower_add0, lower_add1], - target=target_device, - postpone_host_codegen=True) - # Create module for sub whose target is the host. tensor_c = tvm.placeholder(shape, name="C") elemwise_sub = tvm.compute(shape, lambda *i: copy_add_sub(*i) @@ -350,15 +335,10 @@ def check_device(device, target_device): lower_sub = tvm.lower(sub_schedule, [copy_add_sub, tensor_c, elemwise_sub], name="elemwise_sub") - host_funcs_sub, lib_sub = tvm.build(lower_sub, target=target_host, - postpone_host_codegen=True) - host_funcs = host_funcs_add + host_funcs_sub - mhost = tvm.codegen.build_module(host_funcs, target_host) - if lib_add: - mhost.import_module(lib_add) - if lib_sub: - mhost.import_module(lib_sub) + target_flist = {target_device: [lower_add0, lower_add1], target_host: + [lower_sub]} + mhost = tvm.build(target_flist, target_host=target_host) ctx = [host_ctx, device_ctx] params = {} params["A"] = tensor_a = np.random.uniform(