Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 45 additions & 20 deletions tests/python/relay/benchmarking/benchmark_vm.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,20 @@
from tvm.contrib import graph_runtime
from tvm import relay
from tvm.relay import testing
from tvm.relay import vm
from tvm.relay import vmobj as _obj


def benchmark_execution(mod,
params,
measure=False,
measure=True,
data_shape=(1, 3, 224, 224),
out_shape=(1, 1000),
dtype='float32'):
def get_tvm_output(mod, data, params, target, ctx, dtype='float32'):
with relay.build_config(opt_level=1):
dtype='float32',
model="unknown"):
def get_graph_runtime_output(mod, data, params, target, ctx,
dtype='float32', number=2, repeat=20):
with relay.build_config(opt_level=3):
graph, lib, params = relay.build(mod, target, params=params)

m = graph_runtime.create(graph, lib, ctx)
Expand All @@ -41,60 +45,81 @@ def get_tvm_output(mod, data, params, target, ctx, dtype='float32'):
out = m.get_output(0, tvm.nd.empty(out_shape, dtype))

if measure:
print("Evaluate graph runtime inference time cost...")
print("Evaluate graph runtime inference cost of {} on "
"{}".format(model, repr(ctx)))
ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=20)
# Measure in millisecond.
prof_res = np.array(ftimer().results) * 1000
print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
print("Mean graph runtime inference time (std dev): %.2f ms (%.2f ms)" %
(np.mean(prof_res), np.std(prof_res)))

return out.asnumpy()

def get_tvm_vm_output(mod, data, params, target, ctx, dtype='float32'):
ex = relay.create_executor('vm', mod=mod, ctx=ctx)
result = ex.evaluate()(data, **params)
def get_vm_output(mod, data, params, target, ctx, dtype='float32',
number=2, repeat=20):
with relay.build_config(opt_level=3):
exe = vm.compile(mod, target, params=params)
rly_vm = vm.VirtualMachine(exe)
rly_vm.init(ctx)
result = rly_vm.run(data)

if measure:
print("Evaluate vm inference cost of {} on {}".format(model,
repr(ctx)))
ftimer = rly_vm.mod.time_evaluator("invoke", ctx, number=number,
repeat=repeat)
# Measure in millisecond.
prof_res = np.array(ftimer("main", _obj.Tensor(data)).results) * 1000
print("Mean vm inference time (std dev): %.2f ms (%.2f ms)" %
(np.mean(prof_res), np.std(prof_res)))

return result.asnumpy().astype(dtype)

# random input
data = np.random.uniform(size=data_shape).astype(dtype)
target = "llvm"
ctx = tvm.cpu(0)

tvm_out = get_tvm_output(mod, tvm.nd.array(data.astype(dtype)), params,
target, ctx, dtype)
vm_out = get_tvm_vm_output(mod, tvm.nd.array(data.astype(dtype)), params,
target, ctx, dtype)
tvm_out = get_graph_runtime_output(mod, tvm.nd.array(data.astype(dtype)),
params, target, ctx, dtype)
vm_out = get_vm_output(mod, tvm.nd.array(data.astype(dtype)), params,
target, ctx, dtype)
tvm.testing.assert_allclose(vm_out, tvm_out, rtol=1e-5, atol=1e-5)


def test_mlp():
image_shape = (1, 1, 28, 28)
mod, params = testing.mlp.get_workload(1)
benchmark_execution(mod, params, data_shape=image_shape, out_shape=(1, 10))
benchmark_execution(mod, params, data_shape=image_shape, out_shape=(1, 10),
model="mlp")


def test_vgg():
for n in [11, 16]:
mod, params = testing.vgg.get_workload(1, num_layers=n)
benchmark_execution(mod, params)
model = "vgg" + str(n)
benchmark_execution(mod, params, model=model)


def test_resnet():
for n in [18, 50]:
mod, params = testing.resnet.get_workload(batch_size=1, num_layers=n)
benchmark_execution(mod, params, True)
model = "resnet" + str(n)
benchmark_execution(mod, params, model=model)


def test_squeezenet():
for version in ['1.0', '1.1']:
mod, params = testing.squeezenet.get_workload(version=version)
benchmark_execution(mod, params)
model = "squeezenet" + version
benchmark_execution(mod, params, model=model)


def test_inception_v3():
image_shape = (3, 299, 299)
mod, params = testing.inception_v3.get_workload(image_shape=image_shape)
benchmark_execution(mod, params, data_shape=(1, 3, 299, 299))
benchmark_execution(mod, params, data_shape=(1, 3, 299, 299),
model="inception_v3")


def test_dqn():
Expand All @@ -112,7 +137,7 @@ def test_dcgan():

def test_mobilenet():
mod, params = testing.mobilenet.get_workload(batch_size=1)
benchmark_execution(mod, params)
benchmark_execution(mod, params, model="mobilenet")

# TODO: enable when the low building performance (several minutes) fixed.
def test_mobilenet_nhwc():
Expand All @@ -124,7 +149,7 @@ def test_mobilenet_nhwc():

def test_densenet():
mod, params = testing.densenet.get_workload(batch_size=1)
benchmark_execution(mod, params)
benchmark_execution(mod, params, model="densenet")


if __name__ == '__main__':
Expand Down