Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions python/tvm/driver/tvmc/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ def add_run_parser(subparsers, main_parser):
"Profiling may also have an impact on inference time, "
"making it take longer to be generated. (non-micro devices only)",
)
parser.add_argument(
"--end-to-end",
action="store_true",
help="Measure data transfers as well as model execution. This can provide a "
"more realistic performance measurement in many cases.",
)
parser.add_argument(
"--repeat", metavar="N", type=int, default=1, help="run the model n times. Defaults to '1'"
)
Expand Down Expand Up @@ -262,6 +268,7 @@ def drive_run(args):
repeat=args.repeat,
number=args.number,
profile=args.profile,
end_to_end=args.end_to_end,
options=options,
)

Expand Down Expand Up @@ -400,6 +407,7 @@ def run_module(
repeat: int = 10,
number: int = 10,
profile: bool = False,
end_to_end: bool = False,
options: dict = None,
):
"""Run a compiled graph executor module locally or remotely with
Expand Down Expand Up @@ -435,6 +443,10 @@ def run_module(
The number of runs to measure within each repeat.
profile : bool
Whether to profile the run with the debug runtime.
end_to_end : bool
Whether to measure the time of memory copies as well as model
execution. Turning this on can provide a more realistic estimate
of how long running the model in production would take.

Returns
-------
Expand Down Expand Up @@ -557,8 +569,13 @@ def run_module(
module.run()
times = []
else:
# call the benchmarking function of the executor
times = module.benchmark(dev, number=number, repeat=repeat)
# Call the benchmarking function of the executor.
# Optionally measure e2e data transfers from the
# CPU to device memory overheads (e.g. PCIE
# overheads if the device is a discrete GPU).
if end_to_end:
dev = session.cpu()
times = module.benchmark(dev, number=number, repeat=repeat, end_to_end=end_to_end)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should use session.cpu() instead of dev since we need to include copy overheads from CPU memory to e.g. GPU memory

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for noting that, I added an if that sets dev to cpu if end_to_end is on.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great thank you!


logger.debug("Collecting the output tensors.")
num_outputs = module.get_num_outputs()
Expand Down
2 changes: 1 addition & 1 deletion tests/python/driver/tvmc/test_command_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_tvmc_cl_workflow(keras_simple, tmpdir_factory):

# Test running the model
output_path = os.path.join(tmpdir, "predictions.npz")
run_str = f"tvmc run --outputs {output_path} {package_path}"
run_str = f"tvmc run --end-to-end --outputs {output_path} {package_path}"
run_args = run_str.split(" ")[1:]
_main(run_args)
assert os.path.exists(output_path)
2 changes: 1 addition & 1 deletion tests/python/driver/tvmc/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_tvmc_workflow(keras_simple):
tvmc_model = tvmc.load(keras_simple)
tuning_records = tvmc.tune(tvmc_model, target="llvm", enable_autoscheduler=True, trials=2)
tvmc_package = tvmc.compile(tvmc_model, tuning_records=tuning_records, target="llvm")
result = tvmc.run(tvmc_package, device="cpu")
result = tvmc.run(tvmc_package, device="cpu", end_to_end=True)
assert type(tvmc_model) is TVMCModel
assert type(tvmc_package) is TVMCPackage
assert type(result) is TVMCResult
Expand Down