From fa08cd38b43a0e1ed1729f10cee9747e407238d6 Mon Sep 17 00:00:00 2001 From: Josh Fromm Date: Tue, 15 Feb 2022 09:27:59 -0800 Subject: [PATCH 1/5] Add an end_to_end benchmarking argument to TVMC run. --- python/tvm/driver/tvmc/runner.py | 19 +++++++++++++++++-- tests/python/driver/tvmc/test_model.py | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/python/tvm/driver/tvmc/runner.py b/python/tvm/driver/tvmc/runner.py index 4f1be94f6523..83883b1923d6 100644 --- a/python/tvm/driver/tvmc/runner.py +++ b/python/tvm/driver/tvmc/runner.py @@ -103,6 +103,12 @@ def add_run_parser(subparsers, main_parser): "Profiling may also have an impact on inference time, " "making it take longer to be generated. (non-micro devices only)", ) + parser.add_argument( + "--end-to-end", + action="store_true", + help="Measure data transfers as well as model execution. This can provide a " + "more realistic performance measurement in many cases.", + ) parser.add_argument( "--repeat", metavar="N", type=int, default=1, help="run the model n times. Defaults to '1'" ) @@ -262,6 +268,7 @@ def drive_run(args): repeat=args.repeat, number=args.number, profile=args.profile, + end_to_end=args.end_to_end, options=options, ) @@ -400,6 +407,7 @@ def run_module( repeat: int = 10, number: int = 10, profile: bool = False, + end_to_end: bool = False, options: dict = None, ): """Run a compiled graph executor module locally or remotely with @@ -435,6 +443,10 @@ def run_module( The number of runs to measure within each repeat. profile : bool Whether to profile the run with the debug runtime. + end_to_end : bool + Whether to measure the time of memory copies as well as model + execution. Turning this on can provide a more realistic estimate + of how long running the model in production would take. Returns ------- @@ -557,8 +569,11 @@ def run_module( module.run() times = [] else: - # call the benchmarking function of the executor - times = module.benchmark(dev, number=number, repeat=repeat) + # Call the benchmarking function of the executor. + # Optionally measure e2e data transfers from the + # CPU to device memory overheads (e.g. PCIE + # overheads if the device is a discrete GPU). + times = module.benchmark(dev, number=number, repeat=repeat, end_to_end=end_to_end) logger.debug("Collecting the output tensors.") num_outputs = module.get_num_outputs() diff --git a/tests/python/driver/tvmc/test_model.py b/tests/python/driver/tvmc/test_model.py index fd2637a85f1f..d0d398b75521 100644 --- a/tests/python/driver/tvmc/test_model.py +++ b/tests/python/driver/tvmc/test_model.py @@ -30,7 +30,7 @@ def test_tvmc_workflow(keras_simple): tvmc_model = tvmc.load(keras_simple) tuning_records = tvmc.tune(tvmc_model, target="llvm", enable_autoscheduler=True, trials=2) tvmc_package = tvmc.compile(tvmc_model, tuning_records=tuning_records, target="llvm") - result = tvmc.run(tvmc_package, device="cpu") + result = tvmc.run(tvmc_package, device="cpu", end_to_end=True) assert type(tvmc_model) is TVMCModel assert type(tvmc_package) is TVMCPackage assert type(result) is TVMCResult From c5df67a6a2ab1087cc52785af464f21732ca6d09 Mon Sep 17 00:00:00 2001 From: Josh Fromm Date: Tue, 15 Feb 2022 09:33:06 -0800 Subject: [PATCH 2/5] Add command line test. --- tests/python/driver/tvmc/test_command_line.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/driver/tvmc/test_command_line.py b/tests/python/driver/tvmc/test_command_line.py index 66a32160522b..6830cf0503c0 100644 --- a/tests/python/driver/tvmc/test_command_line.py +++ b/tests/python/driver/tvmc/test_command_line.py @@ -47,7 +47,7 @@ def test_tvmc_cl_workflow(keras_simple, tmpdir_factory): # Test running the model output_path = os.path.join(tmpdir, "predictions.npz") - run_str = f"tvmc run --outputs {output_path} {package_path}" + run_str = f"tvmc run --end-to-end --outputs {output_path} {package_path}" run_args = run_str.split(" ")[1:] _main(run_args) assert os.path.exists(output_path) From 2541f74bf6972f070dc60740d520195f8ed2ab01 Mon Sep 17 00:00:00 2001 From: Josh Fromm Date: Tue, 15 Feb 2022 09:34:35 -0800 Subject: [PATCH 3/5] Fix comment syntax. --- python/tvm/driver/tvmc/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tvm/driver/tvmc/runner.py b/python/tvm/driver/tvmc/runner.py index 83883b1923d6..1f7831501fcf 100644 --- a/python/tvm/driver/tvmc/runner.py +++ b/python/tvm/driver/tvmc/runner.py @@ -570,7 +570,7 @@ def run_module( times = [] else: # Call the benchmarking function of the executor. - # Optionally measure e2e data transfers from the + # Optionally measure e2e data transfers from the # CPU to device memory overheads (e.g. PCIE # overheads if the device is a discrete GPU). times = module.benchmark(dev, number=number, repeat=repeat, end_to_end=end_to_end) From 5b25d8b55fa36c18591bc7f507eeb5655f1f26e6 Mon Sep 17 00:00:00 2001 From: Josh Fromm Date: Tue, 15 Feb 2022 10:44:15 -0800 Subject: [PATCH 4/5] Set device to cpu if end_to_end is on. --- python/tvm/driver/tvmc/runner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/tvm/driver/tvmc/runner.py b/python/tvm/driver/tvmc/runner.py index 1f7831501fcf..1ffb400fc9c9 100644 --- a/python/tvm/driver/tvmc/runner.py +++ b/python/tvm/driver/tvmc/runner.py @@ -573,6 +573,8 @@ def run_module( # Optionally measure e2e data transfers from the # CPU to device memory overheads (e.g. PCIE # overheads if the device is a discrete GPU). + if end_to_end: + dev = session.cpu() times = module.benchmark(dev, number=number, repeat=repeat, end_to_end=end_to_end) logger.debug("Collecting the output tensors.") From 8e5f5d60da5841c4b93b3027fef304ff6fb60ecd Mon Sep 17 00:00:00 2001 From: Josh Fromm Date: Wed, 16 Feb 2022 09:12:35 -0800 Subject: [PATCH 5/5] Tickle CI