apache · masahi · Feb 17, 2022 · Feb 15, 2022 · Feb 15, 2022 · Feb 15, 2022
diff --git a/python/tvm/driver/tvmc/runner.py b/python/tvm/driver/tvmc/runner.py
@@ -103,6 +103,12 @@ def add_run_parser(subparsers, main_parser):
         "Profiling may also have an impact on inference time, "
         "making it take longer to be generated. (non-micro devices only)",
     )
+    parser.add_argument(
+        "--end-to-end",
+        action="store_true",
+        help="Measure data transfers as well as model execution. This can provide a "
+        "more realistic performance measurement in many cases.",
+    )
     parser.add_argument(
         "--repeat", metavar="N", type=int, default=1, help="run the model n times. Defaults to '1'"
     )
@@ -262,6 +268,7 @@ def drive_run(args):
         repeat=args.repeat,
         number=args.number,
         profile=args.profile,
+        end_to_end=args.end_to_end,
         options=options,
     )
 
@@ -400,6 +407,7 @@ def run_module(
     repeat: int = 10,
     number: int = 10,
     profile: bool = False,
+    end_to_end: bool = False,
     options: dict = None,
 ):
     """Run a compiled graph executor module locally or remotely with
@@ -435,6 +443,10 @@ def run_module(
         The number of runs to measure within each repeat.
     profile : bool
         Whether to profile the run with the debug runtime.
+    end_to_end : bool
+        Whether to measure the time of memory copies as well as model
+        execution. Turning this on can provide a more realistic estimate
+        of how long running the model in production would take.
 
     Returns
     -------
@@ -557,8 +569,13 @@ def run_module(
             module.run()
             times = []
         else:
-            # call the benchmarking function of the executor
-            times = module.benchmark(dev, number=number, repeat=repeat)
+            # Call the benchmarking function of the executor.
+            # Optionally measure e2e data transfers from the
+            # CPU to device memory overheads (e.g. PCIE
+            # overheads if the device is a discrete GPU).
+            if end_to_end:
+                dev = session.cpu()
+            times = module.benchmark(dev, number=number, repeat=repeat, end_to_end=end_to_end)
 
         logger.debug("Collecting the output tensors.")
         num_outputs = module.get_num_outputs()

diff --git a/tests/python/driver/tvmc/test_command_line.py b/tests/python/driver/tvmc/test_command_line.py
@@ -47,7 +47,7 @@ def test_tvmc_cl_workflow(keras_simple, tmpdir_factory):
 
     # Test running the model
     output_path = os.path.join(tmpdir, "predictions.npz")
-    run_str = f"tvmc run --outputs {output_path} {package_path}"
+    run_str = f"tvmc run --end-to-end --outputs {output_path} {package_path}"
     run_args = run_str.split(" ")[1:]
     _main(run_args)
     assert os.path.exists(output_path)
diff --git a/tests/python/driver/tvmc/test_model.py b/tests/python/driver/tvmc/test_model.py
@@ -30,7 +30,7 @@ def test_tvmc_workflow(keras_simple):
     tvmc_model = tvmc.load(keras_simple)
     tuning_records = tvmc.tune(tvmc_model, target="llvm", enable_autoscheduler=True, trials=2)
     tvmc_package = tvmc.compile(tvmc_model, tuning_records=tuning_records, target="llvm")
-    result = tvmc.run(tvmc_package, device="cpu")
+    result = tvmc.run(tvmc_package, device="cpu", end_to_end=True)
     assert type(tvmc_model) is TVMCModel
     assert type(tvmc_package) is TVMCPackage
     assert type(result) is TVMCResult