From fa08cd38b43a0e1ed1729f10cee9747e407238d6 Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@octoml.ai>
Date: Tue, 15 Feb 2022 09:27:59 -0800
Subject: [PATCH 1/5] Add an end_to_end benchmarking argument to TVMC run.

---
 python/tvm/driver/tvmc/runner.py       | 19 +++++++++++++++++--
 tests/python/driver/tvmc/test_model.py |  2 +-
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/python/tvm/driver/tvmc/runner.py b/python/tvm/driver/tvmc/runner.py
index 4f1be94f6523..83883b1923d6 100644
--- a/python/tvm/driver/tvmc/runner.py
+++ b/python/tvm/driver/tvmc/runner.py
@@ -103,6 +103,12 @@ def add_run_parser(subparsers, main_parser):
         "Profiling may also have an impact on inference time, "
         "making it take longer to be generated. (non-micro devices only)",
     )
+    parser.add_argument(
+        "--end-to-end",
+        action="store_true",
+        help="Measure data transfers as well as model execution. This can provide a "
+        "more realistic performance measurement in many cases.",
+    )
     parser.add_argument(
         "--repeat", metavar="N", type=int, default=1, help="run the model n times. Defaults to '1'"
     )
@@ -262,6 +268,7 @@ def drive_run(args):
         repeat=args.repeat,
         number=args.number,
         profile=args.profile,
+        end_to_end=args.end_to_end,
         options=options,
     )
 
@@ -400,6 +407,7 @@ def run_module(
     repeat: int = 10,
     number: int = 10,
     profile: bool = False,
+    end_to_end: bool = False,
     options: dict = None,
 ):
     """Run a compiled graph executor module locally or remotely with
@@ -435,6 +443,10 @@ def run_module(
         The number of runs to measure within each repeat.
     profile : bool
         Whether to profile the run with the debug runtime.
+    end_to_end : bool
+        Whether to measure the time of memory copies as well as model
+        execution. Turning this on can provide a more realistic estimate
+        of how long running the model in production would take.
 
     Returns
     -------
@@ -557,8 +569,11 @@ def run_module(
             module.run()
             times = []
         else:
-            # call the benchmarking function of the executor
-            times = module.benchmark(dev, number=number, repeat=repeat)
+            # Call the benchmarking function of the executor.
+            # Optionally measure e2e data transfers from the 
+            # CPU to device memory overheads (e.g. PCIE
+            # overheads if the device is a discrete GPU).
+            times = module.benchmark(dev, number=number, repeat=repeat, end_to_end=end_to_end)
 
         logger.debug("Collecting the output tensors.")
         num_outputs = module.get_num_outputs()
diff --git a/tests/python/driver/tvmc/test_model.py b/tests/python/driver/tvmc/test_model.py
index fd2637a85f1f..d0d398b75521 100644
--- a/tests/python/driver/tvmc/test_model.py
+++ b/tests/python/driver/tvmc/test_model.py
@@ -30,7 +30,7 @@ def test_tvmc_workflow(keras_simple):
     tvmc_model = tvmc.load(keras_simple)
     tuning_records = tvmc.tune(tvmc_model, target="llvm", enable_autoscheduler=True, trials=2)
     tvmc_package = tvmc.compile(tvmc_model, tuning_records=tuning_records, target="llvm")
-    result = tvmc.run(tvmc_package, device="cpu")
+    result = tvmc.run(tvmc_package, device="cpu", end_to_end=True)
     assert type(tvmc_model) is TVMCModel
     assert type(tvmc_package) is TVMCPackage
     assert type(result) is TVMCResult

From c5df67a6a2ab1087cc52785af464f21732ca6d09 Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@octoml.ai>
Date: Tue, 15 Feb 2022 09:33:06 -0800
Subject: [PATCH 2/5] Add command line test.

---
 tests/python/driver/tvmc/test_command_line.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python/driver/tvmc/test_command_line.py b/tests/python/driver/tvmc/test_command_line.py
index 66a32160522b..6830cf0503c0 100644
--- a/tests/python/driver/tvmc/test_command_line.py
+++ b/tests/python/driver/tvmc/test_command_line.py
@@ -47,7 +47,7 @@ def test_tvmc_cl_workflow(keras_simple, tmpdir_factory):
 
     # Test running the model
     output_path = os.path.join(tmpdir, "predictions.npz")
-    run_str = f"tvmc run --outputs {output_path} {package_path}"
+    run_str = f"tvmc run --end-to-end --outputs {output_path} {package_path}"
     run_args = run_str.split(" ")[1:]
     _main(run_args)
     assert os.path.exists(output_path)

From 2541f74bf6972f070dc60740d520195f8ed2ab01 Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@octoml.ai>
Date: Tue, 15 Feb 2022 09:34:35 -0800
Subject: [PATCH 3/5] Fix comment syntax.

---
 python/tvm/driver/tvmc/runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/driver/tvmc/runner.py b/python/tvm/driver/tvmc/runner.py
index 83883b1923d6..1f7831501fcf 100644
--- a/python/tvm/driver/tvmc/runner.py
+++ b/python/tvm/driver/tvmc/runner.py
@@ -570,7 +570,7 @@ def run_module(
             times = []
         else:
             # Call the benchmarking function of the executor.
-            # Optionally measure e2e data transfers from the 
+            # Optionally measure e2e data transfers from the
             # CPU to device memory overheads (e.g. PCIE
             # overheads if the device is a discrete GPU).
             times = module.benchmark(dev, number=number, repeat=repeat, end_to_end=end_to_end)

From 5b25d8b55fa36c18591bc7f507eeb5655f1f26e6 Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@octoml.ai>
Date: Tue, 15 Feb 2022 10:44:15 -0800
Subject: [PATCH 4/5] Set device to cpu if end_to_end is on.

---
 python/tvm/driver/tvmc/runner.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/tvm/driver/tvmc/runner.py b/python/tvm/driver/tvmc/runner.py
index 1f7831501fcf..1ffb400fc9c9 100644
--- a/python/tvm/driver/tvmc/runner.py
+++ b/python/tvm/driver/tvmc/runner.py
@@ -573,6 +573,8 @@ def run_module(
             # Optionally measure e2e data transfers from the
             # CPU to device memory overheads (e.g. PCIE
             # overheads if the device is a discrete GPU).
+            if end_to_end:
+                dev = session.cpu()
             times = module.benchmark(dev, number=number, repeat=repeat, end_to_end=end_to_end)
 
         logger.debug("Collecting the output tensors.")

From 8e5f5d60da5841c4b93b3027fef304ff6fb60ecd Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@octoml.ai>
Date: Wed, 16 Feb 2022 09:12:35 -0800
Subject: [PATCH 5/5] Tickle CI