From 9f80c67c48b9c081772fc79f96b57fd926cefb3a Mon Sep 17 00:00:00 2001
From: Andrew Reusch <areusch@octoml.ai>
Date: Fri, 12 Mar 2021 09:51:21 -0800
Subject: [PATCH 01/16] GraphRuntime -> GraphExecutor

---
 .../apache/tvm/android/demo/MainActivity.java |   2 +-
 apps/bundle_deploy/README.md                  |   6 +-
 apps/bundle_deploy/bundle.c                   |  20 +--
 apps/bundle_deploy/bundle_static.c            |  22 ++--
 apps/howto_deploy/cpp_deploy.cc               |   4 +-
 docs/dev/microtvm_design.rst                  |  10 +-
 include/tvm/runtime/crt/error_codes.h         |   8 +-
 include/tvm/runtime/crt/graph_runtime.h       |  28 ++--
 .../tvm/runtime/crt/graph_runtime_module.h    |   2 +-
 .../org/apache/tvm/contrib/GraphRuntime.java  |   2 +-
 .../apache/tvm/contrib/GraphRuntimeTest.java  |   8 +-
 .../tvm/auto_scheduler/relay_integration.py   |   4 +-
 python/tvm/autotvm/task/relay_integration.py  |   6 +-
 python/tvm/contrib/graph_runtime.py           |   6 +-
 python/tvm/micro/model_library_format.py      |   4 +-
 python/tvm/micro/session.py                   |   4 +-
 .../relay/backend/graph_runtime_codegen.py    |   4 +-
 .../relay/backend/graph_runtime_factory.py    |   2 +-
 python/tvm/relay/build_module.py              |   4 +-
 src/relay/backend/build_module.cc             |   2 +-
 src/relay/backend/graph_runtime_codegen.cc    |  20 +--
 src/runtime/crt/graph_runtime/graph_runtime.c | 122 +++++++++---------
 .../graph_runtime_module.c                    |  58 ++++-----
 src/runtime/crt/host/main.cc                  |   4 +-
 .../internal/graph_runtime/graph_runtime.h    |  46 +++----
 .../cuda_graph/graph_runtime_cuda_graph.cc    |  14 +-
 .../graph/debug/graph_runtime_debug.cc        |  20 +--
 src/runtime/graph/graph_runtime.cc            |  52 ++++----
 src/runtime/graph/graph_runtime.h             |  10 +-
 src/runtime/graph/graph_runtime_factory.cc    |  32 ++---
 src/runtime/graph/graph_runtime_factory.h     |  12 +-
 .../micro/standalone/utvm_graph_runtime.cc    |  14 +-
 .../micro/standalone/utvm_graph_runtime.h     |  10 +-
 src/runtime/micro/standalone/utvm_runtime.cc  |  10 +-
 tests/python/unittest/test_link_params.py     |   4 +-
 .../test_micro_model_library_format.py        |   2 +-
 web/src/runtime.ts                            |   8 +-
 37 files changed, 293 insertions(+), 293 deletions(-)

diff --git a/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java b/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java
index 38c135a1edc4..dd3ae565ca8e 100644
--- a/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java
+++ b/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java
@@ -343,7 +343,7 @@ protected void onPostExecute(Integer status) {
                 dialog.dismiss();
             }
             if (status != 0) {
-                showDialog("Error", "Fail to predict image, GraphRuntime exception");
+                showDialog("Error", "Fail to predict image, GraphExecutor exception");
             }
         }
     }
diff --git a/apps/bundle_deploy/README.md b/apps/bundle_deploy/README.md
index a52d3a78f9c9..96b3d0f4edc5 100644
--- a/apps/bundle_deploy/README.md
+++ b/apps/bundle_deploy/README.md
@@ -20,9 +20,9 @@ How to Bundle TVM Modules
 =========================
 
 This folder contains an example on how to bundle a TVM module (with the required
-interpreter runtime modules such as `runtime::GraphRuntime`, the graph JSON, and
+interpreter runtime modules such as `runtime::GraphExecutor`, the graph JSON, and
 the params) into a single, self-contained shared object (`bundle.so`) which
-exposes a C API wrapping the appropriate `runtime::GraphRuntime` instance.
+exposes a C API wrapping the appropriate `runtime::GraphExecutor` instance.
 
 This is useful for cases where we'd like to avoid deploying the TVM runtime
 components to the target host in advance - instead, we simply deploy the bundled
@@ -50,7 +50,7 @@ This will:
   parameters
 - Build a `demo_dynamic` executable that `dlopen`'s `bundle.so` (or `bundle_c.so` in 
   terms of the MISRA-C runtime), instantiates the contained graph runtime,
-  and invokes the `GraphRuntime::Run` function on a cat image, then prints
+  and invokes the `GraphExecutor::Run` function on a cat image, then prints
   the output results.
 
 Type the following command to run the sample code with static linking.
diff --git a/apps/bundle_deploy/bundle.c b/apps/bundle_deploy/bundle.c
index 84740aa25130..b7f7569bd2f9 100644
--- a/apps/bundle_deploy/bundle.c
+++ b/apps/bundle_deploy/bundle.c
@@ -75,30 +75,30 @@ TVM_DLL void* tvm_runtime_create(const char* json_data, const char* params_data,
   TVMModuleHandle mod_syslib = TVMArgs_AsModuleHandle(&pf.ret_value, 0);
 
   // run modules
-  TVMGraphRuntime* graph_runtime = NULL;
-  TVM_CCALL(TVMGraphRuntime_Create(json_data, mod_syslib, &dev, &graph_runtime));
-  TVM_CCALL(TVMGraphRuntime_LoadParams(graph_runtime, params.data, params.size));
+  TVMGraphExecutor* graph_runtime = NULL;
+  TVM_CCALL(TVMGraphExecutor_Create(json_data, mod_syslib, &dev, &graph_runtime));
+  TVM_CCALL(TVMGraphExecutor_LoadParams(graph_runtime, params.data, params.size));
 
   return graph_runtime;
 }
 
 TVM_DLL void tvm_runtime_destroy(void* runtime) {
-  TVMGraphRuntime_Release((TVMGraphRuntime**)&runtime);
+  TVMGraphExecutor_Release((TVMGraphExecutor**)&runtime);
 }
 
 TVM_DLL void tvm_runtime_set_input(void* runtime, const char* name, DLTensor* tensor) {
-  TVMGraphRuntime* graph_runtime = (TVMGraphRuntime*)runtime;
-  TVMGraphRuntime_SetInput(graph_runtime, name, tensor);
+  TVMGraphExecutor* graph_runtime = (TVMGraphExecutor*)runtime;
+  TVMGraphExecutor_SetInput(graph_runtime, name, tensor);
 }
 
 TVM_DLL void tvm_runtime_run(void* runtime) {
-  TVMGraphRuntime* graph_runtime = (TVMGraphRuntime*)runtime;
-  TVMGraphRuntime_Run(graph_runtime);
+  TVMGraphExecutor* graph_runtime = (TVMGraphExecutor*)runtime;
+  TVMGraphExecutor_Run(graph_runtime);
 }
 
 TVM_DLL void tvm_runtime_get_output(void* runtime, int32_t index, DLTensor* tensor) {
-  TVMGraphRuntime* graph_runtime = (TVMGraphRuntime*)runtime;
-  TVMGraphRuntime_GetOutput(graph_runtime, index, tensor);
+  TVMGraphExecutor* graph_runtime = (TVMGraphExecutor*)runtime;
+  TVMGraphExecutor_GetOutput(graph_runtime, index, tensor);
 }
 
 void TVMLogf(const char* msg, ...) {
diff --git a/apps/bundle_deploy/bundle_static.c b/apps/bundle_deploy/bundle_static.c
index ca75b9e0b2e3..d69123f0f7d6 100644
--- a/apps/bundle_deploy/bundle_static.c
+++ b/apps/bundle_deploy/bundle_static.c
@@ -75,31 +75,31 @@ TVM_DLL void* tvm_runtime_create(const char* json_data, const char* params_data,
   TVMModuleHandle mod_syslib = TVMArgs_AsModuleHandle(&pf.ret_value, 0);
 
   // run modules
-  TVMGraphRuntime* graph_runtime = NULL;
-  TVM_CCALL(TVMGraphRuntime_Create(json_data, mod_syslib, &dev, &graph_runtime));
-  TVM_CCALL(TVMGraphRuntime_LoadParams(graph_runtime, params.data, params.size));
+  TVMGraphExecutor* graph_runtime = NULL;
+  TVM_CCALL(TVMGraphExecutor_Create(json_data, mod_syslib, &dev, &graph_runtime));
+  TVM_CCALL(TVMGraphExecutor_LoadParams(graph_runtime, params.data, params.size));
 
   return graph_runtime;
 }
 
 TVM_DLL void tvm_runtime_destroy(void* runtime) {
-  TVMGraphRuntime* graph_runtime = (TVMGraphRuntime*)runtime;
-  TVMGraphRuntime_Release(&graph_runtime);
+  TVMGraphExecutor* graph_runtime = (TVMGraphExecutor*)runtime;
+  TVMGraphExecutor_Release(&graph_runtime);
 }
 
 TVM_DLL void tvm_runtime_set_input(void* runtime, const char* name, DLTensor* tensor) {
-  TVMGraphRuntime* graph_runtime = (TVMGraphRuntime*)runtime;
-  TVMGraphRuntime_SetInput(graph_runtime, name, tensor);
+  TVMGraphExecutor* graph_runtime = (TVMGraphExecutor*)runtime;
+  TVMGraphExecutor_SetInput(graph_runtime, name, tensor);
 }
 
 TVM_DLL void tvm_runtime_run(void* runtime) {
-  TVMGraphRuntime* graph_runtime = (TVMGraphRuntime*)runtime;
-  TVMGraphRuntime_Run(graph_runtime);
+  TVMGraphExecutor* graph_runtime = (TVMGraphExecutor*)runtime;
+  TVMGraphExecutor_Run(graph_runtime);
 }
 
 TVM_DLL void tvm_runtime_get_output(void* runtime, int32_t index, DLTensor* tensor) {
-  TVMGraphRuntime* graph_runtime = (TVMGraphRuntime*)runtime;
-  TVMGraphRuntime_GetOutput(graph_runtime, index, tensor);
+  TVMGraphExecutor* graph_runtime = (TVMGraphExecutor*)runtime;
+  TVMGraphExecutor_GetOutput(graph_runtime, index, tensor);
 }
 
 void TVMLogf(const char* msg, ...) {
diff --git a/apps/howto_deploy/cpp_deploy.cc b/apps/howto_deploy/cpp_deploy.cc
index f58648c2fb7a..4c828c30ea03 100644
--- a/apps/howto_deploy/cpp_deploy.cc
+++ b/apps/howto_deploy/cpp_deploy.cc
@@ -83,7 +83,7 @@ void DeploySingleOp() {
   Verify(mod_syslib, "addonesys");
 }
 
-void DeployGraphRuntime() {
+void DeployGraphExecutor() {
   LOG(INFO) << "Running graph runtime...";
   // load in the library
   DLDevice dev{kDLCPU, 0};
@@ -119,6 +119,6 @@ void DeployGraphRuntime() {
 
 int main(void) {
   DeploySingleOp();
-  DeployGraphRuntime();
+  DeployGraphExecutor();
   return 0;
 }
diff --git a/docs/dev/microtvm_design.rst b/docs/dev/microtvm_design.rst
index 2c3eeb2faea3..ecbb042f90cc 100644
--- a/docs/dev/microtvm_design.rst
+++ b/docs/dev/microtvm_design.rst
@@ -213,8 +213,8 @@ In Host-Driven execution, the firmware binary is the following:
 4. The TVM RPC server.
 5. (optional) Simplified Parameters.
 
-This firmware image is flashed onto the device and a GraphRuntime instance is created on the host.
-The GraphRuntime drives execution by sending RPC commands over a UART:
+This firmware image is flashed onto the device and a GraphExecutor instance is created on the host.
+The GraphExecutor drives execution by sending RPC commands over a UART:
 
 .. figure:: https://raw.githubusercontent.com/tvmai/web-data/main/images/dev/microtvm_host_driven.svg
    :align: center
@@ -223,7 +223,7 @@ The GraphRuntime drives execution by sending RPC commands over a UART:
 Standalone Execution
 ^^^^^^^^^^^^^^^^^^^^
 
-In Standalone execution, the GraphRuntime is instantiated on device:
+In Standalone execution, the GraphExecutor is instantiated on device:
 
 .. figure:: https://raw.githubusercontent.com/tvmai/web-data/main/images/dev/microtvm_standalone.svg
    :align: center
@@ -248,7 +248,7 @@ When configuring for host-driven inference or AutoTVM, the remaining tasks are w
 When configuring for standalone deployment, the firmware needs to:
 
 1. Instantiate the system library by calling the ``runtime.SystemLib`` PackedFunc.
-2. Instantiate a GraphRuntime passing the system library module.
+2. Instantiate a GraphExecutor passing the system library module.
 3. Configure parameters and inputs as needed.
 4. Run the model.
 
@@ -267,7 +267,7 @@ For Host-driven model execution, firmware also needs:
 
 For Standalone model execution, firmware also needs:
 
-4. The TVM C GraphRuntime library, supplied by TVM as a static library.
+4. The TVM C GraphExecutor library, supplied by TVM as a static library.
 5. The remaining compiler outputs (Simplified Parameters and Graph JSON).
 
 The Automated Build Flow
diff --git a/include/tvm/runtime/crt/error_codes.h b/include/tvm/runtime/crt/error_codes.h
index 75e49e63e094..de4c13514388 100644
--- a/include/tvm/runtime/crt/error_codes.h
+++ b/include/tvm/runtime/crt/error_codes.h
@@ -42,7 +42,7 @@ typedef enum {
   kTvmErrorCategorySession = 4,
   kTvmErrorCategoryPlatform = 5,
   kTvmErrorCategoryGenerated = 6,
-  kTvmErrorCategoryGraphRuntime = 7,
+  kTvmErrorCategoryGraphExecutor = 7,
   kTvmErrorCategoryFunctionCall = 8,
   kTvmErrorCategoryTimeEvaluator = 9,
 } tvm_crt_error_category_t;
@@ -84,9 +84,9 @@ typedef enum {
   kTvmErrorGeneratedInvalidStorageId = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGenerated, 0),
 
   // Graph runtime
-  kTvmErrorGraphModuleAlreadyCreated = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphRuntime, 0),
-  kTvmErrorGraphModuleBadContext = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphRuntime, 1),
-  kTvmErrorGraphModuleNoSuchInput = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphRuntime, 2),
+  kTvmErrorGraphModuleAlreadyCreated = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphExecutor, 0),
+  kTvmErrorGraphModuleBadContext = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphExecutor, 1),
+  kTvmErrorGraphModuleNoSuchInput = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphExecutor, 2),
 
   // Function Calls - common problems encountered calling functions.
   kTvmErrorFunctionCallNumArguments = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFunctionCall, 0),
diff --git a/include/tvm/runtime/crt/graph_runtime.h b/include/tvm/runtime/crt/graph_runtime.h
index 02c7421d00e8..3c18a9f6e295 100644
--- a/include/tvm/runtime/crt/graph_runtime.h
+++ b/include/tvm/runtime/crt/graph_runtime.h
@@ -43,7 +43,7 @@ typedef struct TVMOpParam {
 } TVMOpParam;
 
 // Graph attribute
-typedef struct TVMGraphRuntimeGraphAttr {
+typedef struct TVMGraphExecutorGraphAttr {
   uint32_t storage_num_not_alloctaed;
   uint32_t* storage_id;
   uint32_t* device_index;
@@ -52,13 +52,13 @@ typedef struct TVMGraphRuntimeGraphAttr {
   int64_t* shape;
   uint32_t* ndim;
   uint32_t shape_count;
-} TVMGraphRuntimeGraphAttr;
+} TVMGraphExecutorGraphAttr;
 
-typedef struct TVMGraphRuntime TVMGraphRuntime;
+typedef struct TVMGraphExecutor TVMGraphExecutor;
 
 // public functions
 /*!
- * \brief Allocate a new GraphRuntime with TVMPlatformMemoryAllocate and initialize it.
+ * \brief Allocate a new GraphExecutor with TVMPlatformMemoryAllocate and initialize it.
  *
  * \param sym_json JSON-encoded graph.
  * \param module_handle TVM Module that exposes the functions to call.
@@ -66,16 +66,16 @@ typedef struct TVMGraphRuntime TVMGraphRuntime;
  * \param runtime Pointer which receives a pointer to the newly-created instance.
  * \return 0 if successful.
  */
-int TVMGraphRuntime_Create(const char* sym_json, TVMModuleHandle module_handle,
-                           const DLDevice* devices, TVMGraphRuntime** runtime);
+int TVMGraphExecutor_Create(const char* sym_json, TVMModuleHandle module_handle,
+                           const DLDevice* devices, TVMGraphExecutor** runtime);
 
-int TVMGraphRuntime_GetInputIndex(TVMGraphRuntime* runtime, const char* name);
+int TVMGraphExecutor_GetInputIndex(TVMGraphExecutor* runtime, const char* name);
 
 /*!
  * \brief get number of input tensors allocated.
  * \return integer number of tensors available to use.
  */
-int TVMGraphRuntime_GetNumInputs();
+int TVMGraphExecutor_GetNumInputs();
 
 /*!
  * \brief set input to the graph based on name.
@@ -83,13 +83,13 @@ int TVMGraphRuntime_GetNumInputs();
  * \param name The name of the input.
  * \param data_in The input data.
  */
-void TVMGraphRuntime_SetInput(TVMGraphRuntime* runtime, const char* name, DLTensor* data_in);
+void TVMGraphExecutor_SetInput(TVMGraphExecutor* runtime, const char* name, DLTensor* data_in);
 
 /*!
  * \brief get number of output tensors allocated.
  * \return integer number of output tensors allocated.
  */
-int TVMGraphRuntime_GetNumOutputs();
+int TVMGraphExecutor_GetNumOutputs();
 
 /*!
  * \brief Return NDArray for given output index.
@@ -98,7 +98,7 @@ int TVMGraphRuntime_GetNumOutputs();
  * \param out The DLTensor corresponding to given output node index.
  * \return The result of this function execution.
  */
-int TVMGraphRuntime_GetOutput(TVMGraphRuntime* runtime, const int32_t index, DLTensor* out);
+int TVMGraphExecutor_GetOutput(TVMGraphExecutor* runtime, const int32_t index, DLTensor* out);
 
 /*!
  * \brief Load parameters from parameter blob.
@@ -107,21 +107,21 @@ int TVMGraphRuntime_GetOutput(TVMGraphRuntime* runtime, const int32_t index, DLT
  * \param param_size The parameter size.
  * \return The result of this function execution.
  */
-int TVMGraphRuntime_LoadParams(TVMGraphRuntime* runtime, const char* param_blob,
+int TVMGraphExecutor_LoadParams(TVMGraphExecutor* runtime, const char* param_blob,
                                const uint32_t param_size);
 
 /*!
  * \brief Execute the graph.
  * \param runtime The graph runtime.
  */
-void TVMGraphRuntime_Run(TVMGraphRuntime* runtime);
+void TVMGraphExecutor_Run(TVMGraphExecutor* runtime);
 
 /*!
  * \brief Release memory associated with the graph runtime.
  * \param runtime Pointer to graph runtime.
  * \return 0 if successful
  */
-int TVMGraphRuntime_Release(TVMGraphRuntime** runtime);
+int TVMGraphExecutor_Release(TVMGraphExecutor** runtime);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/include/tvm/runtime/crt/graph_runtime_module.h b/include/tvm/runtime/crt/graph_runtime_module.h
index 04e9184c8b8d..51aea1276ba1 100644
--- a/include/tvm/runtime/crt/graph_runtime_module.h
+++ b/include/tvm/runtime/crt/graph_runtime_module.h
@@ -33,7 +33,7 @@ extern "C" {
 /*!
  * \brief Register the "tvm.graph_runtime.create" constructor PackedFunc.
  */
-tvm_crt_error_t TVMGraphRuntimeModule_Register();
+tvm_crt_error_t TVMGraphExecutorModule_Register();
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/jvm/core/src/main/java/org/apache/tvm/contrib/GraphRuntime.java b/jvm/core/src/main/java/org/apache/tvm/contrib/GraphRuntime.java
index 5bbd2beb4644..d692f9a2cf08 100644
--- a/jvm/core/src/main/java/org/apache/tvm/contrib/GraphRuntime.java
+++ b/jvm/core/src/main/java/org/apache/tvm/contrib/GraphRuntime.java
@@ -29,7 +29,7 @@
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
 
-public class GraphRuntime {
+public class GraphExecutor {
   /**
    * Create a runtime executor module given a graph and module.
    * @param graphJson The graph deployed in json format output by compiler.
diff --git a/jvm/core/src/test/java/org/apache/tvm/contrib/GraphRuntimeTest.java b/jvm/core/src/test/java/org/apache/tvm/contrib/GraphRuntimeTest.java
index d1760454a468..0a5fa9a67e3a 100644
--- a/jvm/core/src/test/java/org/apache/tvm/contrib/GraphRuntimeTest.java
+++ b/jvm/core/src/test/java/org/apache/tvm/contrib/GraphRuntimeTest.java
@@ -35,8 +35,8 @@
 
 import static org.junit.Assert.assertArrayEquals;
 
-public class GraphRuntimeTest {
-  private final Logger logger = LoggerFactory.getLogger(GraphRuntime.class);
+public class GraphExecutorTest {
+  private final Logger logger = LoggerFactory.getLogger(GraphExecutor.class);
   private static String loadingDir;
 
   @BeforeClass
@@ -52,7 +52,7 @@ public void test_add_one_local() throws IOException {
         .useDelimiter("\\Z").next();
 
     Device dev = Device.cpu();
-    GraphModule graph = GraphRuntime.create(graphJson, libmod, dev);
+    GraphModule graph = GraphExecutor.create(graphJson, libmod, dev);
 
     long[] shape = new long[]{4};
     NDArray arr = NDArray.empty(shape, dev);
@@ -92,7 +92,7 @@ public void test_add_one_remote() throws IOException {
       remote.upload(new File(libPath));
       Module mlib = remote.loadModule("graph_addone_lib.so");
 
-      GraphModule graph = GraphRuntime.create(graphJson, mlib, dev);
+      GraphModule graph = GraphExecutor.create(graphJson, mlib, dev);
 
       long[] shape = new long[]{4};
       NDArray arr = NDArray.empty(shape, dev);
diff --git a/python/tvm/auto_scheduler/relay_integration.py b/python/tvm/auto_scheduler/relay_integration.py
index 366d3d021d9e..72582a21c515 100644
--- a/python/tvm/auto_scheduler/relay_integration.py
+++ b/python/tvm/auto_scheduler/relay_integration.py
@@ -63,11 +63,11 @@ def call_all_topi_funcs(mod, params, target):
     ):
         try:
             opt_mod, _ = relay.optimize(mod, target, params)
-            grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target)
+            grc = graph_runtime_codegen.GraphExecutorCodegen(None, target)
             grc.codegen(opt_mod["main"])
         except tvm.TVMError:
             print(
-                "Get errors with GraphRuntimeCodegen for task extraction. "
+                "Get errors with GraphExecutorCodegen for task extraction. "
                 "Fallback to VMCompiler."
             )
             compiler = relay.vm.VMCompiler()
diff --git a/python/tvm/autotvm/task/relay_integration.py b/python/tvm/autotvm/task/relay_integration.py
index fe88d1741d60..ddb5205e7f46 100644
--- a/python/tvm/autotvm/task/relay_integration.py
+++ b/python/tvm/autotvm/task/relay_integration.py
@@ -43,7 +43,7 @@ def _lower(mod, target, params):
 
         with vta.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
             mod, _ = relay.optimize(mod, target, params)
-            grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target)
+            grc = graph_runtime_codegen.GraphExecutorCodegen(None, target)
             grc.codegen(mod["main"])
             return
 
@@ -53,11 +53,11 @@ def _lower(mod, target, params):
     # TODO: Currently VM compiler is likely to stack overflow for large models.
     try:
         opt_mod, _ = relay.optimize(mod, target, params)
-        grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target)
+        grc = graph_runtime_codegen.GraphExecutorCodegen(None, target)
         grc.codegen(opt_mod["main"])
     except tvm.TVMError as e:
         print(
-            "Get errors with GraphRuntimeCodegen for task extraction. "
+            "Get errors with GraphExecutorCodegen for task extraction. "
             "Fallback to VMCompiler. Error details:\n%s" % str(e)
         )
         compiler = relay.vm.VMCompiler()
diff --git a/python/tvm/contrib/graph_runtime.py b/python/tvm/contrib/graph_runtime.py
index 2eea188be977..63bd1ebade56 100644
--- a/python/tvm/contrib/graph_runtime.py
+++ b/python/tvm/contrib/graph_runtime.py
@@ -283,12 +283,12 @@ def load_params(self, params_bytes):
         self._load_params(bytearray(params_bytes))
 
     def share_params(self, other, params_bytes):
-        """Share parameters from pre-existing GraphRuntime instance.
+        """Share parameters from pre-existing GraphExecutor instance.
 
         Parameters
         ----------
-        other: GraphRuntime
-            The parent GraphRuntime from which this instance should share
+        other: GraphExecutor
+            The parent GraphExecutor from which this instance should share
             it's parameters.
         params_bytes : bytearray
             The serialized parameter dict (used only for the parameter names).
diff --git a/python/tvm/micro/model_library_format.py b/python/tvm/micro/model_library_format.py
index 4ce80be647c1..06d0a135735f 100644
--- a/python/tvm/micro/model_library_format.py
+++ b/python/tvm/micro/model_library_format.py
@@ -117,7 +117,7 @@ def _build_memory_map(graph_json):
     return memory_map
 
 
-def export_model_library_format(mod: graph_runtime_factory.GraphRuntimeFactoryModule, file_name):
+def export_model_library_format(mod: graph_runtime_factory.GraphExecutorFactoryModule, file_name):
     """Export the build artifact in Model Library Format.
 
     This function creates a .tar archive containing the build artifacts in a standardized
@@ -126,7 +126,7 @@ def export_model_library_format(mod: graph_runtime_factory.GraphRuntimeFactoryMo
 
     Parameters
     ----------
-    mod : tvm.relay.backend.graph_runtime_factory.GraphRuntimeFactoryModule
+    mod : tvm.relay.backend.graph_runtime_factory.GraphExecutorFactoryModule
         The return value of tvm.relay.build, which will be exported into Model Library Format.
     file_name : str
         Path to the .tar archive to generate.
diff --git a/python/tvm/micro/session.py b/python/tvm/micro/session.py
index 8987883abafb..b260bd3bee36 100644
--- a/python/tvm/micro/session.py
+++ b/python/tvm/micro/session.py
@@ -208,7 +208,7 @@ def create_local_graph_runtime(graph_json_str, mod, device):
 
     Returns
     -------
-    tvm.contrib.GraphRuntime :
+    tvm.contrib.GraphExecutor :
          A local graph runtime instance that executes on the remote device.
     """
     device_type_id = [device.device_type, device.device_id]
@@ -237,7 +237,7 @@ def create_local_debug_runtime(graph_json_str, mod, device, dump_root=None):
 
     Returns
     -------
-    tvm.contrib.GraphRuntime :
+    tvm.contrib.GraphExecutor :
          A local graph runtime instance that executes on the remote device.
     """
     device_type_id = [device.device_type, device.device_id]
diff --git a/python/tvm/relay/backend/graph_runtime_codegen.py b/python/tvm/relay/backend/graph_runtime_codegen.py
index ec679aee894c..de3de70ae987 100644
--- a/python/tvm/relay/backend/graph_runtime_codegen.py
+++ b/python/tvm/relay/backend/graph_runtime_codegen.py
@@ -39,11 +39,11 @@
 from tvm.tir import expr as _expr
 
 
-class GraphRuntimeCodegen(object):
+class GraphExecutorCodegen(object):
     """The compiler from Relay to the TVM runtime system."""
 
     def __init__(self, mod, target):
-        self._mod = _build_module._GraphRuntimeCodegen()
+        self._mod = _build_module._GraphExecutorCodegen()
         self._init = self._mod["init"]
         self._codegen = self._mod["codegen"]
         self._get_graph_json = self._mod["get_graph_json"]
diff --git a/python/tvm/relay/backend/graph_runtime_factory.py b/python/tvm/relay/backend/graph_runtime_factory.py
index e92ae710ca0b..a6404e2a0bd0 100644
--- a/python/tvm/relay/backend/graph_runtime_factory.py
+++ b/python/tvm/relay/backend/graph_runtime_factory.py
@@ -21,7 +21,7 @@
 from ...runtime import ndarray
 
 
-class GraphRuntimeFactoryModule:
+class GraphExecutorFactoryModule:
     """Graph runtime factory module.
     This is a module of graph runtime factory
 
diff --git a/python/tvm/relay/build_module.py b/python/tvm/relay/build_module.py
index efe495e816a2..ac50c56264a8 100644
--- a/python/tvm/relay/build_module.py
+++ b/python/tvm/relay/build_module.py
@@ -110,7 +110,7 @@ def build(self, mod, target=None, target_host=None, params=None):
 
         Returns
         -------
-        factory_module : tvm.relay.backend.graph_runtime_factory.GraphRuntimeFactoryModule
+        factory_module : tvm.relay.backend.graph_runtime_factory.GraphExecutorFactoryModule
             The runtime factory for the TVM graph runtime.
         """
         target = _update_target(target)
@@ -281,7 +281,7 @@ def build(ir_mod, target=None, target_host=None, params=None, mod_name="default"
     with tophub_context:
         bld_mod = BuildModule()
         graph_json, runtime_mod, params = bld_mod.build(ir_mod, target, target_host, params)
-        runtime_mod = _graph_runtime_factory.GraphRuntimeFactoryModule(
+        runtime_mod = _graph_runtime_factory.GraphExecutorFactoryModule(
             ir_mod, target, graph_json, runtime_mod, mod_name, params
         )
         return runtime_mod
diff --git a/src/relay/backend/build_module.cc b/src/relay/backend/build_module.cc
index 08846925bede..be6162e3d58e 100644
--- a/src/relay/backend/build_module.cc
+++ b/src/relay/backend/build_module.cc
@@ -60,7 +60,7 @@ struct BuildOutput {
 struct GraphCodegen {
  public:
   GraphCodegen() {
-    auto pf = GetPackedFunc("relay.build_module._GraphRuntimeCodegen");
+    auto pf = GetPackedFunc("relay.build_module._GraphExecutorCodegen");
     mod = (*pf)();
   }
   ~GraphCodegen() {}
diff --git a/src/relay/backend/graph_runtime_codegen.cc b/src/relay/backend/graph_runtime_codegen.cc
index 7ed150495104..f9b26e5cf291 100644
--- a/src/relay/backend/graph_runtime_codegen.cc
+++ b/src/relay/backend/graph_runtime_codegen.cc
@@ -182,9 +182,9 @@ class GraphOpNode : public GraphNode {
 };
 
 /*! \brief Code generator for graph runtime */
-class GraphRuntimeCodegen : public backend::MemoizedExprTranslator<std::vector<GraphNodeRef>> {
+class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<GraphNodeRef>> {
  public:
-  GraphRuntimeCodegen(runtime::Module* mod, const TargetsMap& targets) : mod_(mod) {
+  GraphExecutorCodegen(runtime::Module* mod, const TargetsMap& targets) : mod_(mod) {
     compile_engine_ = CompileEngine::Global();
     targets_ = targets;
   }
@@ -541,7 +541,7 @@ class GraphRuntimeCodegen : public backend::MemoizedExprTranslator<std::vector<G
   TargetsMap targets_;
   /*!
    * \brief parameters (i.e. ConstantNodes found in the graph).
-   * These are take as inputs to the GraphRuntime.
+   * These are take as inputs to the GraphExecutor.
    * Maps param name to a pair of storage_id and NDArray. At runtime, the storage_id can be
    * used to lookup the parameter.
    */
@@ -557,9 +557,9 @@ class GraphRuntimeCodegen : public backend::MemoizedExprTranslator<std::vector<G
   CompileEngine compile_engine_;
 };
 
-class GraphRuntimeCodegenModule : public runtime::ModuleNode {
+class GraphExecutorCodegenModule : public runtime::ModuleNode {
  public:
-  GraphRuntimeCodegenModule() {}
+  GraphExecutorCodegenModule() {}
   virtual PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) {
     if (name == "init") {
       return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
@@ -574,7 +574,7 @@ class GraphRuntimeCodegenModule : public runtime::ModuleNode {
           targets[dev_type->value] = it.second;
         }
         codegen_ =
-            std::make_shared<GraphRuntimeCodegen>(reinterpret_cast<runtime::Module*>(mod), targets);
+            std::make_shared<GraphExecutorCodegen>(reinterpret_cast<runtime::Module*>(mod), targets);
       });
     } else if (name == "codegen") {
       return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
@@ -619,19 +619,19 @@ class GraphRuntimeCodegenModule : public runtime::ModuleNode {
     }
   }
 
-  const char* type_key() const final { return "RelayGraphRuntimeCodegenModule"; }
+  const char* type_key() const final { return "RelayGraphExecutorCodegenModule"; }
 
  private:
-  std::shared_ptr<GraphRuntimeCodegen> codegen_;
+  std::shared_ptr<GraphExecutorCodegen> codegen_;
   LoweredOutput output_;
 };
 
 runtime::Module CreateGraphCodegenMod() {
-  auto ptr = make_object<GraphRuntimeCodegenModule>();
+  auto ptr = make_object<GraphExecutorCodegenModule>();
   return runtime::Module(ptr);
 }
 
-TVM_REGISTER_GLOBAL("relay.build_module._GraphRuntimeCodegen")
+TVM_REGISTER_GLOBAL("relay.build_module._GraphExecutorCodegen")
     .set_body([](TVMArgs args, TVMRetValue* rv) { *rv = CreateGraphCodegenMod(); });
 
 }  // namespace backend
diff --git a/src/runtime/crt/graph_runtime/graph_runtime.c b/src/runtime/crt/graph_runtime/graph_runtime.c
index f0a1eb2da870..9431970c21cd 100644
--- a/src/runtime/crt/graph_runtime/graph_runtime.c
+++ b/src/runtime/crt/graph_runtime/graph_runtime.c
@@ -49,7 +49,7 @@ uint32_t Shape_Accumulate(int64_t* shape, uint32_t ndim) {
   return accum;
 }
 
-int NodeEntry_Load(TVMGraphRuntimeNodeEntry* entry, JSONReader* reader) {
+int NodeEntry_Load(TVMGraphExecutorNodeEntry* entry, JSONReader* reader) {
   int status = 0;
   reader->BeginArray(reader);
   if (!(reader->NextArrayItem(reader))) {
@@ -74,7 +74,7 @@ int NodeEntry_Load(TVMGraphRuntimeNodeEntry* entry, JSONReader* reader) {
   return status;
 }
 
-void TVMGraphRuntimeNode_LoadAttrs(TVMGraphRuntimeNode* node, JSONReader* reader,
+void TVMGraphExecutorNode_LoadAttrs(TVMGraphExecutorNode* node, JSONReader* reader,
                                    TVMOpParam* param) {
   int bitmask = 0;
   char key[20], value[120];
@@ -109,7 +109,7 @@ void TVMGraphRuntimeNode_LoadAttrs(TVMGraphRuntimeNode* node, JSONReader* reader
   }
 }
 
-int TVMGraphRuntimeNode_Load(TVMGraphRuntimeNode* node, JSONReader* reader) {
+int TVMGraphExecutorNode_Load(TVMGraphExecutorNode* node, JSONReader* reader) {
   int status = 0;
   reader->BeginObject(reader);
   int bitmask = 0;
@@ -138,7 +138,7 @@ int TVMGraphRuntimeNode_Load(TVMGraphRuntimeNode* node, JSONReader* reader) {
         break;
       }
       DLDevice dev = {kDLCPU, 0};
-      tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphRuntimeNodeEntry) * num_inputs,
+      tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorNodeEntry) * num_inputs,
                                                       dev, (void**)&node->inputs);
       if (err != kTvmErrorNoError) {
         fprintf(stderr, "memory allocate error: %08x", err);
@@ -150,7 +150,7 @@ int TVMGraphRuntimeNode_Load(TVMGraphRuntimeNode* node, JSONReader* reader) {
           return -1;
         }
 
-        TVMGraphRuntimeNodeEntry* inputs = node->inputs + count;
+        TVMGraphExecutorNodeEntry* inputs = node->inputs + count;
         reader->BeginArray(reader);
         if (!reader->NextArrayItem(reader)) {
           fprintf(stderr, "invalid json format\n");
@@ -181,7 +181,7 @@ int TVMGraphRuntimeNode_Load(TVMGraphRuntimeNode* node, JSONReader* reader) {
     } else if (!strcmp(key, "attr") || !strcmp(key, "attrs")) {
       TVMOpParam param;
 
-      TVMGraphRuntimeNode_LoadAttrs(node, reader, &param);
+      TVMGraphExecutorNode_LoadAttrs(node, reader, &param);
       memcpy(&node->param, &param, sizeof(param));
     } else if (!strcmp(key, "control_deps")) {
       fprintf(stderr, "do not support key %s", key);
@@ -201,15 +201,15 @@ int TVMGraphRuntimeNode_Load(TVMGraphRuntimeNode* node, JSONReader* reader) {
   return status;
 }
 
-TVMGraphRuntimeNode TVMGraphRuntimeNodeCreate() {
-  TVMGraphRuntimeNode node;
-  memset(&node, 0, sizeof(TVMGraphRuntimeNode));
-  node.LoadAttrs = TVMGraphRuntimeNode_LoadAttrs;
-  node.Load = TVMGraphRuntimeNode_Load;
+TVMGraphExecutorNode TVMGraphExecutorNodeCreate() {
+  TVMGraphExecutorNode node;
+  memset(&node, 0, sizeof(TVMGraphExecutorNode));
+  node.LoadAttrs = TVMGraphExecutorNode_LoadAttrs;
+  node.Load = TVMGraphExecutorNode_Load;
   return node;
 }
 
-int TVMGraphRuntimeNodeRelease(TVMGraphRuntimeNode* node) {
+int TVMGraphExecutorNodeRelease(TVMGraphExecutorNode* node) {
   if (!node) {
     return 0;
   }
@@ -225,7 +225,7 @@ int TVMGraphRuntimeNodeRelease(TVMGraphRuntimeNode* node) {
   return 0;
 }
 
-int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr* attr, JSONReader* reader) {
+int TVMGraphExecutorGraphAttr_Load(TVMGraphExecutorGraphAttr* attr, JSONReader* reader) {
   int status = 0;
   int bitmask = 0;
   char key[16], type[16];
@@ -520,7 +520,7 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr* attr, JSONReader* re
   return status;
 }
 
-int TVMGraphRuntimeGraphAttr_Release(TVMGraphRuntimeGraphAttr* attr) {
+int TVMGraphExecutorGraphAttr_Release(TVMGraphExecutorGraphAttr* attr) {
   if (!attr) {
     return 0;
   }
@@ -568,7 +568,7 @@ int TVMGraphRuntimeGraphAttr_Release(TVMGraphRuntimeGraphAttr* attr) {
   return 0;
 }
 
-int TVMGraphRuntime_Load(TVMGraphRuntime* runtime, JSONReader* reader) {
+int TVMGraphExecutor_Load(TVMGraphExecutor* runtime, JSONReader* reader) {
   int status = 0;
   reader->BeginObject(reader);
   int bitmask = 0;
@@ -583,7 +583,7 @@ int TVMGraphRuntime_Load(TVMGraphRuntime* runtime, JSONReader* reader) {
         break;
       }
       DLDevice dev = {kDLCPU, 0};
-      tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphRuntimeNode) * num_items, dev,
+      tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorNode) * num_items, dev,
                                                       (void**)&runtime->nodes);
       if (err != kTvmErrorNoError) {
         fprintf(stderr, "memory allocate error: %08x", err);
@@ -596,8 +596,8 @@ int TVMGraphRuntime_Load(TVMGraphRuntime* runtime, JSONReader* reader) {
           status = -1;
           return status;
         }
-        TVMGraphRuntimeNode* node = runtime->nodes + runtime->nodes_count;
-        status = TVMGraphRuntimeNode_Load(node, reader);
+        TVMGraphExecutorNode* node = runtime->nodes + runtime->nodes_count;
+        status = TVMGraphExecutorNode_Load(node, reader);
         if (status != 0) {
           fprintf(stderr, "failed to load an element in `nodes` field in graph runtime node.\n");
           break;
@@ -673,7 +673,7 @@ int TVMGraphRuntime_Load(TVMGraphRuntime* runtime, JSONReader* reader) {
         break;
       }
       DLDevice dev = {kDLCPU, 0};
-      tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphRuntimeNodeEntry) * num_items,
+      tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorNodeEntry) * num_items,
                                                       dev, (void**)&runtime->outputs);
       if (err != kTvmErrorNoError) {
         fprintf(stderr, "memory allocate error: %08x", err);
@@ -686,7 +686,7 @@ int TVMGraphRuntime_Load(TVMGraphRuntime* runtime, JSONReader* reader) {
           status = -1;
           return status;
         }
-        TVMGraphRuntimeNodeEntry* entry = runtime->outputs + runtime->outputs_count;
+        TVMGraphExecutorNodeEntry* entry = runtime->outputs + runtime->outputs_count;
         status = NodeEntry_Load(entry, reader);
         if (status != 0) {
           fprintf(stderr, "Fail to load an element in `heads` field in graph runtime node.\n");
@@ -696,7 +696,7 @@ int TVMGraphRuntime_Load(TVMGraphRuntime* runtime, JSONReader* reader) {
       }
       bitmask |= 8;
     } else if (!strcmp(key, "attrs")) {
-      status = TVMGraphRuntimeGraphAttr_Load(&(runtime->attrs), reader);
+      status = TVMGraphExecutorGraphAttr_Load(&(runtime->attrs), reader);
       if (status != 0) {
         fprintf(stderr, "Fail to load an element in `heads` field in graph runtime node.\n");
         break;
@@ -719,7 +719,7 @@ int TVMGraphRuntime_Load(TVMGraphRuntime* runtime, JSONReader* reader) {
   return status;
 }
 
-uint32_t TVMGraphRuntime_GetEntryId(TVMGraphRuntime* runtime, uint32_t nid, uint32_t index) {
+uint32_t TVMGraphExecutor_GetEntryId(TVMGraphExecutor* runtime, uint32_t nid, uint32_t index) {
   return runtime->node_row_ptr[nid] + index;
 }
 
@@ -728,7 +728,7 @@ uint32_t TVMGraphRuntime_GetEntryId(TVMGraphRuntime* runtime, uint32_t nid, uint
  * \param runtime The graph runtime.
  * \return the number of input tensors allocated.
  */
-int TVMGraphRuntime_GetNumInputs(TVMGraphRuntime* runtime) { return runtime->input_nodes_count; }
+int TVMGraphExecutor_GetNumInputs(TVMGraphExecutor* runtime) { return runtime->input_nodes_count; }
 
 /*!
  * \brief Get the input index given the name of input.
@@ -736,7 +736,7 @@ int TVMGraphRuntime_GetNumInputs(TVMGraphRuntime* runtime) { return runtime->inp
  * \param name The name of the input.
  * \return The index of input.
  */
-int TVMGraphRuntime_GetInputIndex(TVMGraphRuntime* runtime, const char* name) {
+int TVMGraphExecutor_GetInputIndex(TVMGraphExecutor* runtime, const char* name) {
   uint32_t i;
   int32_t rv = -1;
   for (i = 0; i < runtime->input_nodes_count; ++i) {
@@ -756,12 +756,12 @@ int TVMGraphRuntime_GetInputIndex(TVMGraphRuntime* runtime, const char* name) {
  * \param name The name of the input.
  * \param data_in The input data.
  */
-void TVMGraphRuntime_SetInput(TVMGraphRuntime* runtime, const char* name, DLTensor* data_in) {
-  uint32_t index = TVMGraphRuntime_GetInputIndex(runtime, name);
+void TVMGraphExecutor_SetInput(TVMGraphExecutor* runtime, const char* name, DLTensor* data_in) {
+  uint32_t index = TVMGraphExecutor_GetInputIndex(runtime, name);
   if (index >= runtime->input_nodes_count) {
     fprintf(stderr, "given index is greater than num of input nodes.\n");
   }
-  uint32_t eid = TVMGraphRuntime_GetEntryId(runtime, runtime->input_nodes[index], 0);
+  uint32_t eid = TVMGraphExecutor_GetEntryId(runtime, runtime->input_nodes[index], 0);
   runtime->data_entry[eid].dl_tensor.data = data_in->data;
 }
 
@@ -772,7 +772,7 @@ void TVMGraphRuntime_SetInput(TVMGraphRuntime* runtime, const char* name, DLTens
  * \param param_size The parameter size.
  * \return The result of this function execution.
  */
-int TVMGraphRuntime_LoadParams(TVMGraphRuntime* runtime, const char* param_blob,
+int TVMGraphExecutor_LoadParams(TVMGraphExecutor* runtime, const char* param_blob,
                                const uint32_t param_size) {
   int status = 0;
   const char* bptr = param_blob;
@@ -824,10 +824,10 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime* runtime, const char* param_blob,
   }
 
   for (idx = 0; idx < size; idx++) {
-    int32_t in_idx = TVMGraphRuntime_GetInputIndex(runtime, names + TVM_CRT_STRLEN_NAME * idx);
+    int32_t in_idx = TVMGraphExecutor_GetInputIndex(runtime, names + TVM_CRT_STRLEN_NAME * idx);
     CHECK_GT(in_idx, 0, "Found param for non-existent input: %s\n",
              names + TVM_CRT_STRLEN_NAME * idx);
-    uint32_t eid = TVMGraphRuntime_GetEntryId(runtime, runtime->input_nodes[in_idx], 0);
+    uint32_t eid = TVMGraphExecutor_GetEntryId(runtime, runtime->input_nodes[in_idx], 0);
     if (!(eid < runtime->data_entry_count)) {
       fprintf(stderr, "`entry_id`=%d is greater than expected(%d).\n", eid,
               runtime->data_entry_count);
@@ -871,7 +871,7 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime* runtime, const char* param_blob,
  * \brief Run all the operations one by one.
  * \param runtime The graph runtime.
  */
-void TVMGraphRuntime_Run(TVMGraphRuntime* runtime) {
+void TVMGraphExecutor_Run(TVMGraphExecutor* runtime) {
   // setup the array and requirements.
   uint32_t idx;
   for (idx = 0; idx < runtime->op_execs_count; ++idx) {
@@ -889,13 +889,13 @@ void TVMGraphRuntime_Run(TVMGraphRuntime* runtime) {
  * \param runtime The graph runtime.
  * \return the number of output tensors allocated.
  */
-int TVMGraphRuntime_GetNumOutputs(TVMGraphRuntime* runtime) { return runtime->outputs_count; }
+int TVMGraphExecutor_GetNumOutputs(TVMGraphExecutor* runtime) { return runtime->outputs_count; }
 
-int TVMGraphRuntime_GetOutput(TVMGraphRuntime* runtime, const int32_t idx, DLTensor* out) {
+int TVMGraphExecutor_GetOutput(TVMGraphExecutor* runtime, const int32_t idx, DLTensor* out) {
   int status = 0;
   uint32_t nid = runtime->outputs[idx].node_id;
   uint32_t index = runtime->outputs[idx].index;
-  uint32_t eid = TVMGraphRuntime_GetEntryId(runtime, nid, index);
+  uint32_t eid = TVMGraphExecutor_GetEntryId(runtime, nid, index);
 
   // copy data section to allocated output tensor
   int32_t elem_bytes = out->dtype.bits / 8;
@@ -908,7 +908,7 @@ int TVMGraphRuntime_GetOutput(TVMGraphRuntime* runtime, const int32_t idx, DLTen
   return status;
 }
 
-int TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) {
+int TVMGraphExecutor_SetupStorage(TVMGraphExecutor* runtime) {
   TVMPackedFunc lookup_linked_param;
   int lookup_linked_param_valid;
   uint32_t idx;
@@ -924,7 +924,7 @@ int TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) {
   }
 
   // Grab saved optimization plan from graph.
-  TVMGraphRuntimeGraphAttr* attrs = &(runtime->attrs);
+  TVMGraphExecutorGraphAttr* attrs = &(runtime->attrs);
   DLDataType* vtype = NULL;
   DLDevice alloc_dev = {kDLCPU, 0};
   tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(DLDataType) * attrs->dltype_count,
@@ -938,14 +938,14 @@ int TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) {
   }
 
   // Size and device type of each storage pool entry.
-  TVMGraphRuntimePoolEntry* pool_entry = NULL;
-  err = TVMPlatformMemoryAllocate(sizeof(TVMGraphRuntimePoolEntry) * runtime->nodes_count,
+  TVMGraphExecutorPoolEntry* pool_entry = NULL;
+  err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorPoolEntry) * runtime->nodes_count,
                                   alloc_dev, (void**)&pool_entry);
   if (err != kTvmErrorNoError) {
     fprintf(stderr, "memory allocate error: %08x", err);
     return -1;
   }
-  memset(pool_entry, 0, sizeof(TVMGraphRuntimePoolEntry) * runtime->nodes_count);
+  memset(pool_entry, 0, sizeof(TVMGraphExecutorPoolEntry) * runtime->nodes_count);
   uint32_t pool_entry_count = 0;
   // Find the maximum space size.
   for (idx = 0; idx < attrs->shape_count; idx++) {
@@ -967,14 +967,14 @@ int TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) {
   }
 
   // Allocate the space.
-  err = TVMPlatformMemoryAllocate(sizeof(TVMGraphRuntimeStorageEntry) * pool_entry_count, alloc_dev,
+  err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorStorageEntry) * pool_entry_count, alloc_dev,
                                   (void**)&runtime->storage_pool);
   if (err != kTvmErrorNoError) {
     fprintf(stderr, "memory allocate error: %08x", err);
     return -1;
   }
   for (idx = 0; idx < pool_entry_count; idx++) {
-    TVMGraphRuntimePoolEntry pit = pool_entry[idx];
+    TVMGraphExecutorPoolEntry pit = pool_entry[idx];
     DLDevice dev = runtime->devices[0];
     uint8_t did_find_linked_param = 0;
     if (lookup_linked_param_valid) {
@@ -1042,7 +1042,7 @@ int TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) {
   return 0;
 }
 
-int TVMGraphRuntime_SetupOpExecs(TVMGraphRuntime* runtime) {
+int TVMGraphExecutor_SetupOpExecs(TVMGraphExecutor* runtime) {
   int status = 0;
   uint32_t nid, idx;
   runtime->op_execs_count = runtime->nodes_count;
@@ -1055,18 +1055,18 @@ int TVMGraphRuntime_SetupOpExecs(TVMGraphRuntime* runtime) {
     return status;
   }
   for (nid = 0; nid < runtime->nodes_count; nid++) {
-    const TVMGraphRuntimeNode* inode = runtime->nodes + nid;
+    const TVMGraphExecutorNode* inode = runtime->nodes + nid;
     if (strcmp(inode->op_type, "null")) {
       DLTensorPtr args[TVM_CRT_MAX_ARGS];
       uint32_t args_count = 0;
       for (idx = 0; idx < inode->inputs_count; idx++) {
-        const TVMGraphRuntimeNodeEntry* entry = inode->inputs + idx;
-        uint32_t eid = TVMGraphRuntime_GetEntryId(runtime, entry->node_id, entry->index);
+        const TVMGraphExecutorNodeEntry* entry = inode->inputs + idx;
+        uint32_t eid = TVMGraphExecutor_GetEntryId(runtime, entry->node_id, entry->index);
         args[idx] = &(runtime->data_entry[eid].dl_tensor);
         args_count++;
       }
       for (idx = 0; idx < inode->param.num_outputs; idx++) {
-        uint32_t eid = TVMGraphRuntime_GetEntryId(runtime, nid, idx);
+        uint32_t eid = TVMGraphExecutor_GetEntryId(runtime, nid, idx);
         args[args_count] = &(runtime->data_entry[eid].dl_tensor);
         args_count++;
       }
@@ -1085,7 +1085,7 @@ int TVMGraphRuntime_SetupOpExecs(TVMGraphRuntime* runtime) {
       printf("tvm_op: creating %s with node_id=%d\n", inode->param.func_name, nid);
 #endif  // TVM_CRT_DEBUG
       TVMPackedFunc pf;
-      TVMGraphRuntime_CreateTVMOp(runtime, &(inode->param), args, args_count, inode->inputs_count,
+      TVMGraphExecutor_CreateTVMOp(runtime, &(inode->param), args, args_count, inode->inputs_count,
                                   &pf);
       runtime->op_execs[nid] = pf;
     }
@@ -1104,7 +1104,7 @@ typedef struct TVMOpArgs {
   uint32_t shape_data_count;
 } TVMOpArgs;
 
-int32_t TVMGraphRuntime_CreateTVMOp(TVMGraphRuntime* runtime, const TVMOpParam* param,
+int32_t TVMGraphExecutor_CreateTVMOp(TVMGraphExecutor* runtime, const TVMOpParam* param,
                                     DLTensorPtr* args, const uint32_t args_count,
                                     uint32_t num_inputs, TVMPackedFunc* pf) {
   int status = 0;
@@ -1151,7 +1151,7 @@ int32_t TVMGraphRuntime_CreateTVMOp(TVMGraphRuntime* runtime, const TVMOpParam*
  * executed on.
  * \return 0 on success.
  */
-int TVMGraphRuntime_Init(TVMGraphRuntime* runtime, const char* graph_json,
+int TVMGraphExecutor_Init(TVMGraphExecutor* runtime, const char* graph_json,
                          TVMModuleHandle module_handle, const DLDevice* devs) {
   JSONReader reader;
   tvm_crt_error_t err = JSONReader_Create(graph_json, &reader);
@@ -1159,7 +1159,7 @@ int TVMGraphRuntime_Init(TVMGraphRuntime* runtime, const char* graph_json,
     return -1;
   }
 
-  TVMGraphRuntime_Load(runtime, &reader);
+  TVMGraphExecutor_Load(runtime, &reader);
   err = JSONReader_Release(&reader);
   if (err != kTvmErrorNoError) {
     return -1;
@@ -1168,11 +1168,11 @@ int TVMGraphRuntime_Init(TVMGraphRuntime* runtime, const char* graph_json,
   runtime->devices[0] = devs[0];
 
   int status;
-  status = TVMGraphRuntime_SetupStorage(runtime);
+  status = TVMGraphExecutor_SetupStorage(runtime);
   if (status != 0) {
     return status;
   }
-  status = TVMGraphRuntime_SetupOpExecs(runtime);
+  status = TVMGraphExecutor_SetupOpExecs(runtime);
   if (status != 0) {
     if (status != 0) {
       return status;
@@ -1184,26 +1184,26 @@ int TVMGraphRuntime_Init(TVMGraphRuntime* runtime, const char* graph_json,
   return status;
 }
 
-int TVMGraphRuntime_Create(const char* sym_json, TVMModuleHandle module_handle,
-                           const DLDevice* devs, TVMGraphRuntime** runtime) {
+int TVMGraphExecutor_Create(const char* sym_json, TVMModuleHandle module_handle,
+                           const DLDevice* devs, TVMGraphExecutor** runtime) {
   DLDevice dev = {kDLCPU, 0};
-  tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphRuntime), dev, (void**)runtime);
+  tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutor), dev, (void**)runtime);
   if (err != kTvmErrorNoError) {
     fprintf(stderr, "memory allocate error: %08x", err);
     return -1;
   }
 
-  memset(*runtime, 0, sizeof(TVMGraphRuntime));
+  memset(*runtime, 0, sizeof(TVMGraphExecutor));
   // init
-  return TVMGraphRuntime_Init(*runtime, sym_json, module_handle, devs);
+  return TVMGraphExecutor_Init(*runtime, sym_json, module_handle, devs);
 }
 
-int TVMGraphRuntime_Release(TVMGraphRuntime** pptr) {
+int TVMGraphExecutor_Release(TVMGraphExecutor** pptr) {
   int status = 0;
   int32_t idx;
-  TVMGraphRuntime* runtime = (TVMGraphRuntime*)(*pptr);
+  TVMGraphExecutor* runtime = (TVMGraphExecutor*)(*pptr);
   for (idx = 0; idx < runtime->nodes_count; ++idx) {
-    status = TVMGraphRuntimeNodeRelease(&(runtime->nodes[idx]));
+    status = TVMGraphExecutorNodeRelease(&(runtime->nodes[idx]));
     if (status != 0) {
       return status;
     }
@@ -1213,7 +1213,7 @@ int TVMGraphRuntime_Release(TVMGraphRuntime** pptr) {
   if (status != 0) {
     return status;
   }
-  status = TVMGraphRuntimeGraphAttr_Release(&(runtime->attrs));
+  status = TVMGraphExecutorGraphAttr_Release(&(runtime->attrs));
   if (status != 0) {
     return status;
   }
diff --git a/src/runtime/crt/graph_runtime_module/graph_runtime_module.c b/src/runtime/crt/graph_runtime_module/graph_runtime_module.c
index 4a61b89528ad..fa741329e5f6 100644
--- a/src/runtime/crt/graph_runtime_module/graph_runtime_module.c
+++ b/src/runtime/crt/graph_runtime_module/graph_runtime_module.c
@@ -33,12 +33,12 @@
 
 typedef struct {
   TVMModule mod;
-  TVMGraphRuntime* runtime;
-} GraphRuntimeModule;
+  TVMGraphExecutor* runtime;
+} GraphExecutorModule;
 
-static GraphRuntimeModule graph_runtime;
+static GraphExecutorModule graph_runtime;
 
-int32_t TVMGraphRuntimeModule_Create(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
+int32_t TVMGraphExecutorModule_Create(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
                                      int* ret_tcodes, void* resource_handle) {
   if (graph_runtime.runtime != NULL) {
     return kTvmErrorGraphModuleAlreadyCreated;
@@ -59,7 +59,7 @@ int32_t TVMGraphRuntimeModule_Create(TVMValue* args, int* tcodes, int nargs, TVM
 
   DLDevice dev = {(DLDeviceType)args[2].v_int64, (int)args[3].v_int64};
   int ret_value =
-      TVMGraphRuntime_Create(args[0].v_str, args[1].v_handle, &dev, &graph_runtime.runtime);
+      TVMGraphExecutor_Create(args[0].v_str, args[1].v_handle, &dev, &graph_runtime.runtime);
   if (ret_value != 0) {
     return ret_value;
   }
@@ -68,7 +68,7 @@ int32_t TVMGraphRuntimeModule_Create(TVMValue* args, int* tcodes, int nargs, TVM
   ret_value = TVMModCreateFromCModule(&graph_runtime.mod, &out);
   if (ret_value != 0) {
     ret_tcodes[0] = kTVMNullptr;
-    TVMGraphRuntime_Release(&graph_runtime.runtime);
+    TVMGraphExecutor_Release(&graph_runtime.runtime);
     return ret_value;
   }
 
@@ -77,7 +77,7 @@ int32_t TVMGraphRuntimeModule_Create(TVMValue* args, int* tcodes, int nargs, TVM
   return kTvmErrorNoError;
 }
 
-int32_t TVMGraphRuntimeModule_GetInput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
+int32_t TVMGraphExecutorModule_GetInput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
                                        int* ret_tcodes, void* resource_handle) {
   if (nargs != 1) {
     return kTvmErrorFunctionCallNumArguments;
@@ -87,43 +87,43 @@ int32_t TVMGraphRuntimeModule_GetInput(TVMValue* args, int* tcodes, int nargs, T
     return kTvmErrorFunctionCallWrongArgType;
   }
 
-  int index = TVMGraphRuntime_GetInputIndex(graph_runtime.runtime, args[0].v_str);
+  int index = TVMGraphExecutor_GetInputIndex(graph_runtime.runtime, args[0].v_str);
   if (index < 0) {
     return kTvmErrorGraphModuleNoSuchInput;
   }
 
-  uint32_t eid = TVMGraphRuntime_GetEntryId(graph_runtime.runtime,
+  uint32_t eid = TVMGraphExecutor_GetEntryId(graph_runtime.runtime,
                                             graph_runtime.runtime->input_nodes[index], 0);
   ret_values[0].v_handle = (void*)&graph_runtime.runtime->data_entry[eid].dl_tensor;
   ret_tcodes[0] = kTVMNDArrayHandle;
   return 0;
 }
 
-int32_t TVMGraphRuntimeModule_GetNumInputs(TVMValue* args, int* tcodes, int nargs,
+int32_t TVMGraphExecutorModule_GetNumInputs(TVMValue* args, int* tcodes, int nargs,
                                            TVMValue* ret_values, int* ret_tcodes,
                                            void* resource_handle) {
   if (nargs != 0) {
     return kTvmErrorFunctionCallNumArguments;
   }
 
-  ret_values[0].v_int64 = TVMGraphRuntime_GetNumInputs();
+  ret_values[0].v_int64 = TVMGraphExecutor_GetNumInputs();
   ret_tcodes[0] = kTVMArgInt;
   return 0;
 }
 
-int32_t TVMGraphRuntimeModule_GetNumOutputs(TVMValue* args, int* tcodes, int nargs,
+int32_t TVMGraphExecutorModule_GetNumOutputs(TVMValue* args, int* tcodes, int nargs,
                                             TVMValue* ret_values, int* ret_tcodes,
                                             void* resource_handle) {
   if (nargs != 0) {
     return kTvmErrorFunctionCallNumArguments;
   }
 
-  ret_values[0].v_int64 = TVMGraphRuntime_GetNumOutputs(graph_runtime.runtime);
+  ret_values[0].v_int64 = TVMGraphExecutor_GetNumOutputs(graph_runtime.runtime);
   ret_tcodes[0] = kTVMArgInt;
   return 0;
 }
 
-int32_t TVMGraphRuntimeModule_GetOutput(TVMValue* args, int* tcodes, int nargs,
+int32_t TVMGraphExecutorModule_GetOutput(TVMValue* args, int* tcodes, int nargs,
                                         TVMValue* ret_values, int* ret_tcodes,
                                         void* resource_handle) {
   if (nargs != 1) {
@@ -135,20 +135,20 @@ int32_t TVMGraphRuntimeModule_GetOutput(TVMValue* args, int* tcodes, int nargs,
   }
 
   int output_index = args[0].v_int64;
-  if (output_index < 0 || output_index > TVMGraphRuntime_GetNumOutputs(graph_runtime.runtime)) {
+  if (output_index < 0 || output_index > TVMGraphExecutor_GetNumOutputs(graph_runtime.runtime)) {
     return kTvmErrorGraphModuleNoSuchInput;
   }
 
   uint32_t nid = graph_runtime.runtime->outputs[output_index].node_id;
   uint32_t index = graph_runtime.runtime->outputs[output_index].index;
-  uint32_t eid = TVMGraphRuntime_GetEntryId(graph_runtime.runtime, nid, index);
+  uint32_t eid = TVMGraphExecutor_GetEntryId(graph_runtime.runtime, nid, index);
 
   ret_values[0].v_handle = (void*)&(graph_runtime.runtime->data_entry[eid].dl_tensor);
   ret_tcodes[0] = kTVMNDArrayHandle;
   return 0;
 }
 
-int32_t TVMGraphRuntimeModule_LoadParams(TVMValue* args, int* tcodes, int nargs,
+int32_t TVMGraphExecutorModule_LoadParams(TVMValue* args, int* tcodes, int nargs,
                                          TVMValue* ret_values, int* ret_tcodes,
                                          void* resource_handle) {
   if (nargs != 1) {
@@ -162,22 +162,22 @@ int32_t TVMGraphRuntimeModule_LoadParams(TVMValue* args, int* tcodes, int nargs,
   ret_tcodes[0] = kTVMNullptr;
 
   TVMByteArray* arr = (TVMByteArray*)args[0].v_handle;
-  return TVMGraphRuntime_LoadParams(graph_runtime.runtime, arr->data, arr->size);
+  return TVMGraphExecutor_LoadParams(graph_runtime.runtime, arr->data, arr->size);
 }
 
-int32_t TVMGraphRuntimeModule_Run(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
+int32_t TVMGraphExecutorModule_Run(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
                                   int* ret_tcodes, void* resource_handle) {
   if (nargs != 0) {
     return kTvmErrorFunctionCallNumArguments;
   }
 
-  TVMGraphRuntime_Run(graph_runtime.runtime);
+  TVMGraphExecutor_Run(graph_runtime.runtime);
 
   ret_tcodes[0] = kTVMNullptr;
   return 0;
 }
 
-int32_t TVMGraphRuntimeModule_SetInput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
+int32_t TVMGraphExecutorModule_SetInput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
                                        int* ret_tcodes, void* resource_handle) {
   if (nargs != 2) {
     return kTvmErrorFunctionCallNumArguments;
@@ -187,23 +187,23 @@ int32_t TVMGraphRuntimeModule_SetInput(TVMValue* args, int* tcodes, int nargs, T
     return kTvmErrorFunctionCallWrongArgType;
   }
 
-  TVMGraphRuntime_SetInput(graph_runtime.runtime, args[0].v_str, (DLTensor*)args[1].v_handle);
+  TVMGraphExecutor_SetInput(graph_runtime.runtime, args[0].v_str, (DLTensor*)args[1].v_handle);
 
   ret_tcodes[0] = kTVMNullptr;
   return 0;
 }
 
-int32_t TVMGraphRuntimeModule_NotImplemented(TVMValue* args, int* tcodes, int nargs,
+int32_t TVMGraphExecutorModule_NotImplemented(TVMValue* args, int* tcodes, int nargs,
                                              TVMValue* ret_values, int* ret_tcodes,
                                              void* resource_handle) {
   return kTvmErrorFunctionCallNotImplemented;
 }
 
 static const TVMBackendPackedCFunc graph_runtime_registry_funcs[] = {
-    &TVMGraphRuntimeModule_GetInput,      &TVMGraphRuntimeModule_GetNumInputs,
-    &TVMGraphRuntimeModule_GetNumOutputs, &TVMGraphRuntimeModule_GetOutput,
-    &TVMGraphRuntimeModule_LoadParams,    &TVMGraphRuntimeModule_Run,
-    &TVMGraphRuntimeModule_SetInput,      &TVMGraphRuntimeModule_NotImplemented,
+    &TVMGraphExecutorModule_GetInput,      &TVMGraphExecutorModule_GetNumInputs,
+    &TVMGraphExecutorModule_GetNumOutputs, &TVMGraphExecutorModule_GetOutput,
+    &TVMGraphExecutorModule_LoadParams,    &TVMGraphExecutorModule_Run,
+    &TVMGraphExecutorModule_SetInput,      &TVMGraphExecutorModule_NotImplemented,
 };
 
 static const TVMFuncRegistry graph_runtime_registry = {
@@ -217,9 +217,9 @@ static const TVMFuncRegistry graph_runtime_registry = {
     "share_params\0",
     graph_runtime_registry_funcs};
 
-tvm_crt_error_t TVMGraphRuntimeModule_Register() {
+tvm_crt_error_t TVMGraphExecutorModule_Register() {
   graph_runtime.mod.registry = &graph_runtime_registry;
   graph_runtime.runtime = NULL;
 
-  return TVMFuncRegisterGlobal("tvm.graph_runtime.create", &TVMGraphRuntimeModule_Create, 0);
+  return TVMFuncRegisterGlobal("tvm.graph_runtime.create", &TVMGraphExecutorModule_Create, 0);
 }
diff --git a/src/runtime/crt/host/main.cc b/src/runtime/crt/host/main.cc
index 15e696b59f46..f5548471cd9d 100644
--- a/src/runtime/crt/host/main.cc
+++ b/src/runtime/crt/host/main.cc
@@ -132,8 +132,8 @@ int main(int argc, char** argv) {
   utvm_rpc_server_t rpc_server = UTvmRpcServerInit(&UTvmWriteFunc, nullptr);
 
 #ifdef TVM_HOST_USE_GRAPH_RUNTIME_MODULE
-  CHECK_EQ(TVMGraphRuntimeModule_Register(), kTvmErrorNoError,
-           "failed to register GraphRuntime TVMModule");
+  CHECK_EQ(TVMGraphExecutorModule_Register(), kTvmErrorNoError,
+           "failed to register GraphExecutor TVMModule");
 #endif
 
   if (TVMFuncRegisterGlobal("tvm.testing.reset_server", (TVMFunctionHandle)&testonly_reset_server,
diff --git a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/graph_runtime.h b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/graph_runtime.h
index a64076ab61e0..656e7a77f84c 100644
--- a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/graph_runtime.h
+++ b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/graph_runtime.h
@@ -30,29 +30,29 @@
 #include <tvm/runtime/crt/module.h>
 
 // Memory pool entry.
-typedef struct TVMGraphRuntimePoolEntry {
+typedef struct TVMGraphExecutorPoolEntry {
   size_t size;
   int device_type;
   int entry_id;
-} TVMGraphRuntimePoolEntry;
+} TVMGraphExecutorPoolEntry;
 
 // Node entry
-typedef struct TVMGraphRuntimeNodeEntry {
+typedef struct TVMGraphExecutorNodeEntry {
   uint32_t node_id;
   uint32_t index;
   uint32_t version;
   // JSON Loader
   void (*Load)(JSONReader* reader);
-} TVMGraphRuntimeNodeEntry;
+} TVMGraphExecutorNodeEntry;
 
 // Storage entry.
-typedef struct TVMGraphRuntimeStorageEntry {
+typedef struct TVMGraphExecutorStorageEntry {
   uint8_t is_linked_param;
   TVMNDArray array;
-} TVMGraphRuntimeStorageEntry;
+} TVMGraphExecutorStorageEntry;
 
 // Node
-typedef struct TVMGraphRuntimeNode {
+typedef struct TVMGraphExecutorNode {
   // operator type in string
   char op_type[16];
   // name of the op
@@ -60,20 +60,20 @@ typedef struct TVMGraphRuntimeNode {
   // parameters
   TVMOpParam param;
   // inputs
-  TVMGraphRuntimeNodeEntry* inputs;
+  TVMGraphExecutorNodeEntry* inputs;
   // number of inputs
   size_t inputs_count;
   // control deps
   uint32_t control_deps[20];
   // JSON Loader
-  void (*LoadAttrs)(struct TVMGraphRuntimeNode* node, JSONReader* reader, TVMOpParam* param);
+  void (*LoadAttrs)(struct TVMGraphExecutorNode* node, JSONReader* reader, TVMOpParam* param);
   // JSON Loader
-  int (*Load)(struct TVMGraphRuntimeNode* node, JSONReader* reader);
-} TVMGraphRuntimeNode;
+  int (*Load)(struct TVMGraphExecutorNode* node, JSONReader* reader);
+} TVMGraphExecutorNode;
 
-typedef struct TVMGraphRuntime {
+typedef struct TVMGraphExecutor {
   /*! \brief The graph nodes. */
-  TVMGraphRuntimeNode* nodes;
+  TVMGraphExecutorNode* nodes;
   /*! \brief The graph nodes counter. */
   uint32_t nodes_count;
   /*! \brief The argument nodes. */
@@ -83,18 +83,18 @@ typedef struct TVMGraphRuntime {
   uint32_t* node_row_ptr;
   uint32_t node_row_ptr_count;
   /*! \brief Output entries. */
-  TVMGraphRuntimeNodeEntry* outputs;
+  TVMGraphExecutorNodeEntry* outputs;
   /*! \brief Output entries counter. */
   uint32_t outputs_count;
   /*! \brief Additional graph attributes. */
-  TVMGraphRuntimeGraphAttr attrs;
+  TVMGraphExecutorGraphAttr attrs;
   /*! \brief The code module that contains both host and device code. */
   TVMModuleHandle module_handle;
   /*! \brief Execution context of all devices including the host. */
   DLDevice devices[1];
   uint32_t devices_count;
   /*! \brief Common storage pool for all devices. */
-  TVMGraphRuntimeStorageEntry* storage_pool;
+  TVMGraphExecutorStorageEntry* storage_pool;
   uint32_t storage_pool_count;
   /*! \brief Data entry of each node. */
   TVMNDArray* data_entry;
@@ -102,19 +102,19 @@ typedef struct TVMGraphRuntime {
   /*! \brief Operator on each node. */
   TVMPackedFunc* op_execs;
   uint32_t op_execs_count;
-} TVMGraphRuntime;
+} TVMGraphExecutor;
 
 typedef DLTensor* DLTensorPtr;
 
 // private functions
-uint32_t TVMGraphRuntime_GetEntryId(TVMGraphRuntime* runtime, uint32_t nid, uint32_t index);
-void TVMGraphRuntime_SetInput(TVMGraphRuntime* runtime, const char* name, DLTensor* data_in);
-int TVMGraphRuntime_LoadParams(TVMGraphRuntime* runtime, const char* param_blob,
+uint32_t TVMGraphExecutor_GetEntryId(TVMGraphExecutor* runtime, uint32_t nid, uint32_t index);
+void TVMGraphExecutor_SetInput(TVMGraphExecutor* runtime, const char* name, DLTensor* data_in);
+int TVMGraphExecutor_LoadParams(TVMGraphExecutor* runtime, const char* param_blob,
                                const uint32_t param_size);
-void TVMGraphRuntime_Run(TVMGraphRuntime* runtime);
-int TVMGraphRuntime_GetOutput(TVMGraphRuntime* runtime, const int32_t idx, DLTensor* out);
+void TVMGraphExecutor_Run(TVMGraphExecutor* runtime);
+int TVMGraphExecutor_GetOutput(TVMGraphExecutor* runtime, const int32_t idx, DLTensor* out);
 
-int32_t TVMGraphRuntime_CreateTVMOp(TVMGraphRuntime* runtime, const TVMOpParam* param,
+int32_t TVMGraphExecutor_CreateTVMOp(TVMGraphExecutor* runtime, const TVMOpParam* param,
                                     DLTensorPtr* args, const uint32_t args_count,
                                     uint32_t num_inputs, TVMPackedFunc* pf);
 
diff --git a/src/runtime/graph/cuda_graph/graph_runtime_cuda_graph.cc b/src/runtime/graph/cuda_graph/graph_runtime_cuda_graph.cc
index 3ad8453c4903..e57a4446bb31 100644
--- a/src/runtime/graph/cuda_graph/graph_runtime_cuda_graph.cc
+++ b/src/runtime/graph/cuda_graph/graph_runtime_cuda_graph.cc
@@ -32,14 +32,14 @@ namespace runtime {
 /*!
  * \brief Graph runtime with CUDA Graph Support.
  *
- *  This is the extension of GraphRuntime class used for CUDA graph launch
+ *  This is the extension of GraphExecutor class used for CUDA graph launch
  *  instead of CUDA kernel launch. CUDA graph launch requires CUDA 10.0 or
  *  above, currently there are two ways of constructing CUDA graphs:
  *  (1) Using CUDA stream capture API to capture a series of operations on
  *  CUDA stream, and automatically generates a graph (2) Building a graph
  *  using CUDA graph API manually. This implementation uses stream capture.
  */
-class GraphRuntimeCudaGraph : public GraphRuntime {
+class GraphExecutorCudaGraph : public GraphExecutor {
  public:
   /*!
    * \brief Begin CUDA graph capture on stream, the stream enters capture mode.
@@ -93,7 +93,7 @@ class GraphRuntimeCudaGraph : public GraphRuntime {
   cudaGraphExec_t cuda_graph_exec_;
 };
 
-PackedFunc GraphRuntimeCudaGraph::GetFunction(const std::string& name,
+PackedFunc GraphExecutorCudaGraph::GetFunction(const std::string& name,
                                               const ObjectPtr<Object>& sptr_to_self) {
   if (name == "run_cuda_graph") {
     return PackedFunc(
@@ -104,14 +104,14 @@ PackedFunc GraphRuntimeCudaGraph::GetFunction(const std::string& name,
   } else if (name == "end_capture") {
     return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { this->EndCapture(); });
   } else {
-    return GraphRuntime::GetFunction(name, sptr_to_self);
+    return GraphExecutor::GetFunction(name, sptr_to_self);
   }
 }
 
-Module GraphRuntimeCudaGraphCreate(const std::string& sym_json, const tvm::runtime::Module& m,
+Module GraphExecutorCudaGraphCreate(const std::string& sym_json, const tvm::runtime::Module& m,
                                    const std::vector<Device>& devs,
                                    PackedFunc lookup_linked_param_func) {
-  auto exec = make_object<GraphRuntimeCudaGraph>();
+  auto exec = make_object<GraphExecutorCudaGraph>();
   exec->Init(sym_json, m, devs, lookup_linked_param_func);
   return Module(exec);
 }
@@ -128,7 +128,7 @@ TVM_REGISTER_GLOBAL("tvm.graph_runtime_cuda_graph.create")
         dev_start_arg++;
       }
 
-      *rv = GraphRuntimeCudaGraphCreate(args[0], args[1], GetAllDevice(args, dev_start_arg),
+      *rv = GraphExecutorCudaGraphCreate(args[0], args[1], GetAllDevice(args, dev_start_arg),
                                         lookup_linked_param_func);
     });
 }  // namespace runtime
diff --git a/src/runtime/graph/debug/graph_runtime_debug.cc b/src/runtime/graph/debug/graph_runtime_debug.cc
index fedaf4f890bc..aa9d0f8de5e4 100644
--- a/src/runtime/graph/debug/graph_runtime_debug.cc
+++ b/src/runtime/graph/debug/graph_runtime_debug.cc
@@ -37,10 +37,10 @@ namespace runtime {
 /*!
  * \brief Graph runtime with debug .
  *
- *  This is the extension of GraphRuntime class used for debugging
+ *  This is the extension of GraphExecutor class used for debugging
  *  TVM runtime PackedFunc API.
  */
-class GraphRuntimeDebug : public GraphRuntime {
+class GraphExecutorDebug : public GraphExecutor {
  public:
   /*!
    * \brief Run each operation in the graph and get the time per op for all ops.
@@ -58,7 +58,7 @@ class GraphRuntimeDebug : public GraphRuntime {
    */
   std::string RunIndividual(int number, int repeat, int min_repeat_ms) {
     // warmup run
-    GraphRuntime::Run();
+    GraphExecutor::Run();
     std::string tkey = module_->type_key();
     std::vector<double> time_sec_per_op(op_execs_.size(), 0);
     if (tkey == "rpc") {
@@ -128,7 +128,7 @@ class GraphRuntimeDebug : public GraphRuntime {
           << "Don't know how to run op type " << nodes_[index].op_type
           << " remotely over RPC right now";
 
-      // NOTE: GraphRuntimeDebug expects graph nodes to have an "op" attribute of "tvm_op" or "null"
+      // NOTE: GraphExecutorDebug expects graph nodes to have an "op" attribute of "tvm_op" or "null"
       // and "null" is a placeholder node for a parameter or input.
       return 0;
     }
@@ -235,7 +235,7 @@ class GraphRuntimeDebug : public GraphRuntime {
  * \param name The function which needs to be invoked.
  * \param sptr_to_self Packed function pointer.
  */
-PackedFunc GraphRuntimeDebug::GetFunction(const std::string& name,
+PackedFunc GraphExecutorDebug::GetFunction(const std::string& name,
                                           const ObjectPtr<Object>& sptr_to_self) {
   // return member functions during query.
   if (name == "get_output_by_layer") {
@@ -261,20 +261,20 @@ PackedFunc GraphRuntimeDebug::GetFunction(const std::string& name,
       *rv = this->RunIndividual(number, repeat, min_repeat_ms);
     });
   } else {
-    return GraphRuntime::GetFunction(name, sptr_to_self);
+    return GraphExecutor::GetFunction(name, sptr_to_self);
   }
 }
 
 /*!
- * \brief GraphRuntimeDebugCreate Get the function based on input.
+ * \brief GraphExecutorDebugCreate Get the function based on input.
  * \param sym_json The graph symbol in json format.
  * \param m Compiled module which will be loaded.
  * \param devs All devices.
  */
-Module GraphRuntimeDebugCreate(const std::string& sym_json, const tvm::runtime::Module& m,
+Module GraphExecutorDebugCreate(const std::string& sym_json, const tvm::runtime::Module& m,
                                const std::vector<Device>& devs,
                                PackedFunc lookup_linked_param_func) {
-  auto exec = make_object<GraphRuntimeDebug>();
+  auto exec = make_object<GraphExecutorDebug>();
   exec->Init(sym_json, m, devs, lookup_linked_param_func);
   return Module(exec);
 }
@@ -290,7 +290,7 @@ TVM_REGISTER_GLOBAL("tvm.graph_runtime_debug.create").set_body([](TVMArgs args,
     dev_start_arg++;
   }
 
-  *rv = GraphRuntimeDebugCreate(args[0], args[1], GetAllDevice(args, dev_start_arg),
+  *rv = GraphExecutorDebugCreate(args[0], args[1], GetAllDevice(args, dev_start_arg),
                                 lookup_linked_param_func);
 });
 }  // namespace runtime
diff --git a/src/runtime/graph/graph_runtime.cc b/src/runtime/graph/graph_runtime.cc
index a4320ee3b9c0..84e0fa146693 100644
--- a/src/runtime/graph/graph_runtime.cc
+++ b/src/runtime/graph/graph_runtime.cc
@@ -53,7 +53,7 @@ inline size_t GetDataAlignment(const DLTensor& arr) {
 /*!
  * \brief Run all the operations one by one.
  */
-void GraphRuntime::Run() {
+void GraphExecutor::Run() {
   // setup the array and requirements.
   for (size_t i = 0; i < op_execs_.size(); ++i) {
     if (op_execs_[i]) op_execs_[i]();
@@ -68,7 +68,7 @@ void GraphRuntime::Run() {
  * executed on.
  * \param lookup_linked_param_func Linked parameter lookup function. Default is nullptr.
  */
-void GraphRuntime::Init(const std::string& graph_json, tvm::runtime::Module module,
+void GraphExecutor::Init(const std::string& graph_json, tvm::runtime::Module module,
                         const std::vector<Device>& devs,
                         const PackedFunc lookup_linked_param_func) {
   std::istringstream is(graph_json);
@@ -94,7 +94,7 @@ void GraphRuntime::Init(const std::string& graph_json, tvm::runtime::Module modu
  * \param name The name of the input.
  * \return The index of input.
  */
-int GraphRuntime::GetInputIndex(const std::string& name) {
+int GraphExecutor::GetInputIndex(const std::string& name) {
   auto it = input_map_.find(name);
   if (it != input_map_.end()) {
     return it->second;
@@ -106,7 +106,7 @@ int GraphRuntime::GetInputIndex(const std::string& name) {
  * \param index The input index.
  * \param data_in The input data.
  */
-void GraphRuntime::SetInput(int index, DLTensor* data_in) {
+void GraphExecutor::SetInput(int index, DLTensor* data_in) {
   ICHECK_LT(static_cast<size_t>(index), input_nodes_.size());
   uint32_t eid = this->entry_id(input_nodes_[index], 0);
   data_entry_[eid].CopyFrom(data_in);
@@ -116,7 +116,7 @@ void GraphRuntime::SetInput(int index, DLTensor* data_in) {
  * \param index The input index.
  * \param data_ref The input data that is referred.
  */
-void GraphRuntime::SetInputZeroCopy(int index, DLTensor* data_ref) {
+void GraphExecutor::SetInputZeroCopy(int index, DLTensor* data_ref) {
   ICHECK_LT(static_cast<size_t>(index), input_nodes_.size());
   uint32_t eid = this->entry_id(input_nodes_[index], 0);
   const DLTensor* old_t = data_entry_[eid].operator->();
@@ -141,20 +141,20 @@ void GraphRuntime::SetInputZeroCopy(int index, DLTensor* data_ref) {
  *
  * \return The number of outputs from graph.
  */
-int GraphRuntime::NumOutputs() const { return outputs_.size(); }
+int GraphExecutor::NumOutputs() const { return outputs_.size(); }
 /*!
  * \brief Get the number of inputs
  *
  * \return The number of inputs to the graph.
  */
-int GraphRuntime::NumInputs() const { return input_nodes_.size(); }
+int GraphExecutor::NumInputs() const { return input_nodes_.size(); }
 /*!
  * \brief Return NDArray for given input index.
  * \param index The input index.
  *
  * \return NDArray corresponding to given input node index.
  */
-NDArray GraphRuntime::GetInput(int index) const {
+NDArray GraphExecutor::GetInput(int index) const {
   ICHECK_LT(static_cast<size_t>(index), input_nodes_.size());
   uint32_t eid = this->entry_id(input_nodes_[index], 0);
   return data_entry_[eid];
@@ -165,7 +165,7 @@ NDArray GraphRuntime::GetInput(int index) const {
  *
  * \return NDArray corresponding to given output node index.
  */
-NDArray GraphRuntime::GetOutput(int index) const {
+NDArray GraphExecutor::GetOutput(int index) const {
   ICHECK_LT(static_cast<size_t>(index), outputs_.size());
   uint32_t eid = this->entry_id(outputs_[index]);
   return data_entry_[eid];
@@ -175,7 +175,7 @@ NDArray GraphRuntime::GetOutput(int index) const {
  * \param index The output index.
  * \param data_out the output data.
  */
-void GraphRuntime::CopyOutputTo(int index, DLTensor* data_out) {
+void GraphExecutor::CopyOutputTo(int index, DLTensor* data_out) {
   ICHECK_LT(static_cast<size_t>(index), outputs_.size());
   uint32_t eid = this->entry_id(outputs_[index]);
 
@@ -193,12 +193,12 @@ void GraphRuntime::CopyOutputTo(int index, DLTensor* data_out) {
  * \brief Load parameters from parameter blob.
  * \param param_blob A binary blob of parameter.
  */
-void GraphRuntime::LoadParams(const std::string& param_blob) {
+void GraphExecutor::LoadParams(const std::string& param_blob) {
   dmlc::MemoryStringStream strm(const_cast<std::string*>(&param_blob));
   this->LoadParams(&strm);
 }
 
-void GraphRuntime::LoadParams(dmlc::Stream* strm) {
+void GraphExecutor::LoadParams(dmlc::Stream* strm) {
   Map<String, NDArray> params = ::tvm::runtime::LoadParams(strm);
   for (auto& p : params) {
     int in_idx = GetInputIndex(p.first);
@@ -208,7 +208,7 @@ void GraphRuntime::LoadParams(dmlc::Stream* strm) {
   }
 }
 
-void GraphRuntime::ShareParams(const GraphRuntime& other, dmlc::Stream* strm) {
+void GraphExecutor::ShareParams(const GraphExecutor& other, dmlc::Stream* strm) {
   uint64_t header, reserved;
   ICHECK(strm->Read(&header)) << "Invalid parameters file format";
   ICHECK(header == kTVMNDArrayListMagic) << "Invalid parameters file format";
@@ -233,13 +233,13 @@ void GraphRuntime::ShareParams(const GraphRuntime& other, dmlc::Stream* strm) {
   this->SetupOpExecs();
 }
 
-void GraphRuntime::LinkedNDArrayDeleter(Object* container) {
+void GraphExecutor::LinkedNDArrayDeleter(Object* container) {
   // container is the NDArray::Container which needs to get deleted.
   // The data member points to global const memory, so it does not need deleting.
   delete static_cast<NDArray::Container*>(container);
 }
 
-void GraphRuntime::DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv) {
+void GraphExecutor::DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv) {
   Module mod = args[0];
   int64_t storage_id = args[1];
   DLTensor* template_tensor = args[2];
@@ -266,11 +266,11 @@ void GraphRuntime::DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv) {
 
   std::unique_ptr<NDArray::Container> container{new NDArray::Container(
       static_cast<void*>(opaque_handle), shape_vec, template_tensor->dtype, dev)};
-  container->SetDeleter(GraphRuntime::LinkedNDArrayDeleter);
+  container->SetDeleter(GraphExecutor::LinkedNDArrayDeleter);
   *rv = NDArray(GetObjectPtr<Object>(container.release()));
 }
 
-void GraphRuntime::SetupStorage() {
+void GraphExecutor::SetupStorage() {
   // Grab saved optimization plan from graph.
   std::vector<DLDataType> vtype;
   for (const std::string& s_type : attrs_.dltype) {
@@ -352,7 +352,7 @@ void GraphRuntime::SetupStorage() {
   }
 }
 
-void GraphRuntime::SetupOpExecs() {
+void GraphExecutor::SetupOpExecs() {
   op_execs_.resize(this->GetNumOfNodes());
   input_dltensors_.resize(num_node_entries());
   std::unordered_set<uint32_t> input_node_eids;
@@ -389,9 +389,9 @@ void GraphRuntime::SetupOpExecs() {
   }
 }
 
-std::pair<std::function<void()>, std::shared_ptr<GraphRuntime::OpArgs> > GraphRuntime::CreateTVMOp(
+std::pair<std::function<void()>, std::shared_ptr<GraphExecutor::OpArgs> > GraphExecutor::CreateTVMOp(
     const TVMOpParam& param, const std::vector<DLTensor>& args, size_t num_inputs) {
-  std::shared_ptr<GraphRuntime::OpArgs> arg_ptr = std::make_shared<GraphRuntime::OpArgs>();
+  std::shared_ptr<GraphExecutor::OpArgs> arg_ptr = std::make_shared<GraphExecutor::OpArgs>();
   // setup address.
   arg_ptr->args = args;
   if (param.flatten_data) {
@@ -438,7 +438,7 @@ std::pair<std::function<void()>, std::shared_ptr<GraphRuntime::OpArgs> > GraphRu
   return {fexec, arg_ptr};
 }
 
-PackedFunc GraphRuntime::GetFunction(const std::string& name,
+PackedFunc GraphExecutor::GetFunction(const std::string& name,
                                      const ObjectPtr<Object>& sptr_to_self) {
   // Return member functions during query.
   if (name == "set_input") {
@@ -494,20 +494,20 @@ PackedFunc GraphRuntime::GetFunction(const std::string& name,
   } else if (name == "share_params") {
     return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
       const auto& module = args[0].operator Module();
-      ICHECK_EQ(module.operator->()->type_key(), std::string("GraphRuntime"));
+      ICHECK_EQ(module.operator->()->type_key(), std::string("GraphExecutor"));
       const auto& param_blob = args[1].operator std::string();
       dmlc::MemoryStringStream strm(const_cast<std::string*>(&param_blob));
-      this->ShareParams(dynamic_cast<const GraphRuntime&>(*module.operator->()), &strm);
+      this->ShareParams(dynamic_cast<const GraphExecutor&>(*module.operator->()), &strm);
     });
   } else {
     return PackedFunc();
   }
 }
 
-Module GraphRuntimeCreate(const std::string& sym_json, const tvm::runtime::Module& m,
+Module GraphExecutorCreate(const std::string& sym_json, const tvm::runtime::Module& m,
                           const std::vector<Device>& devs,
                           const PackedFunc lookup_linked_param_func) {
-  auto exec = make_object<GraphRuntime>();
+  auto exec = make_object<GraphExecutor>();
   exec->Init(sym_json, m, devs, lookup_linked_param_func);
   return Module(exec);
 }
@@ -542,7 +542,7 @@ TVM_REGISTER_GLOBAL("tvm.graph_runtime.create").set_body([](TVMArgs args, TVMRet
     dev_start_arg++;
   }
   const auto& devices = GetAllDevice(args, dev_start_arg);
-  *rv = GraphRuntimeCreate(args[0], args[1], devices, lookup_linked_param_func);
+  *rv = GraphExecutorCreate(args[0], args[1], devices, lookup_linked_param_func);
 });
 }  // namespace runtime
 }  // namespace tvm
diff --git a/src/runtime/graph/graph_runtime.h b/src/runtime/graph/graph_runtime.h
index aeaee9e3483e..5877ce0231e8 100644
--- a/src/runtime/graph/graph_runtime.h
+++ b/src/runtime/graph/graph_runtime.h
@@ -61,7 +61,7 @@ struct TVMOpParam {
  *  This runtime can be acccesibly in various language via
  *  TVM runtime PackedFunc API.
  */
-class TVM_DLL GraphRuntime : public ModuleNode {
+class TVM_DLL GraphExecutor : public ModuleNode {
   struct OpArgs {
     std::vector<DLTensor> args;
     std::vector<TVMValue> arg_values;
@@ -81,7 +81,7 @@ class TVM_DLL GraphRuntime : public ModuleNode {
   /*!
    * \return The type key of the executor.
    */
-  const char* type_key() const final { return "GraphRuntime"; }
+  const char* type_key() const final { return "GraphExecutor"; }
   void Run();
 
   /*!
@@ -162,12 +162,12 @@ class TVM_DLL GraphRuntime : public ModuleNode {
   void LoadParams(const std::string& param_blob);
 
   /*!
-   * \brief Share parameters from pre-existing GraphRuntime instance.
-   * \param other A GraphRuntime instance, previously with |LoadParams| called with the
+   * \brief Share parameters from pre-existing GraphExecutor instance.
+   * \param other A GraphExecutor instance, previously with |LoadParams| called with the
    * identical input |param_blob|.
    * \param strm The input stream.
    */
-  void ShareParams(const GraphRuntime& other, dmlc::Stream* strm);
+  void ShareParams(const GraphExecutor& other, dmlc::Stream* strm);
 
   /*!
    * \brief Get total number of nodes.
diff --git a/src/runtime/graph/graph_runtime_factory.cc b/src/runtime/graph/graph_runtime_factory.cc
index a6f87bac8a6e..d74ef461dfb5 100644
--- a/src/runtime/graph/graph_runtime_factory.cc
+++ b/src/runtime/graph/graph_runtime_factory.cc
@@ -34,7 +34,7 @@
 namespace tvm {
 namespace runtime {
 
-GraphRuntimeFactory::GraphRuntimeFactory(
+GraphExecutorFactory::GraphExecutorFactory(
     const std::string& graph_json,
     const std::unordered_map<std::string, tvm::runtime::NDArray>& params,
     const std::string& module_name) {
@@ -43,7 +43,7 @@ GraphRuntimeFactory::GraphRuntimeFactory(
   module_name_ = module_name;
 }
 
-PackedFunc GraphRuntimeFactory::GetFunction(
+PackedFunc GraphExecutorFactory::GetFunction(
     const std::string& name, const tvm::runtime::ObjectPtr<tvm::runtime::Object>& sptr_to_self) {
   if (name == module_name_) {
     return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
@@ -68,7 +68,7 @@ PackedFunc GraphRuntimeFactory::GetFunction(
     return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
       std::unordered_map<std::string, tvm::runtime::NDArray> empty_params{};
       auto exec =
-          make_object<GraphRuntimeFactory>(this->graph_json_, empty_params, this->module_name_);
+          make_object<GraphExecutorFactory>(this->graph_json_, empty_params, this->module_name_);
       exec->Import(this->imports_[0]);
       *rv = Module(exec);
     });
@@ -78,14 +78,14 @@ PackedFunc GraphRuntimeFactory::GetFunction(
       for (int i = 0; i < args.num_args; ++i) {
         devices.emplace_back(args[i].operator Device());
       }
-      *rv = this->CudaGraphRuntimeCreate(devices);
+      *rv = this->CudaGraphExecutorCreate(devices);
     });
   } else {
     return PackedFunc();
   }
 }
 
-void GraphRuntimeFactory::SaveToBinary(dmlc::Stream* stream) {
+void GraphExecutorFactory::SaveToBinary(dmlc::Stream* stream) {
   stream->Write(graph_json_);
   std::vector<std::string> names;
   std::vector<DLTensor*> arrays;
@@ -103,15 +103,15 @@ void GraphRuntimeFactory::SaveToBinary(dmlc::Stream* stream) {
   stream->Write(module_name_);
 }
 
-Module GraphRuntimeFactory::RuntimeCreate(const std::vector<Device>& devs) {
-  auto exec = make_object<GraphRuntime>();
+Module GraphExecutorFactory::RuntimeCreate(const std::vector<Device>& devs) {
+  auto exec = make_object<GraphExecutor>();
   exec->Init(this->graph_json_, this->imports_[0], devs, PackedFunc());
   // set params
   SetParams(exec.get(), this->params_);
   return Module(exec);
 }
 
-Module GraphRuntimeFactory::DebugRuntimeCreate(const std::vector<Device>& devs) {
+Module GraphExecutorFactory::DebugRuntimeCreate(const std::vector<Device>& devs) {
   const PackedFunc* pf = tvm::runtime::Registry::Get("tvm.graph_runtime_debug.create");
   ICHECK(pf != nullptr) << "Cannot find function tvm.graph_runtime_debug.create in registry. "
                            "Do you enable debug graph runtime build?";
@@ -134,11 +134,11 @@ Module GraphRuntimeFactory::DebugRuntimeCreate(const std::vector<Device>& devs)
   pf->CallPacked(TVMArgs(values.data(), codes.data(), args_size), &rv);
   Module mod = rv.operator Module();
   // debug graph runtime is one child class of graph runtime.
-  SetParams(const_cast<GraphRuntime*>(mod.as<GraphRuntime>()), this->params_);
+  SetParams(const_cast<GraphExecutor*>(mod.as<GraphExecutor>()), this->params_);
   return mod;
 }
 
-Module GraphRuntimeFactory::CudaGraphRuntimeCreate(const std::vector<Device>& devs) {
+Module GraphExecutorFactory::CudaGraphExecutorCreate(const std::vector<Device>& devs) {
   const PackedFunc* pf = tvm::runtime::Registry::Get("tvm.graph_runtime_cuda_graph.create");
   ICHECK(pf != nullptr) << "Cannot find function tvm.graph_runtime_cuda_graph.create in registry. "
                            "Did you set(USE_GRAPH_RUNTIME_CUGRAPH=ON)?";
@@ -159,11 +159,11 @@ Module GraphRuntimeFactory::CudaGraphRuntimeCreate(const std::vector<Device>& de
   TVMRetValue rv;
   pf->CallPacked(TVMArgs(values.data(), codes.data(), args_size), &rv);
   Module mod = rv.operator Module();
-  SetParams(const_cast<GraphRuntime*>(mod.as<GraphRuntime>()), this->params_);
+  SetParams(const_cast<GraphExecutor*>(mod.as<GraphExecutor>()), this->params_);
   return mod;
 }
 
-Module GraphRuntimeFactoryModuleLoadBinary(void* strm) {
+Module GraphExecutorFactoryModuleLoadBinary(void* strm) {
   dmlc::Stream* stream = static_cast<dmlc::Stream*>(strm);
   std::string graph_json;
   std::unordered_map<std::string, tvm::runtime::NDArray> params;
@@ -180,7 +180,7 @@ Module GraphRuntimeFactoryModuleLoadBinary(void* strm) {
     params[names[i]] = temp;
   }
   ICHECK(stream->Read(&module_name));
-  auto exec = make_object<GraphRuntimeFactory>(graph_json, params, module_name);
+  auto exec = make_object<GraphExecutorFactory>(graph_json, params, module_name);
   return Module(exec);
 }
 
@@ -197,13 +197,13 @@ TVM_REGISTER_GLOBAL("tvm.graph_runtime_factory.create").set_body([](TVMArgs args
     std::string name = args[i].operator String();
     params[name] = args[i + 1].operator tvm::runtime::NDArray();
   }
-  auto exec = make_object<GraphRuntimeFactory>(args[0], params, args[2]);
+  auto exec = make_object<GraphExecutorFactory>(args[0], params, args[2]);
   exec->Import(args[1]);
   *rv = Module(exec);
 });
 
-TVM_REGISTER_GLOBAL("runtime.module.loadbinary_GraphRuntimeFactory")
-    .set_body_typed(GraphRuntimeFactoryModuleLoadBinary);
+TVM_REGISTER_GLOBAL("runtime.module.loadbinary_GraphExecutorFactory")
+    .set_body_typed(GraphExecutorFactoryModuleLoadBinary);
 
 }  // namespace runtime
 }  // namespace tvm
diff --git a/src/runtime/graph/graph_runtime_factory.h b/src/runtime/graph/graph_runtime_factory.h
index 86958218a0f7..b1e4d230da7f 100644
--- a/src/runtime/graph/graph_runtime_factory.h
+++ b/src/runtime/graph/graph_runtime_factory.h
@@ -42,15 +42,15 @@
 namespace tvm {
 namespace runtime {
 
-class TVM_DLL GraphRuntimeFactory : public runtime::ModuleNode {
+class TVM_DLL GraphExecutorFactory : public runtime::ModuleNode {
  public:
   /*!
-   * \brief Construct the GraphRuntimeFactory.
+   * \brief Construct the GraphExecutorFactory.
    * \param graph_json The execution graph.
    * \param params The params of graph.
    * \param module_name The module name of graph.
    */
-  GraphRuntimeFactory(const std::string& graph_json,
+  GraphExecutorFactory(const std::string& graph_json,
                       const std::unordered_map<std::string, tvm::runtime::NDArray>& params,
                       const std::string& module_name = "default");
 
@@ -65,7 +65,7 @@ class TVM_DLL GraphRuntimeFactory : public runtime::ModuleNode {
   /*!
    * \return The type key of the executor.
    */
-  const char* type_key() const override { return "GraphRuntimeFactory"; }
+  const char* type_key() const override { return "GraphExecutorFactory"; }
 
   /*!
    * \brief Save the module to binary stream.
@@ -95,14 +95,14 @@ class TVM_DLL GraphRuntimeFactory : public runtime::ModuleNode {
    *  executed on.
    * \return created cuda graph runtime module
    */
-  Module CudaGraphRuntimeCreate(const std::vector<Device>& devs);
+  Module CudaGraphExecutorCreate(const std::vector<Device>& devs);
 
   /*!
    * \brief Set params.
    * \param graph_runtime The graph runtime we want to set the params into.
    * \param params The graph params value we want to set.
    */
-  void SetParams(GraphRuntime* graph_runtime,
+  void SetParams(GraphExecutor* graph_runtime,
                  const std::unordered_map<std::string, tvm::runtime::NDArray>& params) const {
     std::unordered_map<std::string, tvm::runtime::NDArray> value = params;
     // upload big arrays first to avoid memory issue in rpc mode
diff --git a/src/runtime/micro/standalone/utvm_graph_runtime.cc b/src/runtime/micro/standalone/utvm_graph_runtime.cc
index 897a4f4b5c0c..f945fd9f82f2 100644
--- a/src/runtime/micro/standalone/utvm_graph_runtime.cc
+++ b/src/runtime/micro/standalone/utvm_graph_runtime.cc
@@ -226,7 +226,7 @@ void* DSOModule::GetSymbol(const char* name) const {
   return f;
 }
 
-MicroGraphRuntime::MicroGraphRuntime(const std::string& graph_json, DSOModule* module) {
+MicroGraphExecutor::MicroGraphExecutor(const std::string& graph_json, DSOModule* module) {
   assert(module);
   module_ = module;
   picojson::value v;
@@ -240,28 +240,28 @@ MicroGraphRuntime::MicroGraphRuntime(const std::string& graph_json, DSOModule* m
   SetupOpExecs();
 }
 
-MicroGraphRuntime::~MicroGraphRuntime() {}
+MicroGraphExecutor::~MicroGraphExecutor() {}
 
-void MicroGraphRuntime::Run() {
+void MicroGraphExecutor::Run() {
   for (size_t i = 0; i < op_execs_.size(); ++i) {
     if (op_execs_[i]) op_execs_[i]();
   }
 }
 
-void MicroGraphRuntime::SetInput(int index, DLTensor* data_in) {
+void MicroGraphExecutor::SetInput(int index, DLTensor* data_in) {
   assert(static_cast<size_t>(index) < input_nodes_.size());
   uint32_t eid = this->entry_id(input_nodes_[index], 0);
   data_entry_[eid].CopyFrom(data_in);
 }
 
-void MicroGraphRuntime::CopyOutputTo(int index, DLTensor* data_out) {
+void MicroGraphExecutor::CopyOutputTo(int index, DLTensor* data_out) {
   assert(static_cast<size_t>(index) < outputs_.size());
   uint32_t eid = this->entry_id(outputs_[index]);
   const NDArray& data = data_entry_[eid];
   data.CopyTo(data_out);
 }
 
-void MicroGraphRuntime::SetupStorage() {
+void MicroGraphExecutor::SetupStorage() {
   // Grab saved optimization plan from graph.
   DynArray<DLDataType> vtype(attrs_.dltype.size());
   for (size_t i = 0; i < attrs_.dltype.size(); ++i) {
@@ -373,7 +373,7 @@ std::function<void()> CreateTVMOp(const DSOModule& module, const TVMOpParam& par
   return fexec;
 }
 
-void MicroGraphRuntime::SetupOpExecs() {
+void MicroGraphExecutor::SetupOpExecs() {
   op_execs_.resize(nodes_.size());
   // setup the array and requirements.
   for (uint32_t nid = 0; nid < nodes_.size(); ++nid) {
diff --git a/src/runtime/micro/standalone/utvm_graph_runtime.h b/src/runtime/micro/standalone/utvm_graph_runtime.h
index 3a2519c98fd4..5e15d5d0a7e7 100644
--- a/src/runtime/micro/standalone/utvm_graph_runtime.h
+++ b/src/runtime/micro/standalone/utvm_graph_runtime.h
@@ -116,12 +116,12 @@ class NDArray {
   DLDevice device_;
 };
 
-// Minimal GraphRuntime implementation
-class MicroGraphRuntime {
+// Minimal GraphExecutor implementation
+class MicroGraphExecutor {
  public:
-  // Construct a GraphRuntime with the given graph and DSOModule.
-  MicroGraphRuntime(const std::string& graph_json, DSOModule* module);
-  ~MicroGraphRuntime();
+  // Construct a GraphExecutor with the given graph and DSOModule.
+  MicroGraphExecutor(const std::string& graph_json, DSOModule* module);
+  ~MicroGraphExecutor();
   // Run the graph
   void Run();
   // Set the input at `index` to a copy of the tensor `data_in`
diff --git a/src/runtime/micro/standalone/utvm_runtime.cc b/src/runtime/micro/standalone/utvm_runtime.cc
index 73d616b6d482..c6b73aad6717 100644
--- a/src/runtime/micro/standalone/utvm_runtime.cc
+++ b/src/runtime/micro/standalone/utvm_runtime.cc
@@ -23,25 +23,25 @@
 #include "utvm_graph_runtime.h"
 
 void* UTVMRuntimeCreate(const char* json, size_t json_len, void* module) {
-  return new tvm::micro::MicroGraphRuntime(std::string(json, json + json_len),
+  return new tvm::micro::MicroGraphExecutor(std::string(json, json + json_len),
                                            reinterpret_cast<tvm::micro::DSOModule*>(module));
 }
 
 void UTVMRuntimeDestroy(void* handle) {
-  delete reinterpret_cast<tvm::micro::MicroGraphRuntime*>(handle);
+  delete reinterpret_cast<tvm::micro::MicroGraphExecutor*>(handle);
 }
 
 void UTVMRuntimeSetInput(void* handle, int index, void* tensor) {
-  reinterpret_cast<tvm::micro::MicroGraphRuntime*>(handle)->SetInput(
+  reinterpret_cast<tvm::micro::MicroGraphExecutor*>(handle)->SetInput(
       index, reinterpret_cast<DLTensor*>(tensor));
 }
 
 void UTVMRuntimeRun(void* handle) {
-  reinterpret_cast<tvm::micro::MicroGraphRuntime*>(handle)->Run();
+  reinterpret_cast<tvm::micro::MicroGraphExecutor*>(handle)->Run();
 }
 
 void UTVMRuntimeGetOutput(void* handle, int index, void* tensor) {
-  reinterpret_cast<tvm::micro::MicroGraphRuntime*>(handle)->CopyOutputTo(
+  reinterpret_cast<tvm::micro::MicroGraphExecutor*>(handle)->CopyOutputTo(
       index, reinterpret_cast<DLTensor*>(tensor));
 }
 void* UTVMRuntimeDSOModuleCreate(const char* so, size_t so_len) {
diff --git a/tests/python/unittest/test_link_params.py b/tests/python/unittest/test_link_params.py
index 9fde832d08ce..4a24687a265b 100644
--- a/tests/python/unittest/test_link_params.py
+++ b/tests/python/unittest/test_link_params.py
@@ -124,7 +124,7 @@ def _verify_linked_param(dtype, lib, mod, graph, name):
     """Directly read memory from the linked library to verify the linked parameter is correct."""
     sid = _lookup_sid(graph, name)
     # NOTE: query_imports=True because when loading a module from disk (i.e. for C backend),
-    # a GraphRuntimeFactory module is created instead of the module itself.
+    # a GraphExecutorFactory module is created instead of the module itself.
     param_ptr = mod.get_function("_lookup_linked_param", True)(sid)
     gen_param = lib.params[name]
     arr_data = (_get_ctypes_dtype(dtype) * np.prod(gen_param.shape)).from_address(param_ptr.value)
@@ -154,7 +154,7 @@ def _add_decl(name, dtype):
         f"def @main(%rand_input : Tensor[{INPUT_SHAPE}, {dtype}], { ', '.join(param_decls.values()) } )  {{",
         # This program ensures that GraphPlanMemory alternates between the same two storage IDs for a
         # while. In doing this, it ensures that param %{dtype}_b will be placed into the graph at an
-        # index unequal to its storage_id. This ensures that GraphRuntimeCodegen encodes the storage_id
+        # index unequal to its storage_id. This ensures that GraphExecutorCodegen encodes the storage_id
         # and not the parameter index into the graph.
         (
             f'    %0 = nn.conv2d(%rand_input, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", '
diff --git a/tests/python/unittest/test_micro_model_library_format.py b/tests/python/unittest/test_micro_model_library_format.py
index c999091cc3cc..510cc5a98a64 100644
--- a/tests/python/unittest/test_micro_model_library_format.py
+++ b/tests/python/unittest/test_micro_model_library_format.py
@@ -170,7 +170,7 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[
 @tvm.testing.requires_micro
 def test_export_model():
     module = tvm.support.FrontendTestModule()
-    factory = graph_runtime_factory.GraphRuntimeFactoryModule(
+    factory = graph_runtime_factory.GraphExecutorFactoryModule(
         None, tvm.target.target.micro("host"), '"graph_json"', module, "test_module", {}
     )
 
diff --git a/web/src/runtime.ts b/web/src/runtime.ts
index 968dd9acbb5b..4b3e96c75457 100644
--- a/web/src/runtime.ts
+++ b/web/src/runtime.ts
@@ -576,7 +576,7 @@ export class Module implements Disposable {
  *  you can also directly call set_input, run, and get_output
  *  of underlying module functions
  */
-class GraphRuntime implements Disposable {
+class GraphExecutor implements Disposable {
   module: Module;
   private packedSetInput: PackedFunc;
   private packedRun: PackedFunc;
@@ -992,18 +992,18 @@ export class Instance implements Disposable {
    * @param lib The underlying library.
    * @param dev The execution device of the graph.
    */
-  createGraphRuntime(
+  createGraphExecutor(
     graphJson: string,
     lib: Module,
     dev: DLDevice
-  ): GraphRuntime {
+  ): GraphExecutor {
     const fcreate = this.getGlobalFunc("tvm.graph_runtime.create");
     const module = fcreate(
       graphJson,
       lib,
       this.scalar(dev.deviceType, "int32"),
       this.scalar(dev.deviceId, "int32")) as Module;
-    return new GraphRuntime(module);
+    return new GraphExecutor(module);
   }
 
 

From d81ab548ed6c3e1731284522ebb15cc591848230 Mon Sep 17 00:00:00 2001
From: Andrew Reusch <areusch@octoml.ai>
Date: Fri, 12 Mar 2021 10:04:14 -0800
Subject: [PATCH 02/16] rename files

---
 .../tvm/contrib/{GraphRuntime.java => GraphExecutor.java} | 0
 .../{GraphRuntimeTest.java => GraphExecutorTest.java}     | 0
 .../{cuda_graph_runtime.py => cuda_graph_executor.py}     | 0
 .../debugger/{debug_runtime.py => debug_executor.py}      | 0
 .../tvm/contrib/{graph_runtime.py => graph_executor.py}   | 0
 ...graph_runtime_codegen.py => graph_executor_codegen.py} | 0
 ...graph_runtime_factory.py => graph_executor_factory.py} | 0
 ...graph_runtime_codegen.cc => graph_executor_codegen.cc} | 0
 .../graph_runtime.c => graph_executor/graph_executor.c}   | 0
 .../crt/{graph_runtime => graph_executor}/load_json.c     | 0
 .../graph_executor_module.c}                              | 0
 .../graph_runtime.h => graph_executor/graph_executor.h}   | 0
 .../{graph_runtime => graph_executor}/load_json.h         | 0
 .../cuda_graph/graph_runtime_cuda_graph.cc                | 0
 .../debug/graph_executor_debug.cc}                        | 0
 .../graph_runtime.cc => graph_executor/graph_executor.cc} | 0
 .../graph_runtime.h => graph_executor/graph_executor.h}   | 8 ++++----
 .../graph_executor_factory.cc}                            | 0
 .../graph_executor_factory.h}                             | 8 ++++----
 .../{utvm_graph_runtime.cc => utvm_graph_executor.cc}     | 0
 .../{utvm_graph_runtime.h => utvm_graph_executor.h}       | 0
 ...nd_graph_runtime.py => test_backend_graph_executor.py} | 0
 22 files changed, 8 insertions(+), 8 deletions(-)
 rename jvm/core/src/main/java/org/apache/tvm/contrib/{GraphRuntime.java => GraphExecutor.java} (100%)
 rename jvm/core/src/test/java/org/apache/tvm/contrib/{GraphRuntimeTest.java => GraphExecutorTest.java} (100%)
 rename python/tvm/contrib/cuda_graph/{cuda_graph_runtime.py => cuda_graph_executor.py} (100%)
 rename python/tvm/contrib/debugger/{debug_runtime.py => debug_executor.py} (100%)
 rename python/tvm/contrib/{graph_runtime.py => graph_executor.py} (100%)
 rename python/tvm/relay/backend/{graph_runtime_codegen.py => graph_executor_codegen.py} (100%)
 rename python/tvm/relay/backend/{graph_runtime_factory.py => graph_executor_factory.py} (100%)
 rename src/relay/backend/{graph_runtime_codegen.cc => graph_executor_codegen.cc} (100%)
 rename src/runtime/crt/{graph_runtime/graph_runtime.c => graph_executor/graph_executor.c} (100%)
 rename src/runtime/crt/{graph_runtime => graph_executor}/load_json.c (100%)
 rename src/runtime/crt/{graph_runtime_module/graph_runtime_module.c => graph_executor_module/graph_executor_module.c} (100%)
 rename src/runtime/crt/include/tvm/runtime/crt/internal/{graph_runtime/graph_runtime.h => graph_executor/graph_executor.h} (100%)
 rename src/runtime/crt/include/tvm/runtime/crt/internal/{graph_runtime => graph_executor}/load_json.h (100%)
 rename src/runtime/{graph => graph_executor}/cuda_graph/graph_runtime_cuda_graph.cc (100%)
 rename src/runtime/{graph/debug/graph_runtime_debug.cc => graph_executor/debug/graph_executor_debug.cc} (100%)
 rename src/runtime/{graph/graph_runtime.cc => graph_executor/graph_executor.cc} (100%)
 rename src/runtime/{graph/graph_runtime.h => graph_executor/graph_executor.h} (98%)
 rename src/runtime/{graph/graph_runtime_factory.cc => graph_executor/graph_executor_factory.cc} (100%)
 rename src/runtime/{graph/graph_runtime_factory.h => graph_executor/graph_executor_factory.h} (94%)
 rename src/runtime/micro/standalone/{utvm_graph_runtime.cc => utvm_graph_executor.cc} (100%)
 rename src/runtime/micro/standalone/{utvm_graph_runtime.h => utvm_graph_executor.h} (100%)
 rename tests/python/relay/{test_backend_graph_runtime.py => test_backend_graph_executor.py} (100%)

diff --git a/jvm/core/src/main/java/org/apache/tvm/contrib/GraphRuntime.java b/jvm/core/src/main/java/org/apache/tvm/contrib/GraphExecutor.java
similarity index 100%
rename from jvm/core/src/main/java/org/apache/tvm/contrib/GraphRuntime.java
rename to jvm/core/src/main/java/org/apache/tvm/contrib/GraphExecutor.java
diff --git a/jvm/core/src/test/java/org/apache/tvm/contrib/GraphRuntimeTest.java b/jvm/core/src/test/java/org/apache/tvm/contrib/GraphExecutorTest.java
similarity index 100%
rename from jvm/core/src/test/java/org/apache/tvm/contrib/GraphRuntimeTest.java
rename to jvm/core/src/test/java/org/apache/tvm/contrib/GraphExecutorTest.java
diff --git a/python/tvm/contrib/cuda_graph/cuda_graph_runtime.py b/python/tvm/contrib/cuda_graph/cuda_graph_executor.py
similarity index 100%
rename from python/tvm/contrib/cuda_graph/cuda_graph_runtime.py
rename to python/tvm/contrib/cuda_graph/cuda_graph_executor.py
diff --git a/python/tvm/contrib/debugger/debug_runtime.py b/python/tvm/contrib/debugger/debug_executor.py
similarity index 100%
rename from python/tvm/contrib/debugger/debug_runtime.py
rename to python/tvm/contrib/debugger/debug_executor.py
diff --git a/python/tvm/contrib/graph_runtime.py b/python/tvm/contrib/graph_executor.py
similarity index 100%
rename from python/tvm/contrib/graph_runtime.py
rename to python/tvm/contrib/graph_executor.py
diff --git a/python/tvm/relay/backend/graph_runtime_codegen.py b/python/tvm/relay/backend/graph_executor_codegen.py
similarity index 100%
rename from python/tvm/relay/backend/graph_runtime_codegen.py
rename to python/tvm/relay/backend/graph_executor_codegen.py
diff --git a/python/tvm/relay/backend/graph_runtime_factory.py b/python/tvm/relay/backend/graph_executor_factory.py
similarity index 100%
rename from python/tvm/relay/backend/graph_runtime_factory.py
rename to python/tvm/relay/backend/graph_executor_factory.py
diff --git a/src/relay/backend/graph_runtime_codegen.cc b/src/relay/backend/graph_executor_codegen.cc
similarity index 100%
rename from src/relay/backend/graph_runtime_codegen.cc
rename to src/relay/backend/graph_executor_codegen.cc
diff --git a/src/runtime/crt/graph_runtime/graph_runtime.c b/src/runtime/crt/graph_executor/graph_executor.c
similarity index 100%
rename from src/runtime/crt/graph_runtime/graph_runtime.c
rename to src/runtime/crt/graph_executor/graph_executor.c
diff --git a/src/runtime/crt/graph_runtime/load_json.c b/src/runtime/crt/graph_executor/load_json.c
similarity index 100%
rename from src/runtime/crt/graph_runtime/load_json.c
rename to src/runtime/crt/graph_executor/load_json.c
diff --git a/src/runtime/crt/graph_runtime_module/graph_runtime_module.c b/src/runtime/crt/graph_executor_module/graph_executor_module.c
similarity index 100%
rename from src/runtime/crt/graph_runtime_module/graph_runtime_module.c
rename to src/runtime/crt/graph_executor_module/graph_executor_module.c
diff --git a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/graph_runtime.h b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
similarity index 100%
rename from src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/graph_runtime.h
rename to src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
diff --git a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/load_json.h b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h
similarity index 100%
rename from src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/load_json.h
rename to src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h
diff --git a/src/runtime/graph/cuda_graph/graph_runtime_cuda_graph.cc b/src/runtime/graph_executor/cuda_graph/graph_runtime_cuda_graph.cc
similarity index 100%
rename from src/runtime/graph/cuda_graph/graph_runtime_cuda_graph.cc
rename to src/runtime/graph_executor/cuda_graph/graph_runtime_cuda_graph.cc
diff --git a/src/runtime/graph/debug/graph_runtime_debug.cc b/src/runtime/graph_executor/debug/graph_executor_debug.cc
similarity index 100%
rename from src/runtime/graph/debug/graph_runtime_debug.cc
rename to src/runtime/graph_executor/debug/graph_executor_debug.cc
diff --git a/src/runtime/graph/graph_runtime.cc b/src/runtime/graph_executor/graph_executor.cc
similarity index 100%
rename from src/runtime/graph/graph_runtime.cc
rename to src/runtime/graph_executor/graph_executor.cc
diff --git a/src/runtime/graph/graph_runtime.h b/src/runtime/graph_executor/graph_executor.h
similarity index 98%
rename from src/runtime/graph/graph_runtime.h
rename to src/runtime/graph_executor/graph_executor.h
index 5877ce0231e8..fc5d2a522a91 100644
--- a/src/runtime/graph/graph_runtime.h
+++ b/src/runtime/graph_executor/graph_executor.h
@@ -20,10 +20,10 @@
 /*!
  * \brief Tiny graph runtime that can run graph
  *        containing only tvm PackedFunc.
- * \file graph_runtime.h
+ * \file graph_executor.h
  */
-#ifndef TVM_RUNTIME_GRAPH_GRAPH_RUNTIME_H_
-#define TVM_RUNTIME_GRAPH_GRAPH_RUNTIME_H_
+#ifndef TVM_RUNTIME_GRAPH_EXECUTOR_GRAPH_EXECUTOR_H_
+#define TVM_RUNTIME_GRAPH_EXECUTOR_GRAPH_EXECUTOR_H_
 
 #include <dlpack/dlpack.h>
 #include <dmlc/json.h>
@@ -430,4 +430,4 @@ std::vector<Device> GetAllDevice(const TVMArgs& args, int dev_start_arg);
 }  // namespace runtime
 }  // namespace tvm
 
-#endif  // TVM_RUNTIME_GRAPH_GRAPH_RUNTIME_H_
+#endif  // TVM_RUNTIME_GRAPH_EXECUTOR_GRAPH_EXECUTOR_H_
diff --git a/src/runtime/graph/graph_runtime_factory.cc b/src/runtime/graph_executor/graph_executor_factory.cc
similarity index 100%
rename from src/runtime/graph/graph_runtime_factory.cc
rename to src/runtime/graph_executor/graph_executor_factory.cc
diff --git a/src/runtime/graph/graph_runtime_factory.h b/src/runtime/graph_executor/graph_executor_factory.h
similarity index 94%
rename from src/runtime/graph/graph_runtime_factory.h
rename to src/runtime/graph_executor/graph_executor_factory.h
index b1e4d230da7f..3881b6b7f0b8 100644
--- a/src/runtime/graph/graph_runtime_factory.h
+++ b/src/runtime/graph_executor/graph_executor_factory.h
@@ -18,12 +18,12 @@
  */
 
 /*!
- * \file tvm/runtime/graph_runtime_factory.h
+ * \file tvm/runtime/graph_executor/graph_executor_factory.h
  * \brief Graph runtime factory creating graph runtime.
  */
 
-#ifndef TVM_RUNTIME_GRAPH_GRAPH_RUNTIME_FACTORY_H_
-#define TVM_RUNTIME_GRAPH_GRAPH_RUNTIME_FACTORY_H_
+#ifndef TVM_RUNTIME_GRAPH_EXECUTOR_GRAPH_EXECUTOR_FACTORY_H_
+#define TVM_RUNTIME_GRAPH_EXECUTOR_GRAPH_EXECUTOR_FACTORY_H_
 
 #include <tvm/runtime/c_runtime_api.h>
 #include <tvm/runtime/module.h>
@@ -136,4 +136,4 @@ class TVM_DLL GraphExecutorFactory : public runtime::ModuleNode {
 }  // namespace runtime
 }  // namespace tvm
 
-#endif  // TVM_RUNTIME_GRAPH_GRAPH_RUNTIME_FACTORY_H_
+#endif  // TVM_RUNTIME_GRAPH_EXECUTOR_GRAPH_EXECUTOR_FACTORY_H_
diff --git a/src/runtime/micro/standalone/utvm_graph_runtime.cc b/src/runtime/micro/standalone/utvm_graph_executor.cc
similarity index 100%
rename from src/runtime/micro/standalone/utvm_graph_runtime.cc
rename to src/runtime/micro/standalone/utvm_graph_executor.cc
diff --git a/src/runtime/micro/standalone/utvm_graph_runtime.h b/src/runtime/micro/standalone/utvm_graph_executor.h
similarity index 100%
rename from src/runtime/micro/standalone/utvm_graph_runtime.h
rename to src/runtime/micro/standalone/utvm_graph_executor.h
diff --git a/tests/python/relay/test_backend_graph_runtime.py b/tests/python/relay/test_backend_graph_executor.py
similarity index 100%
rename from tests/python/relay/test_backend_graph_runtime.py
rename to tests/python/relay/test_backend_graph_executor.py

From dfe2cbc890f3a35c326ebe1cd59af639c35404f5 Mon Sep 17 00:00:00 2001
From: Andrew Reusch <areusch@octoml.ai>
Date: Thu, 18 Mar 2021 12:07:34 -0700
Subject: [PATCH 03/16] Fix paths associated with rename

---
 CMakeLists.txt                                     | 10 +++++-----
 apps/android_camera/app/src/main/jni/tvm_runtime.h |  2 +-
 apps/android_deploy/app/src/main/jni/tvm_runtime.h |  2 +-
 apps/android_rpc/app/src/main/jni/tvm_runtime.h    |  4 ++--
 apps/bundle_deploy/runtime.cc                      |  2 +-
 apps/howto_deploy/tvm_runtime_pack.cc              |  4 ++--
 apps/ios_rpc/tvmrpc/TVMRuntime.mm                  |  2 +-
 cmake/modules/CUDA.cmake                           |  2 +-
 golang/src/tvm_runtime_pack.cc                     |  2 +-
 web/emcc/wasm_runtime.cc                           |  2 +-
 10 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6d37bd4e6e44..ecda307f585d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -309,8 +309,8 @@ endif(USE_STACKVM_RUNTIME)
 
 if(USE_GRAPH_RUNTIME)
   message(STATUS "Build with Graph runtime support...")
-  file(GLOB RUNTIME_GRAPH_SRCS src/runtime/graph/*.cc)
-  list(APPEND RUNTIME_SRCS ${RUNTIME_GRAPH_SRCS})
+  file(GLOB RUNTIME_GRAPH_EXECUTOR_SRCS src/runtime/graph_executor/*.cc)
+  list(APPEND RUNTIME_SRCS ${RUNTIME_GRAPH_EXECUTOR_SRCS})
 
 endif(USE_GRAPH_RUNTIME)
 
@@ -327,9 +327,9 @@ endif()
 if(USE_PROFILER)
   message(STATUS "Build with profiler...")
 
-  file(GLOB RUNTIME_GRAPH_DEBUG_SRCS src/runtime/graph/debug/*.cc)
-  list(APPEND RUNTIME_SRCS ${RUNTIME_GRAPH_DEBUG_SRCS})
-  set_source_files_properties(${RUNTIME_GRAPH_SRCS}
+  file(GLOB RUNTIME_GRAPH_EXECUTOR_DEBUG_SRCS src/runtime/graph_executor/debug/*.cc)
+  list(APPEND RUNTIME_SRCS ${RUNTIME_GRAPH_EXECUTOR_DEBUG_SRCS})
+  set_source_files_properties(${RUNTIME_GRAPH_EXECUTOR_SRCS}
     PROPERTIES COMPILE_DEFINITIONS "TVM_GRAPH_RUNTIME_DEBUG")
 
   file(GLOB RUNTIME_VM_PROFILER_SRCS src/runtime/vm/profiler/*.cc)
diff --git a/apps/android_camera/app/src/main/jni/tvm_runtime.h b/apps/android_camera/app/src/main/jni/tvm_runtime.h
index 1fd19b29e20b..91f21ff02ff6 100644
--- a/apps/android_camera/app/src/main/jni/tvm_runtime.h
+++ b/apps/android_camera/app/src/main/jni/tvm_runtime.h
@@ -37,7 +37,7 @@
 #include "../src/runtime/cpu_device_api.cc"
 #include "../src/runtime/dso_library.cc"
 #include "../src/runtime/file_utils.cc"
-#include "../src/runtime/graph/graph_runtime.cc"
+#include "../src/runtime/graph_executor/graph_runtime.cc"
 #include "../src/runtime/library_module.cc"
 #include "../src/runtime/module.cc"
 #include "../src/runtime/ndarray.cc"
diff --git a/apps/android_deploy/app/src/main/jni/tvm_runtime.h b/apps/android_deploy/app/src/main/jni/tvm_runtime.h
index 6fe8c98756bb..2f7964ebfc09 100644
--- a/apps/android_deploy/app/src/main/jni/tvm_runtime.h
+++ b/apps/android_deploy/app/src/main/jni/tvm_runtime.h
@@ -32,7 +32,7 @@
 #include "../src/runtime/cpu_device_api.cc"
 #include "../src/runtime/dso_library.cc"
 #include "../src/runtime/file_utils.cc"
-#include "../src/runtime/graph/graph_runtime.cc"
+#include "../src/runtime/graph_executor/graph_runtime.cc"
 #include "../src/runtime/library_module.cc"
 #include "../src/runtime/module.cc"
 #include "../src/runtime/ndarray.cc"
diff --git a/apps/android_rpc/app/src/main/jni/tvm_runtime.h b/apps/android_rpc/app/src/main/jni/tvm_runtime.h
index 0745c30ffb1d..5b046cbc70b4 100644
--- a/apps/android_rpc/app/src/main/jni/tvm_runtime.h
+++ b/apps/android_rpc/app/src/main/jni/tvm_runtime.h
@@ -37,8 +37,8 @@
 #include "../src/runtime/cpu_device_api.cc"
 #include "../src/runtime/dso_library.cc"
 #include "../src/runtime/file_utils.cc"
-#include "../src/runtime/graph/graph_runtime.cc"
-#include "../src/runtime/graph/graph_runtime_factory.cc"
+#include "../src/runtime/graph_executor/graph_runtime.cc"
+#include "../src/runtime/graph_executor/graph_runtime_factory.cc"
 #include "../src/runtime/library_module.cc"
 #include "../src/runtime/module.cc"
 #include "../src/runtime/ndarray.cc"
diff --git a/apps/bundle_deploy/runtime.cc b/apps/bundle_deploy/runtime.cc
index 2f7e3848b4bf..94fba2b75ce9 100644
--- a/apps/bundle_deploy/runtime.cc
+++ b/apps/bundle_deploy/runtime.cc
@@ -26,7 +26,7 @@
 #include "../../src/runtime/container.cc"
 #include "../../src/runtime/cpu_device_api.cc"
 #include "../../src/runtime/file_utils.cc"
-#include "../../src/runtime/graph/graph_runtime.cc"
+#include "../../src/runtime/graph_executor/graph_runtime.cc"
 #include "../../src/runtime/library_module.cc"
 #include "../../src/runtime/module.cc"
 #include "../../src/runtime/ndarray.cc"
diff --git a/apps/howto_deploy/tvm_runtime_pack.cc b/apps/howto_deploy/tvm_runtime_pack.cc
index d6dd5876a994..e4c54214e4ca 100644
--- a/apps/howto_deploy/tvm_runtime_pack.cc
+++ b/apps/howto_deploy/tvm_runtime_pack.cc
@@ -59,8 +59,8 @@
 #include "../../src/runtime/system_library.cc"
 
 // Graph runtime
-#include "../../src/runtime/graph/graph_runtime.cc"
-#include "../../src/runtime/graph/graph_runtime_factory.cc"
+#include "../../src/runtime/graph_executor/graph_runtime.cc"
+#include "../../src/runtime/graph_executor/graph_runtime_factory.cc"
 
 // Uncomment the following lines to enable RPC
 // #include "../../src/runtime/rpc/rpc_session.cc"
diff --git a/apps/ios_rpc/tvmrpc/TVMRuntime.mm b/apps/ios_rpc/tvmrpc/TVMRuntime.mm
index 87cb6f9b4c69..8d7b6c0c2ff2 100644
--- a/apps/ios_rpc/tvmrpc/TVMRuntime.mm
+++ b/apps/ios_rpc/tvmrpc/TVMRuntime.mm
@@ -46,7 +46,7 @@
 #include "../../../src/runtime/rpc/rpc_session.cc"
 #include "../../../src/runtime/rpc/rpc_socket_impl.cc"
 // Graph runtime
-#include "../../../src/runtime/graph/graph_runtime.cc"
+#include "../../../src/runtime/graph_executor/graph_runtime.cc"
 // Metal
 #include "../../../src/runtime/metal/metal_device_api.mm"
 #include "../../../src/runtime/metal/metal_module.mm"
diff --git a/cmake/modules/CUDA.cmake b/cmake/modules/CUDA.cmake
index 262a4e6e7123..a0e05987f92a 100644
--- a/cmake/modules/CUDA.cmake
+++ b/cmake/modules/CUDA.cmake
@@ -73,7 +73,7 @@ if(USE_CUDA)
       message(FATAL_ERROR "CUDA Graph requires CUDA 10 or above, got=" ${CUDAToolkit_VERSION})
     endif()
     message(STATUS "Build with Graph runtime with CUDA Graph support...")
-    file(GLOB RUNTIME_CUDA_GRAPH_SRCS src/runtime/graph/cuda_graph/*.cc)
+    file(GLOB RUNTIME_CUDA_GRAPH_SRCS src/runtime/graph_executor/cuda_graph/*.cc)
     list(APPEND RUNTIME_SRCS ${RUNTIME_CUDA_GRAPH_SRCS})
   endif()
 else(USE_CUDA)
diff --git a/golang/src/tvm_runtime_pack.cc b/golang/src/tvm_runtime_pack.cc
index 7dd6dd5e94c5..951ff1b459b0 100644
--- a/golang/src/tvm_runtime_pack.cc
+++ b/golang/src/tvm_runtime_pack.cc
@@ -43,7 +43,7 @@
 #include "src/runtime/system_library.cc"
 
 // Graph runtime
-#include "src/runtime/graph/graph_runtime.cc"
+#include "src/runtime/graph_executor/graph_runtime.cc"
 
 // Uncomment the following lines to enable RPC
 // #include "../../src/runtime/rpc/rpc_session.cc"
diff --git a/web/emcc/wasm_runtime.cc b/web/emcc/wasm_runtime.cc
index ba880e7b5c0a..21773a81a970 100644
--- a/web/emcc/wasm_runtime.cc
+++ b/web/emcc/wasm_runtime.cc
@@ -34,7 +34,7 @@
 #include "src/runtime/c_runtime_api.cc"
 #include "src/runtime/cpu_device_api.cc"
 #include "src/runtime/file_utils.cc"
-#include "src/runtime/graph/graph_runtime.cc"
+#include "src/runtime/graph_executor/graph_runtime.cc"
 #include "src/runtime/library_module.cc"
 #include "src/runtime/logging.cc"
 #include "src/runtime/module.cc"

From 774fc0af02453354ecfbb45507ca3de230676337 Mon Sep 17 00:00:00 2001
From: Andrew Reusch <areusch@octoml.ai>
Date: Fri, 12 Mar 2021 10:05:05 -0800
Subject: [PATCH 04/16] graph_runtime -> graph_executor

---
 .../Camera2BasicFragment.java                 |  2 +-
 .../app/src/main/jni/tvm_runtime.h            |  2 +-
 apps/android_camera/models/prepare_model.py   |  2 +-
 .../apache/tvm/android/demo/MainActivity.java |  2 +-
 .../app/src/main/jni/tvm_runtime.h            |  2 +-
 .../app/src/main/jni/tvm_runtime.h            |  4 +-
 apps/benchmark/arm_cpu_imagenet_bench.py      |  2 +-
 apps/benchmark/gpu_imagenet_bench.py          |  2 +-
 apps/benchmark/mobile_gpu_imagenet_bench.py   |  2 +-
 apps/bundle_deploy/Makefile                   | 12 ++--
 apps/bundle_deploy/bundle.c                   | 22 +++----
 apps/bundle_deploy/bundle.cc                  |  2 +-
 apps/bundle_deploy/bundle_static.c            | 26 ++++-----
 apps/bundle_deploy/runtime.cc                 |  2 +-
 apps/howto_deploy/tvm_runtime_pack.cc         |  4 +-
 apps/ios_rpc/tests/ios_rpc_mobilenet.py       |  4 +-
 apps/ios_rpc/tvmrpc/TVMRuntime.mm             |  2 +-
 cmake/modules/StandaloneCrt.cmake             |  6 +-
 docs/api/python/graph_runtime.rst             |  4 +-
 docs/api/python/index.rst                     |  2 +-
 docs/api/python/relay/backend.rst             |  2 +-
 docs/deploy/arm_compute_lib.rst               |  2 +-
 docs/deploy/bnns.rst                          |  4 +-
 docs/deploy/tensorrt.rst                      |  2 +-
 docs/deploy/vitis_ai.rst                      | 14 ++---
 docs/dev/debugger.rst                         |  8 +--
 golang/sample/complex.go                      |  6 +-
 golang/src/function_test.go                   |  2 +-
 golang/src/tvm_runtime_pack.cc                |  2 +-
 include/tvm/runtime/crt/graph_runtime.h       |  2 +-
 .../tvm/runtime/crt/graph_runtime_module.h    |  4 +-
 .../org/apache/tvm/contrib/GraphExecutor.java |  4 +-
 .../src/test/scripts/test_graph_runtime.py    |  2 +-
 .../tvm/auto_scheduler/relay_integration.py   |  4 +-
 python/tvm/autotvm/task/relay_integration.py  |  6 +-
 .../contrib/cuda_graph/cuda_graph_executor.py | 16 ++---
 python/tvm/contrib/debugger/debug_executor.py | 12 ++--
 python/tvm/contrib/graph_executor.py          | 12 ++--
 python/tvm/driver/tvmc/runner.py              |  2 +-
 python/tvm/micro/__init__.py                  |  2 +-
 python/tvm/micro/model_library_format.py      |  6 +-
 python/tvm/micro/session.py                   | 12 ++--
 .../relay/backend/graph_executor_codegen.py   |  2 +-
 .../relay/backend/graph_executor_factory.py   |  4 +-
 python/tvm/relay/build_module.py              |  8 +--
 python/tvm/relay/frontend/common.py           |  4 +-
 python/tvm/relay/quantize/_calibrate.py       |  4 +-
 rust/tvm-graph-rt/src/graph.rs                |  2 +-
 rust/tvm/examples/resnet/src/build_resnet.py  |  4 +-
 rust/tvm/src/runtime/graph_rt.rs              |  6 +-
 src/runtime/crt/Makefile                      |  4 +-
 src/runtime/crt/common/crt_runtime_api.c      |  2 +-
 .../crt/graph_executor/graph_executor.c       |  4 +-
 src/runtime/crt/graph_executor/load_json.c    |  2 +-
 .../graph_executor_module.c                   | 58 +++++++++----------
 src/runtime/crt/host/main.cc                  |  2 +-
 .../internal/graph_executor/graph_executor.h  |  6 +-
 .../crt/internal/graph_executor/load_json.h   |  2 +-
 .../cuda_graph/graph_runtime_cuda_graph.cc    |  8 +--
 .../debug/graph_executor_debug.cc             |  8 +--
 src/runtime/graph_executor/graph_executor.cc  |  8 +--
 .../graph_executor/graph_executor_factory.cc  | 16 ++---
 .../graph_executor/graph_executor_factory.h   | 10 ++--
 .../micro/standalone/utvm_graph_executor.cc   |  2 +-
 src/runtime/micro/standalone/utvm_runtime.cc  |  2 +-
 tests/cpp/build_module_test.cc                |  6 +-
 tests/cpp/relay_build_module_test.cc          |  2 +-
 tests/micro/test_runtime_micro_on_arm.py      |  8 +--
 tests/micro/zephyr/test_zephyr.py             |  6 +-
 .../test_arm_compute_lib/infrastructure.py    |  4 +-
 .../contrib/test_bnns/infrastructure.py       |  4 +-
 .../contrib/test_bnns/test_onnx_topologies.py |  4 +-
 tests/python/contrib/test_coreml_codegen.py   |  2 +-
 .../contrib/test_ethosn/infrastructure.py     |  4 +-
 tests/python/contrib/test_tensorrt.py         |  6 +-
 .../contrib/test_vitis_ai/infrastructure.py   |  4 +-
 tests/python/frontend/caffe/test_forward.py   |  4 +-
 tests/python/frontend/caffe2/test_forward.py  |  4 +-
 tests/python/frontend/coreml/test_forward.py  |  8 +--
 tests/python/frontend/darknet/test_forward.py |  4 +-
 tests/python/frontend/keras/test_forward.py   |  4 +-
 tests/python/frontend/mxnet/test_forward.py   |  4 +-
 .../frontend/mxnet/test_qnn_ops_utils.py      |  6 +-
 tests/python/frontend/onnx/test_forward.py    |  4 +-
 tests/python/frontend/pytorch/qnn_test.py     |  2 +-
 tests/python/frontend/pytorch/test_forward.py |  6 +-
 .../frontend/tensorflow/test_bn_dynamic.py    |  4 +-
 .../frontend/tensorflow/test_forward.py       | 16 ++---
 tests/python/frontend/tflite/test_forward.py  |  8 +--
 .../test_quantization_accuracy.py             |  2 +-
 .../python/relay/benchmarking/benchmark_vm.py |  8 +--
 ..._auto_scheduler_layout_rewrite_networks.py |  4 +-
 .../relay/test_auto_scheduler_tuning.py       |  4 +-
 .../relay/test_backend_graph_executor.py      |  8 +--
 tests/python/relay/test_cpp_build_module.py   |  6 +-
 tests/python/relay/test_external_codegen.py   |  8 +--
 tests/python/relay/test_json_runtime.py       |  8 +--
 tests/python/relay/test_op_fast_math.py       |  4 +-
 tests/python/relay/test_op_level2.py          |  4 +-
 tests/python/relay/test_op_qnn_concatenate.py |  2 +-
 .../relay/test_op_qnn_conv2_transpose.py      |  4 +-
 tests/python/relay/test_op_qnn_conv2d.py      | 10 ++--
 tests/python/relay/test_op_qnn_dense.py       |  4 +-
 tests/python/relay/test_op_qnn_dequantize.py  |  6 +-
 tests/python/relay/test_op_qnn_mul.py         |  2 +-
 tests/python/relay/test_op_qnn_quantize.py    |  6 +-
 tests/python/relay/test_op_qnn_requantize.py  |  4 +-
 .../relay/test_op_qnn_simulated_dequantize.py |  4 +-
 .../relay/test_op_qnn_simulated_quantize.py   |  4 +-
 tests/python/relay/test_param_dict.py         |  8 +--
 .../python/relay/test_pass_annotate_target.py |  6 +-
 tests/python/relay/test_pass_annotation.py    | 16 ++---
 tests/python/relay/test_pass_legalize.py      |  2 +-
 .../relay/test_pass_legalize_tensorcore.py    |  2 +-
 .../python/relay/test_pass_partition_graph.py |  6 +-
 tests/python/relay/test_pass_qnn_legalize.py  |  2 +-
 .../relay/test_simplify_fc_transpose.py       |  4 +-
 .../python/relay/test_sparse_dense_convert.py |  4 +-
 tests/python/topi/python/test_topi_qnn.py     |  6 +-
 tests/python/unittest/test_crt.py             |  4 +-
 tests/python/unittest/test_link_params.py     | 14 ++---
 .../test_micro_model_library_format.py        |  4 +-
 tests/python/unittest/test_runtime_graph.py   | 14 ++---
 .../unittest/test_runtime_graph_cuda_graph.py |  6 +-
 .../unittest/test_runtime_heterogeneous.py    |  8 +--
 .../test_runtime_module_based_interface.py    | 38 ++++++------
 .../unittest/test_target_codegen_blob.py      |  6 +-
 .../unittest/test_tir_transform_hoist_if.py   |  4 +-
 tests/scripts/task_java_unittest.sh           |  2 +-
 tutorials/auto_scheduler/tune_network_arm.py  |  4 +-
 tutorials/auto_scheduler/tune_network_cuda.py |  4 +-
 tutorials/auto_scheduler/tune_network_mali.py |  4 +-
 tutorials/auto_scheduler/tune_network_x86.py  |  4 +-
 tutorials/autotvm/tune_relay_arm.py           |  2 +-
 tutorials/autotvm/tune_relay_cuda.py          |  2 +-
 tutorials/autotvm/tune_relay_mobile_gpu.py    |  2 +-
 tutorials/autotvm/tune_relay_x86.py           |  2 +-
 tutorials/frontend/build_gcn.py               |  4 +-
 tutorials/frontend/deploy_model_on_android.py |  2 +-
 tutorials/frontend/deploy_model_on_rasp.py    |  2 +-
 tutorials/frontend/deploy_prequantized.py     |  2 +-
 .../frontend/deploy_prequantized_tflite.py    |  4 +-
 tutorials/frontend/deploy_sparse.py           |  4 +-
 tutorials/frontend/deploy_ssd_gluoncv.py      |  4 +-
 tutorials/frontend/from_caffe2.py             |  4 +-
 tutorials/frontend/from_coreml.py             |  4 +-
 tutorials/frontend/from_darknet.py            |  4 +-
 tutorials/frontend/from_mxnet.py              |  4 +-
 tutorials/frontend/from_pytorch.py            |  4 +-
 tutorials/frontend/from_tensorflow.py         |  4 +-
 tutorials/frontend/from_tflite.py             |  2 +-
 tutorials/frontend/using_external_lib.py      |  2 +-
 tutorials/get_started/relay_quick_start.py    |  6 +-
 tutorials/micro/micro_tflite.py               |  4 +-
 vta/scripts/tune_resnet.py                    |  4 +-
 vta/tutorials/autotvm/tune_relay_vta.py       |  4 +-
 .../frontend/deploy_classification.py         |  4 +-
 .../frontend/legacy/deploy_detection.py       |  4 +-
 web/emcc/wasm_runtime.cc                      |  2 +-
 web/src/runtime.ts                            |  2 +-
 160 files changed, 443 insertions(+), 443 deletions(-)

diff --git a/apps/android_camera/app/src/main/java/org/apache/tvm/android/androidcamerademo/Camera2BasicFragment.java b/apps/android_camera/app/src/main/java/org/apache/tvm/android/androidcamerademo/Camera2BasicFragment.java
index 53913ef306dc..3e4a39a72693 100644
--- a/apps/android_camera/app/src/main/java/org/apache/tvm/android/androidcamerademo/Camera2BasicFragment.java
+++ b/apps/android_camera/app/src/main/java/org/apache/tvm/android/androidcamerademo/Camera2BasicFragment.java
@@ -584,7 +584,7 @@ protected Integer doInBackground(Void... args) {
             // get global function module for graph runtime
             Log.i(TAG, "getting graph runtime create handle...");
 
-            Function runtimeCreFun = Function.getFunction("tvm.graph_runtime.create");
+            Function runtimeCreFun = Function.getFunction("tvm.graph_executor.create");
             Log.i(TAG, "creating graph runtime...");
 
             Log.i(TAG, "device type: " + tvmDev.deviceType);
diff --git a/apps/android_camera/app/src/main/jni/tvm_runtime.h b/apps/android_camera/app/src/main/jni/tvm_runtime.h
index 91f21ff02ff6..3455262346f2 100644
--- a/apps/android_camera/app/src/main/jni/tvm_runtime.h
+++ b/apps/android_camera/app/src/main/jni/tvm_runtime.h
@@ -37,7 +37,7 @@
 #include "../src/runtime/cpu_device_api.cc"
 #include "../src/runtime/dso_library.cc"
 #include "../src/runtime/file_utils.cc"
-#include "../src/runtime/graph_executor/graph_runtime.cc"
+#include "../src/runtime/graph_executor/graph_executor.cc"
 #include "../src/runtime/library_module.cc"
 #include "../src/runtime/module.cc"
 #include "../src/runtime/ndarray.cc"
diff --git a/apps/android_camera/models/prepare_model.py b/apps/android_camera/models/prepare_model.py
index f155d46c31a4..d767b2ef88fc 100644
--- a/apps/android_camera/models/prepare_model.py
+++ b/apps/android_camera/models/prepare_model.py
@@ -25,7 +25,7 @@
 
 import tvm
 import tvm.relay as relay
-from tvm.contrib import utils, ndk, graph_runtime as runtime
+from tvm.contrib import utils, ndk, graph_executor as runtime
 from tvm.contrib.download import download_testdata, download
 
 target = "llvm -mtriple=arm64-linux-android"
diff --git a/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java b/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java
index dd3ae565ca8e..79ed20cdc920 100644
--- a/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java
+++ b/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java
@@ -184,7 +184,7 @@ protected Integer doInBackground(Void... args) {
             Module modelLib = Module.load(libCacheFilePath);
 
             // get global function module for graph runtime
-            Function runtimeCreFun = Function.getFunction("tvm.graph_runtime.create");
+            Function runtimeCreFun = Function.getFunction("tvm.graph_executor.create");
             TVMValue runtimeCreFunRes = runtimeCreFun.pushArg(modelGraph)
                     .pushArg(modelLib)
                     .pushArg(tvmDev.deviceType)
diff --git a/apps/android_deploy/app/src/main/jni/tvm_runtime.h b/apps/android_deploy/app/src/main/jni/tvm_runtime.h
index 2f7964ebfc09..120cd0c22fb4 100644
--- a/apps/android_deploy/app/src/main/jni/tvm_runtime.h
+++ b/apps/android_deploy/app/src/main/jni/tvm_runtime.h
@@ -32,7 +32,7 @@
 #include "../src/runtime/cpu_device_api.cc"
 #include "../src/runtime/dso_library.cc"
 #include "../src/runtime/file_utils.cc"
-#include "../src/runtime/graph_executor/graph_runtime.cc"
+#include "../src/runtime/graph_executor/graph_executor.cc"
 #include "../src/runtime/library_module.cc"
 #include "../src/runtime/module.cc"
 #include "../src/runtime/ndarray.cc"
diff --git a/apps/android_rpc/app/src/main/jni/tvm_runtime.h b/apps/android_rpc/app/src/main/jni/tvm_runtime.h
index 5b046cbc70b4..a373081898d3 100644
--- a/apps/android_rpc/app/src/main/jni/tvm_runtime.h
+++ b/apps/android_rpc/app/src/main/jni/tvm_runtime.h
@@ -37,8 +37,8 @@
 #include "../src/runtime/cpu_device_api.cc"
 #include "../src/runtime/dso_library.cc"
 #include "../src/runtime/file_utils.cc"
-#include "../src/runtime/graph_executor/graph_runtime.cc"
-#include "../src/runtime/graph_executor/graph_runtime_factory.cc"
+#include "../src/runtime/graph_executor/graph_executor.cc"
+#include "../src/runtime/graph_executor/graph_executor_factory.cc"
 #include "../src/runtime/library_module.cc"
 #include "../src/runtime/module.cc"
 #include "../src/runtime/ndarray.cc"
diff --git a/apps/benchmark/arm_cpu_imagenet_bench.py b/apps/benchmark/arm_cpu_imagenet_bench.py
index 915f2303b9ed..656735ec6c05 100644
--- a/apps/benchmark/arm_cpu_imagenet_bench.py
+++ b/apps/benchmark/arm_cpu_imagenet_bench.py
@@ -24,7 +24,7 @@
 import tvm
 from tvm import te
 from tvm.contrib.utils import tempdir
-import tvm.contrib.graph_runtime as runtime
+import tvm.contrib.graph_executor as runtime
 from tvm import relay
 
 from util import get_network, print_progress
diff --git a/apps/benchmark/gpu_imagenet_bench.py b/apps/benchmark/gpu_imagenet_bench.py
index 6d91aff74fde..6407f766cb76 100644
--- a/apps/benchmark/gpu_imagenet_bench.py
+++ b/apps/benchmark/gpu_imagenet_bench.py
@@ -24,7 +24,7 @@
 
 import tvm
 from tvm import te
-import tvm.contrib.graph_runtime as runtime
+import tvm.contrib.graph_executor as runtime
 from tvm import relay
 
 from util import get_network
diff --git a/apps/benchmark/mobile_gpu_imagenet_bench.py b/apps/benchmark/mobile_gpu_imagenet_bench.py
index 3144aee080dc..4eff259875ca 100644
--- a/apps/benchmark/mobile_gpu_imagenet_bench.py
+++ b/apps/benchmark/mobile_gpu_imagenet_bench.py
@@ -24,7 +24,7 @@
 import tvm
 from tvm import te
 from tvm.contrib.utils import tempdir
-import tvm.contrib.graph_runtime as runtime
+import tvm.contrib.graph_executor as runtime
 from tvm import relay
 
 from util import get_network, print_progress
diff --git a/apps/bundle_deploy/Makefile b/apps/bundle_deploy/Makefile
index 8e23a92afa93..b533030c4b82 100644
--- a/apps/bundle_deploy/Makefile
+++ b/apps/bundle_deploy/Makefile
@@ -84,8 +84,8 @@ test_static: $(build_dir)/test_static $(build_dir)/test_data_c.bin $(build_dir)/
 $(build_dir)/crt/libcommon.a: $(CRT_SRCS)
 	$(QUIET)cd $(CRT_ROOT) && make QUIET= BUILD_DIR=$(abspath $(build_dir))/crt CRT_CONFIG=$(abspath crt_config/crt_config.h) "EXTRA_CFLAGS=$(PKG_COMPILE_OPTS)" common
 
-$(build_dir)/crt/libgraph_runtime.a: $(CRT_SRCS)
-	$(QUIET)cd $(CRT_ROOT) && make QUIET= BUILD_DIR=$(abspath $(build_dir))/crt CRT_CONFIG=$(abspath crt_config/crt_config.h) "EXTRA_CFLAGS=$(PKG_COMPILE_OPTS)" graph_runtime
+$(build_dir)/crt/libgraph_executor.a: $(CRT_SRCS)
+	$(QUIET)cd $(CRT_ROOT) && make QUIET= BUILD_DIR=$(abspath $(build_dir))/crt CRT_CONFIG=$(abspath crt_config/crt_config.h) "EXTRA_CFLAGS=$(PKG_COMPILE_OPTS)" graph_executor
 
 $(build_dir)/crt/libmemory.a: $(CRT_SRCS)
 	$(QUIET)cd $(CRT_ROOT) && make QUIET= BUILD_DIR=$(abspath $(build_dir))/crt CRT_CONFIG=$(abspath crt_config/crt_config.h) "EXTRA_CFLAGS=$(PKG_COMPILE_OPTS)" memory
@@ -98,11 +98,11 @@ $(build_dir)/test_dynamic: test.cc ${build_dir}/test_graph_c.json ${build_dir}/t
 	$(QUIET)mkdir -p $(@D)
 	$(QUIET)g++ $(PKG_CXXFLAGS) -o $@ test.cc $(BACKTRACE_OBJS) $(BACKTRACE_LDFLAGS)
 
-$(build_dir)/demo_static: demo_static.c ${build_dir}/bundle_static.o $(MODEL_OBJ) ${build_dir}/crt/libmemory.a ${build_dir}/crt/libgraph_runtime.a ${build_dir}/crt/libcommon.a ${build_dir}/graph_c.json.c ${build_dir}/params_c.bin.c $(BACKTRACE_OBJS)
+$(build_dir)/demo_static: demo_static.c ${build_dir}/bundle_static.o $(MODEL_OBJ) ${build_dir}/crt/libmemory.a ${build_dir}/crt/libgraph_executor.a ${build_dir}/crt/libcommon.a ${build_dir}/graph_c.json.c ${build_dir}/params_c.bin.c $(BACKTRACE_OBJS)
 	$(QUIET)mkdir -p $(@D)
 	$(QUIET)gcc $(PKG_CFLAGS) -o $@ $^ $(PKG_LDFLAGS) $(BACKTRACE_LDFLAGS) $(BACKTRACE_CFLAGS)
 
-$(build_dir)/test_static: test_static.c ${build_dir}/bundle_static.o $(TEST_MODEL_OBJ) ${build_dir}/crt/libmemory.a ${build_dir}/crt/libgraph_runtime.a ${build_dir}/crt/libcommon.a $(BACKTRACE_OBJS)
+$(build_dir)/test_static: test_static.c ${build_dir}/bundle_static.o $(TEST_MODEL_OBJ) ${build_dir}/crt/libmemory.a ${build_dir}/crt/libgraph_executor.a ${build_dir}/crt/libcommon.a $(BACKTRACE_OBJS)
 	$(QUIET)mkdir -p $(@D)
 	$(QUIET)gcc $(PKG_CFLAGS) -o $@ $^ $(BACKTRACE_LDFLAGS)
 
@@ -140,7 +140,7 @@ $(build_dir)/bundle.so: bundle.cc runtime.cc $(build_dir)/model_cpp.o
 	$(QUIET)mkdir -p $(@D)
 	$(QUIET)g++ -shared $(PKG_CXXFLAGS) -fvisibility=hidden -o $@  $^ $(PKG_LDFLAGS)
 
-$(build_dir)/bundle_c.so: bundle.c $(MODEL_OBJ) ${build_dir}/crt/libmemory.a ${build_dir}/crt/libgraph_runtime.a ${build_dir}/crt/libcommon.a $(BACKTRACE_OBJS)
+$(build_dir)/bundle_c.so: bundle.c $(MODEL_OBJ) ${build_dir}/crt/libmemory.a ${build_dir}/crt/libgraph_executor.a ${build_dir}/crt/libcommon.a $(BACKTRACE_OBJS)
 	$(QUIET)mkdir -p $(@D)
 	$(QUIET)gcc -shared $(PKG_CFLAGS) -fvisibility=hidden -o $@  $^ $(PKG_LDFLAGS) $(BACKTRACE_LDFLAGS) $(BACKTRACE_CFLAGS)
 
@@ -148,7 +148,7 @@ $(build_dir)/test_bundle.so: bundle.cc runtime.cc $(build_dir)/test_model_cpp.o
 	$(QUIET)mkdir -p $(@D)
 	$(QUIET)g++ -shared $(PKG_CXXFLAGS) -fvisibility=hidden -o $@  $^ $(PKG_LDFLAGS)
 
-$(build_dir)/test_bundle_c.so: bundle.c $(TEST_MODEL_OBJ) ${build_dir}/crt/libmemory.a ${build_dir}/crt/libgraph_runtime.a ${build_dir}/crt/libcommon.a $(BACKTRACE_OBJS)
+$(build_dir)/test_bundle_c.so: bundle.c $(TEST_MODEL_OBJ) ${build_dir}/crt/libmemory.a ${build_dir}/crt/libgraph_executor.a ${build_dir}/crt/libcommon.a $(BACKTRACE_OBJS)
 	$(QUIET)mkdir -p $(@D)
 	$(QUIET)gcc -shared $(PKG_CFLAGS) -fvisibility=hidden -o $@  $^ $(PKG_LDFLAGS) $(BACKTRACE_LDFLAGS) $(BACKTRACE_CFLAGS)
 
diff --git a/apps/bundle_deploy/bundle.c b/apps/bundle_deploy/bundle.c
index b7f7569bd2f9..55c226f11794 100644
--- a/apps/bundle_deploy/bundle.c
+++ b/apps/bundle_deploy/bundle.c
@@ -22,7 +22,7 @@
 #include <stdlib.h>
 #include <tvm/runtime/c_runtime_api.h>
 #include <tvm/runtime/crt/crt.h>
-#include <tvm/runtime/crt/graph_runtime.h>
+#include <tvm/runtime/crt/graph_executor.h>
 #include <tvm/runtime/crt/memory.h>
 #include <tvm/runtime/crt/packed_func.h>
 
@@ -75,11 +75,11 @@ TVM_DLL void* tvm_runtime_create(const char* json_data, const char* params_data,
   TVMModuleHandle mod_syslib = TVMArgs_AsModuleHandle(&pf.ret_value, 0);
 
   // run modules
-  TVMGraphExecutor* graph_runtime = NULL;
-  TVM_CCALL(TVMGraphExecutor_Create(json_data, mod_syslib, &dev, &graph_runtime));
-  TVM_CCALL(TVMGraphExecutor_LoadParams(graph_runtime, params.data, params.size));
+  TVMGraphExecutor* graph_executor = NULL;
+  TVM_CCALL(TVMGraphExecutor_Create(json_data, mod_syslib, &dev, &graph_executor));
+  TVM_CCALL(TVMGraphExecutor_LoadParams(graph_executor, params.data, params.size));
 
-  return graph_runtime;
+  return graph_executor;
 }
 
 TVM_DLL void tvm_runtime_destroy(void* runtime) {
@@ -87,18 +87,18 @@ TVM_DLL void tvm_runtime_destroy(void* runtime) {
 }
 
 TVM_DLL void tvm_runtime_set_input(void* runtime, const char* name, DLTensor* tensor) {
-  TVMGraphExecutor* graph_runtime = (TVMGraphExecutor*)runtime;
-  TVMGraphExecutor_SetInput(graph_runtime, name, tensor);
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)runtime;
+  TVMGraphExecutor_SetInput(graph_executor, name, tensor);
 }
 
 TVM_DLL void tvm_runtime_run(void* runtime) {
-  TVMGraphExecutor* graph_runtime = (TVMGraphExecutor*)runtime;
-  TVMGraphExecutor_Run(graph_runtime);
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)runtime;
+  TVMGraphExecutor_Run(graph_executor);
 }
 
 TVM_DLL void tvm_runtime_get_output(void* runtime, int32_t index, DLTensor* tensor) {
-  TVMGraphExecutor* graph_runtime = (TVMGraphExecutor*)runtime;
-  TVMGraphExecutor_GetOutput(graph_runtime, index, tensor);
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)runtime;
+  TVMGraphExecutor_GetOutput(graph_executor, index, tensor);
 }
 
 void TVMLogf(const char* msg, ...) {
diff --git a/apps/bundle_deploy/bundle.cc b/apps/bundle_deploy/bundle.cc
index e3cc7d1730ce..435d0e41f3db 100644
--- a/apps/bundle_deploy/bundle.cc
+++ b/apps/bundle_deploy/bundle.cc
@@ -35,7 +35,7 @@ TVM_BUNDLE_FUNCTION void* tvm_runtime_create(const char* build_graph_json,
   int device_type = kDLCPU;
   int device_id = 0;
 
-  tvm::runtime::Module mod = (*tvm::runtime::Registry::Get("tvm.graph_runtime.create"))(
+  tvm::runtime::Module mod = (*tvm::runtime::Registry::Get("tvm.graph_executor.create"))(
       json_data, mod_syslib, device_type, device_id);
   TVMByteArray params;
   params.data = reinterpret_cast<const char*>(&build_params_bin[0]);
diff --git a/apps/bundle_deploy/bundle_static.c b/apps/bundle_deploy/bundle_static.c
index d69123f0f7d6..5f7825aec01b 100644
--- a/apps/bundle_deploy/bundle_static.c
+++ b/apps/bundle_deploy/bundle_static.c
@@ -21,7 +21,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <tvm/runtime/crt/crt.h>
-#include <tvm/runtime/crt/graph_runtime.h>
+#include <tvm/runtime/crt/graph_executor.h>
 #include <tvm/runtime/crt/memory.h>
 #include <tvm/runtime/crt/packed_func.h>
 #include <unistd.h>
@@ -75,31 +75,31 @@ TVM_DLL void* tvm_runtime_create(const char* json_data, const char* params_data,
   TVMModuleHandle mod_syslib = TVMArgs_AsModuleHandle(&pf.ret_value, 0);
 
   // run modules
-  TVMGraphExecutor* graph_runtime = NULL;
-  TVM_CCALL(TVMGraphExecutor_Create(json_data, mod_syslib, &dev, &graph_runtime));
-  TVM_CCALL(TVMGraphExecutor_LoadParams(graph_runtime, params.data, params.size));
+  TVMGraphExecutor* graph_executor = NULL;
+  TVM_CCALL(TVMGraphExecutor_Create(json_data, mod_syslib, &dev, &graph_executor));
+  TVM_CCALL(TVMGraphExecutor_LoadParams(graph_executor, params.data, params.size));
 
-  return graph_runtime;
+  return graph_executor;
 }
 
 TVM_DLL void tvm_runtime_destroy(void* runtime) {
-  TVMGraphExecutor* graph_runtime = (TVMGraphExecutor*)runtime;
-  TVMGraphExecutor_Release(&graph_runtime);
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)runtime;
+  TVMGraphExecutor_Release(&graph_executor);
 }
 
 TVM_DLL void tvm_runtime_set_input(void* runtime, const char* name, DLTensor* tensor) {
-  TVMGraphExecutor* graph_runtime = (TVMGraphExecutor*)runtime;
-  TVMGraphExecutor_SetInput(graph_runtime, name, tensor);
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)runtime;
+  TVMGraphExecutor_SetInput(graph_executor, name, tensor);
 }
 
 TVM_DLL void tvm_runtime_run(void* runtime) {
-  TVMGraphExecutor* graph_runtime = (TVMGraphExecutor*)runtime;
-  TVMGraphExecutor_Run(graph_runtime);
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)runtime;
+  TVMGraphExecutor_Run(graph_executor);
 }
 
 TVM_DLL void tvm_runtime_get_output(void* runtime, int32_t index, DLTensor* tensor) {
-  TVMGraphExecutor* graph_runtime = (TVMGraphExecutor*)runtime;
-  TVMGraphExecutor_GetOutput(graph_runtime, index, tensor);
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)runtime;
+  TVMGraphExecutor_GetOutput(graph_executor, index, tensor);
 }
 
 void TVMLogf(const char* msg, ...) {
diff --git a/apps/bundle_deploy/runtime.cc b/apps/bundle_deploy/runtime.cc
index 94fba2b75ce9..7a2573b643f5 100644
--- a/apps/bundle_deploy/runtime.cc
+++ b/apps/bundle_deploy/runtime.cc
@@ -26,7 +26,7 @@
 #include "../../src/runtime/container.cc"
 #include "../../src/runtime/cpu_device_api.cc"
 #include "../../src/runtime/file_utils.cc"
-#include "../../src/runtime/graph_executor/graph_runtime.cc"
+#include "../../src/runtime/graph_executor/graph_executor.cc"
 #include "../../src/runtime/library_module.cc"
 #include "../../src/runtime/module.cc"
 #include "../../src/runtime/ndarray.cc"
diff --git a/apps/howto_deploy/tvm_runtime_pack.cc b/apps/howto_deploy/tvm_runtime_pack.cc
index e4c54214e4ca..9f1f2fda1e5a 100644
--- a/apps/howto_deploy/tvm_runtime_pack.cc
+++ b/apps/howto_deploy/tvm_runtime_pack.cc
@@ -59,8 +59,8 @@
 #include "../../src/runtime/system_library.cc"
 
 // Graph runtime
-#include "../../src/runtime/graph_executor/graph_runtime.cc"
-#include "../../src/runtime/graph_executor/graph_runtime_factory.cc"
+#include "../../src/runtime/graph_executor/graph_executor.cc"
+#include "../../src/runtime/graph_executor/graph_executor_factory.cc"
 
 // Uncomment the following lines to enable RPC
 // #include "../../src/runtime/rpc/rpc_session.cc"
diff --git a/apps/ios_rpc/tests/ios_rpc_mobilenet.py b/apps/ios_rpc/tests/ios_rpc_mobilenet.py
index 50468d301134..ee6ab5fd8363 100644
--- a/apps/ios_rpc/tests/ios_rpc_mobilenet.py
+++ b/apps/ios_rpc/tests/ios_rpc_mobilenet.py
@@ -22,7 +22,7 @@
 from tvm.relay import transform
 from tvm.relay.op.annotation import compiler_begin, compiler_end
 from tvm.relay.quantize.quantize import prerequisite_optimize
-from tvm.contrib import utils, xcode, graph_runtime, coreml_runtime
+from tvm.contrib import utils, xcode, graph_executor, coreml_runtime
 from tvm.contrib.target import coreml as _coreml
 
 import os
@@ -120,7 +120,7 @@ def run(mod, target):
         else:
             dev = remote.cpu(0)
         lib = remote.load_module("deploy.dylib")
-        m = graph_runtime.GraphModule(lib["default"](dev))
+        m = graph_executor.GraphModule(lib["default"](dev))
 
         m.set_input("data", tvm.nd.array(image, dev))
         m.run()
diff --git a/apps/ios_rpc/tvmrpc/TVMRuntime.mm b/apps/ios_rpc/tvmrpc/TVMRuntime.mm
index 8d7b6c0c2ff2..3429e3d85e56 100644
--- a/apps/ios_rpc/tvmrpc/TVMRuntime.mm
+++ b/apps/ios_rpc/tvmrpc/TVMRuntime.mm
@@ -46,7 +46,7 @@
 #include "../../../src/runtime/rpc/rpc_session.cc"
 #include "../../../src/runtime/rpc/rpc_socket_impl.cc"
 // Graph runtime
-#include "../../../src/runtime/graph_executor/graph_runtime.cc"
+#include "../../../src/runtime/graph_executor/graph_executor.cc"
 // Metal
 #include "../../../src/runtime/metal/metal_device_api.mm"
 #include "../../../src/runtime/metal/metal_module.mm"
diff --git a/cmake/modules/StandaloneCrt.cmake b/cmake/modules/StandaloneCrt.cmake
index dc1b3b2665f2..fe6baf81c3e5 100644
--- a/cmake/modules/StandaloneCrt.cmake
+++ b/cmake/modules/StandaloneCrt.cmake
@@ -43,8 +43,8 @@ if(USE_MICRO)
          "src/runtime/crt Makefile -> ."
          "src/runtime/crt/include *.h -> include"
          "src/runtime/crt/common *.c -> src/runtime/crt/common"
-         "src/runtime/crt/graph_runtime *.c -> src/runtime/crt/graph_runtime"
-         "src/runtime/crt/graph_runtime_module *.c -> src/runtime/crt/graph_runtime_module"
+         "src/runtime/crt/graph_executor *.c -> src/runtime/crt/graph_executor"
+         "src/runtime/crt/graph_executor_module *.c -> src/runtime/crt/graph_executor_module"
          "src/runtime/crt/host crt_config.h -> template/host"
          "src/runtime/crt/host *.cc -> template/host"
          "src/runtime/crt/memory *.c -> src/runtime/crt/memory"
@@ -97,7 +97,7 @@ if(USE_MICRO)
     set(make_quiet )
     endif(${VERBOSE})
 
-    list(APPEND crt_libraries memory graph_runtime utvm_rpc_server utvm_rpc_common common)  # NOTE: listed in link order.
+    list(APPEND crt_libraries memory graph_executor utvm_rpc_server utvm_rpc_common common)  # NOTE: listed in link order.
     foreach(crt_lib_name IN LISTS crt_libraries)
       list(APPEND crt_library_paths "host_standalone_crt/lib${crt_lib_name}.a")
     endforeach()
diff --git a/docs/api/python/graph_runtime.rst b/docs/api/python/graph_runtime.rst
index d82c7ce00e2e..3f8811553ba4 100644
--- a/docs/api/python/graph_runtime.rst
+++ b/docs/api/python/graph_runtime.rst
@@ -15,7 +15,7 @@
     specific language governing permissions and limitations
     under the License.
 
-tvm.contrib.graph_runtime
+tvm.contrib.graph_executor
 -------------------------
-.. automodule:: tvm.contrib.graph_runtime
+.. automodule:: tvm.contrib.graph_executor
     :members:
diff --git a/docs/api/python/index.rst b/docs/api/python/index.rst
index a6179684413d..76322a1acfe2 100644
--- a/docs/api/python/index.rst
+++ b/docs/api/python/index.rst
@@ -44,6 +44,6 @@ Python API
    rpc
    micro
    contrib
-   graph_runtime
+   graph_executor
    topi
    vta/index
diff --git a/docs/api/python/relay/backend.rst b/docs/api/python/relay/backend.rst
index c30f226e8437..ffe8a9a8ce79 100644
--- a/docs/api/python/relay/backend.rst
+++ b/docs/api/python/relay/backend.rst
@@ -26,7 +26,7 @@ tvm.relay.backend
 .. automodule:: tvm.relay.backend.compile_engine
     :members:
 
-.. automodule:: tvm.relay.backend.graph_runtime_codegen
+.. automodule:: tvm.relay.backend.graph_executor_codegen
     :members:
 
 .. automodule:: tvm.relay.backend.vm
diff --git a/docs/deploy/arm_compute_lib.rst b/docs/deploy/arm_compute_lib.rst
index 10a0d51e4f91..a736fee4e430 100644
--- a/docs/deploy/arm_compute_lib.rst
+++ b/docs/deploy/arm_compute_lib.rst
@@ -150,7 +150,7 @@ https://tvm.apache.org/docs/tutorials/get_started/cross_compilation_and_rpc.html
 
     dev = tvm.cpu(0)
     loaded_lib = tvm.runtime.load_module('lib_acl.so')
-    gen_module = tvm.contrib.graph_runtime.GraphModule(loaded_lib['default'](dev))
+    gen_module = tvm.contrib.graph_executor.GraphModule(loaded_lib['default'](dev))
     d_data = np.random.uniform(0, 1, data_shape).astype(data_type)
     map_inputs = {'data': d_data}
     gen_module.set_input(**map_inputs)
diff --git a/docs/deploy/bnns.rst b/docs/deploy/bnns.rst
index 6e20f3c2cdbb..7b62fb15a617 100644
--- a/docs/deploy/bnns.rst
+++ b/docs/deploy/bnns.rst
@@ -145,11 +145,11 @@ Load module and run inference on the target machine with TVM  built with ``USE_B
 
     import tvm
     import numpy as np
-    from tvm.contrib import graph_runtime
+    from tvm.contrib import graph_executor
 
     dev = tvm.cpu(0)
     loaded_lib = tvm.runtime.load_module('compiled.dylib')
-    gen_module = tvm.contrib.graph_runtime.GraphModule(loaded_lib['default'](dev))
+    gen_module = tvm.contrib.graph_executor.GraphModule(loaded_lib['default'](dev))
 
     dtype = "float32"
     input_shape = (1, 3, 224, 224)
diff --git a/docs/deploy/tensorrt.rst b/docs/deploy/tensorrt.rst
index 0732a32c01bf..308db4933ae8 100644
--- a/docs/deploy/tensorrt.rst
+++ b/docs/deploy/tensorrt.rst
@@ -126,7 +126,7 @@ have to be built.
 
     dev = tvm.gpu(0)
     loaded_lib = tvm.runtime.load_module('compiled.so')
-    gen_module = tvm.contrib.graph_runtime.GraphModule(loaded_lib['default'](dev))
+    gen_module = tvm.contrib.graph_executor.GraphModule(loaded_lib['default'](dev))
     input_data = np.random.uniform(0, 1, input_shape).astype(dtype)
     gen_module.run(data=input_data)
 
diff --git a/docs/deploy/vitis_ai.rst b/docs/deploy/vitis_ai.rst
index fc887dae968c..1ce89ebed9c2 100755
--- a/docs/deploy/vitis_ai.rst
+++ b/docs/deploy/vitis_ai.rst
@@ -449,7 +449,7 @@ TVM.
    import tvm
    import tvm.relay as relay
    from tvm.contrib.target import vitis_ai
-   from tvm.contrib import utils, graph_runtime
+   from tvm.contrib import utils, graph_executor
    from tvm.relay.build_module import bind_params_by_name
    from tvm.relay.op.contrib.vitis_ai import annotation
 
@@ -490,7 +490,7 @@ will take a substantial amount of time.
 
 .. code:: python
 
-   module = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
+   module = graph_executor.GraphModule(lib["default"](tvm.cpu()))
 
    # First N (default = 128) inputs are used for quantization calibration and will
    # be executed on the CPU
@@ -520,7 +520,7 @@ Load the module from compiled files and run inference
    # load the module into memory
    loaded_lib = tvm.runtime.load_module(lib_path)
 
-   module = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
+   module = graph_executor.GraphModule(lib["default"](tvm.cpu()))
    module.set_input(name, data)
    module.run()
 
@@ -551,7 +551,7 @@ TVM.
    import tvm
    import tvm.relay as relay
    from tvm.contrib.target import vitis_ai
-   from tvm.contrib import utils, graph_runtime
+   from tvm.contrib import utils, graph_executor
    from tvm.relay.build_module import bind_params_by_name
    from tvm.relay.op.contrib.vitis_ai import annotation
 
@@ -631,7 +631,7 @@ quantization on the host machine. This makes use of TVM inference calls
 
 .. code:: python
 
-   module = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
+   module = graph_executor.GraphModule(lib["default"](tvm.cpu()))
 
    # First N (default = 128) inputs are used for quantization calibration and will
    # be executed on the CPU
@@ -694,7 +694,7 @@ as root (execute ``su`` in terminal to log into root).
 
    import pyxir
    import tvm
-   from tvm.contrib import graph_runtime
+   from tvm.contrib import graph_executor
 
    dev = tvm.cpu()
    
@@ -704,6 +704,6 @@ as root (execute ``su`` in terminal to log into root).
    # load the module into memory
    lib = tvm.runtime.load_module("tvm_dpu_arm.so")
 
-   module = graph_runtime.GraphModule(lib["default"](dev))
+   module = graph_executor.GraphModule(lib["default"](dev))
    module.set_input(input_name, input_data)
    module.run()
diff --git a/docs/dev/debugger.rst b/docs/dev/debugger.rst
index 509cfd306a4a..9cd138d73253 100644
--- a/docs/dev/debugger.rst
+++ b/docs/dev/debugger.rst
@@ -133,14 +133,14 @@ How to use Debugger?
 2. Do 'make' tvm, so that it will make the ``libtvm_runtime.so``
 
 3. In frontend script file instead of
-   ``from tvm.contrib import graph_runtime`` import the
+   ``from tvm.contrib import graph_executor`` import the
    ``debug_runtime``
-   ``from tvm.contrib.debugger import debug_runtime as graph_runtime``
+   ``from tvm.contrib.debugger import debug_runtime as graph_executor``
 
 ::
 
-    from tvm.contrib.debugger import debug_runtime as graph_runtime
-    m = graph_runtime.create(graph, lib, dev, dump_root="/tmp/tvmdbg")
+    from tvm.contrib.debugger import debug_runtime as graph_executor
+    m = graph_executor.create(graph, lib, dev, dump_root="/tmp/tvmdbg")
     # set inputs
     m.set_input('data', tvm.nd.array(data.astype(dtype)))
     m.set_input(**params)
diff --git a/golang/sample/complex.go b/golang/sample/complex.go
index bbe74dc85e09..a2e25824bb8f 100644
--- a/golang/sample/complex.go
+++ b/golang/sample/complex.go
@@ -70,13 +70,13 @@ func main() {
     }
     jsonStr := string(bytes)
 
-    // Load module on tvm runtime - call tvm.graph_runtime.create
-    funp, err := gotvm.GetGlobalFunction("tvm.graph_runtime.create")
+    // Load module on tvm runtime - call tvm.graph_executor.create
+    funp, err := gotvm.GetGlobalFunction("tvm.graph_executor.create")
     if err != nil {
         fmt.Print(err)
         return
     }
-    fmt.Printf("Calling tvm.graph_runtime.create\n")
+    fmt.Printf("Calling tvm.graph_executor.create\n")
     // Call function
     graphrt, err := funp.Invoke(jsonStr, modp, (int64)(gotvm.KDLCPU), (int64)(0))
     if err != nil {
diff --git a/golang/src/function_test.go b/golang/src/function_test.go
index 17b1c9a6e1c0..0830d16419a2 100644
--- a/golang/src/function_test.go
+++ b/golang/src/function_test.go
@@ -46,7 +46,7 @@ func TestFunctionGlobals(t *testing.T) {
 
 // Check GetFunction API
 func TestFunctionGlobalGet(t *testing.T) {
-    funp, err := GetGlobalFunction("tvm.graph_runtime.create")
+    funp, err := GetGlobalFunction("tvm.graph_executor.create")
     if err != nil {
         t.Error(err.Error())
         return
diff --git a/golang/src/tvm_runtime_pack.cc b/golang/src/tvm_runtime_pack.cc
index 951ff1b459b0..6a4bd872970b 100644
--- a/golang/src/tvm_runtime_pack.cc
+++ b/golang/src/tvm_runtime_pack.cc
@@ -43,7 +43,7 @@
 #include "src/runtime/system_library.cc"
 
 // Graph runtime
-#include "src/runtime/graph_executor/graph_runtime.cc"
+#include "src/runtime/graph_executor/graph_executor.cc"
 
 // Uncomment the following lines to enable RPC
 // #include "../../src/runtime/rpc/rpc_session.cc"
diff --git a/include/tvm/runtime/crt/graph_runtime.h b/include/tvm/runtime/crt/graph_runtime.h
index 3c18a9f6e295..62166ffe5e0a 100644
--- a/include/tvm/runtime/crt/graph_runtime.h
+++ b/include/tvm/runtime/crt/graph_runtime.h
@@ -18,7 +18,7 @@
  */
 
 /*!
- * \file graph_runtime.h
+ * \file graph_executor.h
  * \brief Tiny graph runtime that can run graph containing only tvm PackedFunc.
  */
 #ifndef TVM_RUNTIME_CRT_GRAPH_RUNTIME_H_
diff --git a/include/tvm/runtime/crt/graph_runtime_module.h b/include/tvm/runtime/crt/graph_runtime_module.h
index 51aea1276ba1..24d5b59be1fe 100644
--- a/include/tvm/runtime/crt/graph_runtime_module.h
+++ b/include/tvm/runtime/crt/graph_runtime_module.h
@@ -18,7 +18,7 @@
  */
 
 /*!
- * \file graph_runtime.h
+ * \file graph_executor.h
  * \brief Tiny graph runtime that can run graph containing only tvm PackedFunc.
  */
 #ifndef TVM_RUNTIME_CRT_GRAPH_RUNTIME_MODULE_H_
@@ -31,7 +31,7 @@ extern "C" {
 #include <tvm/runtime/crt/error_codes.h>
 
 /*!
- * \brief Register the "tvm.graph_runtime.create" constructor PackedFunc.
+ * \brief Register the "tvm.graph_executor.create" constructor PackedFunc.
  */
 tvm_crt_error_t TVMGraphExecutorModule_Register();
 
diff --git a/jvm/core/src/main/java/org/apache/tvm/contrib/GraphExecutor.java b/jvm/core/src/main/java/org/apache/tvm/contrib/GraphExecutor.java
index d692f9a2cf08..30b2fb1acafb 100644
--- a/jvm/core/src/main/java/org/apache/tvm/contrib/GraphExecutor.java
+++ b/jvm/core/src/main/java/org/apache/tvm/contrib/GraphExecutor.java
@@ -38,9 +38,9 @@ public class GraphExecutor {
    * @return Runtime graph module that can be used to execute the graph.
    */
   public static GraphModule create(String graphJson, Module libmod, Device dev) {
-    Function fcreate = Function.getFunction("tvm.graph_runtime.create");
+    Function fcreate = Function.getFunction("tvm.graph_executor.create");
     if (fcreate == null) {
-      throw new RuntimeException("Cannot find global function tvm.graph_runtime.create."
+      throw new RuntimeException("Cannot find global function tvm.graph_executor.create."
           + "Did you compile tvm_runtime with correct version?");
     }
     Module graphModule = fcreate.pushArg(graphJson)
diff --git a/jvm/core/src/test/scripts/test_graph_runtime.py b/jvm/core/src/test/scripts/test_graph_runtime.py
index 07a19fe50c1b..676b008205ca 100644
--- a/jvm/core/src/test/scripts/test_graph_runtime.py
+++ b/jvm/core/src/test/scripts/test_graph_runtime.py
@@ -19,7 +19,7 @@
 import tvm
 from tvm import te
 import json
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 
 def dump_graph_lib(target_dir):
diff --git a/python/tvm/auto_scheduler/relay_integration.py b/python/tvm/auto_scheduler/relay_integration.py
index 72582a21c515..72e3e06f2047 100644
--- a/python/tvm/auto_scheduler/relay_integration.py
+++ b/python/tvm/auto_scheduler/relay_integration.py
@@ -47,7 +47,7 @@ def call_all_topi_funcs(mod, params, target):
     """Call all TOPI compute to extract auto_scheduler tasks in a Relay program"""
     # pylint: disable=import-outside-toplevel
     from tvm import relay
-    from tvm.relay.backend import graph_runtime_codegen
+    from tvm.relay.backend import graph_executor_codegen
 
     # Turn off AutoTVM config not found warnings
     old_autotvm_silent = autotvm.GLOBAL_SCOPE.silent
@@ -63,7 +63,7 @@ def call_all_topi_funcs(mod, params, target):
     ):
         try:
             opt_mod, _ = relay.optimize(mod, target, params)
-            grc = graph_runtime_codegen.GraphExecutorCodegen(None, target)
+            grc = graph_executor_codegen.GraphExecutorCodegen(None, target)
             grc.codegen(opt_mod["main"])
         except tvm.TVMError:
             print(
diff --git a/python/tvm/autotvm/task/relay_integration.py b/python/tvm/autotvm/task/relay_integration.py
index ddb5205e7f46..12e057e01da6 100644
--- a/python/tvm/autotvm/task/relay_integration.py
+++ b/python/tvm/autotvm/task/relay_integration.py
@@ -36,14 +36,14 @@ def _lower(mod, target, params):
     """Helper to lower VTA properly."""
     # pylint: disable=import-outside-toplevel
     from tvm import relay
-    from tvm.relay.backend import graph_runtime_codegen
+    from tvm.relay.backend import graph_executor_codegen
 
     if hasattr(target, "device_name") and target.device_name == "vta":
         import vta
 
         with vta.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
             mod, _ = relay.optimize(mod, target, params)
-            grc = graph_runtime_codegen.GraphExecutorCodegen(None, target)
+            grc = graph_executor_codegen.GraphExecutorCodegen(None, target)
             grc.codegen(mod["main"])
             return
 
@@ -53,7 +53,7 @@ def _lower(mod, target, params):
     # TODO: Currently VM compiler is likely to stack overflow for large models.
     try:
         opt_mod, _ = relay.optimize(mod, target, params)
-        grc = graph_runtime_codegen.GraphExecutorCodegen(None, target)
+        grc = graph_executor_codegen.GraphExecutorCodegen(None, target)
         grc.codegen(opt_mod["main"])
     except tvm.TVMError as e:
         print(
diff --git a/python/tvm/contrib/cuda_graph/cuda_graph_executor.py b/python/tvm/contrib/cuda_graph/cuda_graph_executor.py
index c2f4455d881b..757ce7483e7f 100644
--- a/python/tvm/contrib/cuda_graph/cuda_graph_executor.py
+++ b/python/tvm/contrib/cuda_graph/cuda_graph_executor.py
@@ -18,7 +18,7 @@
 import tvm._ffi
 
 from tvm._ffi.base import string_types
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 
 def create(graph_json_str, libmod, device):
@@ -44,17 +44,17 @@ def create(graph_json_str, libmod, device):
 
     Note
     ----
-    See also :py:class:`tvm.contrib.cuda_graph.cuda_graph_runtime.GraphModuleCudaGraph`
+    See also :py:class:`tvm.contrib.cuda_graph.cuda_graph_executor.GraphModuleCudaGraph`
     for examples to directly construct a GraphModuleCudaGraph from an exported
     relay compiled library.
     """
     assert isinstance(graph_json_str, string_types)
     try:
-        dev, num_rpc_dev, device_type_id = graph_runtime.get_device(libmod, device)
+        dev, num_rpc_dev, device_type_id = graph_executor.get_device(libmod, device)
         if num_rpc_dev == len(dev):
-            fcreate = dev[0]._rpc_sess.get_function("tvm.graph_runtime_cuda_graph.create")
+            fcreate = dev[0]._rpc_sess.get_function("tvm.graph_executor_cuda_graph.create")
         else:
-            fcreate = tvm._ffi.get_global_func("tvm.graph_runtime_cuda_graph.create")
+            fcreate = tvm._ffi.get_global_func("tvm.graph_executor_cuda_graph.create")
     except ValueError:
         raise ValueError(
             "To enable CUDA graph support (experimental), please set "
@@ -64,7 +64,7 @@ def create(graph_json_str, libmod, device):
     return GraphModuleCudaGraph(fcreate(graph_json_str, libmod, *device_type_id))
 
 
-class GraphModuleCudaGraph(graph_runtime.GraphModule):
+class GraphModuleCudaGraph(graph_executor.GraphModule):
     """CUDA graph runtime module.
 
     This is a CUDA graph runtime wrapper over the TVM runtime.
@@ -81,7 +81,7 @@ def __init__(self, module):
         self._end_capture = module["end_capture"]
         self._run_cuda_graph = module["run_cuda_graph"]
         self._cuda_graph_captured = False
-        graph_runtime.GraphModule.__init__(self, module)
+        graph_executor.GraphModule.__init__(self, module)
 
     def capture_cuda_graph(self):
         """Capture a CUDA graph for tvm_op graph
@@ -131,4 +131,4 @@ def debug_get_output(self, node, out):
         out : NDArray
             The output array container
         """
-        raise NotImplementedError("Please use debugger.debug_runtime as graph_runtime instead.")
+        raise NotImplementedError("Please use debugger.debug_runtime as graph_executor instead.")
diff --git a/python/tvm/contrib/debugger/debug_executor.py b/python/tvm/contrib/debugger/debug_executor.py
index f0e299728fbc..b7de4390c81d 100644
--- a/python/tvm/contrib/debugger/debug_executor.py
+++ b/python/tvm/contrib/debugger/debug_executor.py
@@ -22,7 +22,7 @@
 import tvm._ffi
 
 from tvm._ffi.base import string_types
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.runtime.ndarray import array
 from . import debug_result
 
@@ -57,11 +57,11 @@ def create(graph_json_str, libmod, device, dump_root=None):
     assert isinstance(graph_json_str, string_types)
 
     try:
-        dev, num_rpc_dev, device_type_id = graph_runtime.get_device(libmod, device)
+        dev, num_rpc_dev, device_type_id = graph_executor.get_device(libmod, device)
         if num_rpc_dev == len(dev):
-            fcreate = dev[0]._rpc_sess.get_function("tvm.graph_runtime_debug.create")
+            fcreate = dev[0]._rpc_sess.get_function("tvm.graph_executor_debug.create")
         else:
-            fcreate = tvm._ffi.get_global_func("tvm.graph_runtime_debug.create")
+            fcreate = tvm._ffi.get_global_func("tvm.graph_executor_debug.create")
     except ValueError:
         raise ValueError(
             "Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in "
@@ -71,7 +71,7 @@ def create(graph_json_str, libmod, device, dump_root=None):
     return GraphModuleDebug(func_obj, dev, graph_json_str, dump_root)
 
 
-class GraphModuleDebug(graph_runtime.GraphModule):
+class GraphModuleDebug(graph_executor.GraphModule):
     """Graph debug runtime module.
 
     This is a debug wrapper over the TVM runtime.
@@ -100,7 +100,7 @@ def __init__(self, module, device, graph_json_str, dump_root):
         self._dump_path = None
         self._get_output_by_layer = module["get_output_by_layer"]
         self._run_individual = module["run_individual"]
-        graph_runtime.GraphModule.__init__(self, module)
+        graph_executor.GraphModule.__init__(self, module)
         self._create_debug_env(graph_json_str, device)
 
     def _format_device(self, device):
diff --git a/python/tvm/contrib/graph_executor.py b/python/tvm/contrib/graph_executor.py
index 63bd1ebade56..eeb0347e3efd 100644
--- a/python/tvm/contrib/graph_executor.py
+++ b/python/tvm/contrib/graph_executor.py
@@ -50,7 +50,7 @@ def create(graph_json_str, libmod, device):
 
     Note
     ----
-    See also :py:class:`tvm.contrib.graph_runtime.GraphModule`
+    See also :py:class:`tvm.contrib.graph_executor.GraphModule`
     for examples to directly construct a GraphModule from an exported
     relay compiled library.
     """
@@ -59,9 +59,9 @@ def create(graph_json_str, libmod, device):
     dev, num_rpc_dev, device_type_id = get_device(libmod, device)
 
     if num_rpc_dev == len(dev):
-        fcreate = dev[0]._rpc_sess.get_function("tvm.graph_runtime.create")
+        fcreate = dev[0]._rpc_sess.get_function("tvm.graph_executor.create")
     else:
-        fcreate = tvm._ffi.get_global_func("tvm.graph_runtime.create")
+        fcreate = tvm._ffi.get_global_func("tvm.graph_executor.create")
 
     return GraphModule(fcreate(graph_json_str, libmod, *device_type_id))
 
@@ -135,7 +135,7 @@ class GraphModule(object):
 
         import tvm
         from tvm import relay
-        from tvm.contrib import graph_runtime
+        from tvm.contrib import graph_executor
 
         # build the library using graph runtime
         lib = relay.build(...)
@@ -144,7 +144,7 @@ class GraphModule(object):
         lib: tvm.runtime.Module = tvm.runtime.load_module("compiled_lib.so")
         # Call the library factory function for default and create
         # a new runtime.Module, wrap with graph module.
-        gmod = graph_runtime.GraphModule(lib["default"](dev))
+        gmod = graph_executor.GraphModule(lib["default"](dev))
         # use the graph module.
         gmod.set_input("x", data)
         gmod.run()
@@ -270,7 +270,7 @@ def debug_get_output(self, node, out):
         out : NDArray
             The output array container
         """
-        raise NotImplementedError("Please use debugger.debug_runtime as graph_runtime instead.")
+        raise NotImplementedError("Please use debugger.debug_runtime as graph_executor instead.")
 
     def load_params(self, params_bytes):
         """Load parameters from serialized byte array of parameter dict.
diff --git a/python/tvm/driver/tvmc/runner.py b/python/tvm/driver/tvmc/runner.py
index 252647557ab5..9f2688fbb335 100644
--- a/python/tvm/driver/tvmc/runner.py
+++ b/python/tvm/driver/tvmc/runner.py
@@ -26,7 +26,7 @@
 import numpy as np
 from tvm import rpc
 from tvm.autotvm.measure import request_remote
-from tvm.contrib import graph_runtime as runtime
+from tvm.contrib import graph_executor as runtime
 from tvm.contrib.debugger import debug_runtime
 from tvm.relay import load_param_dict
 
diff --git a/python/tvm/micro/__init__.py b/python/tvm/micro/__init__.py
index ade63f2da9e4..a75df5683966 100644
--- a/python/tvm/micro/__init__.py
+++ b/python/tvm/micro/__init__.py
@@ -25,7 +25,7 @@
 from .micro_binary import MicroBinary
 from .model_library_format import export_model_library_format, UnsupportedInModelLibraryFormatError
 from .session import (
-    create_local_graph_runtime,
+    create_local_graph_executor,
     create_local_debug_runtime,
     Session,
     SessionTerminatedError,
diff --git a/python/tvm/micro/model_library_format.py b/python/tvm/micro/model_library_format.py
index 06d0a135735f..6768e03f4473 100644
--- a/python/tvm/micro/model_library_format.py
+++ b/python/tvm/micro/model_library_format.py
@@ -24,7 +24,7 @@
 import tarfile
 
 from ..contrib import utils
-from ..relay.backend import graph_runtime_factory
+from ..relay.backend import graph_executor_factory
 from ..relay import param_dict
 
 
@@ -117,7 +117,7 @@ def _build_memory_map(graph_json):
     return memory_map
 
 
-def export_model_library_format(mod: graph_runtime_factory.GraphExecutorFactoryModule, file_name):
+def export_model_library_format(mod: graph_executor_factory.GraphExecutorFactoryModule, file_name):
     """Export the build artifact in Model Library Format.
 
     This function creates a .tar archive containing the build artifacts in a standardized
@@ -126,7 +126,7 @@ def export_model_library_format(mod: graph_runtime_factory.GraphExecutorFactoryM
 
     Parameters
     ----------
-    mod : tvm.relay.backend.graph_runtime_factory.GraphExecutorFactoryModule
+    mod : tvm.relay.backend.graph_executor_factory.GraphExecutorFactoryModule
         The return value of tvm.relay.build, which will be exported into Model Library Format.
     file_name : str
         Path to the .tar archive to generate.
diff --git a/python/tvm/micro/session.py b/python/tvm/micro/session.py
index b260bd3bee36..ea8f4af9c878 100644
--- a/python/tvm/micro/session.py
+++ b/python/tvm/micro/session.py
@@ -22,7 +22,7 @@
 
 from ..error import register_error
 from .._ffi import get_global_func
-from ..contrib import graph_runtime
+from ..contrib import graph_executor
 from ..contrib.debugger import debug_runtime
 from ..rpc import RPCSession
 from .transport import IoTimeoutError
@@ -92,7 +92,7 @@ def __init__(
         self.timeout_override = timeout_override
 
         self._rpc = None
-        self._graph_runtime = None
+        self._graph_executor = None
 
     def get_system_lib(self):
         return self._rpc.get_function("runtime.SystemLib")()
@@ -192,7 +192,7 @@ def lookup_remote_linked_param(mod, storage_id, template_tensor, device):
     )
 
 
-def create_local_graph_runtime(graph_json_str, mod, device):
+def create_local_graph_executor(graph_json_str, mod, device):
     """Create a local graph runtime driving execution on the remote CPU device given.
 
     Parameters
@@ -212,8 +212,8 @@ def create_local_graph_runtime(graph_json_str, mod, device):
          A local graph runtime instance that executes on the remote device.
     """
     device_type_id = [device.device_type, device.device_id]
-    fcreate = get_global_func("tvm.graph_runtime.create")
-    return graph_runtime.GraphModule(
+    fcreate = get_global_func("tvm.graph_executor.create")
+    return graph_executor.GraphModule(
         fcreate(graph_json_str, mod, lookup_remote_linked_param, *device_type_id)
     )
 
@@ -241,7 +241,7 @@ def create_local_debug_runtime(graph_json_str, mod, device, dump_root=None):
          A local graph runtime instance that executes on the remote device.
     """
     device_type_id = [device.device_type, device.device_id]
-    fcreate = get_global_func("tvm.graph_runtime_debug.create")
+    fcreate = get_global_func("tvm.graph_executor_debug.create")
     return debug_runtime.GraphModuleDebug(
         fcreate(graph_json_str, mod, lookup_remote_linked_param, *device_type_id),
         [device],
diff --git a/python/tvm/relay/backend/graph_executor_codegen.py b/python/tvm/relay/backend/graph_executor_codegen.py
index de3de70ae987..9ec33a4807f5 100644
--- a/python/tvm/relay/backend/graph_executor_codegen.py
+++ b/python/tvm/relay/backend/graph_executor_codegen.py
@@ -31,7 +31,7 @@
 
 To connect to the graph runtime, we use a printer that converts our graph format
 into TVM's JSON format. The resulting string can be loaded by
-contrib.graph_runtime or any other TVM runtime compatible systems.
+contrib.graph_executor or any other TVM runtime compatible systems.
 """
 from tvm.runtime.ndarray import empty
 from tvm.relay import _build_module
diff --git a/python/tvm/relay/backend/graph_executor_factory.py b/python/tvm/relay/backend/graph_executor_factory.py
index a6404e2a0bd0..5356af946de4 100644
--- a/python/tvm/relay/backend/graph_executor_factory.py
+++ b/python/tvm/relay/backend/graph_executor_factory.py
@@ -43,7 +43,7 @@ class GraphExecutorFactoryModule:
 
     def __init__(self, ir_mod, target, graph_json_str, libmod, libmod_name, params):
         assert isinstance(graph_json_str, string_types)
-        fcreate = get_global_func("tvm.graph_runtime_factory.create")
+        fcreate = get_global_func("tvm.graph_executor_factory.create")
         args = []
         for k, v in params.items():
             args.append(k)
@@ -79,7 +79,7 @@ def __iter__(self):
         warnings.warn(
             "legacy graph runtime behavior of producing json / lib / params will be "
             "removed in the next release."
-            " Please see documents of tvm.contrib.graph_runtime.GraphModule for the "
+            " Please see documents of tvm.contrib.graph_executor.GraphModule for the "
             " new recommended usage.",
             DeprecationWarning,
             2,
diff --git a/python/tvm/relay/build_module.py b/python/tvm/relay/build_module.py
index ac50c56264a8..9e81240b2ffe 100644
--- a/python/tvm/relay/build_module.py
+++ b/python/tvm/relay/build_module.py
@@ -27,13 +27,13 @@
 from tvm.tir import expr as tvm_expr
 from .. import nd as _nd, autotvm, register_func
 from ..target import Target
-from ..contrib import graph_runtime as _graph_rt
+from ..contrib import graph_executor as _graph_rt
 from . import _build_module
 from . import ty as _ty
 from . import expr as _expr
 from . import function as _function
 from .transform import InferType
-from .backend import graph_runtime_factory as _graph_runtime_factory
+from .backend import graph_executor_factory as _graph_executor_factory
 from .backend import interpreter as _interpreter
 from .backend.vm import VMExecutor
 
@@ -110,7 +110,7 @@ def build(self, mod, target=None, target_host=None, params=None):
 
         Returns
         -------
-        factory_module : tvm.relay.backend.graph_runtime_factory.GraphExecutorFactoryModule
+        factory_module : tvm.relay.backend.graph_executor_factory.GraphExecutorFactoryModule
             The runtime factory for the TVM graph runtime.
         """
         target = _update_target(target)
@@ -281,7 +281,7 @@ def build(ir_mod, target=None, target_host=None, params=None, mod_name="default"
     with tophub_context:
         bld_mod = BuildModule()
         graph_json, runtime_mod, params = bld_mod.build(ir_mod, target, target_host, params)
-        runtime_mod = _graph_runtime_factory.GraphExecutorFactoryModule(
+        runtime_mod = _graph_executor_factory.GraphExecutorFactoryModule(
             ir_mod, target, graph_json, runtime_mod, mod_name, params
         )
         return runtime_mod
diff --git a/python/tvm/relay/frontend/common.py b/python/tvm/relay/frontend/common.py
index 7bb70cec5ee5..c2546205c571 100644
--- a/python/tvm/relay/frontend/common.py
+++ b/python/tvm/relay/frontend/common.py
@@ -530,13 +530,13 @@ def infer_value(input_val, params, mod=None):
     try:
         # TODO(kevinthesun): Use VM for all cases.
         # pylint: disable=import-outside-toplevel
-        from tvm.contrib import graph_runtime
+        from tvm.contrib import graph_executor
 
         func = _function.Function(analysis.free_vars(input_val), input_val)
         with tvm.transform.PassContext(opt_level=0):
             lib = tvm.relay.build(func, target="llvm", params=params)
         dev = tvm.cpu(0)
-        m = graph_runtime.GraphModule(lib["default"](dev))
+        m = graph_executor.GraphModule(lib["default"](dev))
         m.run()
         return m.get_output(0)
     except Exception:
diff --git a/python/tvm/relay/quantize/_calibrate.py b/python/tvm/relay/quantize/_calibrate.py
index 20afa1909ac9..a906a98dccd4 100644
--- a/python/tvm/relay/quantize/_calibrate.py
+++ b/python/tvm/relay/quantize/_calibrate.py
@@ -29,7 +29,7 @@
 from .. import expr as _expr
 from .. import analysis as _analysis
 from .. import build_module as _build_module
-from ...contrib import graph_runtime
+from ...contrib import graph_executor
 from .kl_divergence import _find_scale_by_kl
 
 
@@ -46,7 +46,7 @@ def _get_profile_runtime(mod):
 
     with tvm.transform.PassContext(opt_level=3):
         lib = _build_module.build(func, target=target)
-    runtime = graph_runtime.GraphModule(lib["default"](dev))
+    runtime = graph_executor.GraphModule(lib["default"](dev))
 
     return runtime
 
diff --git a/rust/tvm-graph-rt/src/graph.rs b/rust/tvm-graph-rt/src/graph.rs
index 0174b0cacf3e..de2e7dddff5c 100644
--- a/rust/tvm-graph-rt/src/graph.rs
+++ b/rust/tvm-graph-rt/src/graph.rs
@@ -39,7 +39,7 @@ use crate::{errors::*, Module, Storage, Tensor};
 
 // @see `kTVMNDArrayMagic` in `ndarray.h`
 const _NDARRAY_MAGIC: u64 = 0xDD5E_40F0_96B4_A13F;
-// @see `kTVMNDArrayListMagic` in `graph_runtime.h`
+// @see `kTVMNDArrayListMagic` in `graph_executor.h`
 const _NDARRAY_LIST_MAGIC: u64 = 0xF7E5_8D4F_0504_9CB7;
 
 /// A TVM computation graph.
diff --git a/rust/tvm/examples/resnet/src/build_resnet.py b/rust/tvm/examples/resnet/src/build_resnet.py
index 6c99dee22bf5..28b5807e3ea1 100644
--- a/rust/tvm/examples/resnet/src/build_resnet.py
+++ b/rust/tvm/examples/resnet/src/build_resnet.py
@@ -29,7 +29,7 @@
 from tvm import te
 from tvm import relay, runtime
 from tvm.relay import testing
-from tvm.contrib import graph_runtime, cc
+from tvm.contrib import graph_executor, cc
 from PIL import Image
 from tvm.contrib.download import download_testdata
 from mxnet.gluon.model_zoo.vision import get_model
@@ -141,7 +141,7 @@ def test_build(build_dir):
     params = bytearray(open(osp.join(build_dir, "deploy_param.params"), "rb").read())
     input_data = get_cat_image()
     dev = tvm.cpu()
-    module = graph_runtime.create(graph, lib, dev)
+    module = graph_executor.create(graph, lib, dev)
     module.load_params(params)
     module.run(data=input_data)
     out = module.get_output(0).asnumpy()
diff --git a/rust/tvm/src/runtime/graph_rt.rs b/rust/tvm/src/runtime/graph_rt.rs
index cba2875c9952..7e1e6ed2fa8b 100644
--- a/rust/tvm/src/runtime/graph_rt.rs
+++ b/rust/tvm/src/runtime/graph_rt.rs
@@ -46,7 +46,7 @@ impl GraphRt {
 
     /// Create a graph runtime from the deprecated graph, lib, dev triple.
     pub fn create_from_parts(graph: &str, lib: Module, dev: Device) -> Result<Self> {
-        let runtime_create_fn = Function::get("tvm.graph_runtime.create").unwrap();
+        let runtime_create_fn = Function::get("tvm.graph_executor.create").unwrap();
 
         let runtime_create_fn_ret = runtime_create_fn.invoke(vec![
             graph.into(),
@@ -55,9 +55,9 @@ impl GraphRt {
             // NOTE you must pass the device id in as i32 because that's what TVM expects
             (dev.device_id as i32).into(),
         ]);
-        let graph_runtime_module: Module = runtime_create_fn_ret?.try_into()?;
+        let graph_executor_module: Module = runtime_create_fn_ret?.try_into()?;
         Ok(Self {
-            module: graph_runtime_module,
+            module: graph_executor_module,
         })
     }
 
diff --git a/src/runtime/crt/Makefile b/src/runtime/crt/Makefile
index d707d0c63b81..8d3acab1858b 100644
--- a/src/runtime/crt/Makefile
+++ b/src/runtime/crt/Makefile
@@ -67,8 +67,8 @@ endef
 
 LIBS = \
 	src/runtime/crt/common \
-	src/runtime/crt/graph_runtime \
-	src/runtime/crt/graph_runtime_module \
+	src/runtime/crt/graph_executor \
+	src/runtime/crt/graph_executor_module \
 	src/runtime/crt/memory \
 	src/runtime/crt/utvm_rpc_common \
 	src/runtime/crt/utvm_rpc_server
diff --git a/src/runtime/crt/common/crt_runtime_api.c b/src/runtime/crt/common/crt_runtime_api.c
index 6634cfa7db89..e7fa7bcb5d5e 100644
--- a/src/runtime/crt/common/crt_runtime_api.c
+++ b/src/runtime/crt/common/crt_runtime_api.c
@@ -30,7 +30,7 @@
 #include <tvm/runtime/crt/crt.h>
 #include <tvm/runtime/crt/func_registry.h>
 #include <tvm/runtime/crt/internal/common/ndarray.h>
-#include <tvm/runtime/crt/internal/graph_runtime/graph_runtime.h>
+#include <tvm/runtime/crt/internal/graph_executor/graph_executor.h>
 #include <tvm/runtime/crt/internal/memory/memory.h>
 #include <tvm/runtime/crt/memory.h>
 #include <tvm/runtime/crt/platform.h>
diff --git a/src/runtime/crt/graph_executor/graph_executor.c b/src/runtime/crt/graph_executor/graph_executor.c
index 9431970c21cd..7ddaa7737feb 100644
--- a/src/runtime/crt/graph_executor/graph_executor.c
+++ b/src/runtime/crt/graph_executor/graph_executor.c
@@ -20,12 +20,12 @@
 // LINT_C_FILE
 
 /*!
- * \file graph_runtime.c
+ * \file graph_executor.c
  * \brief implement graph runtime in pure C
  */
 
 #include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/internal/graph_runtime/graph_runtime.h>
+#include <tvm/runtime/crt/internal/graph_executor/graph_executor.h>
 #include <tvm/runtime/crt/logging.h>
 #include <tvm/runtime/crt/memory.h>
 #include <tvm/runtime/crt/module.h>
diff --git a/src/runtime/crt/graph_executor/load_json.c b/src/runtime/crt/graph_executor/load_json.c
index 65dcb3eccb44..dd2faecdc538 100644
--- a/src/runtime/crt/graph_executor/load_json.c
+++ b/src/runtime/crt/graph_executor/load_json.c
@@ -25,7 +25,7 @@
  */
 #include <stdlib.h>
 #include <string.h>
-#include <tvm/runtime/crt/internal/graph_runtime/load_json.h>
+#include <tvm/runtime/crt/internal/graph_executor/load_json.h>
 #include <tvm/runtime/crt/memory.h>
 #include <tvm/runtime/crt/platform.h>
 
diff --git a/src/runtime/crt/graph_executor_module/graph_executor_module.c b/src/runtime/crt/graph_executor_module/graph_executor_module.c
index fa741329e5f6..f4a7d8cee7cb 100644
--- a/src/runtime/crt/graph_executor_module/graph_executor_module.c
+++ b/src/runtime/crt/graph_executor_module/graph_executor_module.c
@@ -20,27 +20,27 @@
 // LINT_C_FILE
 
 /*!
- * \file graph_runtime_module.c
- * \brief wrap graph_runtime into a TVMModule for use with RPC.
+ * \file graph_executor_module.c
+ * \brief wrap graph_executor into a TVMModule for use with RPC.
  */
 
 #include <tvm/runtime/crt/func_registry.h>
-#include <tvm/runtime/crt/graph_runtime.h>
-#include <tvm/runtime/crt/graph_runtime_module.h>
+#include <tvm/runtime/crt/graph_executor.h>
+#include <tvm/runtime/crt/graph_executor_module.h>
 #include <tvm/runtime/crt/module.h>
 
-#include "tvm/runtime/crt/internal/graph_runtime/graph_runtime.h"
+#include "tvm/runtime/crt/internal/graph_executor/graph_executor.h"
 
 typedef struct {
   TVMModule mod;
   TVMGraphExecutor* runtime;
 } GraphExecutorModule;
 
-static GraphExecutorModule graph_runtime;
+static GraphExecutorModule graph_executor;
 
 int32_t TVMGraphExecutorModule_Create(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
                                      int* ret_tcodes, void* resource_handle) {
-  if (graph_runtime.runtime != NULL) {
+  if (graph_executor.runtime != NULL) {
     return kTvmErrorGraphModuleAlreadyCreated;
   }
 
@@ -59,16 +59,16 @@ int32_t TVMGraphExecutorModule_Create(TVMValue* args, int* tcodes, int nargs, TV
 
   DLDevice dev = {(DLDeviceType)args[2].v_int64, (int)args[3].v_int64};
   int ret_value =
-      TVMGraphExecutor_Create(args[0].v_str, args[1].v_handle, &dev, &graph_runtime.runtime);
+      TVMGraphExecutor_Create(args[0].v_str, args[1].v_handle, &dev, &graph_executor.runtime);
   if (ret_value != 0) {
     return ret_value;
   }
 
   TVMModuleHandle out;
-  ret_value = TVMModCreateFromCModule(&graph_runtime.mod, &out);
+  ret_value = TVMModCreateFromCModule(&graph_executor.mod, &out);
   if (ret_value != 0) {
     ret_tcodes[0] = kTVMNullptr;
-    TVMGraphExecutor_Release(&graph_runtime.runtime);
+    TVMGraphExecutor_Release(&graph_executor.runtime);
     return ret_value;
   }
 
@@ -87,14 +87,14 @@ int32_t TVMGraphExecutorModule_GetInput(TVMValue* args, int* tcodes, int nargs,
     return kTvmErrorFunctionCallWrongArgType;
   }
 
-  int index = TVMGraphExecutor_GetInputIndex(graph_runtime.runtime, args[0].v_str);
+  int index = TVMGraphExecutor_GetInputIndex(graph_executor.runtime, args[0].v_str);
   if (index < 0) {
     return kTvmErrorGraphModuleNoSuchInput;
   }
 
-  uint32_t eid = TVMGraphExecutor_GetEntryId(graph_runtime.runtime,
-                                            graph_runtime.runtime->input_nodes[index], 0);
-  ret_values[0].v_handle = (void*)&graph_runtime.runtime->data_entry[eid].dl_tensor;
+  uint32_t eid = TVMGraphExecutor_GetEntryId(graph_executor.runtime,
+                                            graph_executor.runtime->input_nodes[index], 0);
+  ret_values[0].v_handle = (void*)&graph_executor.runtime->data_entry[eid].dl_tensor;
   ret_tcodes[0] = kTVMNDArrayHandle;
   return 0;
 }
@@ -118,7 +118,7 @@ int32_t TVMGraphExecutorModule_GetNumOutputs(TVMValue* args, int* tcodes, int na
     return kTvmErrorFunctionCallNumArguments;
   }
 
-  ret_values[0].v_int64 = TVMGraphExecutor_GetNumOutputs(graph_runtime.runtime);
+  ret_values[0].v_int64 = TVMGraphExecutor_GetNumOutputs(graph_executor.runtime);
   ret_tcodes[0] = kTVMArgInt;
   return 0;
 }
@@ -135,15 +135,15 @@ int32_t TVMGraphExecutorModule_GetOutput(TVMValue* args, int* tcodes, int nargs,
   }
 
   int output_index = args[0].v_int64;
-  if (output_index < 0 || output_index > TVMGraphExecutor_GetNumOutputs(graph_runtime.runtime)) {
+  if (output_index < 0 || output_index > TVMGraphExecutor_GetNumOutputs(graph_executor.runtime)) {
     return kTvmErrorGraphModuleNoSuchInput;
   }
 
-  uint32_t nid = graph_runtime.runtime->outputs[output_index].node_id;
-  uint32_t index = graph_runtime.runtime->outputs[output_index].index;
-  uint32_t eid = TVMGraphExecutor_GetEntryId(graph_runtime.runtime, nid, index);
+  uint32_t nid = graph_executor.runtime->outputs[output_index].node_id;
+  uint32_t index = graph_executor.runtime->outputs[output_index].index;
+  uint32_t eid = TVMGraphExecutor_GetEntryId(graph_executor.runtime, nid, index);
 
-  ret_values[0].v_handle = (void*)&(graph_runtime.runtime->data_entry[eid].dl_tensor);
+  ret_values[0].v_handle = (void*)&(graph_executor.runtime->data_entry[eid].dl_tensor);
   ret_tcodes[0] = kTVMNDArrayHandle;
   return 0;
 }
@@ -162,7 +162,7 @@ int32_t TVMGraphExecutorModule_LoadParams(TVMValue* args, int* tcodes, int nargs
   ret_tcodes[0] = kTVMNullptr;
 
   TVMByteArray* arr = (TVMByteArray*)args[0].v_handle;
-  return TVMGraphExecutor_LoadParams(graph_runtime.runtime, arr->data, arr->size);
+  return TVMGraphExecutor_LoadParams(graph_executor.runtime, arr->data, arr->size);
 }
 
 int32_t TVMGraphExecutorModule_Run(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
@@ -171,7 +171,7 @@ int32_t TVMGraphExecutorModule_Run(TVMValue* args, int* tcodes, int nargs, TVMVa
     return kTvmErrorFunctionCallNumArguments;
   }
 
-  TVMGraphExecutor_Run(graph_runtime.runtime);
+  TVMGraphExecutor_Run(graph_executor.runtime);
 
   ret_tcodes[0] = kTVMNullptr;
   return 0;
@@ -187,7 +187,7 @@ int32_t TVMGraphExecutorModule_SetInput(TVMValue* args, int* tcodes, int nargs,
     return kTvmErrorFunctionCallWrongArgType;
   }
 
-  TVMGraphExecutor_SetInput(graph_runtime.runtime, args[0].v_str, (DLTensor*)args[1].v_handle);
+  TVMGraphExecutor_SetInput(graph_executor.runtime, args[0].v_str, (DLTensor*)args[1].v_handle);
 
   ret_tcodes[0] = kTVMNullptr;
   return 0;
@@ -199,14 +199,14 @@ int32_t TVMGraphExecutorModule_NotImplemented(TVMValue* args, int* tcodes, int n
   return kTvmErrorFunctionCallNotImplemented;
 }
 
-static const TVMBackendPackedCFunc graph_runtime_registry_funcs[] = {
+static const TVMBackendPackedCFunc graph_executor_registry_funcs[] = {
     &TVMGraphExecutorModule_GetInput,      &TVMGraphExecutorModule_GetNumInputs,
     &TVMGraphExecutorModule_GetNumOutputs, &TVMGraphExecutorModule_GetOutput,
     &TVMGraphExecutorModule_LoadParams,    &TVMGraphExecutorModule_Run,
     &TVMGraphExecutorModule_SetInput,      &TVMGraphExecutorModule_NotImplemented,
 };
 
-static const TVMFuncRegistry graph_runtime_registry = {
+static const TVMFuncRegistry graph_executor_registry = {
     "\x08get_input\0"
     "get_num_inputs\0"
     "get_num_outputs\0"
@@ -215,11 +215,11 @@ static const TVMFuncRegistry graph_runtime_registry = {
     "run\0"
     "set_input\0"
     "share_params\0",
-    graph_runtime_registry_funcs};
+    graph_executor_registry_funcs};
 
 tvm_crt_error_t TVMGraphExecutorModule_Register() {
-  graph_runtime.mod.registry = &graph_runtime_registry;
-  graph_runtime.runtime = NULL;
+  graph_executor.mod.registry = &graph_executor_registry;
+  graph_executor.runtime = NULL;
 
-  return TVMFuncRegisterGlobal("tvm.graph_runtime.create", &TVMGraphExecutorModule_Create, 0);
+  return TVMFuncRegisterGlobal("tvm.graph_executor.create", &TVMGraphExecutorModule_Create, 0);
 }
diff --git a/src/runtime/crt/host/main.cc b/src/runtime/crt/host/main.cc
index f5548471cd9d..8976140bdf99 100644
--- a/src/runtime/crt/host/main.cc
+++ b/src/runtime/crt/host/main.cc
@@ -35,7 +35,7 @@
 #include "crt_config.h"
 
 #ifdef TVM_HOST_USE_GRAPH_RUNTIME_MODULE
-#include <tvm/runtime/crt/graph_runtime_module.h>
+#include <tvm/runtime/crt/graph_executor_module.h>
 #endif
 
 using namespace std::chrono;
diff --git a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
index 656e7a77f84c..f7d1d0b27012 100644
--- a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
+++ b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
@@ -18,15 +18,15 @@
  */
 
 /*!
- * \file src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/graph_runtime.h
+ * \file src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
  * \brief Tiny graph runtime that can run graph containing only tvm PackedFunc.
  */
 #ifndef TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_RUNTIME_GRAPH_RUNTIME_H_
 #define TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_RUNTIME_GRAPH_RUNTIME_H_
 
-#include <tvm/runtime/crt/graph_runtime.h>
+#include <tvm/runtime/crt/graph_executor.h>
 #include <tvm/runtime/crt/internal/common/ndarray.h>
-#include <tvm/runtime/crt/internal/graph_runtime/load_json.h>
+#include <tvm/runtime/crt/internal/graph_executor/load_json.h>
 #include <tvm/runtime/crt/module.h>
 
 // Memory pool entry.
diff --git a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h
index af69506b0634..0010c76a593f 100644
--- a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h
+++ b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h
@@ -18,7 +18,7 @@
  */
 
 /*!
- * \file src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/load_json.h
+ * \file src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h
  * \brief Lightweight JSON Reader that read save into C++ data structs.
  */
 #ifndef TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_RUNTIME_LOAD_JSON_H_
diff --git a/src/runtime/graph_executor/cuda_graph/graph_runtime_cuda_graph.cc b/src/runtime/graph_executor/cuda_graph/graph_runtime_cuda_graph.cc
index e57a4446bb31..5ccfa834e9bb 100644
--- a/src/runtime/graph_executor/cuda_graph/graph_runtime_cuda_graph.cc
+++ b/src/runtime/graph_executor/cuda_graph/graph_runtime_cuda_graph.cc
@@ -18,13 +18,13 @@
  */
 
 /*!
- * \file graph_runtime_cuda_graph.cc
+ * \file graph_executor_cuda_graph.cc
  */
 
 #include <tvm/runtime/registry.h>
 
 #include "../../cuda/cuda_common.h"
-#include "../graph_runtime.h"
+#include "../graph_executor.h"
 
 namespace tvm {
 namespace runtime {
@@ -116,9 +116,9 @@ Module GraphExecutorCudaGraphCreate(const std::string& sym_json, const tvm::runt
   return Module(exec);
 }
 
-TVM_REGISTER_GLOBAL("tvm.graph_runtime_cuda_graph.create")
+TVM_REGISTER_GLOBAL("tvm.graph_executor_cuda_graph.create")
     .set_body([](TVMArgs args, TVMRetValue* rv) {
-      ICHECK_GE(args.num_args, 4) << "The expected number of arguments for graph_runtime.create is "
+      ICHECK_GE(args.num_args, 4) << "The expected number of arguments for graph_executor.create is "
                                      "at least 4, but it has "
                                   << args.num_args;
       PackedFunc lookup_linked_param_func;
diff --git a/src/runtime/graph_executor/debug/graph_executor_debug.cc b/src/runtime/graph_executor/debug/graph_executor_debug.cc
index aa9d0f8de5e4..e49f41cae6d9 100644
--- a/src/runtime/graph_executor/debug/graph_executor_debug.cc
+++ b/src/runtime/graph_executor/debug/graph_executor_debug.cc
@@ -18,7 +18,7 @@
  */
 
 /*!
- * \file graph_runtime_debug.cc
+ * \file graph_executor_debug.cc
  */
 #include <tvm/runtime/container.h>
 #include <tvm/runtime/ndarray.h>
@@ -29,7 +29,7 @@
 #include <chrono>
 #include <sstream>
 
-#include "../graph_runtime.h"
+#include "../graph_executor.h"
 
 namespace tvm {
 namespace runtime {
@@ -279,8 +279,8 @@ Module GraphExecutorDebugCreate(const std::string& sym_json, const tvm::runtime:
   return Module(exec);
 }
 
-TVM_REGISTER_GLOBAL("tvm.graph_runtime_debug.create").set_body([](TVMArgs args, TVMRetValue* rv) {
-  ICHECK_GE(args.num_args, 4) << "The expected number of arguments for graph_runtime.create is "
+TVM_REGISTER_GLOBAL("tvm.graph_executor_debug.create").set_body([](TVMArgs args, TVMRetValue* rv) {
+  ICHECK_GE(args.num_args, 4) << "The expected number of arguments for graph_executor.create is "
                                  "at least 4, but it has "
                               << args.num_args;
   PackedFunc lookup_linked_param_func;
diff --git a/src/runtime/graph_executor/graph_executor.cc b/src/runtime/graph_executor/graph_executor.cc
index 84e0fa146693..570d55a69e5a 100644
--- a/src/runtime/graph_executor/graph_executor.cc
+++ b/src/runtime/graph_executor/graph_executor.cc
@@ -18,9 +18,9 @@
  */
 
 /*!
- * \file graph_runtime.cc
+ * \file graph_executor.cc
  */
-#include "graph_runtime.h"
+#include "graph_executor.h"
 
 #include <tvm/runtime/container.h>
 #include <tvm/runtime/device_api.h>
@@ -531,8 +531,8 @@ std::vector<Device> GetAllDevice(const TVMArgs& args, int dev_start_arg) {
 // execution support yet. For heterogenenous execution, at least 5 arguments will
 // be passed in. The third one is the number of devices.
 // Eventually, we will only probably pass Device for all the languages.
-TVM_REGISTER_GLOBAL("tvm.graph_runtime.create").set_body([](TVMArgs args, TVMRetValue* rv) {
-  ICHECK_GE(args.num_args, 4) << "The expected number of arguments for graph_runtime.create is "
+TVM_REGISTER_GLOBAL("tvm.graph_executor.create").set_body([](TVMArgs args, TVMRetValue* rv) {
+  ICHECK_GE(args.num_args, 4) << "The expected number of arguments for graph_executor.create is "
                                  "at least 4, but it has "
                               << args.num_args;
   PackedFunc lookup_linked_param_func;
diff --git a/src/runtime/graph_executor/graph_executor_factory.cc b/src/runtime/graph_executor/graph_executor_factory.cc
index d74ef461dfb5..a7baed073e1c 100644
--- a/src/runtime/graph_executor/graph_executor_factory.cc
+++ b/src/runtime/graph_executor/graph_executor_factory.cc
@@ -18,11 +18,11 @@
  */
 
 /*!
- * \file graph_runtime_factory.cc
+ * \file graph_executor_factory.cc
  * \brief Graph runtime factory implementations
  */
 
-#include "./graph_runtime_factory.h"
+#include "./graph_executor_factory.h"
 
 #include <tvm/runtime/container.h>
 #include <tvm/runtime/device_api.h>
@@ -112,8 +112,8 @@ Module GraphExecutorFactory::RuntimeCreate(const std::vector<Device>& devs) {
 }
 
 Module GraphExecutorFactory::DebugRuntimeCreate(const std::vector<Device>& devs) {
-  const PackedFunc* pf = tvm::runtime::Registry::Get("tvm.graph_runtime_debug.create");
-  ICHECK(pf != nullptr) << "Cannot find function tvm.graph_runtime_debug.create in registry. "
+  const PackedFunc* pf = tvm::runtime::Registry::Get("tvm.graph_executor_debug.create");
+  ICHECK(pf != nullptr) << "Cannot find function tvm.graph_executor_debug.create in registry. "
                            "Do you enable debug graph runtime build?";
   // Debug runtime create packed function will call GetAllContexs, so we unpack the devs.
   std::vector<int> unpacked_devs;
@@ -139,8 +139,8 @@ Module GraphExecutorFactory::DebugRuntimeCreate(const std::vector<Device>& devs)
 }
 
 Module GraphExecutorFactory::CudaGraphExecutorCreate(const std::vector<Device>& devs) {
-  const PackedFunc* pf = tvm::runtime::Registry::Get("tvm.graph_runtime_cuda_graph.create");
-  ICHECK(pf != nullptr) << "Cannot find function tvm.graph_runtime_cuda_graph.create in registry. "
+  const PackedFunc* pf = tvm::runtime::Registry::Get("tvm.graph_executor_cuda_graph.create");
+  ICHECK(pf != nullptr) << "Cannot find function tvm.graph_executor_cuda_graph.create in registry. "
                            "Did you set(USE_GRAPH_RUNTIME_CUGRAPH=ON)?";
   std::vector<int> unpacked_devs;
   for (const auto& dev : devs) {
@@ -184,9 +184,9 @@ Module GraphExecutorFactoryModuleLoadBinary(void* strm) {
   return Module(exec);
 }
 
-TVM_REGISTER_GLOBAL("tvm.graph_runtime_factory.create").set_body([](TVMArgs args, TVMRetValue* rv) {
+TVM_REGISTER_GLOBAL("tvm.graph_executor_factory.create").set_body([](TVMArgs args, TVMRetValue* rv) {
   ICHECK_GE(args.num_args, 3) << "The expected number of arguments for "
-                                 "graph_runtime_factory.create needs at least 3, "
+                                 "graph_executor_factory.create needs at least 3, "
                                  "but it has "
                               << args.num_args;
   // The argument order is graph_json, module, module_name, param0_name, param0_tensor,
diff --git a/src/runtime/graph_executor/graph_executor_factory.h b/src/runtime/graph_executor/graph_executor_factory.h
index 3881b6b7f0b8..030f16ea1239 100644
--- a/src/runtime/graph_executor/graph_executor_factory.h
+++ b/src/runtime/graph_executor/graph_executor_factory.h
@@ -37,7 +37,7 @@
 #include <unordered_map>
 #include <vector>
 
-#include "./graph_runtime.h"
+#include "./graph_executor.h"
 
 namespace tvm {
 namespace runtime {
@@ -99,10 +99,10 @@ class TVM_DLL GraphExecutorFactory : public runtime::ModuleNode {
 
   /*!
    * \brief Set params.
-   * \param graph_runtime The graph runtime we want to set the params into.
+   * \param graph_executor The graph runtime we want to set the params into.
    * \param params The graph params value we want to set.
    */
-  void SetParams(GraphExecutor* graph_runtime,
+  void SetParams(GraphExecutor* graph_executor,
                  const std::unordered_map<std::string, tvm::runtime::NDArray>& params) const {
     std::unordered_map<std::string, tvm::runtime::NDArray> value = params;
     // upload big arrays first to avoid memory issue in rpc mode
@@ -117,9 +117,9 @@ class TVM_DLL GraphExecutorFactory : public runtime::ModuleNode {
                 return lhs_size > rhs_size;
               });
     for (const auto& key : keys) {
-      int in_idx = graph_runtime->GetInputIndex(key);
+      int in_idx = graph_executor->GetInputIndex(key);
       if (in_idx >= 0) {
-        graph_runtime->SetInput(in_idx, const_cast<DLTensor*>(value[key].operator->()));
+        graph_executor->SetInput(in_idx, const_cast<DLTensor*>(value[key].operator->()));
       }
     }
   }
diff --git a/src/runtime/micro/standalone/utvm_graph_executor.cc b/src/runtime/micro/standalone/utvm_graph_executor.cc
index f945fd9f82f2..920faa134cf5 100644
--- a/src/runtime/micro/standalone/utvm_graph_executor.cc
+++ b/src/runtime/micro/standalone/utvm_graph_executor.cc
@@ -17,7 +17,7 @@
  * under the License.
  */
 
-#include "utvm_graph_runtime.h"
+#include "utvm_graph_executor.h"
 
 #include <dlfcn.h>
 
diff --git a/src/runtime/micro/standalone/utvm_runtime.cc b/src/runtime/micro/standalone/utvm_runtime.cc
index c6b73aad6717..eb96c1e79db7 100644
--- a/src/runtime/micro/standalone/utvm_runtime.cc
+++ b/src/runtime/micro/standalone/utvm_runtime.cc
@@ -20,7 +20,7 @@
 
 #include <cassert>
 
-#include "utvm_graph_runtime.h"
+#include "utvm_graph_executor.h"
 
 void* UTVMRuntimeCreate(const char* json, size_t json_len, void* module) {
   return new tvm::micro::MicroGraphExecutor(std::string(json, json + json_len),
diff --git a/tests/cpp/build_module_test.cc b/tests/cpp/build_module_test.cc
index ed50e3c86e85..75754dee9177 100644
--- a/tests/cpp/build_module_test.cc
+++ b/tests/cpp/build_module_test.cc
@@ -169,10 +169,10 @@ TEST(BuildModule, Heterogeneous) {
   int gpu_dev_ty = static_cast<int>(kDLGPU);
   int gpu_dev_id = 0;
 
-  const runtime::PackedFunc* graph_runtime =
-      tvm::runtime::Registry::Get("tvm.graph_runtime.create");
+  const runtime::PackedFunc* graph_executor =
+      tvm::runtime::Registry::Get("tvm.graph_executor.create");
   runtime::Module mod =
-      (*graph_runtime)(json, module, cpu_dev_ty, cpu_dev_id, gpu_dev_ty, gpu_dev_id);
+      (*graph_executor)(json, module, cpu_dev_ty, cpu_dev_id, gpu_dev_ty, gpu_dev_id);
 
   // test FFI for module.
   auto test_ffi = PackedFunc([](TVMArgs args, TVMRetValue* rv) {
diff --git a/tests/cpp/relay_build_module_test.cc b/tests/cpp/relay_build_module_test.cc
index 74e5289c5093..b7b5abfd697d 100644
--- a/tests/cpp/relay_build_module_test.cc
+++ b/tests/cpp/relay_build_module_test.cc
@@ -124,7 +124,7 @@ TEST(Relay, BuildModule) {
   tvm::runtime::Module mod = mod_f();
   // run
   auto dev = A->device;
-  auto pfr = tvm::runtime::Registry::Get("tvm.graph_runtime.create");
+  auto pfr = tvm::runtime::Registry::Get("tvm.graph_executor.create");
   ICHECK(mod.defined()) << "Module must be defined";
   tvm::runtime::Module run_mod = (*pfr)(json, mod, (int)dev.device_type, (int)dev.device_id);
   auto set_input_f = run_mod.GetFunction("set_input_zero_copy", false);
diff --git a/tests/micro/test_runtime_micro_on_arm.py b/tests/micro/test_runtime_micro_on_arm.py
index 45ca8e74323c..d9742e72c2e9 100644
--- a/tests/micro/test_runtime_micro_on_arm.py
+++ b/tests/micro/test_runtime_micro_on_arm.py
@@ -19,7 +19,7 @@
 import numpy as np
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime, utils
+from tvm.contrib import graph_executor, utils
 from tvm import relay
 import tvm.micro as micro
 from tvm.micro import create_micro_mod
@@ -60,7 +60,7 @@ def relay_micro_build(func, dev_config, params=None):
         graph, c_mod, params = relay.build(func, target=TARGET, params=params)
     micro_mod = micro.create_micro_mod(c_mod, dev_config)
     ctx = tvm.micro_dev(0)
-    mod = graph_runtime.create(graph, micro_mod, ctx)
+    mod = graph_executor.create(graph, micro_mod, ctx)
     mod.set_input(**params)
     return mod
 
@@ -171,7 +171,7 @@ def test_workspace_add():
         tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 2.0)
 
 
-def test_graph_runtime():
+def test_graph_executor():
     """Test a program which uses the graph runtime."""
     if not tvm.runtime.enabled("micro_dev"):
         return
@@ -347,7 +347,7 @@ def test_inactive_session_use():
     print()
     print("finished workspace add test")
     input("[press enter to continue]")
-    test_graph_runtime()
+    test_graph_executor()
     print()
     print("finished graph runtime test")
     input("[press enter to continue]")
diff --git a/tests/micro/zephyr/test_zephyr.py b/tests/micro/zephyr/test_zephyr.py
index 003cd54bba90..b4731f16d99f 100644
--- a/tests/micro/zephyr/test_zephyr.py
+++ b/tests/micro/zephyr/test_zephyr.py
@@ -198,7 +198,7 @@ def test_relay(platform, west_cmd):
         graph, mod, params = tvm.relay.build(func, target=target)
 
     with _make_session(model, target, zephyr_board, west_cmd, mod) as session:
-        graph_mod = tvm.micro.create_local_graph_runtime(
+        graph_mod = tvm.micro.create_local_graph_executor(
             graph, session.get_system_lib(), session.device
         )
         graph_mod.set_input(**params)
@@ -239,7 +239,7 @@ def test_onnx(platform, west_cmd):
         graph = lowered.get_json()
 
     with _make_session(model, target, zephyr_board, west_cmd, lowered.lib) as session:
-        graph_mod = tvm.micro.create_local_graph_runtime(
+        graph_mod = tvm.micro.create_local_graph_executor(
             graph, session.get_system_lib(), session.device
         )
 
@@ -318,7 +318,7 @@ def check_result(relay_mod, model, zephyr_board, west_cmd, map_inputs, out_shape
         graph, mod, params = tvm.relay.build(relay_mod, target=target)
 
     with _make_session(model, target, zephyr_board, west_cmd, mod) as session:
-        rt_mod = tvm.micro.create_local_graph_runtime(
+        rt_mod = tvm.micro.create_local_graph_executor(
             graph, session.get_system_lib(), session.device
         )
         rt_mod.set_input(**params)
diff --git a/tests/python/contrib/test_arm_compute_lib/infrastructure.py b/tests/python/contrib/test_arm_compute_lib/infrastructure.py
index 9a9bf69958f5..35f345cea78a 100644
--- a/tests/python/contrib/test_arm_compute_lib/infrastructure.py
+++ b/tests/python/contrib/test_arm_compute_lib/infrastructure.py
@@ -24,7 +24,7 @@
 import tvm
 from tvm import relay
 from tvm import rpc
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.op.contrib import arm_compute_lib
 from tvm.contrib import utils
 from tvm.autotvm.measure import request_remote
@@ -214,7 +214,7 @@ def build_and_run(
         raise Exception(err_msg)
 
     lib = update_lib(lib, device.device, device.cross_compile)
-    gen_module = graph_runtime.GraphModule(lib["default"](device.device.cpu(0)))
+    gen_module = graph_executor.GraphModule(lib["default"](device.device.cpu(0)))
     gen_module.set_input(**inputs)
     out = []
     for _ in range(no_runs):
diff --git a/tests/python/contrib/test_bnns/infrastructure.py b/tests/python/contrib/test_bnns/infrastructure.py
index 0107de54a04f..d046ee9ad0dd 100644
--- a/tests/python/contrib/test_bnns/infrastructure.py
+++ b/tests/python/contrib/test_bnns/infrastructure.py
@@ -25,7 +25,7 @@
 import tvm
 from tvm import relay
 from tvm import rpc
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.op.contrib.bnns import partition_for_bnns
 from tvm.contrib import utils
 from tvm.autotvm.measure import request_remote
@@ -171,7 +171,7 @@ def build_and_run(
         raise Exception(err_msg)
 
     lib = update_lib(lib, device.device, device.cross_compile)
-    gen_module = graph_runtime.GraphModule(lib["default"](device.device.cpu(0)))
+    gen_module = graph_executor.GraphModule(lib["default"](device.device.cpu(0)))
     gen_module.set_input(**inputs)
     out = []
     for _ in range(no_runs):
diff --git a/tests/python/contrib/test_bnns/test_onnx_topologies.py b/tests/python/contrib/test_bnns/test_onnx_topologies.py
index 7c8dfa6b5e9a..25c4bc483333 100644
--- a/tests/python/contrib/test_bnns/test_onnx_topologies.py
+++ b/tests/python/contrib/test_bnns/test_onnx_topologies.py
@@ -21,7 +21,7 @@
 import tvm
 from tvm import relay
 from tvm.relay import transform
-from tvm.contrib import utils, graph_runtime
+from tvm.contrib import utils, graph_executor
 from tvm.contrib.download import download_testdata
 from tvm.relay.op.contrib.bnns import partition_for_bnns
 
@@ -118,7 +118,7 @@ def run(mod, target, simplify=True, with_bnns=False):
         dev = tvm.cpu(0)
         loaded_lib = tvm.runtime.load_module(path_dso)
 
-        module = graph_runtime.GraphModule(loaded_lib["default"](dev))
+        module = graph_executor.GraphModule(loaded_lib["default"](dev))
         module.run()
         return module.get_output(0).asnumpy()
 
diff --git a/tests/python/contrib/test_coreml_codegen.py b/tests/python/contrib/test_coreml_codegen.py
index f8baf9f3530d..b93c489fdac6 100644
--- a/tests/python/contrib/test_coreml_codegen.py
+++ b/tests/python/contrib/test_coreml_codegen.py
@@ -105,7 +105,7 @@ def test_compile_and_run():
 
     with relay.build_config(opt_level=3):
         lib = relay.build(_create_graph_annotated(), target=target)
-    m = tvm.contrib.graph_runtime.GraphModule(lib["default"](dev))
+    m = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))
 
     shape = (10, 10)
     x_data = np.random.rand(*shape).astype("float32")
diff --git a/tests/python/contrib/test_ethosn/infrastructure.py b/tests/python/contrib/test_ethosn/infrastructure.py
index 791ef4a2a105..59021cf86211 100644
--- a/tests/python/contrib/test_ethosn/infrastructure.py
+++ b/tests/python/contrib/test_ethosn/infrastructure.py
@@ -20,7 +20,7 @@
 from __future__ import absolute_import, print_function
 import tvm
 from tvm import relay
-from tvm.contrib import utils, graph_runtime, download
+from tvm.contrib import utils, graph_executor, download
 from hashlib import md5
 from itertools import zip_longest, combinations
 import numpy as np
@@ -211,7 +211,7 @@ def run(lib, inputs, outputs, npu=True):
     lib_path = temp.relpath(lib_name)
     lib.export_library(lib_path)
     lib = tvm.runtime.load_module(lib_path)
-    module = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
+    module = graph_executor.GraphModule(lib["default"](tvm.cpu()))
     module.set_input(**inputs)
     module.run()
     out = [module.get_output(i) for i in range(outputs)]
diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index 0e25ca24b2a6..c3ada5f0df47 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -24,7 +24,7 @@
 
 from tvm import relay, runtime
 from tvm.relay.op.contrib import tensorrt
-from tvm.contrib import graph_runtime, utils
+from tvm.contrib import graph_executor, utils
 from tvm.runtime.vm import VirtualMachine
 from tvm.relay import Any, GlobalVar, transform
 from tvm.relay.expr_functor import ExprVisitor
@@ -252,7 +252,7 @@ def test_tensorrt_not_compatible():
                 results = exec.evaluate()(x_data)
 
 
-def test_tensorrt_serialize_graph_runtime():
+def test_tensorrt_serialize_graph_executor():
     if skip_codegen_test():
         return
     import mxnet as mx
@@ -273,7 +273,7 @@ def compile_graph(mod, params):
         return graph, lib, params
 
     def run_graph(graph, lib, params):
-        mod_ = graph_runtime.create(graph, lib, device=tvm.gpu(0))
+        mod_ = graph_executor.create(graph, lib, device=tvm.gpu(0))
         mod_.load_params(params)
         mod_.run(data=i_data)
         res = mod_.get_output(0)
diff --git a/tests/python/contrib/test_vitis_ai/infrastructure.py b/tests/python/contrib/test_vitis_ai/infrastructure.py
index acc538822c3c..501ee255c143 100644
--- a/tests/python/contrib/test_vitis_ai/infrastructure.py
+++ b/tests/python/contrib/test_vitis_ai/infrastructure.py
@@ -34,7 +34,7 @@
 from tvm.relay.op.contrib.vitis_ai import annotation
 from tvm.relay.build_module import bind_params_by_name
 from tvm.contrib.target import vitis_ai
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.contrib import utils
 
 
@@ -154,7 +154,7 @@ def verify_result(
 
     lib = build_module(mod, target, params=params, dpu_target=dpu_target, tvm_ops=tvm_ops)
     lib = update_lib(lib)
-    rt_mod = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
+    rt_mod = graph_executor.GraphModule(lib["default"](tvm.cpu()))
 
     for name, data in map_inputs.items():
         rt_mod.set_input(name, data)
diff --git a/tests/python/frontend/caffe/test_forward.py b/tests/python/frontend/caffe/test_forward.py
index 2e8807564239..d0f87fcc21c7 100644
--- a/tests/python/frontend/caffe/test_forward.py
+++ b/tests/python/frontend/caffe/test_forward.py
@@ -36,7 +36,7 @@
 
 import tvm
 from tvm import relay
-from tvm.contrib import utils, graph_runtime
+from tvm.contrib import utils, graph_executor
 from tvm.contrib.download import download_testdata
 
 CURRENT_DIR = os.path.join(os.path.expanduser("~"), ".tvm_test_data", "caffe_test")
@@ -205,7 +205,7 @@ def _run_tvm(data, proto_file, blob_file):
     with tvm.transform.PassContext(opt_level=3):
         lib = relay.build(mod, target=target, target_host=target_host, params=params)
     dtype = "float32"
-    m = graph_runtime.GraphModule(lib["default"](dev))
+    m = graph_executor.GraphModule(lib["default"](dev))
     if isinstance(data, (tuple, list)):
         for idx, d in enumerate(data):
             m.set_input("data" + str(idx), tvm.nd.array(d.astype(dtype)))
diff --git a/tests/python/frontend/caffe2/test_forward.py b/tests/python/frontend/caffe2/test_forward.py
index 879613f6bd08..1081b087c468 100644
--- a/tests/python/frontend/caffe2/test_forward.py
+++ b/tests/python/frontend/caffe2/test_forward.py
@@ -17,7 +17,7 @@
 import numpy as np
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm import relay
 from model_zoo import c2_squeezenet, c2_resnet50, c2_vgg19
 from caffe2.python import workspace, core
@@ -42,7 +42,7 @@ def get_tvm_output(model, input_data, target, device, output_shape, output_dtype
     with tvm.transform.PassContext(opt_level=3):
         lib = relay.build(mod, target, params=params)
 
-    m = graph_runtime.GraphModule(lib["default"](device))
+    m = graph_executor.GraphModule(lib["default"](device))
 
     # set inputs
     m.set_input(input_names, tvm.nd.array(input_data.astype(input_data.dtype)))
diff --git a/tests/python/frontend/coreml/test_forward.py b/tests/python/frontend/coreml/test_forward.py
index 2c1295b28381..c227c3955c5b 100644
--- a/tests/python/frontend/coreml/test_forward.py
+++ b/tests/python/frontend/coreml/test_forward.py
@@ -21,7 +21,7 @@
 
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm import topi
 import tvm.topi.testing
 from tvm import relay
@@ -37,7 +37,7 @@ def get_tvm_output(
 ):
     with tvm.transform.PassContext(opt_level=3):
         lib = relay.build(func, target, params=params)
-    m = graph_runtime.GraphModule(lib["default"](device))
+    m = graph_executor.GraphModule(lib["default"](device))
     # set inputs
     m.set_input(input_name, tvm.nd.array(x.astype(dtype)))
     m.run()
@@ -88,9 +88,9 @@ def run_tvm_graph(
     with tvm.transform.PassContext(opt_level=3):
         lib = relay.build(mod, target, params=params)
 
-    from tvm.contrib import graph_runtime
+    from tvm.contrib import graph_executor
 
-    m = graph_runtime.GraphModule(lib["default"](device))
+    m = graph_executor.GraphModule(lib["default"](device))
     # set inputs
     if isinstance(input_data, list):
         for i, e in enumerate(input_name):
diff --git a/tests/python/frontend/darknet/test_forward.py b/tests/python/frontend/darknet/test_forward.py
index 72457bf5cd46..3bb8e93d3d22 100644
--- a/tests/python/frontend/darknet/test_forward.py
+++ b/tests/python/frontend/darknet/test_forward.py
@@ -24,7 +24,7 @@
 import numpy as np
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.contrib.download import download_testdata
 
 download_testdata.__test__ = False
@@ -80,7 +80,7 @@ def _get_tvm_output(net, data, build_dtype="float32", states=None):
 
     # Execute on TVM
     dev = tvm.cpu(0)
-    m = graph_runtime.GraphModule(lib["default"](dev))
+    m = graph_executor.GraphModule(lib["default"](dev))
     # set inputs
     m.set_input("data", tvm.nd.array(data.astype(dtype)))
     if states:
diff --git a/tests/python/frontend/keras/test_forward.py b/tests/python/frontend/keras/test_forward.py
index bdd7d7b73d53..c7f734b891dd 100644
--- a/tests/python/frontend/keras/test_forward.py
+++ b/tests/python/frontend/keras/test_forward.py
@@ -18,7 +18,7 @@
 import tvm
 from tvm import te
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import keras
 import tvm.testing
 
@@ -89,7 +89,7 @@ def get_tvm_output(xs, target, dev, dtype="float32"):
         mod, params = relay.frontend.from_keras(keras_model, shape_dict, layout=layout)
         with tvm.transform.PassContext(opt_level=2):
             lib = relay.build(mod, target, params=params)
-        m = graph_runtime.GraphModule(lib["default"](dev))
+        m = graph_executor.GraphModule(lib["default"](dev))
         for name, x in zip(keras_model.input_names, xs):
             m.set_input(name, tvm.nd.array(x.astype(dtype)))
         m.run()
diff --git a/tests/python/frontend/mxnet/test_forward.py b/tests/python/frontend/mxnet/test_forward.py
index 19b31d6fd3ed..c58f5dc99fff 100644
--- a/tests/python/frontend/mxnet/test_forward.py
+++ b/tests/python/frontend/mxnet/test_forward.py
@@ -19,7 +19,7 @@
 
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm import relay
 import mxnet as mx
 
@@ -78,7 +78,7 @@ def get_tvm_output(symbol, x, args, auxs, target, dev, dtype="float32"):
             )
         with tvm.transform.PassContext(opt_level=3):
             lib = relay.build(mod, target, params=params)
-        m = graph_runtime.GraphModule(lib["default"](dev))
+        m = graph_executor.GraphModule(lib["default"](dev))
         # set inputs
         m.set_input("data", tvm.nd.array(x.astype(dtype)))
         m.run()
diff --git a/tests/python/frontend/mxnet/test_qnn_ops_utils.py b/tests/python/frontend/mxnet/test_qnn_ops_utils.py
index fa5c5d9715cc..a200e06ed2d0 100644
--- a/tests/python/frontend/mxnet/test_qnn_ops_utils.py
+++ b/tests/python/frontend/mxnet/test_qnn_ops_utils.py
@@ -18,7 +18,7 @@
 import numpy as np
 import tvm
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.frontend.mxnet_qnn_op_utils import (
     dequantize_mxnet_min_max,
     quantize_mxnet_min_max,
@@ -41,7 +41,7 @@ def dequantize_test_driver(in_dtype, quant_args, in_data, verify_output_data):
         mod = tvm.IRModule.from_expr(mod)
         with tvm.transform.PassContext(opt_level=3):
             graph, lib, params = relay.build(mod, "llvm", params=None)
-            rt_mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+            rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
             rt_mod.set_input(input_data=in_data)
             rt_mod.set_input(**params)
             rt_mod.run()
@@ -120,7 +120,7 @@ def quantize_test_driver(out_dtype, quant_args, in_data, verify_output_data):
         mod = tvm.IRModule.from_expr(mod)
         with tvm.transform.PassContext(opt_level=3):
             graph, lib, params = relay.build(mod, "llvm", params=None)
-            rt_mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+            rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
             rt_mod.set_input(input_data=in_data)
             rt_mod.set_input(**params)
             rt_mod.run()
diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py
index 9116b0976409..b72f475b26f8 100644
--- a/tests/python/frontend/onnx/test_forward.py
+++ b/tests/python/frontend/onnx/test_forward.py
@@ -23,7 +23,7 @@
 import tvm.topi.testing
 import tvm
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import scipy
 import tvm.testing
 
@@ -78,7 +78,7 @@ def get_tvm_output(
     with tvm.transform.PassContext(opt_level=1):
         graph, lib, params = relay.build(mod, target, params=params)
 
-    m = graph_runtime.create(graph, lib, device)
+    m = graph_executor.create(graph, lib, device)
     # set inputs
     if isinstance(input_data, list):
         for i, e in enumerate(input_names):
diff --git a/tests/python/frontend/pytorch/qnn_test.py b/tests/python/frontend/pytorch/qnn_test.py
index 29c69abba542..5b0b65f7b128 100644
--- a/tests/python/frontend/pytorch/qnn_test.py
+++ b/tests/python/frontend/pytorch/qnn_test.py
@@ -49,7 +49,7 @@ def get_tvm_runtime(script_module, input_name, ishape):
         # also not to make CI too slow
         lib = relay.build(mod, target="llvm", params=params)
 
-    runtime = tvm.contrib.graph_runtime.GraphModule(lib["default"](tvm.cpu(0)))
+    runtime = tvm.contrib.graph_executor.GraphModule(lib["default"](tvm.cpu(0)))
     return runtime
 
 
diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py
index 6416043068da..9ec52987c354 100644
--- a/tests/python/frontend/pytorch/test_forward.py
+++ b/tests/python/frontend/pytorch/test_forward.py
@@ -27,7 +27,7 @@
 from torch.nn import functional as F
 import tvm
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.contrib.nvcc import have_fp16
 import tvm.testing
 from packaging import version as package_version
@@ -208,7 +208,7 @@ def verify_model(model_name, input_data=[], custom_convert_map={}, rtol=1e-5, at
     with tvm.transform.PassContext(opt_level=3):
         for target, dev in tvm.testing.enabled_targets():
             relay_graph, relay_lib, relay_params = relay.build(mod, target=target, params=params)
-            relay_model = graph_runtime.create(relay_graph, relay_lib, dev)
+            relay_model = graph_executor.create(relay_graph, relay_lib, dev)
             relay_model.set_input(**relay_params)
             for name, inp in compiled_input.items():
                 relay_model.set_input(name, inp)
@@ -3590,7 +3590,7 @@ def test_forward_pretrained_bert_base_uncased():
     # --------------
 
     dev = tvm.device(target, 0)
-    relay_model = graph_runtime.create(relay_graph, relay_lib, dev)
+    relay_model = graph_executor.create(relay_graph, relay_lib, dev)
     relay_model.set_input(**relay_params)
     relay_model.set_input(input_1, tokens_tensor)
     relay_model.set_input(input_2, segments_tensors)
diff --git a/tests/python/frontend/tensorflow/test_bn_dynamic.py b/tests/python/frontend/tensorflow/test_bn_dynamic.py
index 81a2db4704e6..4eb0d01ef102 100644
--- a/tests/python/frontend/tensorflow/test_bn_dynamic.py
+++ b/tests/python/frontend/tensorflow/test_bn_dynamic.py
@@ -66,9 +66,9 @@ def verify_fused_batch_norm(shape):
         mod, params = relay.frontend.from_tensorflow(constant_graph, outputs=["output"])
         with tvm.transform.PassContext(opt_level=3):
             graph, lib, params = relay.build(mod, target=device, params=params)
-        from tvm.contrib import graph_runtime
+        from tvm.contrib import graph_executor
 
-        m = graph_runtime.create(graph, lib, dev)
+        m = graph_executor.create(graph, lib, dev)
         m.set_input(**params)
         m.set_input("input", data)
         m.run()
diff --git a/tests/python/frontend/tensorflow/test_forward.py b/tests/python/frontend/tensorflow/test_forward.py
index 4accd16a2567..53f424b922e2 100644
--- a/tests/python/frontend/tensorflow/test_forward.py
+++ b/tests/python/frontend/tensorflow/test_forward.py
@@ -110,7 +110,7 @@ def run_tvm_graph(
     target="llvm",
     out_names=None,
     opt_level=3,
-    mode="graph_runtime",
+    mode="graph_executor",
     cuda_layout="NCHW",
     layout=None,
     disabled_pass=None,
@@ -165,9 +165,9 @@ def run_tvm_graph(
     else:
         with tvm.transform.PassContext(opt_level=opt_level, disabled_pass=disabled_pass):
             graph, lib, params = relay.build(mod, target, target_host, params)
-        from tvm.contrib import graph_runtime
+        from tvm.contrib import graph_executor
 
-        m = graph_runtime.create(graph, lib, dev)
+        m = graph_executor.create(graph, lib, dev)
         # set inputs
         for e, i in zip(input_node, input_data):
             if e != "":
@@ -207,7 +207,7 @@ def compare_tf_with_tvm(
     init_global_variables=False,
     no_gpu=False,
     opt_level=3,
-    mode="graph_runtime",
+    mode="graph_executor",
     cuda_layout="NCHW",
     add_shapes_to_graph_def=True,
     targets=None,
@@ -3856,10 +3856,10 @@ def _get_tvm_graph_module(graph_def):
         target = "llvm"
         with tvm.transform.PassContext(opt_level=0):
             graph, lib, params = relay.build(mod, target, params=params)
-        from tvm.contrib import graph_runtime
+        from tvm.contrib import graph_executor
 
         dev = tvm.cpu(0)
-        return params, graph_runtime.create(graph, lib, dev)
+        return params, graph_executor.create(graph, lib, dev)
 
     def _do_tvm_sample(model, data, in_states, params, num_samples):
         """Sampled from the model"""
@@ -4073,7 +4073,7 @@ def test_forward_floor():
 def test_forward_relu():
     ishape = (1, 3, 10, 10)
     inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    for mode in ["graph_runtime", "vm"]:
+    for mode in ["graph_executor", "vm"]:
         with tf.Graph().as_default():
             in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
             tf.nn.relu(in1)
@@ -4083,7 +4083,7 @@ def test_forward_relu():
 def test_forward_leaky_relu():
     ishape = (1, 3, 10, 10)
     inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    for mode in ["graph_runtime", "vm"]:
+    for mode in ["graph_executor", "vm"]:
         with tf.Graph().as_default():
             in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
             tf.nn.leaky_relu(in1, alpha=0.4)
diff --git a/tests/python/frontend/tflite/test_forward.py b/tests/python/frontend/tflite/test_forward.py
index 05e0f076e079..b02d246d1ae5 100644
--- a/tests/python/frontend/tflite/test_forward.py
+++ b/tests/python/frontend/tflite/test_forward.py
@@ -160,7 +160,7 @@ def run_tvm_graph(
     num_output=1,
     target="llvm",
     out_names=None,
-    mode="graph_runtime",
+    mode="graph_executor",
 ):
     """ Generic function to compile on relay and execute on tvm """
     # TFLite.Model.Model has changed to TFLite.Model from 1.14 to 2.1
@@ -208,9 +208,9 @@ def run_tvm_graph(
             lib = relay.build(mod, target, params=params)
 
         dev = tvm.device(target, 0)
-        from tvm.contrib import graph_runtime
+        from tvm.contrib import graph_executor
 
-        m = graph_runtime.GraphModule(lib["default"](dev))
+        m = graph_executor.GraphModule(lib["default"](dev))
         # set inputs
         for i, e in enumerate(input_node):
             m.set_input(e, tvm.nd.array(input_data[i].astype(input_data[i].dtype)))
@@ -264,7 +264,7 @@ def compare_tflite_with_tvm(
     out_names=None,
     quantized=False,
     input_range=None,
-    mode="graph_runtime",
+    mode="graph_executor",
     experimental_new_converter=False,
 ):
     """Generic function to generate and compare TFLite and TVM output"""
diff --git a/tests/python/nightly/quantization/test_quantization_accuracy.py b/tests/python/nightly/quantization/test_quantization_accuracy.py
index 4ecd5dce4649..57fa49e93a04 100644
--- a/tests/python/nightly/quantization/test_quantization_accuracy.py
+++ b/tests/python/nightly/quantization/test_quantization_accuracy.py
@@ -98,7 +98,7 @@ def eval_acc(
     with tvm.transform.PassContext(opt_level=3):
         graph, lib, params = relay.build(model, target)
     # create runtime module
-    m = tvm.contrib.graph_runtime.create(graph, lib, device)
+    m = tvm.contrib.graph_executor.create(graph, lib, device)
     m.set_input(**params)
 
     # setup evaluaiton metric
diff --git a/tests/python/relay/benchmarking/benchmark_vm.py b/tests/python/relay/benchmarking/benchmark_vm.py
index c4695f971e97..5136e52a1213 100644
--- a/tests/python/relay/benchmarking/benchmark_vm.py
+++ b/tests/python/relay/benchmarking/benchmark_vm.py
@@ -19,7 +19,7 @@
 
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm import relay
 from tvm.runtime import container
 from tvm.runtime import vm as vm_rt
@@ -36,13 +36,13 @@ def benchmark_execution(
     dtype="float32",
     model="unknown",
 ):
-    def get_graph_runtime_output(
+    def get_graph_executor_output(
         mod, data, params, target, dev, dtype="float32", number=2, repeat=20
     ):
         with tvm.transform.PassContext(opt_level=3):
             lib = relay.build(mod, target, params=params)
 
-        m = graph_runtime.GraphModule(lib["default"](dev))
+        m = graph_executor.GraphModule(lib["default"](dev))
         # set inputs
         m.set_input("data", data)
         m.run()
@@ -82,7 +82,7 @@ def get_vm_output(mod, data, params, target, dev, dtype="float32", number=2, rep
     data = np.random.uniform(size=data_shape).astype(dtype)
 
     for target, dev in testing.enabled_targets():
-        tvm_out = get_graph_runtime_output(
+        tvm_out = get_graph_executor_output(
             mod, tvm.nd.array(data.astype(dtype)), params, target, dev, dtype
         )
         vm_out = get_vm_output(mod, tvm.nd.array(data.astype(dtype)), params, target, dev, dtype)
diff --git a/tests/python/relay/test_auto_scheduler_layout_rewrite_networks.py b/tests/python/relay/test_auto_scheduler_layout_rewrite_networks.py
index 95f1177da024..8466fc1700b0 100644
--- a/tests/python/relay/test_auto_scheduler_layout_rewrite_networks.py
+++ b/tests/python/relay/test_auto_scheduler_layout_rewrite_networks.py
@@ -21,7 +21,7 @@
 
 import tvm
 from tvm import relay, auto_scheduler
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import tvm.testing
 
 
@@ -169,7 +169,7 @@ def tune_and_check(mod, data, weight):
 
         def get_output(data, lib):
             dev = tvm.cpu()
-            module = graph_runtime.GraphModule(lib["default"](dev))
+            module = graph_executor.GraphModule(lib["default"](dev))
             module.set_input("data", data)
             module.run()
 
diff --git a/tests/python/relay/test_auto_scheduler_tuning.py b/tests/python/relay/test_auto_scheduler_tuning.py
index af930e7f7f19..1250543a13ae 100644
--- a/tests/python/relay/test_auto_scheduler_tuning.py
+++ b/tests/python/relay/test_auto_scheduler_tuning.py
@@ -20,7 +20,7 @@
 import numpy as np
 
 from tvm import auto_scheduler, relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import tvm.testing
 
 from test_auto_scheduler_task_extraction import get_network
@@ -70,7 +70,7 @@ def tune_network(network, target):
         # Check the correctness
         def get_output(data, lib):
             dev = tvm.gpu()
-            module = graph_runtime.GraphModule(lib["default"](dev))
+            module = graph_executor.GraphModule(lib["default"](dev))
             module.set_input("data", data)
             module.run()
             return module.get_output(0).asnumpy()
diff --git a/tests/python/relay/test_backend_graph_executor.py b/tests/python/relay/test_backend_graph_executor.py
index 6f75c43579a7..b9553d79c3b6 100644
--- a/tests/python/relay/test_backend_graph_executor.py
+++ b/tests/python/relay/test_backend_graph_executor.py
@@ -18,7 +18,7 @@
 
 import tvm
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.op import add
 import tvm.testing
 
@@ -102,7 +102,7 @@ def test_with_params():
     y_data = np.random.rand(1, 5).astype("float32")
     params = {"y": y_data}
     graph, lib, params = relay.build(tvm.IRModule.from_expr(func), "llvm", params=params)
-    mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+    mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
     mod.set_input(**params)
     mod.set_input(x=x_data)
     mod.run()
@@ -174,7 +174,7 @@ def unit_numpy(X, W):
     for target, dev in tvm.testing.enabled_targets():
         with tvm.transform.PassContext(opt_level=2):
             graph, lib, params = relay.build(tvm.IRModule.from_expr(z), target)
-            m = graph_runtime.create(graph, lib, dev)
+            m = graph_executor.create(graph, lib, dev)
             m.set_input("X", tvm.nd.array(x.astype(dtype)))
             m.set_input("y", tvm.nd.array(y.astype(dtype)))
             m.set_input(**params)
@@ -194,7 +194,7 @@ def test_compile_nested_tuples():
     func = relay.Function([x], out)
 
     graph, lib, _ = relay.build(tvm.IRModule.from_expr(func), "llvm")
-    mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+    mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
 
     x_data = np.random.uniform(size=(10,)).astype(np.float32)
     mod.set_input(x=x_data)
diff --git a/tests/python/relay/test_cpp_build_module.py b/tests/python/relay/test_cpp_build_module.py
index 6f13533f9318..7d2209a34835 100644
--- a/tests/python/relay/test_cpp_build_module.py
+++ b/tests/python/relay/test_cpp_build_module.py
@@ -48,7 +48,7 @@ def test_basic_build():
     assert mod["main"] == func_in_mod, "relay.build changed module in-place"
 
     # test
-    rt = tvm.contrib.graph_runtime.GraphModule(lib["default"](dev))
+    rt = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))
     rt.set_input("a", A)
     rt.run()
     out = rt.get_output(0)
@@ -85,7 +85,7 @@ def test_fp16_build():
     g_json, mmod, params = relay.build(func, "cuda", params=params)
 
     # test
-    rt = tvm.contrib.graph_runtime.create(g_json, mmod, dev)
+    rt = tvm.contrib.graph_executor.create(g_json, mmod, dev)
     rt.load_params(runtime.save_param_dict(params))
     rt.run()
     out = rt.get_output(0)
@@ -114,7 +114,7 @@ def test_fp16_conversion(target, dev):
             g_json, mmod, params = relay.build(tvm.IRModule.from_expr(func), target)
 
         # test
-        rt = tvm.contrib.graph_runtime.create(g_json, mmod, dev)
+        rt = tvm.contrib.graph_executor.create(g_json, mmod, dev)
         rt.set_input("x", X)
         rt.run()
         out = rt.get_output(0)
diff --git a/tests/python/relay/test_external_codegen.py b/tests/python/relay/test_external_codegen.py
index 9241f8e82745..2bc12a3a0547 100644
--- a/tests/python/relay/test_external_codegen.py
+++ b/tests/python/relay/test_external_codegen.py
@@ -63,11 +63,11 @@ def check_vm_result():
         out = vm.run(**map_inputs)
         tvm.testing.assert_allclose(out.asnumpy(), result, rtol=tol, atol=tol)
 
-    def check_graph_runtime_result():
+    def check_graph_executor_result():
         with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]):
             json, lib, _ = relay.build(mod, target=target)
         lib = update_lib(lib)
-        rt_mod = tvm.contrib.graph_runtime.create(json, lib, device)
+        rt_mod = tvm.contrib.graph_executor.create(json, lib, device)
 
         for name, data in map_inputs.items():
             rt_mod.set_input(name, data)
@@ -78,7 +78,7 @@ def check_graph_runtime_result():
         tvm.testing.assert_allclose(out.asnumpy(), result, rtol=tol, atol=tol)
 
     check_vm_result()
-    check_graph_runtime_result()
+    check_graph_executor_result()
 
 
 def set_external_func_attr(func, compiler, ext_symbol):
@@ -353,7 +353,7 @@ def test_load_params_with_constants_in_ext_codegen():
 
     graph_module = relay.build(mod, target="llvm", params=params)
     lib = update_lib(graph_module.get_lib())
-    rt_mod = tvm.contrib.graph_runtime.create(graph_module.get_json(), lib, tvm.cpu(0))
+    rt_mod = tvm.contrib.graph_executor.create(graph_module.get_json(), lib, tvm.cpu(0))
     rt_mod.load_params(runtime.save_param_dict(graph_module.get_params()))
 
 
diff --git a/tests/python/relay/test_json_runtime.py b/tests/python/relay/test_json_runtime.py
index 79d1f710cf41..bf5676d096f1 100644
--- a/tests/python/relay/test_json_runtime.py
+++ b/tests/python/relay/test_json_runtime.py
@@ -50,7 +50,7 @@ def check_result(
     compile_engine.get().clear()
     with tvm.transform.PassContext(opt_level=3):
         json, lib, param = relay.build(ref_mod, target=target, params=params)
-    rt_mod = tvm.contrib.graph_runtime.create(json, lib, device)
+    rt_mod = tvm.contrib.graph_executor.create(json, lib, device)
 
     for name, data in map_inputs.items():
         rt_mod.set_input(name, data)
@@ -70,11 +70,11 @@ def check_vm_result():
         out = vm.run(**map_inputs)
         tvm.testing.assert_allclose(out.asnumpy(), ref_result, rtol=tol, atol=tol)
 
-    def check_graph_runtime_result():
+    def check_graph_executor_result():
         compile_engine.get().clear()
         with relay.build_config(opt_level=3):
             json, lib, param = relay.build(mod, target=target, params=params)
-        rt_mod = tvm.contrib.graph_runtime.create(json, lib, device)
+        rt_mod = tvm.contrib.graph_executor.create(json, lib, device)
 
         for name, data in map_inputs.items():
             rt_mod.set_input(name, data)
@@ -85,7 +85,7 @@ def check_graph_runtime_result():
         tvm.testing.assert_allclose(out.asnumpy(), ref_result, rtol=tol, atol=tol)
 
     check_vm_result()
-    check_graph_runtime_result()
+    check_graph_executor_result()
 
 
 def test_conv2d():
diff --git a/tests/python/relay/test_op_fast_math.py b/tests/python/relay/test_op_fast_math.py
index a82efb7ad2ad..7bcbc6839c4f 100644
--- a/tests/python/relay/test_op_fast_math.py
+++ b/tests/python/relay/test_op_fast_math.py
@@ -22,7 +22,7 @@
 import tvm.relay as relay
 from tvm import topi
 from tvm import te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 
 def test_fastmath():
@@ -43,7 +43,7 @@ def test_apply(relay_op, name, f_numpy, low, high, step, dtype="float32"):
         assert lib.get_function(func_name)
 
         dev = tvm.cpu(0)
-        m = graph_runtime.create(graph, lib, dev)
+        m = graph_executor.create(graph, lib, dev)
         # Set inputs
         m.set_input("x", tvm.nd.array(a_np, dev))
         m.set_input(**params)
diff --git a/tests/python/relay/test_op_level2.py b/tests/python/relay/test_op_level2.py
index b9e7b075a998..c5843758c3d2 100644
--- a/tests/python/relay/test_op_level2.py
+++ b/tests/python/relay/test_op_level2.py
@@ -405,7 +405,7 @@ def run_test_conv2d_cuda(
                 dev = tvm.device(target, 0)
                 params = {"w": tvm.nd.array(kernel)}
                 graph, lib, params = relay.build_module.build(mod, target=target, params=params)
-                module = tvm.contrib.graph_runtime.create(graph, lib, dev)
+                module = tvm.contrib.graph_executor.create(graph, lib, dev)
                 module.set_input("x", tvm.nd.array(data))
                 module.set_input(**params)
                 module.run()
@@ -668,7 +668,7 @@ def run_test_conv3d_cuda(
                 dev = tvm.device(target, 0)
                 params = {"w": tvm.nd.array(kernel)}
                 graph, lib, params = relay.build_module.build(mod, target=target, params=params)
-                module = tvm.contrib.graph_runtime.create(graph, lib, dev)
+                module = tvm.contrib.graph_executor.create(graph, lib, dev)
                 module.set_input("x", tvm.nd.array(data))
                 module.set_input(**params)
                 module.run()
diff --git a/tests/python/relay/test_op_qnn_concatenate.py b/tests/python/relay/test_op_qnn_concatenate.py
index a9207f67fc68..453875301af9 100644
--- a/tests/python/relay/test_op_qnn_concatenate.py
+++ b/tests/python/relay/test_op_qnn_concatenate.py
@@ -19,7 +19,7 @@
 from tvm import te
 import numpy as np
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import tvm.topi.testing
 
 
diff --git a/tests/python/relay/test_op_qnn_conv2_transpose.py b/tests/python/relay/test_op_qnn_conv2_transpose.py
index 93776d265a49..e4e02279efd6 100644
--- a/tests/python/relay/test_op_qnn_conv2_transpose.py
+++ b/tests/python/relay/test_op_qnn_conv2_transpose.py
@@ -21,7 +21,7 @@
 from tvm import relay
 from tvm.relay import transform
 from tvm.relay.testing import run_infer_type
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.testing.temp_op_attr import TempOpAttr
 
 
@@ -191,7 +191,7 @@ def get_output(func, golden_inputs):
             golden_data, golden_weight = golden_inputs
             params = {"kernel": golden_weight}
             graph, lib, params = relay.build(func, "llvm", params=params)
-            mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+            mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
             mod.set_input("data", golden_data)
             mod.set_input(**params)
             mod.run()
diff --git a/tests/python/relay/test_op_qnn_conv2d.py b/tests/python/relay/test_op_qnn_conv2d.py
index 87b1f732e2bd..928450312147 100644
--- a/tests/python/relay/test_op_qnn_conv2d.py
+++ b/tests/python/relay/test_op_qnn_conv2d.py
@@ -21,7 +21,7 @@
 from tvm import relay
 from tvm.relay import transform
 from tvm.relay.testing import run_infer_type
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.testing.temp_op_attr import TempOpAttr
 
 # We use llvm target for testing functionality. `llvm` points to an older Intel
@@ -198,7 +198,7 @@ def get_output(func, golden_inputs):
             golden_data, golden_weight = golden_inputs
             params = {"kernel": golden_weight}
             graph, lib, params = relay.build(func, "llvm", params=params)
-            mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+            mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
             mod.set_input("data", golden_data)
             mod.set_input(**params)
             mod.run()
@@ -722,7 +722,7 @@ def test_tflite_large_irregular():
         with tvm.transform.PassContext(opt_level=2):
             params = {"kernel": golden_weight}
             graph, lib, params = relay.build(qnn_func, "llvm", params=params)
-            mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+            mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
             mod.set_input("data", golden_data)
             mod.set_input(**params)
             mod.run()
@@ -767,7 +767,7 @@ def test_tflite_output_multiplier_greater_than_one():
         with tvm.transform.PassContext(opt_level=2):
             params = {"kernel": golden_weight}
             graph, lib, params = relay.build(qnn_func, "llvm", params=params)
-            mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+            mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
             mod.set_input("data", golden_data)
             mod.set_input(**params)
             mod.run()
@@ -830,7 +830,7 @@ def test_tflite_anistropic_strides():
         with tvm.transform.PassContext(opt_level=2):
             params = {"kernel": golden_weight}
             graph, lib, params = relay.build(qnn_func, "llvm", params=params)
-            mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+            mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
             mod.set_input("data", golden_data)
             mod.set_input(**params)
             mod.run()
diff --git a/tests/python/relay/test_op_qnn_dense.py b/tests/python/relay/test_op_qnn_dense.py
index ef74ce3edcf6..c47ac6b35ec7 100644
--- a/tests/python/relay/test_op_qnn_dense.py
+++ b/tests/python/relay/test_op_qnn_dense.py
@@ -19,7 +19,7 @@
 from tvm import te
 import numpy as np
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.testing.temp_op_attr import TempOpAttr
 
 
@@ -211,7 +211,7 @@ def qnn_dense_driver(test_configuration):
     mod = relay.qnn.transform.CanonicalizeOps()(mod)
     with tvm.transform.PassContext(opt_level=2):
         graph, lib, params = relay.build(mod, "llvm", params=None)
-        mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+        mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
         mod.set_input(quantized_data_name, test_configuration[quantized_data_name])
         mod.set_input(quantized_kernel_name, test_configuration[quantized_kernel_name])
         if test_configuration[bias_name] is not None:
diff --git a/tests/python/relay/test_op_qnn_dequantize.py b/tests/python/relay/test_op_qnn_dequantize.py
index 85f97a9c073c..ab398bbc1316 100644
--- a/tests/python/relay/test_op_qnn_dequantize.py
+++ b/tests/python/relay/test_op_qnn_dequantize.py
@@ -19,7 +19,7 @@
 from tvm import te
 import numpy as np
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.testing import run_infer_type
 
 
@@ -35,7 +35,7 @@ def dequantize_test_driver(in_dtype, quant_args, in_data, verify_output_data, ax
     mod = tvm.IRModule.from_expr(mod)
     with tvm.transform.PassContext(opt_level=3):
         graph, lib, params = relay.build(mod, "llvm", params=None)
-        rt_mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+        rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
         rt_mod.set_input(input_data=in_data)
         rt_mod.set_input(**params)
         rt_mod.run()
@@ -140,7 +140,7 @@ def test_dynamic_dequantize():
         with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
             lib = relay.build(mod, target=target)
 
-    module = graph_runtime.GraphModule(lib["default"](dev))
+    module = graph_executor.GraphModule(lib["default"](dev))
     module.set_input(**{"x": data, "scale": scale, "zp": zp})
     module.run()
 
diff --git a/tests/python/relay/test_op_qnn_mul.py b/tests/python/relay/test_op_qnn_mul.py
index 10721b573234..8ff3ab5c3df2 100644
--- a/tests/python/relay/test_op_qnn_mul.py
+++ b/tests/python/relay/test_op_qnn_mul.py
@@ -19,7 +19,7 @@
 from tvm import te
 import numpy as np
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import tvm.topi.testing
 
 # "unquantize" a quantized tensor
diff --git a/tests/python/relay/test_op_qnn_quantize.py b/tests/python/relay/test_op_qnn_quantize.py
index e92344f7dcfa..2ae688ef4784 100644
--- a/tests/python/relay/test_op_qnn_quantize.py
+++ b/tests/python/relay/test_op_qnn_quantize.py
@@ -19,7 +19,7 @@
 from tvm import te
 import numpy as np
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.testing import run_infer_type
 
 
@@ -39,7 +39,7 @@ def quantize_test_driver(in_dtype, quant_args, axis, out_dtype, in_data, verify_
     mod = tvm.IRModule.from_expr(mod)
     with tvm.transform.PassContext(opt_level=3):
         graph, lib, params = relay.build(mod, "llvm", params=None)
-        rt_mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+        rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
         rt_mod.set_input(input_data=in_data)
         rt_mod.set_input(**params)
         rt_mod.run()
@@ -155,7 +155,7 @@ def test_dynamic_quantize():
         with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
             lib = relay.build(mod, target=target)
 
-    module = graph_runtime.GraphModule(lib["default"](dev))
+    module = graph_executor.GraphModule(lib["default"](dev))
     module.set_input(**{"x": data, "scale": scale, "zp": zp})
     module.run()
 
diff --git a/tests/python/relay/test_op_qnn_requantize.py b/tests/python/relay/test_op_qnn_requantize.py
index 5aa3f7f7fdc8..5e61fad7676d 100644
--- a/tests/python/relay/test_op_qnn_requantize.py
+++ b/tests/python/relay/test_op_qnn_requantize.py
@@ -19,7 +19,7 @@
 from tvm import te
 import numpy as np
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 roundings = ["UPWARD", "TONEAREST"]
 
@@ -28,7 +28,7 @@ def verify(mod, goldens):
     with tvm.transform.PassContext(opt_level=3):
         graph, lib, params = relay.build(mod, "llvm", params=None)
         golden_data, golden_output = goldens
-        rt_mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+        rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
         rt_mod.set_input("quantized_data", golden_data)
         rt_mod.set_input(**params)
         rt_mod.run()
diff --git a/tests/python/relay/test_op_qnn_simulated_dequantize.py b/tests/python/relay/test_op_qnn_simulated_dequantize.py
index 266e9d73b3cb..3aecd935b62b 100644
--- a/tests/python/relay/test_op_qnn_simulated_dequantize.py
+++ b/tests/python/relay/test_op_qnn_simulated_dequantize.py
@@ -19,7 +19,7 @@
 from tvm import te
 import numpy as np
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.runtime.vm import VirtualMachine
 from tvm.topi.nn.qnn import SQNN_DTYPE_TO_CODE
 
@@ -39,7 +39,7 @@ def dequantize_test_driver(in_dtype, quant_args, axis, in_data):
     mod = tvm.IRModule.from_expr(mod)
     with tvm.transform.PassContext(opt_level=3):
         graph, lib, params = relay.build(mod, "llvm", params=None)
-    rt_mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+    rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
     rt_mod.set_input(input_data=in_data)
     rt_mod.set_input(**params)
     rt_mod.run()
diff --git a/tests/python/relay/test_op_qnn_simulated_quantize.py b/tests/python/relay/test_op_qnn_simulated_quantize.py
index 8a15a037d8ba..fd9d13168e01 100644
--- a/tests/python/relay/test_op_qnn_simulated_quantize.py
+++ b/tests/python/relay/test_op_qnn_simulated_quantize.py
@@ -19,7 +19,7 @@
 from tvm import te
 import numpy as np
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.runtime.vm import VirtualMachine
 from tvm.topi.nn.qnn import SQNN_DTYPE_TO_CODE
 
@@ -47,7 +47,7 @@ def quantize_test_driver(in_dtype, quant_args, axis, out_dtype, in_data):
     mod = tvm.IRModule.from_expr(mod)
     with tvm.transform.PassContext(opt_level=3):
         graph, lib, params = relay.build(mod, "llvm", params=None)
-    rt_mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+    rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
     rt_mod.set_input(input_data=in_data)
     rt_mod.set_input(**params)
     rt_mod.run()
diff --git a/tests/python/relay/test_param_dict.py b/tests/python/relay/test_param_dict.py
index 7bc4ab422a64..2272883fc39c 100644
--- a/tests/python/relay/test_param_dict.py
+++ b/tests/python/relay/test_param_dict.py
@@ -24,7 +24,7 @@
 from tvm.relay.op import add
 from tvm import relay
 from tvm import rpc
-from tvm.contrib import utils, graph_runtime
+from tvm.contrib import utils, graph_executor
 
 
 def test_save_load():
@@ -60,7 +60,7 @@ def test_bigendian_rpc_param():
     if host is None:
         return
 
-    def verify_graph_runtime(remote, target, shape, dtype):
+    def verify_graph_executor(remote, target, shape, dtype):
         x = relay.var("x")
         y = relay.const(1)
         z = relay.add(x, y)
@@ -76,7 +76,7 @@ def verify_graph_runtime(remote, target, shape, dtype):
         remote.upload(path_dso)
         lib = remote.load_module("dev_lib.o")
         dev = remote.cpu(0)
-        mod = graph_runtime.create(graph, lib, dev)
+        mod = graph_executor.create(graph, lib, dev)
         mod.load_params(runtime.save_param_dict(params))
         mod.run()
         out = mod.get_output(0, tvm.nd.empty(shape, dtype=dtype, device=dev))
@@ -86,7 +86,7 @@ def verify_graph_runtime(remote, target, shape, dtype):
     remote = rpc.connect(host, port)
     target = "llvm -mtriple=powerpc-linux-gnu"
     for dtype in ["float32", "float64", "int32", "int8"]:
-        verify_graph_runtime(remote, target, (10,), dtype)
+        verify_graph_executor(remote, target, (10,), dtype)
 
 
 if __name__ == "__main__":
diff --git a/tests/python/relay/test_pass_annotate_target.py b/tests/python/relay/test_pass_annotate_target.py
index 52deb4e89ef9..c756d74ff0be 100644
--- a/tests/python/relay/test_pass_annotate_target.py
+++ b/tests/python/relay/test_pass_annotate_target.py
@@ -60,11 +60,11 @@ def check_vm_result():
         out = vm.run(**map_inputs)
         tvm.testing.assert_allclose(out.asnumpy(), result, rtol=tol, atol=tol)
 
-    def check_graph_runtime_result():
+    def check_graph_executor_result():
         with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]):
             json, lib, param = relay.build(mod, target=target, params=params)
         lib = update_lib(lib)
-        rt_mod = tvm.contrib.graph_runtime.create(json, lib, device)
+        rt_mod = tvm.contrib.graph_executor.create(json, lib, device)
 
         for name, data in map_inputs.items():
             rt_mod.set_input(name, data)
@@ -76,7 +76,7 @@ def check_graph_runtime_result():
         tvm.testing.assert_allclose(out.asnumpy(), result, rtol=tol, atol=tol)
 
     check_vm_result()
-    check_graph_runtime_result()
+    check_graph_executor_result()
 
 
 def test_extern_dnnl():
diff --git a/tests/python/relay/test_pass_annotation.py b/tests/python/relay/test_pass_annotation.py
index 70f1a0aa52e5..a9c31f5ccedd 100644
--- a/tests/python/relay/test_pass_annotation.py
+++ b/tests/python/relay/test_pass_annotation.py
@@ -20,7 +20,7 @@
 
 import tvm
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.expr_functor import ExprMutator
 from tvm.relay import transform
 import tvm.testing
@@ -31,7 +31,7 @@ def _trace(module, metadata, _):
         pass  # import pdb; pdb.set_trace()
 
 
-def check_graph_runtime(
+def check_graph_executor(
     target, ref_res, device, func, params, config, opt_level, expected_index=None
 ):
     with tvm.transform.PassContext(opt_level=opt_level, config=config):
@@ -41,7 +41,7 @@ def check_graph_runtime(
         if "device_index" in graph_json["attrs"]:
             device_index = graph_json["attrs"]["device_index"][1]
             assert device_index == expected_index
-        mod = graph_runtime.create(graph, lib, contexts)
+        mod = graph_executor.create(graph, lib, contexts)
         mod.set_input(**new_params)
         mod.run()
         res = mod.get_output(0).asnumpy()
@@ -429,7 +429,7 @@ def expected():
         check_annotated_graph(annotated_func, expected_func)
         opt_level = 1
         config = {"relay.fallback_device_type": fallback_device.device_type}
-        check_graph_runtime(
+        check_graph_executor(
             target, ref_res, device, annotated_func, params, config, opt_level, expected_index
         )
         opt_level = 2
@@ -465,7 +465,7 @@ def annotated():
         check_annotated_graph(annotated_func, expected_func)
         opt_level = 1
         config = {"relay.fallback_device_type": fallback_device.device_type}
-        check_graph_runtime(target, ref_res, device, annotated_func, params, config, opt_level)
+        check_graph_executor(target, ref_res, device, annotated_func, params, config, opt_level)
         opt_level = 2
         check_vm_runtime(target, ref_res, device, annotated_func, params, config, opt_level)
 
@@ -506,7 +506,7 @@ def expected():
         opt_level = 1
         config = {"relay.fallback_device_type": fallback_device.device_type}
         check_annotated_graph(annotated_func, expected_func)
-        check_graph_runtime(
+        check_graph_executor(
             target, ref_res, device, annotated_func, params, config, opt_level, expected_index
         )
         opt_level = 2
@@ -520,7 +520,7 @@ def test_fallback_all_operators(device, tgt):
         expected_func = get_func()
         check_annotated_graph(annotated_func, expected_func)
         opt_level = 2
-        check_graph_runtime(target, ref_res, device, annotated_func, params, {}, opt_level)
+        check_graph_executor(target, ref_res, device, annotated_func, params, {}, opt_level)
         check_vm_runtime(target, ref_res, device, annotated_func, params, {}, opt_level)
 
     test_fuse_log_add(dev, tgt)
@@ -582,7 +582,7 @@ def expected():
     opt_level = 0
     config = {"relay.fallback_device_type": fallback_device.device_type}
 
-    check_graph_runtime(
+    check_graph_executor(
         target, ref_res, dev, annotated_func, params, config, opt_level, expected_index
     )
 
diff --git a/tests/python/relay/test_pass_legalize.py b/tests/python/relay/test_pass_legalize.py
index 0d14f6611db9..8a37da33a10f 100644
--- a/tests/python/relay/test_pass_legalize.py
+++ b/tests/python/relay/test_pass_legalize.py
@@ -20,7 +20,7 @@
 from tvm import te
 
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay import transform, analysis
 from tvm.relay.testing.temp_op_attr import TempOpAttr
 
diff --git a/tests/python/relay/test_pass_legalize_tensorcore.py b/tests/python/relay/test_pass_legalize_tensorcore.py
index 5ecda4ba07a8..f45e39047238 100644
--- a/tests/python/relay/test_pass_legalize_tensorcore.py
+++ b/tests/python/relay/test_pass_legalize_tensorcore.py
@@ -20,7 +20,7 @@
 from tvm import te
 from tvm import topi
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay import transform, analysis
 from tvm.relay.testing.temp_op_attr import TempOpAttr
 
diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py
index a5bc3ff2cead..01a1e48f832a 100644
--- a/tests/python/relay/test_pass_partition_graph.py
+++ b/tests/python/relay/test_pass_partition_graph.py
@@ -208,12 +208,12 @@ def check_vm_result():
         for out, ref in zip(outs, results):
             tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=tol, atol=tol)
 
-    def check_graph_runtime_result():
+    def check_graph_executor_result():
         compile_engine.get().clear()
         with tvm.transform.PassContext(opt_level=3):
             json, lib, param = relay.build(mod, target=target, params=params)
         lib = update_lib(lib)
-        rt_mod = tvm.contrib.graph_runtime.create(json, lib, device)
+        rt_mod = tvm.contrib.graph_executor.create(json, lib, device)
 
         for name, data in map_inputs.items():
             rt_mod.set_input(name, data)
@@ -229,7 +229,7 @@ def check_graph_runtime_result():
             tvm.testing.assert_allclose(out.asnumpy(), results[idx], rtol=tol, atol=tol)
 
     check_vm_result()
-    check_graph_runtime_result()
+    check_graph_executor_result()
 
 
 def test_multi_node_compiler():
diff --git a/tests/python/relay/test_pass_qnn_legalize.py b/tests/python/relay/test_pass_qnn_legalize.py
index 6a5c8f7cd647..a30cd1e73e3f 100644
--- a/tests/python/relay/test_pass_qnn_legalize.py
+++ b/tests/python/relay/test_pass_qnn_legalize.py
@@ -20,7 +20,7 @@
 from tvm import te
 
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay import transform, analysis
 from tvm.relay.testing.temp_op_attr import TempOpAttr
 
diff --git a/tests/python/relay/test_simplify_fc_transpose.py b/tests/python/relay/test_simplify_fc_transpose.py
index ce93a68c7321..fa5f332e6cd5 100644
--- a/tests/python/relay/test_simplify_fc_transpose.py
+++ b/tests/python/relay/test_simplify_fc_transpose.py
@@ -31,11 +31,11 @@ def run_func(func, params, x):
     with tvm.transform.PassContext(opt_level=3):
         lib = relay.build(func, "llvm", params=params)
 
-    from tvm.contrib import graph_runtime
+    from tvm.contrib import graph_executor
 
     dev = tvm.cpu(0)
     dtype = "float32"
-    m = graph_runtime.GraphModule(lib["default"](dev))
+    m = graph_executor.GraphModule(lib["default"](dev))
     # set inputs
     m.set_input("data", tvm.nd.array(x.astype(dtype)))
     # execute
diff --git a/tests/python/relay/test_sparse_dense_convert.py b/tests/python/relay/test_sparse_dense_convert.py
index 4eaaf769ff11..1efa813ebfb0 100644
--- a/tests/python/relay/test_sparse_dense_convert.py
+++ b/tests/python/relay/test_sparse_dense_convert.py
@@ -52,11 +52,11 @@ def run_func(func, params, x):
     with tvm.transform.PassContext(opt_level=3):
         graph, lib, new_params = relay.build(func, "llvm", params=params)
 
-    from tvm.contrib import graph_runtime
+    from tvm.contrib import graph_executor
 
     dev = tvm.cpu(0)
     dtype = "float32"
-    m = graph_runtime.create(graph, lib, dev)
+    m = graph_executor.create(graph, lib, dev)
     # set inputs
     m.set_input("data", tvm.nd.array(x.astype(dtype)))
     m.set_input(**new_params)
diff --git a/tests/python/topi/python/test_topi_qnn.py b/tests/python/topi/python/test_topi_qnn.py
index 1f49a68aa519..995cfd2df666 100644
--- a/tests/python/topi/python/test_topi_qnn.py
+++ b/tests/python/topi/python/test_topi_qnn.py
@@ -18,7 +18,7 @@
 import numpy as np
 import tvm
 from tvm import topi, relay, te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import tvm.topi.testing
 
 
@@ -59,7 +59,7 @@ def check_target(target, dev):
             lib = relay.build(tvm.IRModule.from_expr(real_q_op), target=target)
 
         # Get real qnn quantize output.
-        m = graph_runtime.GraphModule(lib["default"](dev))
+        m = graph_executor.GraphModule(lib["default"](dev))
         m.set_input("a", a_np)
 
         m.run()
@@ -126,7 +126,7 @@ def check_target(target, dev):
             lib = relay.build(tvm.IRModule.from_expr(real_dq_op), target=target)
 
         # Get real qnn quantize output.
-        m = graph_runtime.GraphModule(lib["default"](dev))
+        m = graph_executor.GraphModule(lib["default"](dev))
         m.set_input("a", a_np)
 
         m.run()
diff --git a/tests/python/unittest/test_crt.py b/tests/python/unittest/test_crt.py
index c9dfef4654ed..cd932ff91db1 100644
--- a/tests/python/unittest/test_crt.py
+++ b/tests/python/unittest/test_crt.py
@@ -138,7 +138,7 @@ def test_reset():
 
 
 @tvm.testing.requires_micro
-def test_graph_runtime():
+def test_graph_executor():
     """Test use of the graph runtime with microTVM."""
     import tvm.micro
 
@@ -156,7 +156,7 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), uint8]) {
         factory = tvm.relay.build(relay_mod, target=TARGET)
 
     with _make_session(workspace, factory.get_lib()) as sess:
-        graph_mod = tvm.micro.create_local_graph_runtime(
+        graph_mod = tvm.micro.create_local_graph_executor(
             factory.get_json(), sess.get_system_lib(), sess.device
         )
         A_data = tvm.nd.array(np.array([2, 3], dtype="uint8"), device=sess.device)
diff --git a/tests/python/unittest/test_link_params.py b/tests/python/unittest/test_link_params.py
index 4a24687a265b..739c363a5c53 100644
--- a/tests/python/unittest/test_link_params.py
+++ b/tests/python/unittest/test_link_params.py
@@ -206,7 +206,7 @@ def test_llvm_link_params():
             # Wrap in function to explicitly deallocate the runtime.
             def _run_linked(lib, mod):
                 graph_json, _, _ = lib
-                graph_rt = tvm.contrib.graph_runtime.create(graph_json, mod, tvm.cpu(0))
+                graph_rt = tvm.contrib.graph_executor.create(graph_json, mod, tvm.cpu(0))
                 graph_rt.set_input("rand_input", rand_input)  # NOTE: params not required.
                 graph_rt.run()
                 return graph_rt.get_output(0)
@@ -218,7 +218,7 @@ def _run_linked(lib, mod):
 
             def _run_unlinked(lib):
                 graph_json, mod, lowered_params = lib
-                graph_rt = tvm.contrib.graph_runtime.create(graph_json, mod, tvm.cpu(0))
+                graph_rt = tvm.contrib.graph_executor.create(graph_json, mod, tvm.cpu(0))
                 graph_rt.set_input("rand_input", rand_input, **lowered_params)
                 graph_rt.run()
                 return graph_rt.get_output(0)
@@ -316,7 +316,7 @@ def test_c_link_params():
 
             # Wrap in function to explicitly deallocate the runtime.
             def _run_linked(lib_mod):
-                graph_rt = tvm.contrib.graph_runtime.GraphModule(lib_mod["default"](tvm.cpu(0)))
+                graph_rt = tvm.contrib.graph_executor.GraphModule(lib_mod["default"](tvm.cpu(0)))
                 graph_rt.set_input("rand_input", rand_input)  # NOTE: params not required.
                 graph_rt.run()
 
@@ -334,7 +334,7 @@ def _run_linked(lib_mod):
             lib_mod = tvm.runtime.load_module(lib_path)
 
             def _run_unlinked(lib_mod):
-                graph_rt = tvm.contrib.graph_runtime.GraphModule(lib_mod["default"](tvm.cpu(0)))
+                graph_rt = tvm.contrib.graph_executor.GraphModule(lib_mod["default"](tvm.cpu(0)))
                 graph_rt.set_input("rand_input", rand_input, **params)
                 graph_rt.run()
                 return graph_rt.get_output(0)
@@ -374,7 +374,7 @@ def test_crt_link_params():
                 compiler_options=opts,
                 extra_libs=[
                     tvm.micro.get_standalone_crt_lib(m)
-                    for m in ("memory", "graph_runtime_module", "graph_runtime")
+                    for m in ("memory", "graph_executor_module", "graph_executor")
                 ],
             )
 
@@ -383,7 +383,7 @@ def test_crt_link_params():
             }
             flasher = compiler.flasher(**flasher_kw)
             with tvm.micro.Session(binary=micro_binary, flasher=flasher) as sess:
-                graph_rt = tvm.micro.session.create_local_graph_runtime(
+                graph_rt = tvm.micro.session.create_local_graph_executor(
                     graph_json, sess.get_system_lib(), sess.device
                 )
 
@@ -397,7 +397,7 @@ def test_crt_link_params():
 
             def _run_unlinked(lib):
                 graph_json, mod, lowered_params = lib
-                graph_rt = tvm.contrib.graph_runtime.create(graph_json, mod, tvm.cpu(0))
+                graph_rt = tvm.contrib.graph_executor.create(graph_json, mod, tvm.cpu(0))
                 graph_rt.set_input("rand_input", rand_input, **lowered_params)
                 graph_rt.run()
                 return graph_rt.get_output(0).asnumpy()
diff --git a/tests/python/unittest/test_micro_model_library_format.py b/tests/python/unittest/test_micro_model_library_format.py
index 510cc5a98a64..db6c55bca12a 100644
--- a/tests/python/unittest/test_micro_model_library_format.py
+++ b/tests/python/unittest/test_micro_model_library_format.py
@@ -26,7 +26,7 @@
 
 import tvm
 import tvm.relay
-from tvm.relay.backend import graph_runtime_factory
+from tvm.relay.backend import graph_executor_factory
 import tvm.runtime.module
 import tvm.testing
 from tvm.contrib import utils
@@ -170,7 +170,7 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[
 @tvm.testing.requires_micro
 def test_export_model():
     module = tvm.support.FrontendTestModule()
-    factory = graph_runtime_factory.GraphExecutorFactoryModule(
+    factory = graph_executor_factory.GraphExecutorFactoryModule(
         None, tvm.target.target.micro("host"), '"graph_json"', module, "test_module", {}
     )
 
diff --git a/tests/python/unittest/test_runtime_graph.py b/tests/python/unittest/test_runtime_graph.py
index f37d4089a8ee..5f0c7837d4f5 100644
--- a/tests/python/unittest/test_runtime_graph.py
+++ b/tests/python/unittest/test_runtime_graph.py
@@ -21,7 +21,7 @@
 import json
 from tvm import rpc
 from tvm import relay
-from tvm.contrib import utils, graph_runtime
+from tvm.contrib import utils, graph_executor
 
 
 @tvm.testing.requires_llvm
@@ -59,7 +59,7 @@ def test_graph_simple():
 
     def check_verify():
         mlib = tvm.build(s, [A, B], "llvm", name="myadd")
-        mod = graph_runtime.create(graph, mlib, tvm.cpu(0))
+        mod = graph_executor.create(graph, mlib, tvm.cpu(0))
         a = np.random.uniform(size=(n,)).astype(A.dtype)
         mod.run(x=a)
         out = mod.get_output(0, tvm.nd.empty((n,)))
@@ -75,7 +75,7 @@ def check_remote():
         mlib.export_library(path_dso)
         remote.upload(path_dso)
         mlib = remote.load_module("dev_lib.so")
-        mod = graph_runtime.create(graph, mlib, remote.cpu(0))
+        mod = graph_executor.create(graph, mlib, remote.cpu(0))
         a = np.random.uniform(size=(n,)).astype(A.dtype)
         mod.run(x=tvm.nd.array(a, dev))
         out = tvm.nd.empty((n,), device=dev)
@@ -92,10 +92,10 @@ def check_sharing():
         params = {"x": x_in}
         graph, lib, params = relay.build(func, target="llvm", params=params)
 
-        mod_shared = graph_runtime.create(graph, lib, tvm.cpu(0))
+        mod_shared = graph_executor.create(graph, lib, tvm.cpu(0))
         mod_shared.load_params(runtime.save_param_dict(params))
         num_mods = 10
-        mods = [graph_runtime.create(graph, lib, tvm.cpu(0)) for _ in range(num_mods)]
+        mods = [graph_executor.create(graph, lib, tvm.cpu(0)) for _ in range(num_mods)]
 
         for mod in mods:
             mod.share_params(mod_shared, runtime.save_param_dict(params))
@@ -120,7 +120,7 @@ def check_sharing():
 
 
 def test_load_unexpected_params():
-    # Test whether graph_runtime.load_params works if parameters
+    # Test whether graph_executor.load_params works if parameters
     # are provided that are not an expected input.
     mod = tvm.IRModule()
     params = {}
@@ -130,7 +130,7 @@ def test_load_unexpected_params():
     mod["main"] = relay.Function([x, y], z)
 
     graph_module = relay.build(mod, target="llvm", params=params)
-    rt_mod = tvm.contrib.graph_runtime.create(
+    rt_mod = tvm.contrib.graph_executor.create(
         graph_module.get_json(), graph_module.get_lib(), tvm.cpu(0)
     )
 
diff --git a/tests/python/unittest/test_runtime_graph_cuda_graph.py b/tests/python/unittest/test_runtime_graph_cuda_graph.py
index 41c782a91d9b..ee7750e3e142 100644
--- a/tests/python/unittest/test_runtime_graph_cuda_graph.py
+++ b/tests/python/unittest/test_runtime_graph_cuda_graph.py
@@ -27,8 +27,8 @@
 from tvm import te
 import numpy as np
 
-from tvm.contrib import utils, graph_runtime
-from tvm.contrib.cuda_graph import cuda_graph_runtime
+from tvm.contrib import utils, graph_executor
+from tvm.contrib.cuda_graph import cuda_graph_executor
 
 
 bx = te.thread_axis("blockIdx.x")
@@ -75,7 +75,7 @@ def check_verify():
         mlib = tvm.build(s, [A, B], "cuda", name="myadd")
         dev = tvm.gpu(0)
         try:
-            mod = cuda_graph_runtime.create(graph, mlib, dev)
+            mod = cuda_graph_executor.create(graph, mlib, dev)
         except ValueError:
             return
 
diff --git a/tests/python/unittest/test_runtime_heterogeneous.py b/tests/python/unittest/test_runtime_heterogeneous.py
index b3fbc2fdf248..e97b349af36e 100644
--- a/tests/python/unittest/test_runtime_heterogeneous.py
+++ b/tests/python/unittest/test_runtime_heterogeneous.py
@@ -21,7 +21,7 @@
 
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime, utils
+from tvm.contrib import graph_executor, utils
 from tvm import topi
 
 
@@ -172,7 +172,7 @@ def check_device(device, target_device):
         target_flist = {target_device: lower_add, target_host: lower_sub}
         mhost = tvm.build(target_flist, target_host=target_host)
         dev = [host_dev, device_dev]
-        mod = graph_runtime.create(graph, mhost, dev)
+        mod = graph_executor.create(graph, mhost, dev)
         params = {}
         params["A"] = tensor_a = np.random.uniform(size=shape).astype(tensor_a.dtype)
         params["B"] = tensor_b = np.random.uniform(size=shape).astype(tensor_b.dtype)
@@ -408,7 +408,7 @@ def check_device(device, target_device):
         params["D"] = tensor_d = np.random.uniform(size=shape).astype(tensor_d.dtype)
 
         def check_verify():
-            mod = graph_runtime.create(graph, mhost, dev)
+            mod = graph_executor.create(graph, mhost, dev)
             mod.set_input(**params)
             mod.run()
             out = mod.get_output(0, tvm.nd.empty(shape))
@@ -422,7 +422,7 @@ def check_load_module():
                 out_file.write(graph)
             loaded_lib = tvm.runtime.load_module(path_lib)
             loaded_graph = open(temp.relpath("deploy.json")).read()
-            mod = graph_runtime.create(loaded_graph, loaded_lib, dev)
+            mod = graph_executor.create(loaded_graph, loaded_lib, dev)
             mod.set_input(**params)
             mod.run()
             out = mod.get_output(0, tvm.nd.empty(shape))
diff --git a/tests/python/unittest/test_runtime_module_based_interface.py b/tests/python/unittest/test_runtime_module_based_interface.py
index a9f8f6352d35..f2ba91dcdf44 100644
--- a/tests/python/unittest/test_runtime_module_based_interface.py
+++ b/tests/python/unittest/test_runtime_module_based_interface.py
@@ -18,9 +18,9 @@
 from tvm import relay, runtime
 from tvm.relay import testing
 import tvm
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.contrib.debugger import debug_runtime
-from tvm.contrib.cuda_graph import cuda_graph_runtime
+from tvm.contrib.cuda_graph import cuda_graph_executor
 import tvm.testing
 
 
@@ -37,7 +37,7 @@ def verify(data):
         graph, lib, graph_params = relay.build_module.build(mod, "llvm", params=params)
 
     dev = tvm.cpu()
-    module = graph_runtime.create(graph, lib, dev)
+    module = graph_executor.create(graph, lib, dev)
     module.set_input("data", data)
     module.set_input(**graph_params)
     module.run()
@@ -55,7 +55,7 @@ def test_legacy_compatibility():
         graph, lib, graph_params = relay.build_module.build(mod, "llvm", params=params)
     data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
     dev = tvm.cpu()
-    module = graph_runtime.create(graph, lib, dev)
+    module = graph_executor.create(graph, lib, dev)
     module.set_input("data", data)
     module.set_input(**graph_params)
     module.run()
@@ -83,7 +83,7 @@ def test_cpu():
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
     # graph runtime wrapper
-    gmod = graph_runtime.GraphModule(complied_graph_lib["default"](dev))
+    gmod = graph_executor.GraphModule(complied_graph_lib["default"](dev))
     gmod.set_input("data", data)
     gmod.run()
     out = gmod.get_output(0).asnumpy()
@@ -110,7 +110,7 @@ def test_gpu():
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
     # graph runtime wrapper
-    gmod = graph_runtime.GraphModule(complied_graph_lib["default"](dev))
+    gmod = graph_executor.GraphModule(complied_graph_lib["default"](dev))
     gmod.set_input("data", data)
     gmod.run()
     out = gmod.get_output(0).asnumpy()
@@ -152,7 +152,7 @@ def verify_cpu_export(obj_format):
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
         # graph runtime wrapper
-        gmod = graph_runtime.GraphModule(loaded_lib["default"](dev))
+        gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         gmod.set_input("data", data)
         gmod.run()
         out = gmod.get_output(0).asnumpy()
@@ -191,7 +191,7 @@ def verify_gpu_export(obj_format):
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
         # graph runtime wrapper
-        gmod = graph_runtime.GraphModule(loaded_lib["default"](dev))
+        gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         gmod.set_input("data", data)
         gmod.run()
         out = gmod.get_output(0).asnumpy()
@@ -235,7 +235,7 @@ def verify_rpc_cpu_export(obj_format):
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
         # graph runtime wrapper
-        gmod = graph_runtime.GraphModule(loaded_lib["default"](dev))
+        gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         gmod.set_input("data", data)
         gmod.run()
         out = gmod.get_output(0).asnumpy()
@@ -280,7 +280,7 @@ def verify_rpc_gpu_export(obj_format):
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
         # graph runtime wrapper
-        gmod = graph_runtime.GraphModule(loaded_lib["default"](dev))
+        gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         gmod.set_input("data", data)
         gmod.run()
         out = gmod.get_output(0).asnumpy()
@@ -334,7 +334,7 @@ def verify_cpu_remove_package_params(obj_format):
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
         # graph runtime wrapper
-        gmod = graph_runtime.GraphModule(loaded_lib["default"](dev))
+        gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         loaded_params = bytearray(open(temp.relpath("deploy_param.params"), "rb").read())
         gmod.set_input("data", data)
         gmod.load_params(loaded_params)
@@ -381,7 +381,7 @@ def verify_gpu_remove_package_params(obj_format):
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
         # graph runtime wrapper
-        gmod = graph_runtime.GraphModule(loaded_lib["default"](dev))
+        gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         loaded_params = bytearray(open(temp.relpath("deploy_param.params"), "rb").read())
         gmod.set_input("data", data)
         gmod.load_params(loaded_params)
@@ -434,7 +434,7 @@ def verify_rpc_cpu_remove_package_params(obj_format):
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
         # graph runtime wrapper
-        gmod = graph_runtime.GraphModule(loaded_lib["default"](dev))
+        gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         loaded_params = bytearray(open(path_params, "rb").read())
         gmod.set_input("data", data)
         gmod.load_params(loaded_params)
@@ -487,7 +487,7 @@ def verify_rpc_gpu_remove_package_params(obj_format):
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
         # graph runtime wrapper
-        gmod = graph_runtime.GraphModule(loaded_lib["default"](dev))
+        gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         loaded_params = bytearray(open(path_params, "rb").read())
         gmod.set_input("data", data)
         gmod.load_params(loaded_params)
@@ -502,7 +502,7 @@ def verify_rpc_gpu_remove_package_params(obj_format):
         verify_rpc_gpu_remove_package_params(obj_format)
 
 
-def test_debug_graph_runtime():
+def test_debug_graph_executor():
     if not tvm.testing.device_enabled("llvm"):
         print("Skip because llvm is not enabled")
         return
@@ -516,7 +516,7 @@ def test_debug_graph_runtime():
     try:
         gmod = complied_graph_lib["debug_create"]("default", dev)
     except:
-        print("Skip because debug graph_runtime not enabled")
+        print("Skip because debug graph_executor not enabled")
         return
     set_input = gmod["set_input"]
     run = gmod["run"]
@@ -540,7 +540,7 @@ def test_debug_graph_runtime():
 
 
 @tvm.testing.requires_cudagraph
-def test_cuda_graph_runtime():
+def test_cuda_graph_executor():
     mod, params = relay.testing.synthetic.get_workload()
     with tvm.transform.PassContext(opt_level=3):
         complied_graph_lib = relay.build_module.build(mod, "cuda", params=params)
@@ -561,7 +561,7 @@ def test_cuda_graph_runtime():
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
     # cuda graph runtime wrapper
-    cu_gmod = cuda_graph_runtime.GraphModuleCudaGraph(gmod)
+    cu_gmod = cuda_graph_executor.GraphModuleCudaGraph(gmod)
     cu_gmod.set_input("data", data)
     cu_gmod.run()
     out = cu_gmod.get_output(0).asnumpy()
@@ -602,5 +602,5 @@ def make_module(mod):
     test_gpu()
     test_mod_export()
     test_remove_package_params()
-    test_debug_graph_runtime()
+    test_debug_graph_executor()
     test_multiple_imported_modules()
diff --git a/tests/python/unittest/test_target_codegen_blob.py b/tests/python/unittest/test_target_codegen_blob.py
index 3429b39c6ac3..f1290ddd1e51 100644
--- a/tests/python/unittest/test_target_codegen_blob.py
+++ b/tests/python/unittest/test_target_codegen_blob.py
@@ -18,7 +18,7 @@
 import numpy as np
 from tvm import relay
 from tvm.relay import testing
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import tvm
 from tvm import te
 import ctypes
@@ -39,7 +39,7 @@ def verify(data):
         with tvm.transform.PassContext(opt_level=3):
             lib = relay.build_module.build(mod, "llvm", params=params)
         dev = tvm.cpu()
-        module = graph_runtime.GraphModule(lib["default"](dev))
+        module = graph_executor.GraphModule(lib["default"](dev))
         module.set_input("data", data)
         module.run()
         out = module.get_output(0).asnumpy()
@@ -58,7 +58,7 @@ def verify(data):
     loaded_lib = tvm.runtime.load_module(path_lib)
     data = np.random.uniform(-1, 1, size=input_shape).astype("float32")
     dev = tvm.gpu()
-    module = graph_runtime.GraphModule(loaded_lib["default"](dev))
+    module = graph_executor.GraphModule(loaded_lib["default"](dev))
     module.set_input("data", data)
     module.run()
     out = module.get_output(0).asnumpy()
diff --git a/tests/python/unittest/test_tir_transform_hoist_if.py b/tests/python/unittest/test_tir_transform_hoist_if.py
index 748a33d977ee..7d02e4f12c1d 100644
--- a/tests/python/unittest/test_tir_transform_hoist_if.py
+++ b/tests/python/unittest/test_tir_transform_hoist_if.py
@@ -765,7 +765,7 @@ def test_hoisting_op_conv():
     for target, dev in enabled_targets():
         with tvm.transform.PassContext(opt_level=3):
             lib = relay.build_module.build(mod, target=target, params=params)
-            m = tvm.contrib.graph_runtime.GraphModule(lib["default"](dev))
+            m = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))
             x = np.random.uniform(size=dshape)
             data_tvm = tvm.nd.array(data)
             m.set_input("x", data_tvm)
@@ -779,7 +779,7 @@ def test_hoisting_op_conv():
             opt_level=3, config={"tir.HoistIfThenElse": {"support_block_scope_hosting": True}}
         ):
             lib = relay.build_module.build(mod, target=target, params=params)
-            m = tvm.contrib.graph_runtime.GraphModule(lib["default"](dev))
+            m = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))
             x = np.random.uniform(size=dshape)
             data_tvm = tvm.nd.array(data)
             m.set_input("x", data_tvm)
diff --git a/tests/scripts/task_java_unittest.sh b/tests/scripts/task_java_unittest.sh
index 7ab4afae3c2e..7818d7d458d6 100755
--- a/tests/scripts/task_java_unittest.sh
+++ b/tests/scripts/task_java_unittest.sh
@@ -32,7 +32,7 @@ TEMP_DIR=$(mktemp -d)
 
 python3 $SCRIPT_DIR/test_add_cpu.py $TEMP_DIR
 python3 $SCRIPT_DIR/test_add_gpu.py $TEMP_DIR
-python3 $SCRIPT_DIR/test_graph_runtime.py $TEMP_DIR
+python3 $SCRIPT_DIR/test_graph_executor.py $TEMP_DIR
 
 # start rpc proxy server
 PORT=$(( ( RANDOM % 1000 )  + 9000 ))
diff --git a/tutorials/auto_scheduler/tune_network_arm.py b/tutorials/auto_scheduler/tune_network_arm.py
index 30bbce1e6b7e..7e2281a4b459 100644
--- a/tutorials/auto_scheduler/tune_network_arm.py
+++ b/tutorials/auto_scheduler/tune_network_arm.py
@@ -49,7 +49,7 @@
 import tvm
 from tvm import relay, auto_scheduler
 import tvm.relay.testing
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.contrib.utils import tempdir
 
 #################################################################
@@ -321,7 +321,7 @@ def tune_and_evaluate():
 
     # Create graph runtime
     dev = remote.cpu()
-    module = graph_runtime.GraphModule(rlib["default"](dev))
+    module = graph_executor.GraphModule(rlib["default"](dev))
     data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
     module.set_input("data", data_tvm)
 
diff --git a/tutorials/auto_scheduler/tune_network_cuda.py b/tutorials/auto_scheduler/tune_network_cuda.py
index b5162fc85f14..e92ddaf40541 100644
--- a/tutorials/auto_scheduler/tune_network_cuda.py
+++ b/tutorials/auto_scheduler/tune_network_cuda.py
@@ -49,7 +49,7 @@
 import tvm
 from tvm import relay, auto_scheduler
 import tvm.relay.testing
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 #################################################################
 # Define a Network
@@ -282,7 +282,7 @@ def run_tuning():
 
 # Create graph runtime
 dev = tvm.device(str(target), 0)
-module = graph_runtime.GraphModule(lib["default"](dev))
+module = graph_executor.GraphModule(lib["default"](dev))
 data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
 module.set_input("data", data_tvm)
 
diff --git a/tutorials/auto_scheduler/tune_network_mali.py b/tutorials/auto_scheduler/tune_network_mali.py
index 3d3861263e4b..a96ce922d4ae 100644
--- a/tutorials/auto_scheduler/tune_network_mali.py
+++ b/tutorials/auto_scheduler/tune_network_mali.py
@@ -49,7 +49,7 @@
 import tvm
 from tvm import relay, auto_scheduler
 import tvm.relay.testing
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import os
 
 #################################################################
@@ -256,7 +256,7 @@ def tune_and_evaluate():
     lib.export_library(path_lib, ndk.create_shared)
     remote.upload(path_lib)
     loaded_lib = remote.load_module(filename)
-    module = graph_runtime.GraphModule(loaded_lib["default"](dev))
+    module = graph_executor.GraphModule(loaded_lib["default"](dev))
     data = (np.random.uniform(size=input_shape)).astype(dtype)
     data_tvm = tvm.nd.array(data)
     module.set_input("data", data_tvm)
diff --git a/tutorials/auto_scheduler/tune_network_x86.py b/tutorials/auto_scheduler/tune_network_x86.py
index ca2b8c197778..253878f70374 100644
--- a/tutorials/auto_scheduler/tune_network_x86.py
+++ b/tutorials/auto_scheduler/tune_network_x86.py
@@ -49,7 +49,7 @@
 import tvm
 from tvm import relay, auto_scheduler
 import tvm.relay.testing
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 #################################################################
 # Define a Network
@@ -281,7 +281,7 @@ def run_tuning():
 
 # Create graph runtime
 dev = tvm.device(str(target), 0)
-module = graph_runtime.GraphModule(lib["default"](dev))
+module = graph_executor.GraphModule(lib["default"](dev))
 data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
 module.set_input("data", data_tvm)
 
diff --git a/tutorials/autotvm/tune_relay_arm.py b/tutorials/autotvm/tune_relay_arm.py
index 9ddcf817c168..9223eb30cd9d 100644
--- a/tutorials/autotvm/tune_relay_arm.py
+++ b/tutorials/autotvm/tune_relay_arm.py
@@ -70,7 +70,7 @@
 import tvm.relay.testing
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
 from tvm.contrib.utils import tempdir
-import tvm.contrib.graph_runtime as runtime
+import tvm.contrib.graph_executor as runtime
 
 #################################################################
 # Define network
diff --git a/tutorials/autotvm/tune_relay_cuda.py b/tutorials/autotvm/tune_relay_cuda.py
index 8ae5144eb31d..50485c4d7ff2 100644
--- a/tutorials/autotvm/tune_relay_cuda.py
+++ b/tutorials/autotvm/tune_relay_cuda.py
@@ -67,7 +67,7 @@
 from tvm import relay, autotvm
 import tvm.relay.testing
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-import tvm.contrib.graph_runtime as runtime
+import tvm.contrib.graph_executor as runtime
 
 #################################################################
 # Define Network
diff --git a/tutorials/autotvm/tune_relay_mobile_gpu.py b/tutorials/autotvm/tune_relay_mobile_gpu.py
index 08fc87d4da1b..aefa600e3c3f 100644
--- a/tutorials/autotvm/tune_relay_mobile_gpu.py
+++ b/tutorials/autotvm/tune_relay_mobile_gpu.py
@@ -69,7 +69,7 @@
 import tvm.relay.testing
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
 from tvm.contrib.utils import tempdir
-import tvm.contrib.graph_runtime as runtime
+import tvm.contrib.graph_executor as runtime
 
 #################################################################
 # Define network
diff --git a/tutorials/autotvm/tune_relay_x86.py b/tutorials/autotvm/tune_relay_x86.py
index b072a3adfea3..dd5d4057c211 100644
--- a/tutorials/autotvm/tune_relay_x86.py
+++ b/tutorials/autotvm/tune_relay_x86.py
@@ -36,7 +36,7 @@
 from tvm.relay import testing
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
 from tvm.autotvm.graph_tuner import DPTuner, PBQPTuner
-import tvm.contrib.graph_runtime as runtime
+import tvm.contrib.graph_executor as runtime
 
 #################################################################
 # Define network
diff --git a/tutorials/frontend/build_gcn.py b/tutorials/frontend/build_gcn.py
index b21c09692a64..5ecea00b76b0 100644
--- a/tutorials/frontend/build_gcn.py
+++ b/tutorials/frontend/build_gcn.py
@@ -175,7 +175,7 @@ def evaluate(data, logits):
 #                                        = ((H * W)^t * A^t)^t
 #                                        = ((W^t * H^t) * A^t)^t
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import tvm
 from tvm import te
 
@@ -337,7 +337,7 @@ def prepare_params(g, data):
 
 # Generate graph runtime
 dev = tvm.device(target, 0)
-m = graph_runtime.GraphModule(lib["default"](dev))
+m = graph_executor.GraphModule(lib["default"](dev))
 
 ######################################################################
 # Run the TVM model, test for accuracy and verify with DGL
diff --git a/tutorials/frontend/deploy_model_on_android.py b/tutorials/frontend/deploy_model_on_android.py
index c69ac8d3f0e3..8321cc6128bc 100644
--- a/tutorials/frontend/deploy_model_on_android.py
+++ b/tutorials/frontend/deploy_model_on_android.py
@@ -34,7 +34,7 @@
 from tvm import te
 import tvm.relay as relay
 from tvm import rpc
-from tvm.contrib import utils, ndk, graph_runtime as runtime
+from tvm.contrib import utils, ndk, graph_executor as runtime
 from tvm.contrib.download import download_testdata
 
 
diff --git a/tutorials/frontend/deploy_model_on_rasp.py b/tutorials/frontend/deploy_model_on_rasp.py
index 75e142243a38..a59665f62f1c 100644
--- a/tutorials/frontend/deploy_model_on_rasp.py
+++ b/tutorials/frontend/deploy_model_on_rasp.py
@@ -30,7 +30,7 @@
 from tvm import te
 import tvm.relay as relay
 from tvm import rpc
-from tvm.contrib import utils, graph_runtime as runtime
+from tvm.contrib import utils, graph_executor as runtime
 from tvm.contrib.download import download_testdata
 
 ######################################################################
diff --git a/tutorials/frontend/deploy_prequantized.py b/tutorials/frontend/deploy_prequantized.py
index 681bc0741338..308027a4a193 100644
--- a/tutorials/frontend/deploy_prequantized.py
+++ b/tutorials/frontend/deploy_prequantized.py
@@ -90,7 +90,7 @@ def run_tvm_model(mod, params, input_name, inp, target="llvm"):
     with tvm.transform.PassContext(opt_level=3):
         lib = relay.build(mod, target=target, params=params)
 
-    runtime = tvm.contrib.graph_runtime.GraphModule(lib["default"](tvm.device(target, 0)))
+    runtime = tvm.contrib.graph_executor.GraphModule(lib["default"](tvm.device(target, 0)))
 
     runtime.set_input(input_name, inp)
     runtime.run()
diff --git a/tutorials/frontend/deploy_prequantized_tflite.py b/tutorials/frontend/deploy_prequantized_tflite.py
index 547704a72fc4..e0f9a6b2ebde 100644
--- a/tutorials/frontend/deploy_prequantized_tflite.py
+++ b/tutorials/frontend/deploy_prequantized_tflite.py
@@ -168,9 +168,9 @@ def run_tflite_model(tflite_model_buf, input_data):
 ###############################################################################
 # Lets run TVM compiled pre-quantized model inference and get the TVM prediction.
 def run_tvm(lib):
-    from tvm.contrib import graph_runtime
+    from tvm.contrib import graph_executor
 
-    rt_mod = graph_runtime.GraphModule(lib["default"](tvm.cpu(0)))
+    rt_mod = graph_executor.GraphModule(lib["default"](tvm.cpu(0)))
     rt_mod.set_input("input", data)
     rt_mod.run()
     tvm_res = rt_mod.get_output(0).asnumpy()
diff --git a/tutorials/frontend/deploy_sparse.py b/tutorials/frontend/deploy_sparse.py
index 4d96f4d5de9f..1fcb1b3246da 100644
--- a/tutorials/frontend/deploy_sparse.py
+++ b/tutorials/frontend/deploy_sparse.py
@@ -82,7 +82,7 @@
 import numpy as np
 import tensorflow as tf
 from tvm import relay, runtime
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay import data_dep_optimization as ddo
 from tensorflow.python.framework.convert_to_constants import (
     convert_variables_to_constants_v2,
@@ -214,7 +214,7 @@ def run_relay_graph(mod, params, shape_dict, target, dev):
     input_shape = shape_dict["input_1"]
     dummy_data = np.random.uniform(size=input_shape, low=0, high=input_shape[1]).astype("int32")
 
-    m = graph_runtime.GraphModule(lib["default"](dev))
+    m = graph_executor.GraphModule(lib["default"](dev))
     m.set_input(0, dummy_data)
     m.run()
     tvm_output = m.get_output(0)
diff --git a/tutorials/frontend/deploy_ssd_gluoncv.py b/tutorials/frontend/deploy_ssd_gluoncv.py
index 9f31ab70731a..40b40ce1f441 100644
--- a/tutorials/frontend/deploy_ssd_gluoncv.py
+++ b/tutorials/frontend/deploy_ssd_gluoncv.py
@@ -28,7 +28,7 @@
 
 from matplotlib import pyplot as plt
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.contrib.download import download_testdata
 from gluoncv import model_zoo, data, utils
 
@@ -102,7 +102,7 @@ def build(target):
 
 def run(lib, dev):
     # Build TVM runtime
-    m = graph_runtime.GraphModule(lib["default"](dev))
+    m = graph_executor.GraphModule(lib["default"](dev))
     tvm_input = tvm.nd.array(x.asnumpy(), device=dev)
     m.set_input("data", tvm_input)
     # execute
diff --git a/tutorials/frontend/from_caffe2.py b/tutorials/frontend/from_caffe2.py
index 390fd2f32035..a3378de8b0e3 100644
--- a/tutorials/frontend/from_caffe2.py
+++ b/tutorials/frontend/from_caffe2.py
@@ -105,12 +105,12 @@ def transform_image(image):
 # The process is no different from other examples.
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 # context x86 CPU, use tvm.gpu(0) if you run on GPU
 dev = tvm.cpu(0)
 # create a runtime executor module
-m = graph_runtime.GraphModule(lib["default"](dev))
+m = graph_executor.GraphModule(lib["default"](dev))
 # set inputs
 m.set_input(input_name, tvm.nd.array(data.astype("float32")))
 # execute
diff --git a/tutorials/frontend/from_coreml.py b/tutorials/frontend/from_coreml.py
index 5703f5742d3d..ea8817d3a0a8 100644
--- a/tutorials/frontend/from_coreml.py
+++ b/tutorials/frontend/from_coreml.py
@@ -81,11 +81,11 @@
 # Execute on TVM
 # -------------------
 # The process is no different from other example
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 dev = tvm.cpu(0)
 dtype = "float32"
-m = graph_runtime.GraphModule(lib["default"](dev))
+m = graph_executor.GraphModule(lib["default"](dev))
 # set inputs
 m.set_input("image", tvm.nd.array(x.astype(dtype)))
 # execute
diff --git a/tutorials/frontend/from_darknet.py b/tutorials/frontend/from_darknet.py
index b76c32e83137..356dc16bedf0 100644
--- a/tutorials/frontend/from_darknet.py
+++ b/tutorials/frontend/from_darknet.py
@@ -117,9 +117,9 @@
 # Execute on TVM Runtime
 # ----------------------
 # The process is no different from other examples.
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
-m = graph_runtime.GraphModule(lib["default"](dev))
+m = graph_executor.GraphModule(lib["default"](dev))
 
 # set inputs
 m.set_input("data", tvm.nd.array(data.astype(dtype)))
diff --git a/tutorials/frontend/from_mxnet.py b/tutorials/frontend/from_mxnet.py
index 696af6b4a3a5..bfaac2c6c98e 100644
--- a/tutorials/frontend/from_mxnet.py
+++ b/tutorials/frontend/from_mxnet.py
@@ -104,11 +104,11 @@ def transform_image(image):
 # Execute the portable graph on TVM
 # ---------------------------------
 # Now, we would like to reproduce the same forward computation using TVM.
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 dev = tvm.gpu(0)
 dtype = "float32"
-m = graph_runtime.GraphModule(lib["default"](dev))
+m = graph_executor.GraphModule(lib["default"](dev))
 # set inputs
 m.set_input("data", tvm.nd.array(x.astype(dtype)))
 # execute
diff --git a/tutorials/frontend/from_pytorch.py b/tutorials/frontend/from_pytorch.py
index ecc3e9aef40c..a0db518025e3 100644
--- a/tutorials/frontend/from_pytorch.py
+++ b/tutorials/frontend/from_pytorch.py
@@ -110,10 +110,10 @@
 # Execute the portable graph on TVM
 # ---------------------------------
 # Now we can try deploying the compiled model on target.
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 dtype = "float32"
-m = graph_runtime.GraphModule(lib["default"](dev))
+m = graph_executor.GraphModule(lib["default"](dev))
 # Set inputs
 m.set_input(input_name, tvm.nd.array(img.astype(dtype)))
 # Execute
diff --git a/tutorials/frontend/from_tensorflow.py b/tutorials/frontend/from_tensorflow.py
index 5a7c98105715..96c001e4fd41 100644
--- a/tutorials/frontend/from_tensorflow.py
+++ b/tutorials/frontend/from_tensorflow.py
@@ -152,10 +152,10 @@
 # ---------------------------------
 # Now we can try deploying the compiled model on target.
 
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 dtype = "uint8"
-m = graph_runtime.GraphModule(lib["default"](dev))
+m = graph_executor.GraphModule(lib["default"](dev))
 # set inputs
 m.set_input("DecodeJpeg/contents", tvm.nd.array(x.astype(dtype)))
 # execute
diff --git a/tutorials/frontend/from_tflite.py b/tutorials/frontend/from_tflite.py
index f7e8422c37b6..a85cfcea913c 100644
--- a/tutorials/frontend/from_tflite.py
+++ b/tutorials/frontend/from_tflite.py
@@ -148,7 +148,7 @@ def extract(path):
 # --------------
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime as runtime
+from tvm.contrib import graph_executor as runtime
 
 # Create a runtime executor module
 module = runtime.GraphModule(lib["default"](tvm.cpu()))
diff --git a/tutorials/frontend/using_external_lib.py b/tutorials/frontend/using_external_lib.py
index 667dc2de1b9b..232f618bb28a 100644
--- a/tutorials/frontend/using_external_lib.py
+++ b/tutorials/frontend/using_external_lib.py
@@ -34,7 +34,7 @@
 import tvm
 from tvm import te
 import numpy as np
-from tvm.contrib import graph_runtime as runtime
+from tvm.contrib import graph_executor as runtime
 from tvm import relay
 from tvm.relay import testing
 import tvm.testing
diff --git a/tutorials/get_started/relay_quick_start.py b/tutorials/get_started/relay_quick_start.py
index 86c4ac4399ed..d0fbe9f59bde 100644
--- a/tutorials/get_started/relay_quick_start.py
+++ b/tutorials/get_started/relay_quick_start.py
@@ -43,7 +43,7 @@
 from tvm.relay import testing
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import tvm.testing
 
 ######################################################################
@@ -110,7 +110,7 @@
 dev = tvm.gpu()
 data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
 # create module
-module = graph_runtime.GraphModule(lib["default"](dev))
+module = graph_executor.GraphModule(lib["default"](dev))
 # set input and parameters
 module.set_input("data", data)
 # run
@@ -143,7 +143,7 @@
 loaded_lib = tvm.runtime.load_module(path_lib)
 input_data = tvm.nd.array(np.random.uniform(size=data_shape).astype("float32"))
 
-module = graph_runtime.GraphModule(loaded_lib["default"](dev))
+module = graph_executor.GraphModule(loaded_lib["default"](dev))
 module.run(data=input_data)
 out_deploy = module.get_output(0).asnumpy()
 
diff --git a/tutorials/micro/micro_tflite.py b/tutorials/micro/micro_tflite.py
index 53271b29e20d..f59b1c3723a8 100644
--- a/tutorials/micro/micro_tflite.py
+++ b/tutorials/micro/micro_tflite.py
@@ -127,7 +127,7 @@
 import tvm
 import tvm.micro as micro
 from tvm.contrib.download import download_testdata
-from tvm.contrib import graph_runtime, utils
+from tvm.contrib import graph_executor, utils
 from tvm import relay
 
 model_url = "https://people.linaro.org/~tom.gall/sine_model.tflite"
@@ -257,7 +257,7 @@
 
 flasher = compiler.flasher()
 with tvm.micro.Session(binary=micro_binary, flasher=flasher) as session:
-    graph_mod = tvm.micro.create_local_graph_runtime(
+    graph_mod = tvm.micro.create_local_graph_executor(
         graph, session.get_system_lib(), session.device
     )
 
diff --git a/vta/scripts/tune_resnet.py b/vta/scripts/tune_resnet.py
index a10d1de8c46b..f3246ebcd298 100644
--- a/vta/scripts/tune_resnet.py
+++ b/vta/scripts/tune_resnet.py
@@ -28,7 +28,7 @@
 from tvm import rpc, autotvm, relay
 from tvm.autotvm.measure.measure_methods import request_remote
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-from tvm.contrib import graph_runtime, utils, download
+from tvm.contrib import graph_executor, utils, download
 from tvm.contrib.debugger import debug_runtime
 import vta
 from vta.testing import simulator
@@ -327,7 +327,7 @@ def tune_tasks(
         if opt.debug_profile:
             m = debug_runtime.create(graph, lib, ctx)
         else:
-            m = graph_runtime.create(graph, lib, ctx)
+            m = graph_executor.create(graph, lib, ctx)
 
         # Set the network parameters and synthetic input
         image = tvm.nd.array((np.random.uniform(size=(1, 3, 224, 224))).astype("float32"))
diff --git a/vta/tutorials/autotvm/tune_relay_vta.py b/vta/tutorials/autotvm/tune_relay_vta.py
index ed2671c75ae8..ba326912f423 100644
--- a/vta/tutorials/autotvm/tune_relay_vta.py
+++ b/vta/tutorials/autotvm/tune_relay_vta.py
@@ -62,7 +62,7 @@
 import tvm
 from tvm import te
 from tvm import rpc, autotvm, relay
-from tvm.contrib import graph_runtime, utils, download
+from tvm.contrib import graph_executor, utils, download
 from tvm.autotvm.measure.measure_methods import request_remote
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
 
@@ -433,7 +433,7 @@ def tune_and_evaluate(tuning_opt):
 
         # Generate the graph runtime
         ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)
-        m = graph_runtime.GraphModule(lib["default"](ctx))
+        m = graph_executor.GraphModule(lib["default"](ctx))
 
         # upload parameters to device
         image = tvm.nd.array((np.random.uniform(size=(1, 3, 224, 224))).astype("float32"))
diff --git a/vta/tutorials/frontend/deploy_classification.py b/vta/tutorials/frontend/deploy_classification.py
index 1bf4161a3340..91e999271b1e 100644
--- a/vta/tutorials/frontend/deploy_classification.py
+++ b/vta/tutorials/frontend/deploy_classification.py
@@ -52,7 +52,7 @@
 import tvm
 from tvm import te
 from tvm import rpc, autotvm, relay
-from tvm.contrib import graph_runtime, utils, download
+from tvm.contrib import graph_executor, utils, download
 from tvm.contrib.debugger import debug_runtime
 from tvm.relay import transform
 
@@ -210,7 +210,7 @@
     lib = remote.load_module("graphlib.tar")
 
     # Graph runtime
-    m = graph_runtime.GraphModule(lib["default"](ctx))
+    m = graph_executor.GraphModule(lib["default"](ctx))
 
 ######################################################################
 # Perform image classification inference
diff --git a/vta/tutorials/frontend/legacy/deploy_detection.py b/vta/tutorials/frontend/legacy/deploy_detection.py
index cbb320e98f13..9181a5e7b1b3 100644
--- a/vta/tutorials/frontend/legacy/deploy_detection.py
+++ b/vta/tutorials/frontend/legacy/deploy_detection.py
@@ -58,7 +58,7 @@
 from tvm import rpc, autotvm, relay
 from tvm.relay.testing import yolo_detection, darknet
 from tvm.relay.testing.darknet import __darknetffi__
-from tvm.contrib import graph_runtime, utils
+from tvm.contrib import graph_executor, utils
 from tvm.contrib.download import download_testdata
 from vta.testing import simulator
 from vta.top import graph_pack
@@ -247,7 +247,7 @@
     lib = remote.load_module("graphlib.tar")
 
     # Graph runtime
-    m = graph_runtime.GraphModule(lib["default"](ctx))
+    m = graph_executor.GraphModule(lib["default"](ctx))
 
 ####################################
 # Perform image detection inference.
diff --git a/web/emcc/wasm_runtime.cc b/web/emcc/wasm_runtime.cc
index 21773a81a970..bcad656678e5 100644
--- a/web/emcc/wasm_runtime.cc
+++ b/web/emcc/wasm_runtime.cc
@@ -34,7 +34,7 @@
 #include "src/runtime/c_runtime_api.cc"
 #include "src/runtime/cpu_device_api.cc"
 #include "src/runtime/file_utils.cc"
-#include "src/runtime/graph_executor/graph_runtime.cc"
+#include "src/runtime/graph_executor/graph_executor.cc"
 #include "src/runtime/library_module.cc"
 #include "src/runtime/logging.cc"
 #include "src/runtime/module.cc"
diff --git a/web/src/runtime.ts b/web/src/runtime.ts
index 4b3e96c75457..532c45ba5ccf 100644
--- a/web/src/runtime.ts
+++ b/web/src/runtime.ts
@@ -997,7 +997,7 @@ export class Instance implements Disposable {
     lib: Module,
     dev: DLDevice
   ): GraphExecutor {
-    const fcreate = this.getGlobalFunc("tvm.graph_runtime.create");
+    const fcreate = this.getGlobalFunc("tvm.graph_executor.create");
     const module = fcreate(
       graphJson,
       lib,

From 3f66176a0c7cb2ab0c6c7ce18c4bd225ae4ad80f Mon Sep 17 00:00:00 2001
From: Andrew Reusch <areusch@octoml.ai>
Date: Fri, 12 Mar 2021 10:06:30 -0800
Subject: [PATCH 05/16] more renames

---
 docs/api/python/{graph_runtime.rst => graph_executor.rst}         | 0
 include/tvm/runtime/crt/{graph_runtime.h => graph_executor.h}     | 0
 .../crt/{graph_runtime_module.h => graph_executor_module.h}       | 0
 .../scripts/{test_graph_runtime.py => test_graph_executor.py}     | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename docs/api/python/{graph_runtime.rst => graph_executor.rst} (100%)
 rename include/tvm/runtime/crt/{graph_runtime.h => graph_executor.h} (100%)
 rename include/tvm/runtime/crt/{graph_runtime_module.h => graph_executor_module.h} (100%)
 rename jvm/core/src/test/scripts/{test_graph_runtime.py => test_graph_executor.py} (100%)

diff --git a/docs/api/python/graph_runtime.rst b/docs/api/python/graph_executor.rst
similarity index 100%
rename from docs/api/python/graph_runtime.rst
rename to docs/api/python/graph_executor.rst
diff --git a/include/tvm/runtime/crt/graph_runtime.h b/include/tvm/runtime/crt/graph_executor.h
similarity index 100%
rename from include/tvm/runtime/crt/graph_runtime.h
rename to include/tvm/runtime/crt/graph_executor.h
diff --git a/include/tvm/runtime/crt/graph_runtime_module.h b/include/tvm/runtime/crt/graph_executor_module.h
similarity index 100%
rename from include/tvm/runtime/crt/graph_runtime_module.h
rename to include/tvm/runtime/crt/graph_executor_module.h
diff --git a/jvm/core/src/test/scripts/test_graph_runtime.py b/jvm/core/src/test/scripts/test_graph_executor.py
similarity index 100%
rename from jvm/core/src/test/scripts/test_graph_runtime.py
rename to jvm/core/src/test/scripts/test_graph_executor.py

From 4ad96ee13f3927085643dd7bdc2be6ae0680cd7a Mon Sep 17 00:00:00 2001
From: Andrew Reusch <areusch@octoml.ai>
Date: Fri, 12 Mar 2021 10:13:26 -0800
Subject: [PATCH 06/16] "graph runtime" -> "graph executor"

---
 CMakeLists.txt                                | 10 ++++----
 .../Camera2BasicFragment.java                 | 10 ++++----
 .../apache/tvm/android/demo/MainActivity.java |  6 ++---
 apps/bundle_deploy/README.md                  |  2 +-
 apps/howto_deploy/cpp_deploy.cc               |  4 ++--
 apps/howto_deploy/tvm_runtime_pack.cc         |  2 +-
 apps/ios_rpc/tvmrpc/TVMRuntime.mm             |  2 +-
 cmake/config.cmake                            |  6 ++---
 cmake/modules/CUDA.cmake                      |  4 ++--
 cmake/modules/contrib/ArmComputeLib.cmake     |  4 ++--
 docs/deploy/arm_compute_lib.rst               |  2 +-
 docs/dev/index.rst                            |  2 +-
 docs/dev/microtvm_design.rst                  | 12 +++++-----
 docs/dev/virtual_machine.rst                  | 10 ++++----
 docs/install/from_source.rst                  |  2 +-
 golang/sample/complex.go                      |  4 ++--
 golang/src/tvm_runtime_pack.cc                |  2 +-
 include/tvm/runtime/crt/error_codes.h         |  2 +-
 include/tvm/runtime/crt/graph_executor.h      | 14 +++++------
 .../tvm/runtime/crt/graph_executor_module.h   |  2 +-
 .../contrib/cuda_graph/cuda_graph_executor.py | 12 +++++-----
 python/tvm/contrib/graph_executor.py          |  4 ++--
 python/tvm/driver/tvmc/compiler.py            |  2 +-
 python/tvm/driver/tvmc/runner.py              |  4 ++--
 python/tvm/micro/session.py                   |  6 ++---
 python/tvm/relay/analysis/analysis.py         |  2 +-
 .../relay/backend/graph_executor_codegen.py   |  4 ++--
 .../relay/backend/graph_executor_factory.py   |  8 +++----
 python/tvm/relay/build_module.py              | 14 +++++------
 .../tvm/relay/op/contrib/arm_compute_lib.py   |  2 +-
 python/tvm/relay/op/contrib/tensorrt.py       |  2 +-
 rust/tvm-graph-rt/Cargo.toml                  |  2 +-
 rust/tvm/README.md                            |  2 +-
 rust/tvm/examples/resnet/src/build_resnet.py  |  2 +-
 rust/tvm/src/lib.rs                           |  2 +-
 rust/tvm/src/runtime/graph_rt.rs              | 14 +++++------
 src/relay/analysis/get_calibration_data.cc    |  4 ++--
 src/relay/backend/build_module.cc             |  4 ++--
 src/relay/backend/compile_engine.cc           |  2 +-
 .../contrib/arm_compute_lib/codegen.cc        |  4 ++--
 src/relay/backend/contrib/tensorrt/codegen.cc |  2 +-
 src/relay/backend/graph_executor_codegen.cc   |  4 ++--
 src/relay/backend/graph_plan_memory.cc        |  2 +-
 src/relay/transforms/partition_graph.cc       |  2 +-
 .../crt/graph_executor/graph_executor.c       | 20 ++++++++--------
 .../internal/graph_executor/graph_executor.h  |  2 +-
 .../cuda_graph/graph_runtime_cuda_graph.cc    |  2 +-
 .../debug/graph_executor_debug.cc             |  2 +-
 src/runtime/graph_executor/graph_executor.h   |  4 ++--
 .../graph_executor/graph_executor_factory.cc  |  6 ++---
 .../graph_executor/graph_executor_factory.h   |  8 +++----
 src/runtime/metadata_module.cc                |  2 +-
 src/target/metadata_module.cc                 |  2 +-
 tests/cpp/build_module_test.cc                |  2 +-
 tests/micro/test_runtime_micro_on_arm.py      |  8 +++----
 tests/python/contrib/test_tensorrt.py         |  2 +-
 tests/python/frontend/mxnet/test_forward.py   |  4 ++--
 .../python/relay/benchmarking/benchmark_vm.py |  4 ++--
 tests/python/relay/test_external_codegen.py   |  2 +-
 tests/python/unittest/test_crt.py             |  2 +-
 .../test_runtime_module_based_interface.py    | 24 +++++++++----------
 tests/scripts/task_rust.sh                    |  4 ++--
 tutorials/auto_scheduler/tune_network_arm.py  |  2 +-
 tutorials/auto_scheduler/tune_network_cuda.py |  2 +-
 tutorials/auto_scheduler/tune_network_mali.py |  2 +-
 tutorials/auto_scheduler/tune_network_x86.py  |  2 +-
 tutorials/frontend/build_gcn.py               |  2 +-
 tutorials/get_started/relay_quick_start.py    |  2 +-
 vta/tutorials/autotvm/tune_relay_vta.py       |  2 +-
 .../frontend/deploy_classification.py         |  6 ++---
 .../frontend/legacy/deploy_detection.py       |  6 ++---
 web/src/runtime.ts                            |  6 ++---
 72 files changed, 170 insertions(+), 170 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ecda307f585d..d6743a681291 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -34,9 +34,9 @@ tvm_option(USE_RPC "Build with RPC" ON)
 tvm_option(USE_THREADS "Build with thread support" ON)
 tvm_option(USE_LLVM "Build with LLVM, can be set to specific llvm-config path" OFF)
 tvm_option(USE_STACKVM_RUNTIME "Include stackvm into the runtime" OFF)
-tvm_option(USE_GRAPH_RUNTIME "Build with tiny graph runtime" ON)
-tvm_option(USE_GRAPH_RUNTIME_CUDA_GRAPH "Build with tiny graph runtime with CUDA Graph for GPUs" OFF)
-tvm_option(USE_PROFILER "Build profiler for the VM and graph runtime" ON)
+tvm_option(USE_GRAPH_RUNTIME "Build with tiny graph executor" ON)
+tvm_option(USE_GRAPH_RUNTIME_CUDA_GRAPH "Build with tiny graph executor with CUDA Graph for GPUs" OFF)
+tvm_option(USE_PROFILER "Build profiler for the VM and graph executor" ON)
 tvm_option(USE_OPENMP "Build with OpenMP thread pool implementation" OFF)
 tvm_option(USE_RELAY_DEBUG "Building Relay in debug mode..." OFF)
 tvm_option(USE_RTTI "Build with RTTI" ON)
@@ -79,7 +79,7 @@ tvm_option(USE_COREML "Build with coreml support" OFF)
 tvm_option(USE_BNNS "Build with BNNS support" OFF)
 tvm_option(USE_TARGET_ONNX "Build with ONNX Codegen support" OFF)
 tvm_option(USE_ARM_COMPUTE_LIB "Build with Arm Compute Library" OFF)
-tvm_option(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME "Build with Arm Compute Library graph runtime" OFF)
+tvm_option(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME "Build with Arm Compute Library graph executor" OFF)
 tvm_option(USE_TENSORRT_CODEGEN "Build with TensorRT Codegen support" OFF)
 tvm_option(USE_TENSORRT_RUNTIME "Build with TensorRT runtime" OFF)
 tvm_option(USE_RUST_EXT "Build with Rust based compiler extensions, STATIC, DYNAMIC, or OFF" OFF)
@@ -308,7 +308,7 @@ else()
 endif(USE_STACKVM_RUNTIME)
 
 if(USE_GRAPH_RUNTIME)
-  message(STATUS "Build with Graph runtime support...")
+  message(STATUS "Build with Graph executor support...")
   file(GLOB RUNTIME_GRAPH_EXECUTOR_SRCS src/runtime/graph_executor/*.cc)
   list(APPEND RUNTIME_SRCS ${RUNTIME_GRAPH_EXECUTOR_SRCS})
 
diff --git a/apps/android_camera/app/src/main/java/org/apache/tvm/android/androidcamerademo/Camera2BasicFragment.java b/apps/android_camera/app/src/main/java/org/apache/tvm/android/androidcamerademo/Camera2BasicFragment.java
index 3e4a39a72693..e02b703bd683 100644
--- a/apps/android_camera/app/src/main/java/org/apache/tvm/android/androidcamerademo/Camera2BasicFragment.java
+++ b/apps/android_camera/app/src/main/java/org/apache/tvm/android/androidcamerademo/Camera2BasicFragment.java
@@ -516,7 +516,7 @@ private void setInputName(String modelName) {
     }
 
     /*
-       Load precompiled model on TVM graph runtime and init the system.
+       Load precompiled model on TVM graph executor and init the system.
     */
     private class LoadModelAsyncTask extends AsyncTask<Void, Void, Integer> {
 
@@ -581,11 +581,11 @@ protected Integer doInBackground(Void... args) {
             Module modelLib = Module.load(libCacheFilePath);
 
 
-            // get global function module for graph runtime
-            Log.i(TAG, "getting graph runtime create handle...");
+            // get global function module for graph executor
+            Log.i(TAG, "getting graph executor create handle...");
 
             Function runtimeCreFun = Function.getFunction("tvm.graph_executor.create");
-            Log.i(TAG, "creating graph runtime...");
+            Log.i(TAG, "creating graph executor...");
 
             Log.i(TAG, "device type: " + tvmDev.deviceType);
             Log.i(TAG, "device id: " + tvmDev.deviceId);
@@ -598,7 +598,7 @@ protected Integer doInBackground(Void... args) {
 
             Log.i(TAG, "as module...");
             graphRuntimeModule = runtimeCreFunRes.asModule();
-            Log.i(TAG, "getting graph runtime load params handle...");
+            Log.i(TAG, "getting graph executor load params handle...");
             // get the function from the module(load parameters)
             Function loadParamFunc = graphRuntimeModule.getFunction("load_params");
             Log.i(TAG, "loading params...");
diff --git a/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java b/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java
index 79ed20cdc920..6a559557e22e 100644
--- a/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java
+++ b/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java
@@ -119,7 +119,7 @@ public void onClick(View v) {
     }
 
     /*
-        Load precompiled model on TVM graph runtime and init the system.
+        Load precompiled model on TVM graph executor and init the system.
      */
     private class LoadModleAsyncTask extends AsyncTask<Void, Void, Integer> {
         ProgressDialog dialog = new ProgressDialog(MainActivity.this);
@@ -183,7 +183,7 @@ protected Integer doInBackground(Void... args) {
             // tvm module for compiled functions
             Module modelLib = Module.load(libCacheFilePath);
 
-            // get global function module for graph runtime
+            // get global function module for graph executor
             Function runtimeCreFun = Function.getFunction("tvm.graph_executor.create");
             TVMValue runtimeCreFunRes = runtimeCreFun.pushArg(modelGraph)
                     .pushArg(modelLib)
@@ -224,7 +224,7 @@ protected void onPostExecute(Integer status) {
     }
 
     /*
-        Execute prediction for processed decode input bitmap image content on TVM graph runtime.
+        Execute prediction for processed decode input bitmap image content on TVM graph executor.
      */
     private class ModelRunAsyncTask extends AsyncTask<Bitmap, Void, Integer> {
         ProgressDialog dialog = new ProgressDialog(MainActivity.this);
diff --git a/apps/bundle_deploy/README.md b/apps/bundle_deploy/README.md
index 96b3d0f4edc5..619a2d7d05cc 100644
--- a/apps/bundle_deploy/README.md
+++ b/apps/bundle_deploy/README.md
@@ -49,7 +49,7 @@ This will:
 - Build a `bundle.so` shared object containing the model specification and
   parameters
 - Build a `demo_dynamic` executable that `dlopen`'s `bundle.so` (or `bundle_c.so` in 
-  terms of the MISRA-C runtime), instantiates the contained graph runtime,
+  terms of the MISRA-C runtime), instantiates the contained graph executor,
   and invokes the `GraphExecutor::Run` function on a cat image, then prints
   the output results.
 
diff --git a/apps/howto_deploy/cpp_deploy.cc b/apps/howto_deploy/cpp_deploy.cc
index 4c828c30ea03..8500ddb5fabe 100644
--- a/apps/howto_deploy/cpp_deploy.cc
+++ b/apps/howto_deploy/cpp_deploy.cc
@@ -84,11 +84,11 @@ void DeploySingleOp() {
 }
 
 void DeployGraphExecutor() {
-  LOG(INFO) << "Running graph runtime...";
+  LOG(INFO) << "Running graph executor...";
   // load in the library
   DLDevice dev{kDLCPU, 0};
   tvm::runtime::Module mod_factory = tvm::runtime::Module::LoadFromFile("lib/test_relay_add.so");
-  // create the graph runtime module
+  // create the graph executor module
   tvm::runtime::Module gmod = mod_factory.GetFunction("default")(dev);
   tvm::runtime::PackedFunc set_input = gmod.GetFunction("set_input");
   tvm::runtime::PackedFunc get_output = gmod.GetFunction("get_output");
diff --git a/apps/howto_deploy/tvm_runtime_pack.cc b/apps/howto_deploy/tvm_runtime_pack.cc
index 9f1f2fda1e5a..c8778a380233 100644
--- a/apps/howto_deploy/tvm_runtime_pack.cc
+++ b/apps/howto_deploy/tvm_runtime_pack.cc
@@ -58,7 +58,7 @@
 #include "../../src/runtime/dso_library.cc"
 #include "../../src/runtime/system_library.cc"
 
-// Graph runtime
+// Graph executor
 #include "../../src/runtime/graph_executor/graph_executor.cc"
 #include "../../src/runtime/graph_executor/graph_executor_factory.cc"
 
diff --git a/apps/ios_rpc/tvmrpc/TVMRuntime.mm b/apps/ios_rpc/tvmrpc/TVMRuntime.mm
index 3429e3d85e56..7ab9a4d2d219 100644
--- a/apps/ios_rpc/tvmrpc/TVMRuntime.mm
+++ b/apps/ios_rpc/tvmrpc/TVMRuntime.mm
@@ -45,7 +45,7 @@
 #include "../../../src/runtime/rpc/rpc_server_env.cc"
 #include "../../../src/runtime/rpc/rpc_session.cc"
 #include "../../../src/runtime/rpc/rpc_socket_impl.cc"
-// Graph runtime
+// Graph executor
 #include "../../../src/runtime/graph_executor/graph_executor.cc"
 // Metal
 #include "../../../src/runtime/metal/metal_device_api.mm"
diff --git a/cmake/config.cmake b/cmake/config.cmake
index 98d1d9780103..5b907a1d4d87 100644
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -96,13 +96,13 @@ set(USE_CPP_RPC OFF)
 # Whether embed stackvm into the runtime
 set(USE_STACKVM_RUNTIME OFF)
 
-# Whether enable tiny embedded graph runtime.
+# Whether enable tiny embedded graph executor.
 set(USE_GRAPH_RUNTIME ON)
 
-# Whether enable tiny graph runtime with CUDA Graph
+# Whether enable tiny graph executor with CUDA Graph
 set(USE_GRAPH_RUNTIME_CUDA_GRAPH OFF)
 
-# Whether to enable the profiler for the graph runtime and vm
+# Whether to enable the profiler for the graph executor and vm
 set(USE_PROFILER ON)
 
 # Whether enable uTVM standalone runtime
diff --git a/cmake/modules/CUDA.cmake b/cmake/modules/CUDA.cmake
index a0e05987f92a..9b48bec510f4 100644
--- a/cmake/modules/CUDA.cmake
+++ b/cmake/modules/CUDA.cmake
@@ -67,12 +67,12 @@ if(USE_CUDA)
 
   if(USE_GRAPH_RUNTIME_CUDA_GRAPH)
     if(NOT USE_GRAPH_RUNTIME)
-      message(FATAL_ERROR "CUDA Graph is only supported by graph runtime, please set USE_GRAPH_RUNTIME=ON")
+      message(FATAL_ERROR "CUDA Graph is only supported by graph executor, please set USE_GRAPH_RUNTIME=ON")
     endif()
     if(CUDAToolkit_VERSION_MAJOR LESS "10")
       message(FATAL_ERROR "CUDA Graph requires CUDA 10 or above, got=" ${CUDAToolkit_VERSION})
     endif()
-    message(STATUS "Build with Graph runtime with CUDA Graph support...")
+    message(STATUS "Build with Graph executor with CUDA Graph support...")
     file(GLOB RUNTIME_CUDA_GRAPH_SRCS src/runtime/graph_executor/cuda_graph/*.cc)
     list(APPEND RUNTIME_SRCS ${RUNTIME_CUDA_GRAPH_SRCS})
   endif()
diff --git a/cmake/modules/contrib/ArmComputeLib.cmake b/cmake/modules/contrib/ArmComputeLib.cmake
index ba082505125b..3c9a50002c41 100644
--- a/cmake/modules/contrib/ArmComputeLib.cmake
+++ b/cmake/modules/contrib/ArmComputeLib.cmake
@@ -60,11 +60,11 @@ if(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME)
     list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_CORE_LIB})
     list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_GRAPH_LIB})
     list(APPEND RUNTIME_SRCS ${ACL_CONTRIB_SRC})
-    message(STATUS "Build with Arm Compute Library graph runtime support: "
+    message(STATUS "Build with Arm Compute Library graph executor support: "
             ${EXTERN_ACL_COMPUTE_LIB} ", \n"
             ${EXTERN_ACL_COMPUTE_CORE_LIB} ", \n"
             ${EXTERN_ACL_COMPUTE_GRAPH_LIB})
 
-    # Set flag to detect ACL graph runtime support.
+    # Set flag to detect ACL graph executor support.
     add_definitions(-DTVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB)
 endif()
diff --git a/docs/deploy/arm_compute_lib.rst b/docs/deploy/arm_compute_lib.rst
index a736fee4e430..1f9bc578f070 100644
--- a/docs/deploy/arm_compute_lib.rst
+++ b/docs/deploy/arm_compute_lib.rst
@@ -64,7 +64,7 @@ because ACL cannot be used on an x86 machine. However, we still want to be able
 runtime module on an x86 machine.
 
 * USE_ARM_COMPUTE_LIB=ON/OFF - Enabling this flag will add support for compiling an ACL runtime module.
-* USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME=ON/OFF/path-to-acl - Enabling this flag will allow the graph runtime to
+* USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME=ON/OFF/path-to-acl - Enabling this flag will allow the graph executor to
   compute the ACL offloaded functions.
 
 These flags can be used in different scenarios depending on your setup. For example, if you want
diff --git a/docs/dev/index.rst b/docs/dev/index.rst
index 7ceed646087f..c297d32923fe 100644
--- a/docs/dev/index.rst
+++ b/docs/dev/index.rst
@@ -94,7 +94,7 @@ This process helps us to divide the original problem into two sub-problems:
 We use the low-level tir phase to compile and optimize each sub-functions. For specific targets, we may also directly go to the target translation
 phase and use external code generators.
 
-There are a few different ways(in relay/backend) to handle the calls into the overall execution problem. For simple models with known shapes and no control flow, we can lower to a graph runtime that stores the execution structure in a graph. We also support a virtual machine backend for dynamic executions. Finally, we plan to support ahead of time compilation that compiles the high-level execution structure into the executable and generated primitive functions. All of these execution modes are encapsulated by a unified **runtime.Module** interface, which we will discuss in the latter part of the guide.
+There are a few different ways(in relay/backend) to handle the calls into the overall execution problem. For simple models with known shapes and no control flow, we can lower to a graph executor that stores the execution structure in a graph. We also support a virtual machine backend for dynamic executions. Finally, we plan to support ahead of time compilation that compiles the high-level execution structure into the executable and generated primitive functions. All of these execution modes are encapsulated by a unified **runtime.Module** interface, which we will discuss in the latter part of the guide.
 
 **tir/transform** contains transformation passes for TIR level functions. Many tir passes serve the purpose of lowering. For example, there are passes to flatten multi-dimensional access to one-dimensional pointer access, to expand the intrinsics into target-specific ones, and to decorate the function entry to meet the runtime calling convention. Of course, there are also optimizations passes, such as access index simplification and dead code elimination.
 
diff --git a/docs/dev/microtvm_design.rst b/docs/dev/microtvm_design.rst
index ecbb042f90cc..885ef2c8fc0d 100644
--- a/docs/dev/microtvm_design.rst
+++ b/docs/dev/microtvm_design.rst
@@ -68,7 +68,7 @@ The parts of this process are described below:
 
 #. **Deployment**. The project is built and the residual firmware binary is flashed onto the device.
    Model inference is driven either by TVM using an on-device RPC server, or on the device using the
-   on-device Graph Runtime.
+   on-device Graph Executor.
 
 Design Goals
 ============
@@ -189,14 +189,14 @@ The TVM compiler traditionally outputs three pieces:
 2. A model execution graph, encoded as JSON; and
 3. Simplified parameters.
 
-To correctly execute the model, a Graph Runtime needs to reconstruct the graph in memory, load the
+To correctly execute the model, a Graph Executor needs to reconstruct the graph in memory, load the
 parameters, and then invoke the operator implementations in the correct order.
 
 microTVM supports two ways to do this:
 
-1. **Host-Driven**. The Graph Runtime can run on the host and carry out execution by issuing
+1. **Host-Driven**. The Graph Executor can run on the host and carry out execution by issuing
    commands to the device using an RPC link with a UART-like transport.
-2. **Standalone**. A C Graph Runtime is available to be compiled on-device, but it is not
+2. **Standalone**. A C Graph Executor is available to be compiled on-device, but it is not
    particularly memory efficient. This way enables standalone execution without any attached host.
 
 Host-Driven is designed for experimenting with models on-device and, like AutoTVM, uses the RPC server to
@@ -323,11 +323,11 @@ Future Work
 Ahead-of-Time Runtime
 ----------------------
 
-A limitation of the Graph Runtime is the amount of memory overhead required in parsing the JSON.
+A limitation of the Graph Executor is the amount of memory overhead required in parsing the JSON.
 The current implementation contributes significantly to the dynamic memory usage of microTVM,
 limiting its utility. An ahead-of-time runtime can avoid the need for any Graph JSON parsing and
 improve inference speed by generating C code to call the generated operator implementations directly
-rather than relying on a data-driven approach with the Graph Runtime.
+rather than relying on a data-driven approach with the Graph Executor.
 
 Memory Planning
 ----------------
diff --git a/docs/dev/virtual_machine.rst b/docs/dev/virtual_machine.rst
index 9081d50b92ef..7826f68b71dd 100644
--- a/docs/dev/virtual_machine.rst
+++ b/docs/dev/virtual_machine.rst
@@ -32,9 +32,9 @@ There are further challenges in compiling dynamic code, such as dynamic scheduli
 fully dynamic tensor shapes, and control flow. The interpreter offers simple solutions
 for these, but none is sufficiently compelling or optimized.
 
-The second execution mechanism is the existing graph runtime. In order to target Relay
+The second execution mechanism is the existing graph executor. In order to target Relay
 programs to this, we compile a small subset of them to the old graph format and execute
-them on the runtime. Graph runtime provides a fast execution experience but only for a very limited
+them on the runtime. Graph executor provides a fast execution experience but only for a very limited
 subset of Relay programs.
 
 An alternative but not-standard approach is Relay's ahead-of-time compiler,
@@ -64,7 +64,7 @@ micro-optimizations present in scalar VMs are dramatically less important.
 
 TVM has provided strong support for vision models,
 but we want to grow to support a wider variety of models.
-The graph runtime is able to utilize the fully static nature of the input graphs to perform
+The graph executor is able to utilize the fully static nature of the input graphs to perform
 aggressive optimization such as fully static allocation, and optimal memory reuse.
 When we introduce models which make use of control flow, recursion, dynamic shapes, and dynamic
 allocation, we must change how execution works. A virtual machine for Relay is a natural choice.
@@ -354,7 +354,7 @@ Serialization
 
 Serializing and deserializing the executable generated by the Relay VM compiler is a must as
 we may want to save the model to the disk and perform inference later. Previously, Relay has produced
-a serialized form in a json file for the graph runtime. However, the same format is not directly
+a serialized form in a json file for the graph executor. However, the same format is not directly
 applicable to the VM as it emits bytecode instead of graph-style programs.
 Serialization of an executable essentially needs to handle both model specific
 (i.e. weights and kernels) and VM related (i.e. bytecode and global function names) data.
@@ -376,7 +376,7 @@ components in a binary format that is organized with the following sections in o
 - Code section. The VM functions, including bytecode, are sitting in this section. The dispatching
   loop iterates through this section to fetch instructions for execution.
 
-Hence, unlike the graph runtime artifact that contains weight (.params), graph json (.json),
+Hence, unlike the graph executor artifact that contains weight (.params), graph json (.json),
 and compiled kernel library (.so), the serialized executable artifact is composed of the Relay
 object file (.ro) and the compiled kernel library (.so).
 
diff --git a/docs/install/from_source.rst b/docs/install/from_source.rst
index f6be4e31af90..ef6ac9e84c74 100644
--- a/docs/install/from_source.rst
+++ b/docs/install/from_source.rst
@@ -88,7 +88,7 @@ The configuration of TVM can be modified by `config.cmake`.
   - On macOS, for some versions of Xcode, you need to add ``-lc++abi`` in the LDFLAGS or you'll get link errors.
   - Change ``set(USE_CUDA OFF)`` to ``set(USE_CUDA ON)`` to enable CUDA backend. Do the same for other backends and libraries
     you want to build for (OpenCL, RCOM, METAL, VULKAN, ...).
-  - To help with debugging, ensure the embedded graph runtime and debugging functions are enabled with ``set(USE_GRAPH_RUNTIME ON)`` and ``set(USE_GRAPH_RUNTIME_DEBUG ON)``
+  - To help with debugging, ensure the embedded graph executor and debugging functions are enabled with ``set(USE_GRAPH_RUNTIME ON)`` and ``set(USE_GRAPH_RUNTIME_DEBUG ON)``
 
 - TVM requires LLVM for for CPU codegen. We highly recommend you to build with the LLVM support on.
 
diff --git a/golang/sample/complex.go b/golang/sample/complex.go
index a2e25824bb8f..911d0a7a28c1 100644
--- a/golang/sample/complex.go
+++ b/golang/sample/complex.go
@@ -84,7 +84,7 @@ func main() {
         return
     }
     graphmod := graphrt.AsModule()
-    fmt.Printf("Graph runtime Created\n")
+    fmt.Printf("Graph executor Created\n")
 
     // Array allocation attributes
     tshapeIn  := []int64{1, 224, 224, 3}
@@ -105,7 +105,7 @@ func main() {
     }
     fmt.Printf("Input and Output Arrays allocated\n")
 
-    // Get module function from graph runtime : load_params
+    // Get module function from graph executor : load_params
     // Read params
     bytes, err = ioutil.ReadFile(modParams)
     if err != nil {
diff --git a/golang/src/tvm_runtime_pack.cc b/golang/src/tvm_runtime_pack.cc
index 6a4bd872970b..430e046e39a8 100644
--- a/golang/src/tvm_runtime_pack.cc
+++ b/golang/src/tvm_runtime_pack.cc
@@ -42,7 +42,7 @@
 #include "src/runtime/dso_library.cc"
 #include "src/runtime/system_library.cc"
 
-// Graph runtime
+// Graph executor
 #include "src/runtime/graph_executor/graph_executor.cc"
 
 // Uncomment the following lines to enable RPC
diff --git a/include/tvm/runtime/crt/error_codes.h b/include/tvm/runtime/crt/error_codes.h
index de4c13514388..4cbfb0aab8e2 100644
--- a/include/tvm/runtime/crt/error_codes.h
+++ b/include/tvm/runtime/crt/error_codes.h
@@ -83,7 +83,7 @@ typedef enum {
   // Common error codes returned from generated functions.
   kTvmErrorGeneratedInvalidStorageId = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGenerated, 0),
 
-  // Graph runtime
+  // Graph executor
   kTvmErrorGraphModuleAlreadyCreated = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphExecutor, 0),
   kTvmErrorGraphModuleBadContext = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphExecutor, 1),
   kTvmErrorGraphModuleNoSuchInput = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphExecutor, 2),
diff --git a/include/tvm/runtime/crt/graph_executor.h b/include/tvm/runtime/crt/graph_executor.h
index 62166ffe5e0a..5bb3dd4e16a9 100644
--- a/include/tvm/runtime/crt/graph_executor.h
+++ b/include/tvm/runtime/crt/graph_executor.h
@@ -19,7 +19,7 @@
 
 /*!
  * \file graph_executor.h
- * \brief Tiny graph runtime that can run graph containing only tvm PackedFunc.
+ * \brief Tiny graph executor that can run graph containing only tvm PackedFunc.
  */
 #ifndef TVM_RUNTIME_CRT_GRAPH_RUNTIME_H_
 #define TVM_RUNTIME_CRT_GRAPH_RUNTIME_H_
@@ -79,7 +79,7 @@ int TVMGraphExecutor_GetNumInputs();
 
 /*!
  * \brief set input to the graph based on name.
- * \param runtime The graph runtime.
+ * \param runtime The graph executor.
  * \param name The name of the input.
  * \param data_in The input data.
  */
@@ -93,7 +93,7 @@ int TVMGraphExecutor_GetNumOutputs();
 
 /*!
  * \brief Return NDArray for given output index.
- * \param runtime The graph runtime.
+ * \param runtime The graph executor.
  * \param index The output index.
  * \param out The DLTensor corresponding to given output node index.
  * \return The result of this function execution.
@@ -102,7 +102,7 @@ int TVMGraphExecutor_GetOutput(TVMGraphExecutor* runtime, const int32_t index, D
 
 /*!
  * \brief Load parameters from parameter blob.
- * \param runtime The graph runtime.
+ * \param runtime The graph executor.
  * \param param_blob A binary blob of parameter.
  * \param param_size The parameter size.
  * \return The result of this function execution.
@@ -112,13 +112,13 @@ int TVMGraphExecutor_LoadParams(TVMGraphExecutor* runtime, const char* param_blo
 
 /*!
  * \brief Execute the graph.
- * \param runtime The graph runtime.
+ * \param runtime The graph executor.
  */
 void TVMGraphExecutor_Run(TVMGraphExecutor* runtime);
 
 /*!
- * \brief Release memory associated with the graph runtime.
- * \param runtime Pointer to graph runtime.
+ * \brief Release memory associated with the graph executor.
+ * \param runtime Pointer to graph executor.
  * \return 0 if successful
  */
 int TVMGraphExecutor_Release(TVMGraphExecutor** runtime);
diff --git a/include/tvm/runtime/crt/graph_executor_module.h b/include/tvm/runtime/crt/graph_executor_module.h
index 24d5b59be1fe..7bc881354d80 100644
--- a/include/tvm/runtime/crt/graph_executor_module.h
+++ b/include/tvm/runtime/crt/graph_executor_module.h
@@ -19,7 +19,7 @@
 
 /*!
  * \file graph_executor.h
- * \brief Tiny graph runtime that can run graph containing only tvm PackedFunc.
+ * \brief Tiny graph executor that can run graph containing only tvm PackedFunc.
  */
 #ifndef TVM_RUNTIME_CRT_GRAPH_RUNTIME_MODULE_H_
 #define TVM_RUNTIME_CRT_GRAPH_RUNTIME_MODULE_H_
diff --git a/python/tvm/contrib/cuda_graph/cuda_graph_executor.py b/python/tvm/contrib/cuda_graph/cuda_graph_executor.py
index 757ce7483e7f..712c9ea2e9ef 100644
--- a/python/tvm/contrib/cuda_graph/cuda_graph_executor.py
+++ b/python/tvm/contrib/cuda_graph/cuda_graph_executor.py
@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Graph runtime with CUDA Graph"""
+"""Graph executor with CUDA Graph"""
 import tvm._ffi
 
 from tvm._ffi.base import string_types
@@ -40,7 +40,7 @@ def create(graph_json_str, libmod, device):
     Returns
     -------
     graph_module : GraphModuleCudaGraph
-        CUDA graph runtime module that can be used to execute the graph.
+        CUDA graph executor module that can be used to execute the graph.
 
     Note
     ----
@@ -65,9 +65,9 @@ def create(graph_json_str, libmod, device):
 
 
 class GraphModuleCudaGraph(graph_executor.GraphModule):
-    """CUDA graph runtime module.
+    """CUDA graph executor module.
 
-    This is a CUDA graph runtime wrapper over the TVM runtime.
+    This is a CUDA graph executor wrapper over the TVM runtime.
     Runtime interfaces are wrapped with CUDA graph functionalities.
 
     Parameters
@@ -99,13 +99,13 @@ def run_cuda_graph(self):
         """Run the CUDA graph for tvm_op graph
 
         Run the captured CUDA graph instance instead of the
-        for-loop kernel launch of default graph runtime
+        for-loop kernel launch of default graph executor
         """
         self._run_cuda_graph()
 
     def run(self, **input_dict):
         """A run wrapper for graph capture / launch, user can just
-        change default graph runtime to cuda graph runtime, and
+        change default graph executor to cuda graph executor, and
         the first call will capture a cuda graph for future launch
 
         Parameters
diff --git a/python/tvm/contrib/graph_executor.py b/python/tvm/contrib/graph_executor.py
index eeb0347e3efd..f9d54b883497 100644
--- a/python/tvm/contrib/graph_executor.py
+++ b/python/tvm/contrib/graph_executor.py
@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Minimum graph runtime that executes graph containing TVM PackedFunc."""
+"""Minimum graph executor that executes graph containing TVM PackedFunc."""
 import numpy as np
 import tvm._ffi
 
@@ -137,7 +137,7 @@ class GraphModule(object):
         from tvm import relay
         from tvm.contrib import graph_executor
 
-        # build the library using graph runtime
+        # build the library using graph executor
         lib = relay.build(...)
         lib.export_library("compiled_lib.so")
         # load it back as a runtime
diff --git a/python/tvm/driver/tvmc/compiler.py b/python/tvm/driver/tvmc/compiler.py
index 83791e50f6d5..5bdb578f2c16 100644
--- a/python/tvm/driver/tvmc/compiler.py
+++ b/python/tvm/driver/tvmc/compiler.py
@@ -143,7 +143,7 @@ def compile_model(
 
     This function takes a union of the arguments of both frontends.load_model
     and compiler.compile_relay. The resulting TVM module can be executed using
-    the graph runtime.
+    the graph executor.
 
     Parameters
     ----------
diff --git a/python/tvm/driver/tvmc/runner.py b/python/tvm/driver/tvmc/runner.py
index 9f2688fbb335..9fcc49ac6471 100644
--- a/python/tvm/driver/tvmc/runner.py
+++ b/python/tvm/driver/tvmc/runner.py
@@ -77,7 +77,7 @@ def add_run_parser(subparsers):
         "--profile",
         action="store_true",
         help="generate profiling data from the runtime execution. "
-        "Using --profile requires the Graph Runtime Debug enabled on TVM. "
+        "Using --profile requires the Graph Executor Debug enabled on TVM. "
         "Profiling may also have an impact on inference time, "
         "making it take longer to be generated.",
     )
@@ -296,7 +296,7 @@ def run_module(
     repeat=1,
     profile=False,
 ):
-    """Run a compiled graph runtime module locally or remotely with
+    """Run a compiled graph executor module locally or remotely with
     optional input values.
 
     If input tensors are not specified explicitly, they can be filled
diff --git a/python/tvm/micro/session.py b/python/tvm/micro/session.py
index ea8f4af9c878..9573db2095bb 100644
--- a/python/tvm/micro/session.py
+++ b/python/tvm/micro/session.py
@@ -193,7 +193,7 @@ def lookup_remote_linked_param(mod, storage_id, template_tensor, device):
 
 
 def create_local_graph_executor(graph_json_str, mod, device):
-    """Create a local graph runtime driving execution on the remote CPU device given.
+    """Create a local graph executor driving execution on the remote CPU device given.
 
     Parameters
     ----------
@@ -209,7 +209,7 @@ def create_local_graph_executor(graph_json_str, mod, device):
     Returns
     -------
     tvm.contrib.GraphExecutor :
-         A local graph runtime instance that executes on the remote device.
+         A local graph executor instance that executes on the remote device.
     """
     device_type_id = [device.device_type, device.device_id]
     fcreate = get_global_func("tvm.graph_executor.create")
@@ -238,7 +238,7 @@ def create_local_debug_runtime(graph_json_str, mod, device, dump_root=None):
     Returns
     -------
     tvm.contrib.GraphExecutor :
-         A local graph runtime instance that executes on the remote device.
+         A local graph executor instance that executes on the remote device.
     """
     device_type_id = [device.device_type, device.device_id]
     fcreate = get_global_func("tvm.graph_executor_debug.create")
diff --git a/python/tvm/relay/analysis/analysis.py b/python/tvm/relay/analysis/analysis.py
index 3928f77b2607..661d7523ad77 100644
--- a/python/tvm/relay/analysis/analysis.py
+++ b/python/tvm/relay/analysis/analysis.py
@@ -405,7 +405,7 @@ def search_fc_transpose(expr):
 def get_calibration_data(mod, data):
     """Get the calibration data of a given relay graph
 
-    This pass uses the graph runtime to get the calibration data of a module, which
+    This pass uses the graph executor to get the calibration data of a module, which
     includes the input and output values of each function. The returned data uses
     the GlobalVar of each function as a key. Users can further access the inputs and
     outputs by using `inputs` or  `outputs` as the key.
diff --git a/python/tvm/relay/backend/graph_executor_codegen.py b/python/tvm/relay/backend/graph_executor_codegen.py
index 9ec33a4807f5..f24bf2c2b55b 100644
--- a/python/tvm/relay/backend/graph_executor_codegen.py
+++ b/python/tvm/relay/backend/graph_executor_codegen.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 """
-A compiler from a Relay expression to TVM's graph runtime.
+A compiler from a Relay expression to TVM's graph executor.
 
 The compiler is built from a few pieces.
 
@@ -29,7 +29,7 @@
 graph langauge is composed of Node, NodeRef, InputNode, OpNode.
 This "little language" represents programs in TVM's graph format.
 
-To connect to the graph runtime, we use a printer that converts our graph format
+To connect to the graph executor, we use a printer that converts our graph format
 into TVM's JSON format. The resulting string can be loaded by
 contrib.graph_executor or any other TVM runtime compatible systems.
 """
diff --git a/python/tvm/relay/backend/graph_executor_factory.py b/python/tvm/relay/backend/graph_executor_factory.py
index 5356af946de4..d6959d22e5c8 100644
--- a/python/tvm/relay/backend/graph_executor_factory.py
+++ b/python/tvm/relay/backend/graph_executor_factory.py
@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Graph runtime factory."""
+"""Graph executor factory."""
 import warnings
 from ..._ffi.base import string_types
 from ..._ffi.registry import get_global_func
@@ -22,8 +22,8 @@
 
 
 class GraphExecutorFactoryModule:
-    """Graph runtime factory module.
-    This is a module of graph runtime factory
+    """Graph executor factory module.
+    This is a module of graph executor factory
 
     Parameters
     ----------
@@ -77,7 +77,7 @@ def __getitem__(self, item):
 
     def __iter__(self):
         warnings.warn(
-            "legacy graph runtime behavior of producing json / lib / params will be "
+            "legacy graph executor behavior of producing json / lib / params will be "
             "removed in the next release."
             " Please see documents of tvm.contrib.graph_executor.GraphModule for the "
             " new recommended usage.",
diff --git a/python/tvm/relay/build_module.py b/python/tvm/relay/build_module.py
index 9e81240b2ffe..3981a31900b5 100644
--- a/python/tvm/relay/build_module.py
+++ b/python/tvm/relay/build_module.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 """
-Construct the necessary state for the TVM graph runtime
+Construct the necessary state for the TVM graph executor
 from a Relay expression.
 """
 import warnings
@@ -70,7 +70,7 @@ def _convert_param_map(params):
 
 
 class BuildModule(object):
-    """Build an IR module to run on TVM graph runtime. This class is used
+    """Build an IR module to run on TVM graph executor. This class is used
     to expose the `RelayBuildModule` APIs implemented in C++.
     """
 
@@ -111,7 +111,7 @@ def build(self, mod, target=None, target_host=None, params=None):
         Returns
         -------
         factory_module : tvm.relay.backend.graph_executor_factory.GraphExecutorFactoryModule
-            The runtime factory for the TVM graph runtime.
+            The runtime factory for the TVM graph executor.
         """
         target = _update_target(target)
 
@@ -211,7 +211,7 @@ def _build_module_no_factory(mod, target=None, target_host=None, params=None, mo
 def build(ir_mod, target=None, target_host=None, params=None, mod_name="default"):
     # fmt: off
     # pylint: disable=line-too-long
-    """Helper function that builds a Relay function to run on TVM graph runtime.
+    """Helper function that builds a Relay function to run on TVM graph executor.
 
     Parameters
     ----------
@@ -241,7 +241,7 @@ def build(ir_mod, target=None, target_host=None, params=None, mod_name="default"
     Returns
     -------
     graph_json : str
-        The json string that can be accepted by graph runtime.
+        The json string that can be accepted by graph executor.
 
     mod : tvm.Module
         The module containing necessary libraries.
@@ -392,7 +392,7 @@ def _make_executor(self, expr=None):
         self.mod = InferType()(self.mod)
         ret_type = self.mod["main"].checked_type.ret_type
         if _ty.is_dynamic(ret_type):
-            raise ValueError("Graph Runtime only supports static graphs, got output type", ret_type)
+            raise ValueError("Graph Executor only supports static graphs, got output type", ret_type)
         mod = build(self.mod, target=self.target)
         gmodule = _graph_rt.GraphModule(mod["default"](self.device))
 
@@ -444,7 +444,7 @@ def create_executor(kind="debug", mod=None, device=None, target="llvm"):
     ----------
     kind : str
         The type of executor. Avaliable options are `debug` for the
-        interpreter, `graph` for the graph runtime, and `vm` for the virtual
+        interpreter, `graph` for the graph executor, and `vm` for the virtual
         machine.
 
     mod : :py:class:`~tvm.IRModule`
diff --git a/python/tvm/relay/op/contrib/arm_compute_lib.py b/python/tvm/relay/op/contrib/arm_compute_lib.py
index fabb639845b6..17fdbf941e08 100644
--- a/python/tvm/relay/op/contrib/arm_compute_lib.py
+++ b/python/tvm/relay/op/contrib/arm_compute_lib.py
@@ -30,7 +30,7 @@
 
 
 def is_arm_compute_runtime_enabled():
-    """Check if the ACL graph runtime is present.
+    """Check if the ACL graph executor is present.
 
     Returns
     -------
diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index afdea9712342..a36b66c8f0dd 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -29,7 +29,7 @@
 
 
 def is_tensorrt_runtime_enabled():
-    """Check if the TensorRT graph runtime is present.
+    """Check if the TensorRT graph executor is present.
     Returns
     -------
     ret: bool
diff --git a/rust/tvm-graph-rt/Cargo.toml b/rust/tvm-graph-rt/Cargo.toml
index 13837f62695d..5c492393a75e 100644
--- a/rust/tvm-graph-rt/Cargo.toml
+++ b/rust/tvm-graph-rt/Cargo.toml
@@ -19,7 +19,7 @@
 name = "tvm-graph-rt"
 version = "0.1.0"
 license = "Apache-2.0"
-description = "A static graph runtime for TVM."
+description = "A static graph executor for TVM."
 repository = "https://github.com/apache/tvm"
 readme = "README.md"
 keywords = ["tvm"]
diff --git a/rust/tvm/README.md b/rust/tvm/README.md
index 75fabe7d9a1b..b518f93195b7 100644
--- a/rust/tvm/README.md
+++ b/rust/tvm/README.md
@@ -37,7 +37,7 @@ The Rust bindings are composed of a few crates:
 - The [tvm_rt](https://tvm.apache.org/docs/api/rust/tvm_rt/index.html) crate which exposes Rust
   bindings to the TVM runtime APIs.
 - The [tvm_sys] crate which provides raw bindings and linkage to the TVM C++ library.
-- The [tvm_graph_rt] crate which implements a version of the TVM graph runtime in Rust vs. C++.
+- The [tvm_graph_rt] crate which implements a version of the TVM graph executor in Rust vs. C++.
 
 These crates have been recently refactored and reflect a much different philosophy than
 previous bindings, as well as much increased support for more of the TVM API including
diff --git a/rust/tvm/examples/resnet/src/build_resnet.py b/rust/tvm/examples/resnet/src/build_resnet.py
index 28b5807e3ea1..13c499b54deb 100644
--- a/rust/tvm/examples/resnet/src/build_resnet.py
+++ b/rust/tvm/examples/resnet/src/build_resnet.py
@@ -151,7 +151,7 @@ def test_build(build_dir):
 
 
 if __name__ == "__main__":
-    logger.info("Compiling the model to graph runtime.")
+    logger.info("Compiling the model to graph executor.")
     build(build_dir)
     logger.info("Testing the model's predication on test data.")
     test_build(build_dir)
diff --git a/rust/tvm/src/lib.rs b/rust/tvm/src/lib.rs
index 047b2cf220e0..81abe338bd1b 100644
--- a/rust/tvm/src/lib.rs
+++ b/rust/tvm/src/lib.rs
@@ -24,7 +24,7 @@
 //! One particular use case is that given optimized deep learning model artifacts,
 //! (compiled with TVM) which include a shared library
 //! `lib.so`, `graph.json` and a byte-array `param.params`, one can load them
-//! in Rust idiomatically to create a TVM Graph Runtime and
+//! in Rust idiomatically to create a TVM Graph Executor and
 //! run the model for some inputs and get the
 //! desired predictions *all in Rust*.
 //!
diff --git a/rust/tvm/src/runtime/graph_rt.rs b/rust/tvm/src/runtime/graph_rt.rs
index 7e1e6ed2fa8b..421a00386cf5 100644
--- a/rust/tvm/src/runtime/graph_rt.rs
+++ b/rust/tvm/src/runtime/graph_rt.rs
@@ -22,19 +22,19 @@ use std::convert::TryInto;
 use crate::runtime::Function;
 use crate::{runtime::function::Result, runtime::ByteArray, Device, Module, NDArray};
 
-/// An instance of the C++ graph runtime.
+/// An instance of the C++ graph executor.
 ///
 /// An efficient and light weight runtime for static deep learning models.
 pub struct GraphRt {
-    /// The backing graph runtime module which exposes a set of packed functions
+    /// The backing graph executor module which exposes a set of packed functions
     /// which can be invoked by a client.
     ///
-    /// In the graph runtime module, it exposes create, load_params, set_input, get_output, and run.
+    /// In the graph executor module, it exposes create, load_params, set_input, get_output, and run.
     module: Module,
 }
 
 impl GraphRt {
-    /// Create a graph runtime directly from a runtime module.
+    /// Create a graph executor directly from a runtime module.
     pub fn from_module(module: Module, dev: Device) -> Result<GraphRt> {
         let default: Box<dyn Fn(Device) -> Result<Module>> =
             module.get_function("default", false)?.into();
@@ -44,7 +44,7 @@ impl GraphRt {
         })
     }
 
-    /// Create a graph runtime from the deprecated graph, lib, dev triple.
+    /// Create a graph executor from the deprecated graph, lib, dev triple.
     pub fn create_from_parts(graph: &str, lib: Module, dev: Device) -> Result<Self> {
         let runtime_create_fn = Function::get("tvm.graph_executor.create").unwrap();
 
@@ -92,13 +92,13 @@ impl GraphRt {
         Ok(())
     }
 
-    /// Extract the ith output from the graph runtime and returns it.
+    /// Extract the ith output from the graph executor and returns it.
     pub fn get_output(&mut self, i: i64) -> Result<NDArray> {
         let get_output_fn = self.module.get_function("get_output", false)?;
         get_output_fn.invoke(vec![i.into()])?.try_into()
     }
 
-    /// Extract the ith output from the graph runtime and write the results into output.
+    /// Extract the ith output from the graph executor and write the results into output.
     pub fn get_output_into(&mut self, i: i64, output: NDArray) -> Result<()> {
         let get_output_fn = self.module.get_function("get_output", false)?;
         get_output_fn.invoke(vec![i.into(), output.into()])?;
diff --git a/src/relay/analysis/get_calibration_data.cc b/src/relay/analysis/get_calibration_data.cc
index 70fe2a68f21e..12bab1e38ddd 100644
--- a/src/relay/analysis/get_calibration_data.cc
+++ b/src/relay/analysis/get_calibration_data.cc
@@ -36,7 +36,7 @@ namespace relay {
 
 /*!
  * \brief This function returns a module that will be used by
- * the relay graph runtime for collecting the calibration data.
+ * the relay graph executor for collecting the calibration data.
  * To do that, we first make all inputs and outputs of each
  * function into the final output (i.e., the final output is a
  * tuple of tensors). Then, we change the compiler attribute of
@@ -106,7 +106,7 @@ IRModule GetCalibrateModule(IRModule module) {
       }
     }
   }
-  // reset the attribute of functions for running graph runtime
+  // reset the attribute of functions for running graph executor
   for (const auto& pair : glob_funcs) {
     if (auto* fn = pair.second.as<FunctionNode>()) {
       auto func = GetRef<Function>(fn);
diff --git a/src/relay/backend/build_module.cc b/src/relay/backend/build_module.cc
index be6162e3d58e..3995d5ab3568 100644
--- a/src/relay/backend/build_module.cc
+++ b/src/relay/backend/build_module.cc
@@ -19,7 +19,7 @@
 
 /*!
  * \file relay/backend/build_module.cc
- * \brief Code generation for TVM's graph runtime.
+ * \brief Code generation for TVM's graph executor.
  */
 #include <tvm/driver/driver_api.h>
 #include <tvm/ir/expr.h>
@@ -228,7 +228,7 @@ class RelayBuildModule : public runtime::ModuleNode {
   const char* type_key() const final { return "RelayBuildModule"; }
 
   /*!
-   * \brief Build relay IRModule for graph runtime
+   * \brief Build relay IRModule for graph executor
    *
    * \param mod Relay IRModule
    * \param target Target device
diff --git a/src/relay/backend/compile_engine.cc b/src/relay/backend/compile_engine.cc
index f492b70565ac..0777b19ec557 100644
--- a/src/relay/backend/compile_engine.cc
+++ b/src/relay/backend/compile_engine.cc
@@ -262,7 +262,7 @@ class ScheduleGetter : public backend::MemoizedExprTranslator<Array<te::Tensor>>
       ICHECK(tuple_type) << "Expect output to be a tuple type";
       ICHECK_EQ(tuple_type->fields.size(), outputs.size());
     }
-    // Set the name to `__copy`. It will be detected in graph runtime to perform
+    // Set the name to `__copy`. It will be detected in graph executor to perform
     // data copy across devices.
     if (op == device_copy_op_) {
       readable_name_stream_.str(std::string());
diff --git a/src/relay/backend/contrib/arm_compute_lib/codegen.cc b/src/relay/backend/contrib/arm_compute_lib/codegen.cc
index e0669ae64bdb..1e3f1ce1c525 100644
--- a/src/relay/backend/contrib/arm_compute_lib/codegen.cc
+++ b/src/relay/backend/contrib/arm_compute_lib/codegen.cc
@@ -376,9 +376,9 @@ runtime::Module ACLCompiler(const ObjectRef& ref) {
 TVM_REGISTER_GLOBAL("relay.ext.arm_compute_lib").set_body_typed(ACLCompiler);
 
 /*!
- * \brief Check whether ACL graph runtime is used.
+ * \brief Check whether ACL graph executor is used.
  *
- * \return True if ACL graph runtime is enabled, False if not.
+ * \return True if ACL graph executor is enabled, False if not.
  */
 inline constexpr bool IsACLRuntimeEnabled() {
 #if TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
diff --git a/src/relay/backend/contrib/tensorrt/codegen.cc b/src/relay/backend/contrib/tensorrt/codegen.cc
index 059dbc192a04..c453ff625ed7 100644
--- a/src/relay/backend/contrib/tensorrt/codegen.cc
+++ b/src/relay/backend/contrib/tensorrt/codegen.cc
@@ -217,7 +217,7 @@ runtime::Module TensorRTCompiler(const ObjectRef& ref) {
 TVM_REGISTER_GLOBAL("relay.ext.tensorrt").set_body_typed(TensorRTCompiler);
 
 /*!
- * \brief Check whether TensorRT graph runtime is enabled.
+ * \brief Check whether TensorRT graph executor is enabled.
  * \return True if enabled, False if not.
  */
 inline constexpr bool IsTensorRTRuntimeEnabled() {
diff --git a/src/relay/backend/graph_executor_codegen.cc b/src/relay/backend/graph_executor_codegen.cc
index f9b26e5cf291..54f7f85836b2 100644
--- a/src/relay/backend/graph_executor_codegen.cc
+++ b/src/relay/backend/graph_executor_codegen.cc
@@ -19,7 +19,7 @@
 
 /*!
  * \file relay/backend/graph_codegen.cc
- * \brief Graph runtime codegen
+ * \brief Graph executor codegen
  */
 
 #include <dmlc/any.h>
@@ -181,7 +181,7 @@ class GraphOpNode : public GraphNode {
   const std::string op_type_name_{"tvm_op"};
 };
 
-/*! \brief Code generator for graph runtime */
+/*! \brief Code generator for graph executor */
 class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<GraphNodeRef>> {
  public:
   GraphExecutorCodegen(runtime::Module* mod, const TargetsMap& targets) : mod_(mod) {
diff --git a/src/relay/backend/graph_plan_memory.cc b/src/relay/backend/graph_plan_memory.cc
index 26bc77aa6ec5..4260f052d2c0 100644
--- a/src/relay/backend/graph_plan_memory.cc
+++ b/src/relay/backend/graph_plan_memory.cc
@@ -20,7 +20,7 @@
 /*!
  * \file relay/backend/graph_plan_memory.cc
  * \brief Memory index assignment pass for executing
- *   the program in the graph runtime.
+ *   the program in the graph executor.
  */
 #include <tvm/relay/analysis.h>
 #include <tvm/relay/expr.h>
diff --git a/src/relay/transforms/partition_graph.cc b/src/relay/transforms/partition_graph.cc
index 404c7efb10b0..94891c3c98ea 100644
--- a/src/relay/transforms/partition_graph.cc
+++ b/src/relay/transforms/partition_graph.cc
@@ -428,7 +428,7 @@ IRModule RemoveDefaultAnnotations(IRModule module) {
  *  could be a tuple output. Such tuple outputs needs to be flattened
  *  otherwise the function would create tuples of tuples. Moreover, tuple
  *  of tuples are valid relay, however they are not currently supported by
- *  graph runtime or relay VM.
+ *  graph executor or relay VM.
  */
 
 // New annotations would be required to be added for each flattened output
diff --git a/src/runtime/crt/graph_executor/graph_executor.c b/src/runtime/crt/graph_executor/graph_executor.c
index 7ddaa7737feb..ecd2006b8ea9 100644
--- a/src/runtime/crt/graph_executor/graph_executor.c
+++ b/src/runtime/crt/graph_executor/graph_executor.c
@@ -21,7 +21,7 @@
 
 /*!
  * \file graph_executor.c
- * \brief implement graph runtime in pure C
+ * \brief implement graph executor in pure C
  */
 
 #include <tvm/runtime/c_runtime_api.h>
@@ -599,7 +599,7 @@ int TVMGraphExecutor_Load(TVMGraphExecutor* runtime, JSONReader* reader) {
         TVMGraphExecutorNode* node = runtime->nodes + runtime->nodes_count;
         status = TVMGraphExecutorNode_Load(node, reader);
         if (status != 0) {
-          fprintf(stderr, "failed to load an element in `nodes` field in graph runtime node.\n");
+          fprintf(stderr, "failed to load an element in `nodes` field in graph executor node.\n");
           break;
 #if TVM_CRT_DEBUG
         } else {
@@ -689,7 +689,7 @@ int TVMGraphExecutor_Load(TVMGraphExecutor* runtime, JSONReader* reader) {
         TVMGraphExecutorNodeEntry* entry = runtime->outputs + runtime->outputs_count;
         status = NodeEntry_Load(entry, reader);
         if (status != 0) {
-          fprintf(stderr, "Fail to load an element in `heads` field in graph runtime node.\n");
+          fprintf(stderr, "Fail to load an element in `heads` field in graph executor node.\n");
           break;
         }
         runtime->outputs_count++;
@@ -698,7 +698,7 @@ int TVMGraphExecutor_Load(TVMGraphExecutor* runtime, JSONReader* reader) {
     } else if (!strcmp(key, "attrs")) {
       status = TVMGraphExecutorGraphAttr_Load(&(runtime->attrs), reader);
       if (status != 0) {
-        fprintf(stderr, "Fail to load an element in `heads` field in graph runtime node.\n");
+        fprintf(stderr, "Fail to load an element in `heads` field in graph executor node.\n");
         break;
       }
       bitmask |= 16;
@@ -725,14 +725,14 @@ uint32_t TVMGraphExecutor_GetEntryId(TVMGraphExecutor* runtime, uint32_t nid, ui
 
 /*!
  * \brief Get the number of input tensors allocated.
- * \param runtime The graph runtime.
+ * \param runtime The graph executor.
  * \return the number of input tensors allocated.
  */
 int TVMGraphExecutor_GetNumInputs(TVMGraphExecutor* runtime) { return runtime->input_nodes_count; }
 
 /*!
  * \brief Get the input index given the name of input.
- * \param runtime The graph runtime.
+ * \param runtime The graph executor.
  * \param name The name of the input.
  * \return The index of input.
  */
@@ -752,7 +752,7 @@ int TVMGraphExecutor_GetInputIndex(TVMGraphExecutor* runtime, const char* name)
 
 /*!
  * \brief set input to the graph based on name.
- * \param runtime The graph runtime.
+ * \param runtime The graph executor.
  * \param name The name of the input.
  * \param data_in The input data.
  */
@@ -767,7 +767,7 @@ void TVMGraphExecutor_SetInput(TVMGraphExecutor* runtime, const char* name, DLTe
 
 /*!
  * \brief Load parameters from parameter blob.
- * \param runtime The graph runtime.
+ * \param runtime The graph executor.
  * \param param_blob A binary blob of parameter.
  * \param param_size The parameter size.
  * \return The result of this function execution.
@@ -869,7 +869,7 @@ int TVMGraphExecutor_LoadParams(TVMGraphExecutor* runtime, const char* param_blo
 
 /*!
  * \brief Run all the operations one by one.
- * \param runtime The graph runtime.
+ * \param runtime The graph executor.
  */
 void TVMGraphExecutor_Run(TVMGraphExecutor* runtime) {
   // setup the array and requirements.
@@ -886,7 +886,7 @@ void TVMGraphExecutor_Run(TVMGraphExecutor* runtime) {
 
 /*!
  * \brief Get the number of output tensors allocated.
- * \param runtime The graph runtime.
+ * \param runtime The graph executor.
  * \return the number of output tensors allocated.
  */
 int TVMGraphExecutor_GetNumOutputs(TVMGraphExecutor* runtime) { return runtime->outputs_count; }
diff --git a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
index f7d1d0b27012..69811b5a9348 100644
--- a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
+++ b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
@@ -19,7 +19,7 @@
 
 /*!
  * \file src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
- * \brief Tiny graph runtime that can run graph containing only tvm PackedFunc.
+ * \brief Tiny graph executor that can run graph containing only tvm PackedFunc.
  */
 #ifndef TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_RUNTIME_GRAPH_RUNTIME_H_
 #define TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_RUNTIME_GRAPH_RUNTIME_H_
diff --git a/src/runtime/graph_executor/cuda_graph/graph_runtime_cuda_graph.cc b/src/runtime/graph_executor/cuda_graph/graph_runtime_cuda_graph.cc
index 5ccfa834e9bb..280d22743659 100644
--- a/src/runtime/graph_executor/cuda_graph/graph_runtime_cuda_graph.cc
+++ b/src/runtime/graph_executor/cuda_graph/graph_runtime_cuda_graph.cc
@@ -30,7 +30,7 @@ namespace tvm {
 namespace runtime {
 
 /*!
- * \brief Graph runtime with CUDA Graph Support.
+ * \brief Graph executor with CUDA Graph Support.
  *
  *  This is the extension of GraphExecutor class used for CUDA graph launch
  *  instead of CUDA kernel launch. CUDA graph launch requires CUDA 10.0 or
diff --git a/src/runtime/graph_executor/debug/graph_executor_debug.cc b/src/runtime/graph_executor/debug/graph_executor_debug.cc
index e49f41cae6d9..87cdfe20963c 100644
--- a/src/runtime/graph_executor/debug/graph_executor_debug.cc
+++ b/src/runtime/graph_executor/debug/graph_executor_debug.cc
@@ -35,7 +35,7 @@ namespace tvm {
 namespace runtime {
 
 /*!
- * \brief Graph runtime with debug .
+ * \brief Graph executor with debug .
  *
  *  This is the extension of GraphExecutor class used for debugging
  *  TVM runtime PackedFunc API.
diff --git a/src/runtime/graph_executor/graph_executor.h b/src/runtime/graph_executor/graph_executor.h
index fc5d2a522a91..37a47f6971e6 100644
--- a/src/runtime/graph_executor/graph_executor.h
+++ b/src/runtime/graph_executor/graph_executor.h
@@ -18,7 +18,7 @@
  */
 
 /*!
- * \brief Tiny graph runtime that can run graph
+ * \brief Tiny graph executor that can run graph
  *        containing only tvm PackedFunc.
  * \file graph_executor.h
  */
@@ -56,7 +56,7 @@ struct TVMOpParam {
 };
 
 /*!
- * \brief Tiny graph runtime.
+ * \brief Tiny graph executor.
  *
  *  This runtime can be acccesibly in various language via
  *  TVM runtime PackedFunc API.
diff --git a/src/runtime/graph_executor/graph_executor_factory.cc b/src/runtime/graph_executor/graph_executor_factory.cc
index a7baed073e1c..7110cf695888 100644
--- a/src/runtime/graph_executor/graph_executor_factory.cc
+++ b/src/runtime/graph_executor/graph_executor_factory.cc
@@ -19,7 +19,7 @@
 
 /*!
  * \file graph_executor_factory.cc
- * \brief Graph runtime factory implementations
+ * \brief Graph executor factory implementations
  */
 
 #include "./graph_executor_factory.h"
@@ -114,7 +114,7 @@ Module GraphExecutorFactory::RuntimeCreate(const std::vector<Device>& devs) {
 Module GraphExecutorFactory::DebugRuntimeCreate(const std::vector<Device>& devs) {
   const PackedFunc* pf = tvm::runtime::Registry::Get("tvm.graph_executor_debug.create");
   ICHECK(pf != nullptr) << "Cannot find function tvm.graph_executor_debug.create in registry. "
-                           "Do you enable debug graph runtime build?";
+                           "Do you enable debug graph executor build?";
   // Debug runtime create packed function will call GetAllContexs, so we unpack the devs.
   std::vector<int> unpacked_devs;
   for (const auto& dev : devs) {
@@ -133,7 +133,7 @@ Module GraphExecutorFactory::DebugRuntimeCreate(const std::vector<Device>& devs)
   TVMRetValue rv;
   pf->CallPacked(TVMArgs(values.data(), codes.data(), args_size), &rv);
   Module mod = rv.operator Module();
-  // debug graph runtime is one child class of graph runtime.
+  // debug graph executor is one child class of graph executor.
   SetParams(const_cast<GraphExecutor*>(mod.as<GraphExecutor>()), this->params_);
   return mod;
 }
diff --git a/src/runtime/graph_executor/graph_executor_factory.h b/src/runtime/graph_executor/graph_executor_factory.h
index 030f16ea1239..6aa443d1aafa 100644
--- a/src/runtime/graph_executor/graph_executor_factory.h
+++ b/src/runtime/graph_executor/graph_executor_factory.h
@@ -19,7 +19,7 @@
 
 /*!
  * \file tvm/runtime/graph_executor/graph_executor_factory.h
- * \brief Graph runtime factory creating graph runtime.
+ * \brief Graph executor factory creating graph executor.
  */
 
 #ifndef TVM_RUNTIME_GRAPH_EXECUTOR_GRAPH_EXECUTOR_FACTORY_H_
@@ -90,16 +90,16 @@ class TVM_DLL GraphExecutorFactory : public runtime::ModuleNode {
   Module DebugRuntimeCreate(const std::vector<Device>& devs);
 
   /*!
-   * \brief Create a specific cuda graph runtime module
+   * \brief Create a specific cuda graph executor module
    * \param devs The device of the host and devices where graph nodes will be
    *  executed on.
-   * \return created cuda graph runtime module
+   * \return created cuda graph executor module
    */
   Module CudaGraphExecutorCreate(const std::vector<Device>& devs);
 
   /*!
    * \brief Set params.
-   * \param graph_executor The graph runtime we want to set the params into.
+   * \param graph_executor The graph executor we want to set the params into.
    * \param params The graph params value we want to set.
    */
   void SetParams(GraphExecutor* graph_executor,
diff --git a/src/runtime/metadata_module.cc b/src/runtime/metadata_module.cc
index 665c72cc5e0d..4a1d89ce1a1f 100644
--- a/src/runtime/metadata_module.cc
+++ b/src/runtime/metadata_module.cc
@@ -21,7 +21,7 @@
  * \file src/runtime/metadata_module.cc
  * \brief A wrapper for initializing imported modules using metadata. This
  * module is intended to be used by various runtime in the TVM stack, i.e.
- * graph runtime, relay VM, AOT runtime, and various user defined runtimes. It
+ * graph executor, relay VM, AOT runtime, and various user defined runtimes. It
  * paves the way to separate the code and metedata, which makes compilation
  * and/or interpretation more convenient. In addition, the clear separation of
  * code and metadata significantly reduces the efforts for handling external
diff --git a/src/target/metadata_module.cc b/src/target/metadata_module.cc
index 0b30d42c876c..8184e9189c4b 100644
--- a/src/target/metadata_module.cc
+++ b/src/target/metadata_module.cc
@@ -35,7 +35,7 @@ namespace codegen {
 
 /*!
  * \brief Create a metadata module wrapper. The helper is used by different
- *        codegens, such as graph runtime codegen and the vm compiler.
+ *        codegens, such as graph executor codegen and the vm compiler.
  *
  * \param params The metadata for initialization of all modules.
  * \param target_module the internal module that is compiled by tvm.
diff --git a/tests/cpp/build_module_test.cc b/tests/cpp/build_module_test.cc
index 75754dee9177..e9373936e0d4 100644
--- a/tests/cpp/build_module_test.cc
+++ b/tests/cpp/build_module_test.cc
@@ -163,7 +163,7 @@ TEST(BuildModule, Heterogeneous) {
     pc[i] = i - 1.0;
   }
 
-  // Initialize graph runtime.
+  // Initialize graph executor.
   int cpu_dev_ty = static_cast<int>(kDLCPU);
   int cpu_dev_id = 0;
   int gpu_dev_ty = static_cast<int>(kDLGPU);
diff --git a/tests/micro/test_runtime_micro_on_arm.py b/tests/micro/test_runtime_micro_on_arm.py
index d9742e72c2e9..7d19d9510062 100644
--- a/tests/micro/test_runtime_micro_on_arm.py
+++ b/tests/micro/test_runtime_micro_on_arm.py
@@ -36,7 +36,7 @@
 
 
 def relay_micro_build(func, dev_config, params=None):
-    """Create a graph runtime module with a micro device context from a Relay function.
+    """Create a graph executor module with a micro device context from a Relay function.
 
     Parameters
     ----------
@@ -52,7 +52,7 @@ def relay_micro_build(func, dev_config, params=None):
     Return
     ------
     mod : tvm.runtime.Module
-        graph runtime module for the target device
+        graph executor module for the target device
     """
     with tvm.transform.PassContext(
         disabled_pass={"FuseOps"}, config={"tir.disable_vectorize": True}
@@ -172,7 +172,7 @@ def test_workspace_add():
 
 
 def test_graph_executor():
-    """Test a program which uses the graph runtime."""
+    """Test a program which uses the graph executor."""
     if not tvm.runtime.enabled("micro_dev"):
         return
     shape = (1024,)
@@ -349,7 +349,7 @@ def test_inactive_session_use():
     input("[press enter to continue]")
     test_graph_executor()
     print()
-    print("finished graph runtime test")
+    print("finished graph executor test")
     input("[press enter to continue]")
     test_conv2d()
     print()
diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index c3ada5f0df47..2bef7be65938 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -296,7 +296,7 @@ def load_graph():
         lib = tvm.runtime.load_module(tmpdir.relpath("compiled.so"))
         return graph, lib, params
 
-    # Test serialization with graph runtime
+    # Test serialization with graph executor
     graph, lib, graph_params = compile_graph(mod, params)
     save_graph(graph, lib, graph_params)
     loaded_graph, loaded_lib, loaded_params = load_graph()
diff --git a/tests/python/frontend/mxnet/test_forward.py b/tests/python/frontend/mxnet/test_forward.py
index c58f5dc99fff..c4e8e804b15a 100644
--- a/tests/python/frontend/mxnet/test_forward.py
+++ b/tests/python/frontend/mxnet/test_forward.py
@@ -802,7 +802,7 @@ def verify(val, shape, dtype):
         mx_sym = mx.sym.full(shape, val, dtype=dtype)
         mod, _ = relay.frontend.from_mxnet(mx_sym, {})
         for target, dev in tvm.testing.enabled_targets():
-            # Skip testing graph runtime because this op will be optimized out
+            # Skip testing graph executor because this op will be optimized out
             # by constant folding.
             for kind in ["debug"]:
                 intrp = relay.create_executor(kind, mod=mod, device=dev, target=target)
@@ -994,7 +994,7 @@ def verify(
 
         mod, params = relay.frontend.from_mxnet(mx_sym, shape=shape_dict, arg_params=mx_params)
         for target, dev in tvm.testing.enabled_targets():
-            # only test graph runtime because debug runtime is too slow
+            # only test graph executor because debug runtime is too slow
             for kind in ["graph"]:
                 intrp = relay.create_executor(kind, mod=mod, device=dev, target=target)
                 op_res = intrp.evaluate()(**inputs, **params)
diff --git a/tests/python/relay/benchmarking/benchmark_vm.py b/tests/python/relay/benchmarking/benchmark_vm.py
index 5136e52a1213..44ce9be766d2 100644
--- a/tests/python/relay/benchmarking/benchmark_vm.py
+++ b/tests/python/relay/benchmarking/benchmark_vm.py
@@ -49,12 +49,12 @@ def get_graph_executor_output(
         out = m.get_output(0, tvm.nd.empty(out_shape, dtype))
 
         if measure:
-            print("Evaluate graph runtime inference cost of {} on " "{}".format(model, repr(dev)))
+            print("Evaluate graph executor inference cost of {} on " "{}".format(model, repr(dev)))
             ftimer = m.module.time_evaluator("run", dev, number=1, repeat=20)
             # Measure in millisecond.
             prof_res = np.array(ftimer().results) * 1000
             print(
-                "Mean graph runtime inference time (std dev): %.2f ms (%.2f ms)"
+                "Mean graph executor inference time (std dev): %.2f ms (%.2f ms)"
                 % (np.mean(prof_res), np.std(prof_res))
             )
 
diff --git a/tests/python/relay/test_external_codegen.py b/tests/python/relay/test_external_codegen.py
index 2bc12a3a0547..9f6d88e47f0b 100644
--- a/tests/python/relay/test_external_codegen.py
+++ b/tests/python/relay/test_external_codegen.py
@@ -336,7 +336,7 @@ def test_extern_dnnl_const():
 
 def test_load_params_with_constants_in_ext_codegen():
     # After binding params and partitioning graph_module.get_params()
-    # might contain parameters that are not an graph runtime input but
+    # might contain parameters that are not an graph executor input but
     # for example constants in external function.
     y_in = np.ones((1,)).astype("float32")
     params = {"y": y_in}
diff --git a/tests/python/unittest/test_crt.py b/tests/python/unittest/test_crt.py
index cd932ff91db1..a0524353f3b3 100644
--- a/tests/python/unittest/test_crt.py
+++ b/tests/python/unittest/test_crt.py
@@ -139,7 +139,7 @@ def test_reset():
 
 @tvm.testing.requires_micro
 def test_graph_executor():
-    """Test use of the graph runtime with microTVM."""
+    """Test use of the graph executor with microTVM."""
     import tvm.micro
 
     workspace = tvm.micro.Workspace(debug=True)
diff --git a/tests/python/unittest/test_runtime_module_based_interface.py b/tests/python/unittest/test_runtime_module_based_interface.py
index f2ba91dcdf44..bf062afdb701 100644
--- a/tests/python/unittest/test_runtime_module_based_interface.py
+++ b/tests/python/unittest/test_runtime_module_based_interface.py
@@ -82,7 +82,7 @@ def test_cpu():
     out = get_output(0).asnumpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-    # graph runtime wrapper
+    # graph executor wrapper
     gmod = graph_executor.GraphModule(complied_graph_lib["default"](dev))
     gmod.set_input("data", data)
     gmod.run()
@@ -109,7 +109,7 @@ def test_gpu():
     out = get_output(0).asnumpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-    # graph runtime wrapper
+    # graph executor wrapper
     gmod = graph_executor.GraphModule(complied_graph_lib["default"](dev))
     gmod.set_input("data", data)
     gmod.run()
@@ -151,7 +151,7 @@ def verify_cpu_export(obj_format):
         out = get_output(0).asnumpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-        # graph runtime wrapper
+        # graph executor wrapper
         gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         gmod.set_input("data", data)
         gmod.run()
@@ -190,7 +190,7 @@ def verify_gpu_export(obj_format):
         out = get_output(0).asnumpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-        # graph runtime wrapper
+        # graph executor wrapper
         gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         gmod.set_input("data", data)
         gmod.run()
@@ -234,7 +234,7 @@ def verify_rpc_cpu_export(obj_format):
         out = get_output(0).asnumpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-        # graph runtime wrapper
+        # graph executor wrapper
         gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         gmod.set_input("data", data)
         gmod.run()
@@ -279,7 +279,7 @@ def verify_rpc_gpu_export(obj_format):
         out = get_output(0).asnumpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-        # graph runtime wrapper
+        # graph executor wrapper
         gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         gmod.set_input("data", data)
         gmod.run()
@@ -333,7 +333,7 @@ def verify_cpu_remove_package_params(obj_format):
         out = get_output(0).asnumpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-        # graph runtime wrapper
+        # graph executor wrapper
         gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         loaded_params = bytearray(open(temp.relpath("deploy_param.params"), "rb").read())
         gmod.set_input("data", data)
@@ -380,7 +380,7 @@ def verify_gpu_remove_package_params(obj_format):
         out = get_output(0).asnumpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-        # graph runtime wrapper
+        # graph executor wrapper
         gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         loaded_params = bytearray(open(temp.relpath("deploy_param.params"), "rb").read())
         gmod.set_input("data", data)
@@ -433,7 +433,7 @@ def verify_rpc_cpu_remove_package_params(obj_format):
         out = get_output(0).asnumpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-        # graph runtime wrapper
+        # graph executor wrapper
         gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         loaded_params = bytearray(open(path_params, "rb").read())
         gmod.set_input("data", data)
@@ -486,7 +486,7 @@ def verify_rpc_gpu_remove_package_params(obj_format):
         out = get_output(0).asnumpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-        # graph runtime wrapper
+        # graph executor wrapper
         gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         loaded_params = bytearray(open(path_params, "rb").read())
         gmod.set_input("data", data)
@@ -526,7 +526,7 @@ def test_debug_graph_executor():
     out = get_output(0).asnumpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-    # debug graph runtime wrapper
+    # debug graph executor wrapper
     debug_g_mod = debug_runtime.GraphModuleDebug(
         complied_graph_lib["debug_create"]("default", dev),
         [dev],
@@ -560,7 +560,7 @@ def test_cuda_graph_executor():
     out = get_output(0).asnumpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-    # cuda graph runtime wrapper
+    # cuda graph executor wrapper
     cu_gmod = cuda_graph_executor.GraphModuleCudaGraph(gmod)
     cu_gmod.set_input("data", data)
     cu_gmod.run()
diff --git a/tests/scripts/task_rust.sh b/tests/scripts/task_rust.sh
index 2c87cceec8bb..c40585b62b47 100755
--- a/tests/scripts/task_rust.sh
+++ b/tests/scripts/task_rust.sh
@@ -58,14 +58,14 @@ cd $RUST_DIR/tvm-rt
 cargo build
 cargo test --tests
 
-# Next we test the graph runtime crate.
+# Next we test the graph executor crate.
 cd $RUST_DIR/tvm-graph-rt
 
 # We first we compile a model using the Python bindings then run the tests.
 python3 tests/build_model.py
 cargo test --tests
 
-# Run some more tests involving the graph runtime API.
+# Run some more tests involving the graph executor API.
 cd tests/test_tvm_basic
 cargo run
 cd -
diff --git a/tutorials/auto_scheduler/tune_network_arm.py b/tutorials/auto_scheduler/tune_network_arm.py
index 7e2281a4b459..153143dd4e94 100644
--- a/tutorials/auto_scheduler/tune_network_arm.py
+++ b/tutorials/auto_scheduler/tune_network_arm.py
@@ -319,7 +319,7 @@ def tune_and_evaluate():
     remote.upload(tmp.relpath(filename))
     rlib = remote.load_module(filename)
 
-    # Create graph runtime
+    # Create graph executor
     dev = remote.cpu()
     module = graph_executor.GraphModule(rlib["default"](dev))
     data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
diff --git a/tutorials/auto_scheduler/tune_network_cuda.py b/tutorials/auto_scheduler/tune_network_cuda.py
index e92ddaf40541..7b5619c671be 100644
--- a/tutorials/auto_scheduler/tune_network_cuda.py
+++ b/tutorials/auto_scheduler/tune_network_cuda.py
@@ -280,7 +280,7 @@ def run_tuning():
     with tvm.transform.PassContext(opt_level=3, config={"relay.backend.use_auto_scheduler": True}):
         lib = relay.build(mod, target=target, params=params)
 
-# Create graph runtime
+# Create graph executor
 dev = tvm.device(str(target), 0)
 module = graph_executor.GraphModule(lib["default"](dev))
 data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
diff --git a/tutorials/auto_scheduler/tune_network_mali.py b/tutorials/auto_scheduler/tune_network_mali.py
index a96ce922d4ae..13d1e4793ffa 100644
--- a/tutorials/auto_scheduler/tune_network_mali.py
+++ b/tutorials/auto_scheduler/tune_network_mali.py
@@ -242,7 +242,7 @@ def tune_and_evaluate():
         ):
             lib = relay.build(mod, target=target, target_host=target_host, params=params)
 
-    # Create graph runtime
+    # Create graph executor
     print("=============== Request Remote ===============")
     from tvm.auto_scheduler.utils import request_remote
 
diff --git a/tutorials/auto_scheduler/tune_network_x86.py b/tutorials/auto_scheduler/tune_network_x86.py
index 253878f70374..91dc64eec20e 100644
--- a/tutorials/auto_scheduler/tune_network_x86.py
+++ b/tutorials/auto_scheduler/tune_network_x86.py
@@ -279,7 +279,7 @@ def run_tuning():
     with tvm.transform.PassContext(opt_level=3, config={"relay.backend.use_auto_scheduler": True}):
         lib = relay.build(mod, target=target, params=params)
 
-# Create graph runtime
+# Create graph executor
 dev = tvm.device(str(target), 0)
 module = graph_executor.GraphModule(lib["default"](dev))
 data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
diff --git a/tutorials/frontend/build_gcn.py b/tutorials/frontend/build_gcn.py
index 5ecea00b76b0..e73dc2dca287 100644
--- a/tutorials/frontend/build_gcn.py
+++ b/tutorials/frontend/build_gcn.py
@@ -335,7 +335,7 @@ def prepare_params(g, data):
 with tvm.transform.PassContext(opt_level=0):  # Currently only support opt_level=0
     lib = relay.build(mod, target, params=params)
 
-# Generate graph runtime
+# Generate graph executor
 dev = tvm.device(target, 0)
 m = graph_executor.GraphModule(lib["default"](dev))
 
diff --git a/tutorials/get_started/relay_quick_start.py b/tutorials/get_started/relay_quick_start.py
index d0fbe9f59bde..fa9207604bac 100644
--- a/tutorials/get_started/relay_quick_start.py
+++ b/tutorials/get_started/relay_quick_start.py
@@ -104,7 +104,7 @@
 #####################################################################
 # Run the generate library
 # ------------------------
-# Now we can create graph runtime and run the module on Nvidia GPU.
+# Now we can create graph executor and run the module on Nvidia GPU.
 
 # create random input
 dev = tvm.gpu()
diff --git a/vta/tutorials/autotvm/tune_relay_vta.py b/vta/tutorials/autotvm/tune_relay_vta.py
index ba326912f423..7deb7408479a 100644
--- a/vta/tutorials/autotvm/tune_relay_vta.py
+++ b/vta/tutorials/autotvm/tune_relay_vta.py
@@ -431,7 +431,7 @@ def tune_and_evaluate(tuning_opt):
         remote.upload(temp.relpath("graphlib.tar"))
         lib = remote.load_module("graphlib.tar")
 
-        # Generate the graph runtime
+        # Generate the graph executor
         ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)
         m = graph_executor.GraphModule(lib["default"](ctx))
 
diff --git a/vta/tutorials/frontend/deploy_classification.py b/vta/tutorials/frontend/deploy_classification.py
index 91e999271b1e..808972e97979 100644
--- a/vta/tutorials/frontend/deploy_classification.py
+++ b/vta/tutorials/frontend/deploy_classification.py
@@ -135,7 +135,7 @@
 ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)
 
 ######################################################################
-# Build the inference graph runtime
+# Build the inference graph executor
 # ---------------------------------
 # Grab vision model from Gluon model zoo and compile with Relay.
 # The compilation steps are:
@@ -147,7 +147,7 @@
 # 4. Perform constant folding to reduce number of operators (e.g. eliminate batch norm multiply).
 # 5. Perform relay build to object file.
 # 6. Load the object file onto remote (FPGA device).
-# 7. Generate graph runtime, `m`.
+# 7. Generate graph executor, `m`.
 #
 
 # Load pre-configured AutoTVM schedules
@@ -209,7 +209,7 @@
     remote.upload(temp.relpath("graphlib.tar"))
     lib = remote.load_module("graphlib.tar")
 
-    # Graph runtime
+    # Graph executor
     m = graph_executor.GraphModule(lib["default"](ctx))
 
 ######################################################################
diff --git a/vta/tutorials/frontend/legacy/deploy_detection.py b/vta/tutorials/frontend/legacy/deploy_detection.py
index 9181a5e7b1b3..696d0508b956 100644
--- a/vta/tutorials/frontend/legacy/deploy_detection.py
+++ b/vta/tutorials/frontend/legacy/deploy_detection.py
@@ -178,7 +178,7 @@
 ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)
 
 ####################################
-# Build the inference graph runtime.
+# Build the inference graph executor.
 # ----------------------------------
 # Using Darknet library load downloaded vision model and compile with Relay.
 # The compilation steps are:
@@ -190,7 +190,7 @@
 # 4. Perform constant folding to reduce number of operators (e.g. eliminate batch norm multiply).
 # 5. Perform relay build to object file.
 # 6. Load the object file onto remote (FPGA device).
-# 7. Generate graph runtime, `m`.
+# 7. Generate graph executor, `m`.
 #
 
 # Load pre-configured AutoTVM schedules
@@ -246,7 +246,7 @@
     remote.upload(temp.relpath("graphlib.tar"))
     lib = remote.load_module("graphlib.tar")
 
-    # Graph runtime
+    # Graph executor
     m = graph_executor.GraphModule(lib["default"](ctx))
 
 ####################################
diff --git a/web/src/runtime.ts b/web/src/runtime.ts
index 532c45ba5ccf..6b1840a0deed 100644
--- a/web/src/runtime.ts
+++ b/web/src/runtime.ts
@@ -570,7 +570,7 @@ export class Module implements Disposable {
 }
 
 /**
- *  Graph runtime.
+ *  Graph executor.
  *
  *  This is a thin wrapper of the underlying TVM module.
  *  you can also directly call set_input, run, and get_output
@@ -986,9 +986,9 @@ export class Instance implements Disposable {
   }
 
   /**
-   * Create a new graph runtime.
+   * Create a new graph executor.
    *
-   * @param graphJson The graph runtime json file.
+   * @param graphJson The graph executor json file.
    * @param lib The underlying library.
    * @param dev The execution device of the graph.
    */

From 32b3adb0652e16f4aa3f7274b8f532835dcd6ddb Mon Sep 17 00:00:00 2001
From: Andrew Reusch <areusch@octoml.ai>
Date: Fri, 12 Mar 2021 10:15:08 -0800
Subject: [PATCH 07/16] GRAPH_RUNTIME -> GRAPH_EXECUTOR

---
 CMakeLists.txt                                 | 18 +++++++++---------
 .../reference-vm/zephyr/rebuild-tvm.sh         |  2 +-
 cmake/config.cmake                             |  8 ++++----
 cmake/modules/CUDA.cmake                       |  6 +++---
 cmake/modules/LibInfo.cmake                    |  6 +++---
 cmake/modules/contrib/ArmComputeLib.cmake      | 10 +++++-----
 cmake/modules/contrib/TensorRT.cmake           |  2 +-
 conda/recipe/bld.bat                           |  2 +-
 conda/recipe/build.sh                          |  2 +-
 docker/Dockerfile.demo_android                 |  2 +-
 docker/install/install_tvm_cpu.sh              |  2 +-
 docs/deploy/arm_compute_lib.rst                | 10 +++++-----
 docs/dev/debugger.rst                          |  4 ++--
 docs/install/from_source.rst                   |  2 +-
 include/tvm/runtime/crt/graph_executor.h       |  6 +++---
 .../tvm/runtime/crt/graph_executor_module.h    |  6 +++---
 .../org/apache/tvm/contrib/GraphModule.java    |  4 ++--
 .../contrib/cuda_graph/cuda_graph_executor.py  |  2 +-
 python/tvm/contrib/debugger/debug_executor.py  |  2 +-
 .../backend/contrib/arm_compute_lib/codegen.cc |  2 +-
 src/relay/backend/contrib/tensorrt/codegen.cc  | 10 +++++-----
 .../contrib/arm_compute_lib/acl_runtime.cc     |  8 ++++----
 .../contrib/tensorrt/tensorrt_runtime.cc       |  4 ++--
 src/runtime/crt/host/main.cc                   |  4 ++--
 .../internal/graph_executor/graph_executor.h   |  6 +++---
 .../crt/internal/graph_executor/load_json.h    | 12 ++++++------
 .../graph_executor/graph_executor_factory.cc   |  2 +-
 .../micro/standalone/utvm_graph_executor.h     |  6 +++---
 src/support/libinfo.cc                         | 18 +++++++++---------
 tests/azure-pipelines/main.yml                 |  6 +++---
 tests/python/unittest/test_link_params.py      |  2 +-
 tests/scripts/task_config_build_gpu.sh         |  2 +-
 tutorials/frontend/deploy_model_on_android.py  |  2 +-
 33 files changed, 90 insertions(+), 90 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d6743a681291..74b6bb996419 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -34,8 +34,8 @@ tvm_option(USE_RPC "Build with RPC" ON)
 tvm_option(USE_THREADS "Build with thread support" ON)
 tvm_option(USE_LLVM "Build with LLVM, can be set to specific llvm-config path" OFF)
 tvm_option(USE_STACKVM_RUNTIME "Include stackvm into the runtime" OFF)
-tvm_option(USE_GRAPH_RUNTIME "Build with tiny graph executor" ON)
-tvm_option(USE_GRAPH_RUNTIME_CUDA_GRAPH "Build with tiny graph executor with CUDA Graph for GPUs" OFF)
+tvm_option(USE_GRAPH_EXECUTOR "Build with tiny graph executor" ON)
+tvm_option(USE_GRAPH_EXECUTOR_CUDA_GRAPH "Build with tiny graph executor with CUDA Graph for GPUs" OFF)
 tvm_option(USE_PROFILER "Build profiler for the VM and graph executor" ON)
 tvm_option(USE_OPENMP "Build with OpenMP thread pool implementation" OFF)
 tvm_option(USE_RELAY_DEBUG "Building Relay in debug mode..." OFF)
@@ -79,7 +79,7 @@ tvm_option(USE_COREML "Build with coreml support" OFF)
 tvm_option(USE_BNNS "Build with BNNS support" OFF)
 tvm_option(USE_TARGET_ONNX "Build with ONNX Codegen support" OFF)
 tvm_option(USE_ARM_COMPUTE_LIB "Build with Arm Compute Library" OFF)
-tvm_option(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME "Build with Arm Compute Library graph executor" OFF)
+tvm_option(USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR "Build with Arm Compute Library graph executor" OFF)
 tvm_option(USE_TENSORRT_CODEGEN "Build with TensorRT Codegen support" OFF)
 tvm_option(USE_TENSORRT_RUNTIME "Build with TensorRT runtime" OFF)
 tvm_option(USE_RUST_EXT "Build with Rust based compiler extensions, STATIC, DYNAMIC, or OFF" OFF)
@@ -307,16 +307,16 @@ else()
   list(APPEND COMPILER_SRCS ${STACKVM_RUNTIME_SRCS})
 endif(USE_STACKVM_RUNTIME)
 
-if(USE_GRAPH_RUNTIME)
-  message(STATUS "Build with Graph executor support...")
+if(USE_GRAPH_EXECUTOR)
+  message(STATUS "Build with Graph Executor support...")
   file(GLOB RUNTIME_GRAPH_EXECUTOR_SRCS src/runtime/graph_executor/*.cc)
   list(APPEND RUNTIME_SRCS ${RUNTIME_GRAPH_EXECUTOR_SRCS})
 
-endif(USE_GRAPH_RUNTIME)
+endif(USE_GRAPH_EXECUTOR)
 
 # convert old options for profiler
-if(USE_GRAPH_RUNTIME_DEBUG)
-  unset(USE_GRAPH_RUNTIME_DEBUG CACHE)
+if(USE_GRAPH_EXECUTOR_DEBUG)
+  unset(USE_GRAPH_EXECUTOR_DEBUG CACHE)
   set(USE_PROFILER ON)
 endif()
 if(USE_VM_PROFILER)
@@ -330,7 +330,7 @@ if(USE_PROFILER)
   file(GLOB RUNTIME_GRAPH_EXECUTOR_DEBUG_SRCS src/runtime/graph_executor/debug/*.cc)
   list(APPEND RUNTIME_SRCS ${RUNTIME_GRAPH_EXECUTOR_DEBUG_SRCS})
   set_source_files_properties(${RUNTIME_GRAPH_EXECUTOR_SRCS}
-    PROPERTIES COMPILE_DEFINITIONS "TVM_GRAPH_RUNTIME_DEBUG")
+    PROPERTIES COMPILE_DEFINITIONS "TVM_GRAPH_EXECUTOR_DEBUG")
 
   file(GLOB RUNTIME_VM_PROFILER_SRCS src/runtime/vm/profiler/*.cc)
   list(APPEND RUNTIME_SRCS ${RUNTIME_VM_PROFILER_SRCS})
diff --git a/apps/microtvm/reference-vm/zephyr/rebuild-tvm.sh b/apps/microtvm/reference-vm/zephyr/rebuild-tvm.sh
index 4672012e73f2..2eb55e385520 100755
--- a/apps/microtvm/reference-vm/zephyr/rebuild-tvm.sh
+++ b/apps/microtvm/reference-vm/zephyr/rebuild-tvm.sh
@@ -28,7 +28,7 @@ fi
 cp cmake/config.cmake "${BUILD_DIR}"
 cd "${BUILD_DIR}"
 sed -i 's/USE_MICRO OFF/USE_MICRO ON/' config.cmake
-sed -i 's/USE_GRAPH_RUNTIME_DEBUG OFF/USE_GRAPH_RUNTIME_DEBUG ON/' config.cmake
+sed -i 's/USE_GRAPH_EXECUTOR_DEBUG OFF/USE_GRAPH_EXECUTOR_DEBUG ON/' config.cmake
 sed -i 's/USE_LLVM OFF/USE_LLVM ON/' config.cmake
 cmake ..
 rm -rf standalone_crt host_standalone_crt  # remove stale generated files
diff --git a/cmake/config.cmake b/cmake/config.cmake
index 5b907a1d4d87..7b29df648ac7 100644
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -97,10 +97,10 @@ set(USE_CPP_RPC OFF)
 set(USE_STACKVM_RUNTIME OFF)
 
 # Whether enable tiny embedded graph executor.
-set(USE_GRAPH_RUNTIME ON)
+set(USE_GRAPH_EXECUTOR ON)
 
 # Whether enable tiny graph executor with CUDA Graph
-set(USE_GRAPH_RUNTIME_CUDA_GRAPH OFF)
+set(USE_GRAPH_EXECUTOR_CUDA_GRAPH OFF)
 
 # Whether to enable the profiler for the graph executor and vm
 set(USE_PROFILER ON)
@@ -207,10 +207,10 @@ set(USE_DNNL_CODEGEN OFF)
 #
 # USE_ARM_COMPUTE_LIB - Support for compiling a relay graph offloading supported
 #                       operators to Arm Compute Library. OFF/ON
-# USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME - Run Arm Compute Library annotated functions via the ACL
+# USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR - Run Arm Compute Library annotated functions via the ACL
 #                                     runtime. OFF/ON/"path/to/ACL"
 set(USE_ARM_COMPUTE_LIB OFF)
-set(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME OFF)
+set(USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR OFF)
 
 # Whether to build with Arm Ethos-N support
 # Possible values:
diff --git a/cmake/modules/CUDA.cmake b/cmake/modules/CUDA.cmake
index 9b48bec510f4..1bdc5036f857 100644
--- a/cmake/modules/CUDA.cmake
+++ b/cmake/modules/CUDA.cmake
@@ -65,9 +65,9 @@ if(USE_CUDA)
     list(APPEND RUNTIME_SRCS ${CONTRIB_THRUST_SRC})
   endif(USE_THRUST)
 
-  if(USE_GRAPH_RUNTIME_CUDA_GRAPH)
-    if(NOT USE_GRAPH_RUNTIME)
-      message(FATAL_ERROR "CUDA Graph is only supported by graph executor, please set USE_GRAPH_RUNTIME=ON")
+  if(USE_GRAPH_EXECUTOR_CUDA_GRAPH)
+    if(NOT USE_GRAPH_EXECUTOR)
+      message(FATAL_ERROR "CUDA Graph is only supported by graph executor, please set USE_GRAPH_EXECUTOR=ON")
     endif()
     if(CUDAToolkit_VERSION_MAJOR LESS "10")
       message(FATAL_ERROR "CUDA Graph requires CUDA 10 or above, got=" ${CUDAToolkit_VERSION})
diff --git a/cmake/modules/LibInfo.cmake b/cmake/modules/LibInfo.cmake
index 131dceeb345d..2a69d06970a8 100644
--- a/cmake/modules/LibInfo.cmake
+++ b/cmake/modules/LibInfo.cmake
@@ -42,8 +42,8 @@ function(add_lib_info src_file)
     TVM_INFO_USE_LLVM="${USE_LLVM}"
     TVM_INFO_LLVM_VERSION="${TVM_INFO_LLVM_VERSION}"
     TVM_INFO_USE_STACKVM_RUNTIME="${USE_STACKVM_RUNTIME}"
-    TVM_INFO_USE_GRAPH_RUNTIME="${USE_GRAPH_RUNTIME}"
-    TVM_INFO_USE_GRAPH_RUNTIME_DEBUG="${USE_GRAPH_RUNTIME_DEBUG}"
+    TVM_INFO_USE_GRAPH_EXECUTOR="${USE_GRAPH_EXECUTOR}"
+    TVM_INFO_USE_GRAPH_EXECUTOR_DEBUG="${USE_GRAPH_EXECUTOR_DEBUG}"
     TVM_INFO_USE_OPENMP="${USE_OPENMP}"
     TVM_INFO_USE_RELAY_DEBUG="${USE_RELAY_DEBUG}"
     TVM_INFO_USE_RTTI="${USE_RTTI}"
@@ -73,7 +73,7 @@ function(add_lib_info src_file)
     TVM_INFO_USE_COREML="${USE_COREML}"
     TVM_INFO_USE_TARGET_ONNX="${USE_TARGET_ONNX}"
     TVM_INFO_USE_ARM_COMPUTE_LIB="${USE_ARM_COMPUTE_LIB}"
-    TVM_INFO_USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME="${USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME}"
+    TVM_INFO_USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR="${USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR}"
     TVM_INFO_INDEX_DEFAULT_I64="${INDEX_DEFAULT_I64}"
     TVM_CXX_COMPILER_PATH="${CMAKE_CXX_COMPILER}"
   )
diff --git a/cmake/modules/contrib/ArmComputeLib.cmake b/cmake/modules/contrib/ArmComputeLib.cmake
index 3c9a50002c41..1e47d087abdc 100644
--- a/cmake/modules/contrib/ArmComputeLib.cmake
+++ b/cmake/modules/contrib/ArmComputeLib.cmake
@@ -23,17 +23,17 @@ if(USE_ARM_COMPUTE_LIB)
     file(GLOB ACL_RELAY_CONTRIB_SRC src/relay/backend/contrib/arm_compute_lib/*.cc)
     file(GLOB ACL_RUNTIME_MODULE src/runtime/contrib/arm_compute_lib/acl_runtime.cc)
     list(APPEND COMPILER_SRCS ${ACL_RELAY_CONTRIB_SRC})
-    if(NOT USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME)
+    if(NOT USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR)
         list(APPEND COMPILER_SRCS ${ACL_RUNTIME_MODULE})
     endif()
     message(STATUS "Build with Arm Compute Library support...")
 endif()
 
-if(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME)
+if(USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR)
     set(ACL_PATH ${CMAKE_CURRENT_SOURCE_DIR}/acl)
     # Detect custom ACL path.
-    if (NOT USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME STREQUAL "ON")
-        set(ACL_PATH ${USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME})
+    if (NOT USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR STREQUAL "ON")
+        set(ACL_PATH ${USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR})
     endif()
 
     file(GLOB ACL_CONTRIB_SRC src/runtime/contrib/arm_compute_lib/*)
@@ -66,5 +66,5 @@ if(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME)
             ${EXTERN_ACL_COMPUTE_GRAPH_LIB})
 
     # Set flag to detect ACL graph executor support.
-    add_definitions(-DTVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB)
+    add_definitions(-DTVM_GRAPH_EXECUTOR_ARM_COMPUTE_LIB)
 endif()
diff --git a/cmake/modules/contrib/TensorRT.cmake b/cmake/modules/contrib/TensorRT.cmake
index 0c7e43c0fcf8..218f0b2e20fe 100644
--- a/cmake/modules/contrib/TensorRT.cmake
+++ b/cmake/modules/contrib/TensorRT.cmake
@@ -55,5 +55,5 @@ if(USE_TENSORRT_RUNTIME)
     list(APPEND RUNTIME_SRCS ${RUNTIME_TENSORRT_SRCS})
 
     # Set defines
-    add_definitions(-DTVM_GRAPH_RUNTIME_TENSORRT)
+    add_definitions(-DTVM_GRAPH_EXECUTOR_TENSORRT)
 endif()
diff --git a/conda/recipe/bld.bat b/conda/recipe/bld.bat
index 9fc0469febc6..e877b8fda1e1 100644
--- a/conda/recipe/bld.bat
+++ b/conda/recipe/bld.bat
@@ -28,7 +28,7 @@ cmake ^
       -DUSE_CPP_RPC=ON ^
       -DUSE_SORT=ON ^
       -DUSE_RANDOM=ON ^
-      -DUSE_GRAPH_RUNTIME_DEBUG=ON ^
+      -DUSE_GRAPH_EXECUTOR_DEBUG=ON ^
       -DINSTALL_DEV=ON ^
       %SRC_DIR%
 
diff --git a/conda/recipe/build.sh b/conda/recipe/build.sh
index 828e3c39488a..a94b9df72440 100755
--- a/conda/recipe/build.sh
+++ b/conda/recipe/build.sh
@@ -49,7 +49,7 @@ cmake -DCMAKE_INSTALL_PREFIX="${PREFIX}" \
       -DUSE_CPP_RPC=OFF \
       -DUSE_SORT=ON \
       -DUSE_RANDOM=ON \
-      -DUSE_GRAPH_RUNTIME_DEBUG=ON \
+      -DUSE_GRAPH_EXECUTOR_DEBUG=ON \
       -DUSE_LLVM=ON \
       -DINSTALL_DEV=ON \
       -DUSE_LIBBACKTRACE=AUTO \
diff --git a/docker/Dockerfile.demo_android b/docker/Dockerfile.demo_android
index 039439a937e9..f56f56728e70 100644
--- a/docker/Dockerfile.demo_android
+++ b/docker/Dockerfile.demo_android
@@ -61,7 +61,7 @@ RUN cd /usr && \
         -DUSE_LLVM=llvm-config-8 \
         -DUSE_RPC=ON \
         -DUSE_SORT=ON \
-        -DUSE_GRAPH_RUNTIME=ON \
+        -DUSE_GRAPH_EXECUTOR=ON \
         -DUSE_VULKAN=ON \
         .. && \
     make -j10
diff --git a/docker/install/install_tvm_cpu.sh b/docker/install/install_tvm_cpu.sh
index c3a15fa26b6d..48e6df3597db 100755
--- a/docker/install/install_tvm_cpu.sh
+++ b/docker/install/install_tvm_cpu.sh
@@ -27,7 +27,7 @@ cd /usr/tvm
 git checkout 4b13bf668edc7099b38d463e5db94ebc96c80470
 
 echo set\(USE_LLVM llvm-config-8\) >> config.cmake
-echo set\(USE_GRAPH_RUNTIME ON\) >> config.cmake
+echo set\(USE_GRAPH_EXECUTOR ON\) >> config.cmake
 echo set\(USE_BLAS openblas\) >> config.cmake
 mkdir -p build
 cd build
diff --git a/docs/deploy/arm_compute_lib.rst b/docs/deploy/arm_compute_lib.rst
index 1f9bc578f070..4e43682a240a 100644
--- a/docs/deploy/arm_compute_lib.rst
+++ b/docs/deploy/arm_compute_lib.rst
@@ -52,7 +52,7 @@ We recommend two different ways to build and install ACL:
       mv ./linux-<architecture-to-build-for>-neon/* .
 
 
-In both cases you will need to set USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME to the path where the ACL package
+In both cases you will need to set USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR to the path where the ACL package
 is located. Cmake will look in /path-to-acl/ along with /path-to-acl/lib and /path-to-acl/build for the
 required binaries. See the section below for more information on how to use these configuration options.
 
@@ -64,15 +64,15 @@ because ACL cannot be used on an x86 machine. However, we still want to be able
 runtime module on an x86 machine.
 
 * USE_ARM_COMPUTE_LIB=ON/OFF - Enabling this flag will add support for compiling an ACL runtime module.
-* USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME=ON/OFF/path-to-acl - Enabling this flag will allow the graph executor to
+* USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR=ON/OFF/path-to-acl - Enabling this flag will allow the graph executor to
   compute the ACL offloaded functions.
 
 These flags can be used in different scenarios depending on your setup. For example, if you want
 to compile an ACL module on an x86 machine and then run the module on a remote Arm device via RPC, you will
-need to use USE_ARM_COMPUTE_LIB=ON on the x86 machine and USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME=ON on the remote
+need to use USE_ARM_COMPUTE_LIB=ON on the x86 machine and USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR=ON on the remote
 AArch64 device.
 
-By default both options are set to OFF. Using USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME=ON will mean that ACL
+By default both options are set to OFF. Using USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR=ON will mean that ACL
 binaries are searched for by cmake in the default locations
 (see https://cmake.org/cmake/help/v3.4/command/find_library.html). In addition to this,
 /path-to-tvm-project/acl/ will also be searched. It is likely that you will need to set your own path to
@@ -83,7 +83,7 @@ These flags should be set in your config.cmake file. For example:
 .. code:: cmake
 
     set(USE_ARM_COMPUTE_LIB ON)
-    set(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME /path/to/acl)
+    set(USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR /path/to/acl)
 
 
 Usage
diff --git a/docs/dev/debugger.rst b/docs/dev/debugger.rst
index 9cd138d73253..d3559feadb68 100644
--- a/docs/dev/debugger.rst
+++ b/docs/dev/debugger.rst
@@ -123,12 +123,12 @@ Example of loading the parameters
 How to use Debugger?
 ***************************************
 
-1. In ``config.cmake`` set the ``USE_GRAPH_RUNTIME_DEBUG`` flag to ``ON``
+1. In ``config.cmake`` set the ``USE_GRAPH_EXECUTOR_DEBUG`` flag to ``ON``
 
    ::
 
        # Whether enable additional graph debug functions
-       set(USE_GRAPH_RUNTIME_DEBUG ON)
+       set(USE_GRAPH_EXECUTOR_DEBUG ON)
 
 2. Do 'make' tvm, so that it will make the ``libtvm_runtime.so``
 
diff --git a/docs/install/from_source.rst b/docs/install/from_source.rst
index ef6ac9e84c74..f0ebad1d9edc 100644
--- a/docs/install/from_source.rst
+++ b/docs/install/from_source.rst
@@ -88,7 +88,7 @@ The configuration of TVM can be modified by `config.cmake`.
   - On macOS, for some versions of Xcode, you need to add ``-lc++abi`` in the LDFLAGS or you'll get link errors.
   - Change ``set(USE_CUDA OFF)`` to ``set(USE_CUDA ON)`` to enable CUDA backend. Do the same for other backends and libraries
     you want to build for (OpenCL, RCOM, METAL, VULKAN, ...).
-  - To help with debugging, ensure the embedded graph executor and debugging functions are enabled with ``set(USE_GRAPH_RUNTIME ON)`` and ``set(USE_GRAPH_RUNTIME_DEBUG ON)``
+  - To help with debugging, ensure the embedded graph executor and debugging functions are enabled with ``set(USE_GRAPH_EXECUTOR ON)`` and ``set(USE_PROFILER ON)``
 
 - TVM requires LLVM for for CPU codegen. We highly recommend you to build with the LLVM support on.
 
diff --git a/include/tvm/runtime/crt/graph_executor.h b/include/tvm/runtime/crt/graph_executor.h
index 5bb3dd4e16a9..9578013cdf02 100644
--- a/include/tvm/runtime/crt/graph_executor.h
+++ b/include/tvm/runtime/crt/graph_executor.h
@@ -21,8 +21,8 @@
  * \file graph_executor.h
  * \brief Tiny graph executor that can run graph containing only tvm PackedFunc.
  */
-#ifndef TVM_RUNTIME_CRT_GRAPH_RUNTIME_H_
-#define TVM_RUNTIME_CRT_GRAPH_RUNTIME_H_
+#ifndef TVM_RUNTIME_CRT_GRAPH_EXECUTOR_H_
+#define TVM_RUNTIME_CRT_GRAPH_EXECUTOR_H_
 
 #ifdef __cplusplus
 extern "C" {
@@ -127,4 +127,4 @@ int TVMGraphExecutor_Release(TVMGraphExecutor** runtime);
 }  // extern "C"
 #endif
 
-#endif  // TVM_RUNTIME_CRT_GRAPH_RUNTIME_H_
+#endif  // TVM_RUNTIME_CRT_GRAPH_EXECUTOR_H_
diff --git a/include/tvm/runtime/crt/graph_executor_module.h b/include/tvm/runtime/crt/graph_executor_module.h
index 7bc881354d80..10a879e9ba30 100644
--- a/include/tvm/runtime/crt/graph_executor_module.h
+++ b/include/tvm/runtime/crt/graph_executor_module.h
@@ -21,8 +21,8 @@
  * \file graph_executor.h
  * \brief Tiny graph executor that can run graph containing only tvm PackedFunc.
  */
-#ifndef TVM_RUNTIME_CRT_GRAPH_RUNTIME_MODULE_H_
-#define TVM_RUNTIME_CRT_GRAPH_RUNTIME_MODULE_H_
+#ifndef TVM_RUNTIME_CRT_GRAPH_EXECUTOR_MODULE_H_
+#define TVM_RUNTIME_CRT_GRAPH_EXECUTOR_MODULE_H_
 
 #ifdef __cplusplus
 extern "C" {
@@ -39,4 +39,4 @@ tvm_crt_error_t TVMGraphExecutorModule_Register();
 }  // extern "C"
 #endif
 
-#endif  // TVM_RUNTIME_CRT_GRAPH_RUNTIME_MODULE_H_
+#endif  // TVM_RUNTIME_CRT_GRAPH_EXECUTOR_MODULE_H_
diff --git a/jvm/core/src/main/java/org/apache/tvm/contrib/GraphModule.java b/jvm/core/src/main/java/org/apache/tvm/contrib/GraphModule.java
index 0e2583553ed3..a7a03d52740e 100644
--- a/jvm/core/src/main/java/org/apache/tvm/contrib/GraphModule.java
+++ b/jvm/core/src/main/java/org/apache/tvm/contrib/GraphModule.java
@@ -147,7 +147,7 @@ public NDArray debugGetOutput(String node, NDArray out) {
     if (fdebugGetOutput != null) {
       fdebugGetOutput.pushArg(node).pushArg(out).invoke();
     } else {
-      throw new RuntimeException("Please compile runtime with USE_GRAPH_RUNTIME_DEBUG = 0");
+      throw new RuntimeException("Please compile runtime with USE_GRAPH_EXECUTOR_DEBUG = 0");
     }
     return out;
   }
@@ -162,7 +162,7 @@ public NDArray debugGetOutput(int node, NDArray out) {
     if (fdebugGetOutput != null) {
       fdebugGetOutput.pushArg(node).pushArg(out).invoke();
     } else {
-      throw new RuntimeException("Please compile runtime with USE_GRAPH_RUNTIME_DEBUG = 0");
+      throw new RuntimeException("Please compile runtime with USE_GRAPH_EXECUTOR_DEBUG = 0");
     }
     return out;
   }
diff --git a/python/tvm/contrib/cuda_graph/cuda_graph_executor.py b/python/tvm/contrib/cuda_graph/cuda_graph_executor.py
index 712c9ea2e9ef..53bb9b0cb37c 100644
--- a/python/tvm/contrib/cuda_graph/cuda_graph_executor.py
+++ b/python/tvm/contrib/cuda_graph/cuda_graph_executor.py
@@ -58,7 +58,7 @@ def create(graph_json_str, libmod, device):
     except ValueError:
         raise ValueError(
             "To enable CUDA graph support (experimental), please set "
-            "'(USE_GRAPH_RUNTIME_CUGRAPH ON)' in config.cmake and rebuild TVM"
+            "'(USE_GRAPH_EXECUTOR_CUGRAPH ON)' in config.cmake and rebuild TVM"
         )
 
     return GraphModuleCudaGraph(fcreate(graph_json_str, libmod, *device_type_id))
diff --git a/python/tvm/contrib/debugger/debug_executor.py b/python/tvm/contrib/debugger/debug_executor.py
index b7de4390c81d..b27ae6533e38 100644
--- a/python/tvm/contrib/debugger/debug_executor.py
+++ b/python/tvm/contrib/debugger/debug_executor.py
@@ -64,7 +64,7 @@ def create(graph_json_str, libmod, device, dump_root=None):
             fcreate = tvm._ffi.get_global_func("tvm.graph_executor_debug.create")
     except ValueError:
         raise ValueError(
-            "Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in "
+            "Please set '(USE_GRAPH_EXECUTOR_DEBUG ON)' in "
             "config.cmake and rebuild TVM to enable debug mode"
         )
     func_obj = fcreate(graph_json_str, libmod, *device_type_id)
diff --git a/src/relay/backend/contrib/arm_compute_lib/codegen.cc b/src/relay/backend/contrib/arm_compute_lib/codegen.cc
index 1e3f1ce1c525..8098c8d51274 100644
--- a/src/relay/backend/contrib/arm_compute_lib/codegen.cc
+++ b/src/relay/backend/contrib/arm_compute_lib/codegen.cc
@@ -381,7 +381,7 @@ TVM_REGISTER_GLOBAL("relay.ext.arm_compute_lib").set_body_typed(ACLCompiler);
  * \return True if ACL graph executor is enabled, False if not.
  */
 inline constexpr bool IsACLRuntimeEnabled() {
-#if TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
+#if TVM_GRAPH_EXECUTOR_ARM_COMPUTE_LIB
   return true;
 #else
   return false;
diff --git a/src/relay/backend/contrib/tensorrt/codegen.cc b/src/relay/backend/contrib/tensorrt/codegen.cc
index c453ff625ed7..e121b6010ad8 100644
--- a/src/relay/backend/contrib/tensorrt/codegen.cc
+++ b/src/relay/backend/contrib/tensorrt/codegen.cc
@@ -32,7 +32,7 @@
 #include "../../utils.h"
 #include "../codegen_json/codegen_json.h"
 
-#if TVM_GRAPH_RUNTIME_TENSORRT
+#if TVM_GRAPH_EXECUTOR_TENSORRT
 #include "NvInfer.h"
 #endif
 
@@ -221,11 +221,11 @@ TVM_REGISTER_GLOBAL("relay.ext.tensorrt").set_body_typed(TensorRTCompiler);
  * \return True if enabled, False if not.
  */
 inline constexpr bool IsTensorRTRuntimeEnabled() {
-#if TVM_GRAPH_RUNTIME_TENSORRT
+#if TVM_GRAPH_EXECUTOR_TENSORRT
   return true;
 #else
   return false;
-#endif  // TVM_GRAPH_RUNTIME_TENSORRT
+#endif  // TVM_GRAPH_EXECUTOR_TENSORRT
 }
 
 /*!
@@ -234,11 +234,11 @@ inline constexpr bool IsTensorRTRuntimeEnabled() {
  * runtime is not enabled.
  */
 Array<Integer> GetTensorRTVersion() {
-#if TVM_GRAPH_RUNTIME_TENSORRT
+#if TVM_GRAPH_EXECUTOR_TENSORRT
   return {Integer(NV_TENSORRT_MAJOR), Integer(NV_TENSORRT_MINOR), Integer(NV_TENSORRT_PATCH)};
 #else
   return {};
-#endif  // TVM_GRAPH_RUNTIME_TENSORRT
+#endif  // TVM_GRAPH_EXECUTOR_TENSORRT
 }
 
 TVM_REGISTER_GLOBAL("relay.op.is_tensorrt_runtime_enabled")
diff --git a/src/runtime/contrib/arm_compute_lib/acl_runtime.cc b/src/runtime/contrib/arm_compute_lib/acl_runtime.cc
index ed8f6adbd083..6562d1bfc62d 100644
--- a/src/runtime/contrib/arm_compute_lib/acl_runtime.cc
+++ b/src/runtime/contrib/arm_compute_lib/acl_runtime.cc
@@ -28,7 +28,7 @@
 #include "../json/json_node.h"
 #include "../json/json_runtime.h"
 
-#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
+#ifdef TVM_GRAPH_EXECUTOR_ARM_COMPUTE_LIB
 #include <arm_compute/core/Types.h>
 #include <arm_compute/runtime/NEON/functions/NEArithmeticAddition.h>
 #include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h>
@@ -82,7 +82,7 @@ class ACLRuntime : public JSONRuntimeBase {
     BuildEngine();
   }
 
-#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
+#ifdef TVM_GRAPH_EXECUTOR_ARM_COMPUTE_LIB
   /*!
    * \brief Unpack inputs and outputs and run inference on a given layer.
    *
@@ -518,12 +518,12 @@ class ACLRuntime : public JSONRuntimeBase {
 #else
   void Run() override {
     LOG(FATAL) << "Cannot call run on Arm Compute Library module without runtime enabled. "
-               << "Please build with USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME.";
+               << "Please build with USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR.";
   }
 
   void BuildEngine() {
     LOG(WARNING) << "Arm Compute Library engine is not initialized. "
-                 << "Please build with USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME.";
+                 << "Please build with USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR.";
   }
 #endif
 };
diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
index e6eb28c10af6..21031c67863f 100644
--- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
+++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
@@ -32,7 +32,7 @@
 #include "../json/json_node.h"
 #include "../json/json_runtime.h"
 
-#ifdef TVM_GRAPH_RUNTIME_TENSORRT
+#ifdef TVM_GRAPH_EXECUTOR_TENSORRT
 #include "NvInfer.h"
 #include "tensorrt_builder.h"
 #endif
@@ -108,7 +108,7 @@ class TensorRTRuntime : public JSONRuntimeBase {
     }
   }
 
-#ifdef TVM_GRAPH_RUNTIME_TENSORRT
+#ifdef TVM_GRAPH_EXECUTOR_TENSORRT
   /*! \brief Destroy engines and contexts. */
   ~TensorRTRuntime() {
     for (auto& it : trt_engine_cache_) {
diff --git a/src/runtime/crt/host/main.cc b/src/runtime/crt/host/main.cc
index 8976140bdf99..e64455417928 100644
--- a/src/runtime/crt/host/main.cc
+++ b/src/runtime/crt/host/main.cc
@@ -34,7 +34,7 @@
 
 #include "crt_config.h"
 
-#ifdef TVM_HOST_USE_GRAPH_RUNTIME_MODULE
+#ifdef TVM_HOST_USE_GRAPH_EXECUTOR_MODULE
 #include <tvm/runtime/crt/graph_executor_module.h>
 #endif
 
@@ -131,7 +131,7 @@ int main(int argc, char** argv) {
 
   utvm_rpc_server_t rpc_server = UTvmRpcServerInit(&UTvmWriteFunc, nullptr);
 
-#ifdef TVM_HOST_USE_GRAPH_RUNTIME_MODULE
+#ifdef TVM_HOST_USE_GRAPH_EXECUTOR_MODULE
   CHECK_EQ(TVMGraphExecutorModule_Register(), kTvmErrorNoError,
            "failed to register GraphExecutor TVMModule");
 #endif
diff --git a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
index 69811b5a9348..79706c702d9a 100644
--- a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
+++ b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
@@ -21,8 +21,8 @@
  * \file src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
  * \brief Tiny graph executor that can run graph containing only tvm PackedFunc.
  */
-#ifndef TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_RUNTIME_GRAPH_RUNTIME_H_
-#define TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_RUNTIME_GRAPH_RUNTIME_H_
+#ifndef TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_EXECUTOR_GRAPH_EXECUTOR_H_
+#define TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_EXECUTOR_GRAPH_EXECUTOR_H_
 
 #include <tvm/runtime/crt/graph_executor.h>
 #include <tvm/runtime/crt/internal/common/ndarray.h>
@@ -118,4 +118,4 @@ int32_t TVMGraphExecutor_CreateTVMOp(TVMGraphExecutor* runtime, const TVMOpParam
                                     DLTensorPtr* args, const uint32_t args_count,
                                     uint32_t num_inputs, TVMPackedFunc* pf);
 
-#endif  // TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_RUNTIME_GRAPH_RUNTIME_H_
+#endif  // TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_EXECUTOR_GRAPH_EXECUTOR_H_
diff --git a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h
index 0010c76a593f..ac5adc842b62 100644
--- a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h
+++ b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h
@@ -21,8 +21,8 @@
  * \file src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h
  * \brief Lightweight JSON Reader that read save into C++ data structs.
  */
-#ifndef TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_RUNTIME_LOAD_JSON_H_
-#define TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_RUNTIME_LOAD_JSON_H_
+#ifndef TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_EXECUTOR_LOAD_JSON_H_
+#define TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_EXECUTOR_LOAD_JSON_H_
 
 #include <ctype.h>
 #include <inttypes.h>
@@ -38,9 +38,9 @@ enum {
   JSON_READ_TYPE_S32 = 6,
   JSON_READ_TYPE_F32 = 7,
   JSON_READ_TYPE_F64 = 8,
-  JSON_READ_TYPE_GRAPH_RUNTIME_NODE = 9,
-  JSON_READ_TYPE_GRAPH_RUNTIME_NODE_ENTRY = 10,
-  JSON_READ_TYPE_GRAPH_RUNTIME_GRAPH_ATTR = 11
+  JSON_READ_TYPE_GRAPH_EXECUTOR_NODE = 9,
+  JSON_READ_TYPE_GRAPH_EXECUTOR_NODE_ENTRY = 10,
+  JSON_READ_TYPE_GRAPH_EXECUTOR_GRAPH_ATTR = 11
 };
 
 typedef struct Seq {
@@ -100,4 +100,4 @@ tvm_crt_error_t JSONReader_Create(const char* is, JSONReader* reader);
  */
 tvm_crt_error_t JSONReader_Release(JSONReader* reader);
 
-#endif  // TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_RUNTIME_LOAD_JSON_H_
+#endif  // TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_EXECUTOR_LOAD_JSON_H_
diff --git a/src/runtime/graph_executor/graph_executor_factory.cc b/src/runtime/graph_executor/graph_executor_factory.cc
index 7110cf695888..bc12a4944b4a 100644
--- a/src/runtime/graph_executor/graph_executor_factory.cc
+++ b/src/runtime/graph_executor/graph_executor_factory.cc
@@ -141,7 +141,7 @@ Module GraphExecutorFactory::DebugRuntimeCreate(const std::vector<Device>& devs)
 Module GraphExecutorFactory::CudaGraphExecutorCreate(const std::vector<Device>& devs) {
   const PackedFunc* pf = tvm::runtime::Registry::Get("tvm.graph_executor_cuda_graph.create");
   ICHECK(pf != nullptr) << "Cannot find function tvm.graph_executor_cuda_graph.create in registry. "
-                           "Did you set(USE_GRAPH_RUNTIME_CUGRAPH=ON)?";
+                           "Did you set(USE_GRAPH_EXECUTOR_CUGRAPH=ON)?";
   std::vector<int> unpacked_devs;
   for (const auto& dev : devs) {
     unpacked_devs.emplace_back(dev.device_type);
diff --git a/src/runtime/micro/standalone/utvm_graph_executor.h b/src/runtime/micro/standalone/utvm_graph_executor.h
index 5e15d5d0a7e7..afede6a7b30a 100644
--- a/src/runtime/micro/standalone/utvm_graph_executor.h
+++ b/src/runtime/micro/standalone/utvm_graph_executor.h
@@ -17,8 +17,8 @@
  * under the License.
  */
 
-#ifndef TVM_RUNTIME_MICRO_STANDALONE_UTVM_GRAPH_RUNTIME_H_
-#define TVM_RUNTIME_MICRO_STANDALONE_UTVM_GRAPH_RUNTIME_H_
+#ifndef TVM_RUNTIME_MICRO_STANDALONE_UTVM_GRAPH_EXECUTOR_H_
+#define TVM_RUNTIME_MICRO_STANDALONE_UTVM_GRAPH_EXECUTOR_H_
 
 #include <dlpack/dlpack.h>
 
@@ -164,4 +164,4 @@ class MicroGraphExecutor {
 }  // namespace micro
 }  // namespace tvm
 
-#endif  // TVM_RUNTIME_MICRO_STANDALONE_UTVM_GRAPH_RUNTIME_H_
+#endif  // TVM_RUNTIME_MICRO_STANDALONE_UTVM_GRAPH_EXECUTOR_H_
diff --git a/src/support/libinfo.cc b/src/support/libinfo.cc
index d6c8f1799596..ea3a22e8ab01 100644
--- a/src/support/libinfo.cc
+++ b/src/support/libinfo.cc
@@ -76,12 +76,12 @@
 #define TVM_INFO_USE_STACKVM_RUNTIME "NOT-FOUND"
 #endif
 
-#ifndef TVM_INFO_USE_GRAPH_RUNTIME
-#define TVM_INFO_USE_GRAPH_RUNTIME "NOT-FOUND"
+#ifndef TVM_INFO_USE_GRAPH_EXECUTOR
+#define TVM_INFO_USE_GRAPH_EXECUTOR "NOT-FOUND"
 #endif
 
-#ifndef TVM_INFO_USE_GRAPH_RUNTIME_DEBUG
-#define TVM_INFO_USE_GRAPH_RUNTIME_DEBUG "NOT-FOUND"
+#ifndef TVM_INFO_USE_GRAPH_EXECUTOR_DEBUG
+#define TVM_INFO_USE_GRAPH_EXECUTOR_DEBUG "NOT-FOUND"
 #endif
 
 #ifndef TVM_INFO_USE_OPENMP
@@ -200,8 +200,8 @@
 #define TVM_INFO_USE_ARM_COMPUTE_LIB "NOT-FOUND"
 #endif
 
-#ifndef TVM_INFO_USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME
-#define TVM_INFO_USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME "NOT-FOUND"
+#ifndef TVM_INFO_USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR
+#define TVM_INFO_USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR "NOT-FOUND"
 #endif
 
 #ifndef TVM_INFO_INDEX_DEFAULT_I64
@@ -234,8 +234,8 @@ TVM_DLL Map<String, String> GetLibInfo() {
       {"USE_LLVM", TVM_INFO_USE_LLVM},
       {"LLVM_VERSION", TVM_INFO_LLVM_VERSION},
       {"USE_STACKVM_RUNTIME", TVM_INFO_USE_STACKVM_RUNTIME},
-      {"USE_GRAPH_RUNTIME", TVM_INFO_USE_GRAPH_RUNTIME},
-      {"USE_GRAPH_RUNTIME_DEBUG", TVM_INFO_USE_GRAPH_RUNTIME_DEBUG},
+      {"USE_GRAPH_EXECUTOR", TVM_INFO_USE_GRAPH_EXECUTOR},
+      {"USE_GRAPH_EXECUTOR_DEBUG", TVM_INFO_USE_GRAPH_EXECUTOR_DEBUG},
       {"USE_OPENMP", TVM_INFO_USE_OPENMP},
       {"USE_RELAY_DEBUG", TVM_INFO_USE_RELAY_DEBUG},
       {"USE_RTTI", TVM_INFO_USE_RTTI},
@@ -265,7 +265,7 @@ TVM_DLL Map<String, String> GetLibInfo() {
       {"USE_COREML", TVM_INFO_USE_COREML},
       {"USE_TARGET_ONNX", TVM_INFO_USE_TARGET_ONNX},
       {"USE_ARM_COMPUTE_LIB", TVM_INFO_USE_ARM_COMPUTE_LIB},
-      {"USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME", TVM_INFO_USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME},
+      {"USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR", TVM_INFO_USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR},
       {"INDEX_DEFAULT_I64", TVM_INFO_INDEX_DEFAULT_I64},
       {"TVM_CXX_COMPILER_PATH", TVM_CXX_COMPILER_PATH}};
   return result;
diff --git a/tests/azure-pipelines/main.yml b/tests/azure-pipelines/main.yml
index 094c1df12739..49d488aba5fd 100644
--- a/tests/azure-pipelines/main.yml
+++ b/tests/azure-pipelines/main.yml
@@ -35,7 +35,7 @@ jobs:
         cmakeArgs: >
           -DUSE_SORT=ON
           -DUSE_RPC=ON
-          -DUSE_GRAPH_RUNTIME=ON
+          -DUSE_GRAPH_EXECUTOR=ON
           ..
     - task: MSBuild@1
       inputs:
@@ -56,7 +56,7 @@ jobs:
         cmakeArgs: >
           -DUSE_SORT=ON
           -DUSE_RPC=ON
-          -DUSE_GRAPH_RUNTIME=ON
+          -DUSE_GRAPH_EXECUTOR=ON
           ..
     - task: MSBuild@1
       inputs:
@@ -75,7 +75,7 @@ jobs:
         cmakeArgs: >
           -DUSE_SORT=ON
           -DUSE_RPC=ON
-          -DUSE_GRAPH_RUNTIME=ON
+          -DUSE_GRAPH_EXECUTOR=ON
           ..
     - script: cd build.common && make -j`sysctl -n hw.ncpu`
       displayName: Build the project
diff --git a/tests/python/unittest/test_link_params.py b/tests/python/unittest/test_link_params.py
index 739c363a5c53..3ad515604d0b 100644
--- a/tests/python/unittest/test_link_params.py
+++ b/tests/python/unittest/test_link_params.py
@@ -365,7 +365,7 @@ def test_crt_link_params():
             opts = tvm.micro.default_options(
                 os.path.join(tvm.micro.get_standalone_crt_dir(), "template", "host")
             )
-            opts["bin_opts"]["ldflags"].append("-DTVM_HOST_USE_GRAPH_RUNTIME_MODULE")
+            opts["bin_opts"]["ldflags"].append("-DTVM_HOST_USE_GRAPH_EXECUTOR_MODULE")
 
             micro_binary = tvm.micro.build_static_runtime(
                 workspace,
diff --git a/tests/scripts/task_config_build_gpu.sh b/tests/scripts/task_config_build_gpu.sh
index 7338555c4c94..609325c9962b 100755
--- a/tests/scripts/task_config_build_gpu.sh
+++ b/tests/scripts/task_config_build_gpu.sh
@@ -34,7 +34,7 @@ echo set\(USE_NNPACK ON\) >> config.cmake
 echo set\(NNPACK_PATH /NNPACK/build/\) >> config.cmake
 echo set\(USE_RPC ON\) >> config.cmake
 echo set\(USE_SORT ON\) >> config.cmake
-echo set\(USE_GRAPH_RUNTIME ON\) >> config.cmake
+echo set\(USE_GRAPH_EXECUTOR ON\) >> config.cmake
 echo set\(USE_STACKVM_RUNTIME ON\) >> config.cmake
 echo set\(USE_PROFILER ON\) >> config.cmake
 echo set\(USE_ANTLR ON\) >> config.cmake
diff --git a/tutorials/frontend/deploy_model_on_android.py b/tutorials/frontend/deploy_model_on_android.py
index 8321cc6128bc..8efcb706b380 100644
--- a/tutorials/frontend/deploy_model_on_android.py
+++ b/tutorials/frontend/deploy_model_on_android.py
@@ -71,7 +71,7 @@
 #         -DUSE_RPC=ON \
 #         -DUSE_SORT=ON \
 #         -DUSE_VULKAN=ON \
-#         -DUSE_GRAPH_RUNTIME=ON \
+#         -DUSE_GRAPH_EXECUTOR=ON \
 #         ..
 #   make -j10
 #

From bd3271d830cdc3f4b86dbb70c779eadde256f151 Mon Sep 17 00:00:00 2001
From: Andrew Reusch <areusch@octoml.ai>
Date: Fri, 12 Mar 2021 10:23:39 -0800
Subject: [PATCH 08/16] git-clang-format

---
 include/tvm/runtime/crt/graph_executor.h      |  4 +--
 src/relay/backend/graph_executor_codegen.cc   |  4 +--
 .../crt/graph_executor/graph_executor.c       | 22 ++++++------
 .../graph_executor_module.c                   | 36 ++++++++++---------
 .../internal/graph_executor/graph_executor.h  |  6 ++--
 .../cuda_graph/graph_runtime_cuda_graph.cc    | 15 ++++----
 .../debug/graph_executor_debug.cc             | 12 +++----
 src/runtime/graph_executor/graph_executor.cc  | 15 ++++----
 .../graph_executor/graph_executor_factory.cc  | 35 +++++++++---------
 .../graph_executor/graph_executor_factory.h   |  4 +--
 src/runtime/micro/standalone/utvm_runtime.cc  |  2 +-
 web/src/runtime.ts                            |  8 ++---
 12 files changed, 82 insertions(+), 81 deletions(-)

diff --git a/include/tvm/runtime/crt/graph_executor.h b/include/tvm/runtime/crt/graph_executor.h
index 9578013cdf02..6895d2e71e34 100644
--- a/include/tvm/runtime/crt/graph_executor.h
+++ b/include/tvm/runtime/crt/graph_executor.h
@@ -67,7 +67,7 @@ typedef struct TVMGraphExecutor TVMGraphExecutor;
  * \return 0 if successful.
  */
 int TVMGraphExecutor_Create(const char* sym_json, TVMModuleHandle module_handle,
-                           const DLDevice* devices, TVMGraphExecutor** runtime);
+                            const DLDevice* devices, TVMGraphExecutor** runtime);
 
 int TVMGraphExecutor_GetInputIndex(TVMGraphExecutor* runtime, const char* name);
 
@@ -108,7 +108,7 @@ int TVMGraphExecutor_GetOutput(TVMGraphExecutor* runtime, const int32_t index, D
  * \return The result of this function execution.
  */
 int TVMGraphExecutor_LoadParams(TVMGraphExecutor* runtime, const char* param_blob,
-                               const uint32_t param_size);
+                                const uint32_t param_size);
 
 /*!
  * \brief Execute the graph.
diff --git a/src/relay/backend/graph_executor_codegen.cc b/src/relay/backend/graph_executor_codegen.cc
index 54f7f85836b2..72989b5ba46a 100644
--- a/src/relay/backend/graph_executor_codegen.cc
+++ b/src/relay/backend/graph_executor_codegen.cc
@@ -573,8 +573,8 @@ class GraphExecutorCodegenModule : public runtime::ModuleNode {
           ICHECK(dev_type);
           targets[dev_type->value] = it.second;
         }
-        codegen_ =
-            std::make_shared<GraphExecutorCodegen>(reinterpret_cast<runtime::Module*>(mod), targets);
+        codegen_ = std::make_shared<GraphExecutorCodegen>(reinterpret_cast<runtime::Module*>(mod),
+                                                          targets);
       });
     } else if (name == "codegen") {
       return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
diff --git a/src/runtime/crt/graph_executor/graph_executor.c b/src/runtime/crt/graph_executor/graph_executor.c
index ecd2006b8ea9..0fe4201e0e71 100644
--- a/src/runtime/crt/graph_executor/graph_executor.c
+++ b/src/runtime/crt/graph_executor/graph_executor.c
@@ -75,7 +75,7 @@ int NodeEntry_Load(TVMGraphExecutorNodeEntry* entry, JSONReader* reader) {
 }
 
 void TVMGraphExecutorNode_LoadAttrs(TVMGraphExecutorNode* node, JSONReader* reader,
-                                   TVMOpParam* param) {
+                                    TVMOpParam* param) {
   int bitmask = 0;
   char key[20], value[120];
   memset(param, 0, sizeof(TVMOpParam));
@@ -138,8 +138,8 @@ int TVMGraphExecutorNode_Load(TVMGraphExecutorNode* node, JSONReader* reader) {
         break;
       }
       DLDevice dev = {kDLCPU, 0};
-      tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorNodeEntry) * num_inputs,
-                                                      dev, (void**)&node->inputs);
+      tvm_crt_error_t err = TVMPlatformMemoryAllocate(
+          sizeof(TVMGraphExecutorNodeEntry) * num_inputs, dev, (void**)&node->inputs);
       if (err != kTvmErrorNoError) {
         fprintf(stderr, "memory allocate error: %08x", err);
         return -1;
@@ -773,7 +773,7 @@ void TVMGraphExecutor_SetInput(TVMGraphExecutor* runtime, const char* name, DLTe
  * \return The result of this function execution.
  */
 int TVMGraphExecutor_LoadParams(TVMGraphExecutor* runtime, const char* param_blob,
-                               const uint32_t param_size) {
+                                const uint32_t param_size) {
   int status = 0;
   const char* bptr = param_blob;
   uint64_t header, reserved;
@@ -967,8 +967,8 @@ int TVMGraphExecutor_SetupStorage(TVMGraphExecutor* runtime) {
   }
 
   // Allocate the space.
-  err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorStorageEntry) * pool_entry_count, alloc_dev,
-                                  (void**)&runtime->storage_pool);
+  err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorStorageEntry) * pool_entry_count,
+                                  alloc_dev, (void**)&runtime->storage_pool);
   if (err != kTvmErrorNoError) {
     fprintf(stderr, "memory allocate error: %08x", err);
     return -1;
@@ -1086,7 +1086,7 @@ int TVMGraphExecutor_SetupOpExecs(TVMGraphExecutor* runtime) {
 #endif  // TVM_CRT_DEBUG
       TVMPackedFunc pf;
       TVMGraphExecutor_CreateTVMOp(runtime, &(inode->param), args, args_count, inode->inputs_count,
-                                  &pf);
+                                   &pf);
       runtime->op_execs[nid] = pf;
     }
   }
@@ -1105,8 +1105,8 @@ typedef struct TVMOpArgs {
 } TVMOpArgs;
 
 int32_t TVMGraphExecutor_CreateTVMOp(TVMGraphExecutor* runtime, const TVMOpParam* param,
-                                    DLTensorPtr* args, const uint32_t args_count,
-                                    uint32_t num_inputs, TVMPackedFunc* pf) {
+                                     DLTensorPtr* args, const uint32_t args_count,
+                                     uint32_t num_inputs, TVMPackedFunc* pf) {
   int status = 0;
   uint32_t idx;
   TVMOpArgs arg_ptr;
@@ -1152,7 +1152,7 @@ int32_t TVMGraphExecutor_CreateTVMOp(TVMGraphExecutor* runtime, const TVMOpParam
  * \return 0 on success.
  */
 int TVMGraphExecutor_Init(TVMGraphExecutor* runtime, const char* graph_json,
-                         TVMModuleHandle module_handle, const DLDevice* devs) {
+                          TVMModuleHandle module_handle, const DLDevice* devs) {
   JSONReader reader;
   tvm_crt_error_t err = JSONReader_Create(graph_json, &reader);
   if (err != kTvmErrorNoError) {
@@ -1185,7 +1185,7 @@ int TVMGraphExecutor_Init(TVMGraphExecutor* runtime, const char* graph_json,
 }
 
 int TVMGraphExecutor_Create(const char* sym_json, TVMModuleHandle module_handle,
-                           const DLDevice* devs, TVMGraphExecutor** runtime) {
+                            const DLDevice* devs, TVMGraphExecutor** runtime) {
   DLDevice dev = {kDLCPU, 0};
   tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutor), dev, (void**)runtime);
   if (err != kTvmErrorNoError) {
diff --git a/src/runtime/crt/graph_executor_module/graph_executor_module.c b/src/runtime/crt/graph_executor_module/graph_executor_module.c
index f4a7d8cee7cb..b5c7e742f5c0 100644
--- a/src/runtime/crt/graph_executor_module/graph_executor_module.c
+++ b/src/runtime/crt/graph_executor_module/graph_executor_module.c
@@ -39,7 +39,7 @@ typedef struct {
 static GraphExecutorModule graph_executor;
 
 int32_t TVMGraphExecutorModule_Create(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
-                                     int* ret_tcodes, void* resource_handle) {
+                                      int* ret_tcodes, void* resource_handle) {
   if (graph_executor.runtime != NULL) {
     return kTvmErrorGraphModuleAlreadyCreated;
   }
@@ -77,8 +77,9 @@ int32_t TVMGraphExecutorModule_Create(TVMValue* args, int* tcodes, int nargs, TV
   return kTvmErrorNoError;
 }
 
-int32_t TVMGraphExecutorModule_GetInput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
-                                       int* ret_tcodes, void* resource_handle) {
+int32_t TVMGraphExecutorModule_GetInput(TVMValue* args, int* tcodes, int nargs,
+                                        TVMValue* ret_values, int* ret_tcodes,
+                                        void* resource_handle) {
   if (nargs != 1) {
     return kTvmErrorFunctionCallNumArguments;
   }
@@ -93,15 +94,15 @@ int32_t TVMGraphExecutorModule_GetInput(TVMValue* args, int* tcodes, int nargs,
   }
 
   uint32_t eid = TVMGraphExecutor_GetEntryId(graph_executor.runtime,
-                                            graph_executor.runtime->input_nodes[index], 0);
+                                             graph_executor.runtime->input_nodes[index], 0);
   ret_values[0].v_handle = (void*)&graph_executor.runtime->data_entry[eid].dl_tensor;
   ret_tcodes[0] = kTVMNDArrayHandle;
   return 0;
 }
 
 int32_t TVMGraphExecutorModule_GetNumInputs(TVMValue* args, int* tcodes, int nargs,
-                                           TVMValue* ret_values, int* ret_tcodes,
-                                           void* resource_handle) {
+                                            TVMValue* ret_values, int* ret_tcodes,
+                                            void* resource_handle) {
   if (nargs != 0) {
     return kTvmErrorFunctionCallNumArguments;
   }
@@ -112,8 +113,8 @@ int32_t TVMGraphExecutorModule_GetNumInputs(TVMValue* args, int* tcodes, int nar
 }
 
 int32_t TVMGraphExecutorModule_GetNumOutputs(TVMValue* args, int* tcodes, int nargs,
-                                            TVMValue* ret_values, int* ret_tcodes,
-                                            void* resource_handle) {
+                                             TVMValue* ret_values, int* ret_tcodes,
+                                             void* resource_handle) {
   if (nargs != 0) {
     return kTvmErrorFunctionCallNumArguments;
   }
@@ -124,8 +125,8 @@ int32_t TVMGraphExecutorModule_GetNumOutputs(TVMValue* args, int* tcodes, int na
 }
 
 int32_t TVMGraphExecutorModule_GetOutput(TVMValue* args, int* tcodes, int nargs,
-                                        TVMValue* ret_values, int* ret_tcodes,
-                                        void* resource_handle) {
+                                         TVMValue* ret_values, int* ret_tcodes,
+                                         void* resource_handle) {
   if (nargs != 1) {
     return kTvmErrorFunctionCallNumArguments;
   }
@@ -149,8 +150,8 @@ int32_t TVMGraphExecutorModule_GetOutput(TVMValue* args, int* tcodes, int nargs,
 }
 
 int32_t TVMGraphExecutorModule_LoadParams(TVMValue* args, int* tcodes, int nargs,
-                                         TVMValue* ret_values, int* ret_tcodes,
-                                         void* resource_handle) {
+                                          TVMValue* ret_values, int* ret_tcodes,
+                                          void* resource_handle) {
   if (nargs != 1) {
     return kTvmErrorFunctionCallNumArguments;
   }
@@ -166,7 +167,7 @@ int32_t TVMGraphExecutorModule_LoadParams(TVMValue* args, int* tcodes, int nargs
 }
 
 int32_t TVMGraphExecutorModule_Run(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
-                                  int* ret_tcodes, void* resource_handle) {
+                                   int* ret_tcodes, void* resource_handle) {
   if (nargs != 0) {
     return kTvmErrorFunctionCallNumArguments;
   }
@@ -177,8 +178,9 @@ int32_t TVMGraphExecutorModule_Run(TVMValue* args, int* tcodes, int nargs, TVMVa
   return 0;
 }
 
-int32_t TVMGraphExecutorModule_SetInput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
-                                       int* ret_tcodes, void* resource_handle) {
+int32_t TVMGraphExecutorModule_SetInput(TVMValue* args, int* tcodes, int nargs,
+                                        TVMValue* ret_values, int* ret_tcodes,
+                                        void* resource_handle) {
   if (nargs != 2) {
     return kTvmErrorFunctionCallNumArguments;
   }
@@ -194,8 +196,8 @@ int32_t TVMGraphExecutorModule_SetInput(TVMValue* args, int* tcodes, int nargs,
 }
 
 int32_t TVMGraphExecutorModule_NotImplemented(TVMValue* args, int* tcodes, int nargs,
-                                             TVMValue* ret_values, int* ret_tcodes,
-                                             void* resource_handle) {
+                                              TVMValue* ret_values, int* ret_tcodes,
+                                              void* resource_handle) {
   return kTvmErrorFunctionCallNotImplemented;
 }
 
diff --git a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
index 79706c702d9a..2a7c63f3d85f 100644
--- a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
+++ b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
@@ -110,12 +110,12 @@ typedef DLTensor* DLTensorPtr;
 uint32_t TVMGraphExecutor_GetEntryId(TVMGraphExecutor* runtime, uint32_t nid, uint32_t index);
 void TVMGraphExecutor_SetInput(TVMGraphExecutor* runtime, const char* name, DLTensor* data_in);
 int TVMGraphExecutor_LoadParams(TVMGraphExecutor* runtime, const char* param_blob,
-                               const uint32_t param_size);
+                                const uint32_t param_size);
 void TVMGraphExecutor_Run(TVMGraphExecutor* runtime);
 int TVMGraphExecutor_GetOutput(TVMGraphExecutor* runtime, const int32_t idx, DLTensor* out);
 
 int32_t TVMGraphExecutor_CreateTVMOp(TVMGraphExecutor* runtime, const TVMOpParam* param,
-                                    DLTensorPtr* args, const uint32_t args_count,
-                                    uint32_t num_inputs, TVMPackedFunc* pf);
+                                     DLTensorPtr* args, const uint32_t args_count,
+                                     uint32_t num_inputs, TVMPackedFunc* pf);
 
 #endif  // TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_EXECUTOR_GRAPH_EXECUTOR_H_
diff --git a/src/runtime/graph_executor/cuda_graph/graph_runtime_cuda_graph.cc b/src/runtime/graph_executor/cuda_graph/graph_runtime_cuda_graph.cc
index 280d22743659..53f225403be6 100644
--- a/src/runtime/graph_executor/cuda_graph/graph_runtime_cuda_graph.cc
+++ b/src/runtime/graph_executor/cuda_graph/graph_runtime_cuda_graph.cc
@@ -94,7 +94,7 @@ class GraphExecutorCudaGraph : public GraphExecutor {
 };
 
 PackedFunc GraphExecutorCudaGraph::GetFunction(const std::string& name,
-                                              const ObjectPtr<Object>& sptr_to_self) {
+                                               const ObjectPtr<Object>& sptr_to_self) {
   if (name == "run_cuda_graph") {
     return PackedFunc(
         [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { this->RunCudaGraph(); });
@@ -109,8 +109,8 @@ PackedFunc GraphExecutorCudaGraph::GetFunction(const std::string& name,
 }
 
 Module GraphExecutorCudaGraphCreate(const std::string& sym_json, const tvm::runtime::Module& m,
-                                   const std::vector<Device>& devs,
-                                   PackedFunc lookup_linked_param_func) {
+                                    const std::vector<Device>& devs,
+                                    PackedFunc lookup_linked_param_func) {
   auto exec = make_object<GraphExecutorCudaGraph>();
   exec->Init(sym_json, m, devs, lookup_linked_param_func);
   return Module(exec);
@@ -118,9 +118,10 @@ Module GraphExecutorCudaGraphCreate(const std::string& sym_json, const tvm::runt
 
 TVM_REGISTER_GLOBAL("tvm.graph_executor_cuda_graph.create")
     .set_body([](TVMArgs args, TVMRetValue* rv) {
-      ICHECK_GE(args.num_args, 4) << "The expected number of arguments for graph_executor.create is "
-                                     "at least 4, but it has "
-                                  << args.num_args;
+      ICHECK_GE(args.num_args, 4)
+          << "The expected number of arguments for graph_executor.create is "
+             "at least 4, but it has "
+          << args.num_args;
       PackedFunc lookup_linked_param_func;
       int dev_start_arg = 2;
       if (args[2].type_code() == kTVMPackedFuncHandle) {
@@ -129,7 +130,7 @@ TVM_REGISTER_GLOBAL("tvm.graph_executor_cuda_graph.create")
       }
 
       *rv = GraphExecutorCudaGraphCreate(args[0], args[1], GetAllDevice(args, dev_start_arg),
-                                        lookup_linked_param_func);
+                                         lookup_linked_param_func);
     });
 }  // namespace runtime
 }  // namespace tvm
diff --git a/src/runtime/graph_executor/debug/graph_executor_debug.cc b/src/runtime/graph_executor/debug/graph_executor_debug.cc
index 87cdfe20963c..7c1e6960f9f5 100644
--- a/src/runtime/graph_executor/debug/graph_executor_debug.cc
+++ b/src/runtime/graph_executor/debug/graph_executor_debug.cc
@@ -128,8 +128,8 @@ class GraphExecutorDebug : public GraphExecutor {
           << "Don't know how to run op type " << nodes_[index].op_type
           << " remotely over RPC right now";
 
-      // NOTE: GraphExecutorDebug expects graph nodes to have an "op" attribute of "tvm_op" or "null"
-      // and "null" is a placeholder node for a parameter or input.
+      // NOTE: GraphExecutorDebug expects graph nodes to have an "op" attribute of "tvm_op" or
+      // "null" and "null" is a placeholder node for a parameter or input.
       return 0;
     }
 
@@ -236,7 +236,7 @@ class GraphExecutorDebug : public GraphExecutor {
  * \param sptr_to_self Packed function pointer.
  */
 PackedFunc GraphExecutorDebug::GetFunction(const std::string& name,
-                                          const ObjectPtr<Object>& sptr_to_self) {
+                                           const ObjectPtr<Object>& sptr_to_self) {
   // return member functions during query.
   if (name == "get_output_by_layer") {
     return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
@@ -272,8 +272,8 @@ PackedFunc GraphExecutorDebug::GetFunction(const std::string& name,
  * \param devs All devices.
  */
 Module GraphExecutorDebugCreate(const std::string& sym_json, const tvm::runtime::Module& m,
-                               const std::vector<Device>& devs,
-                               PackedFunc lookup_linked_param_func) {
+                                const std::vector<Device>& devs,
+                                PackedFunc lookup_linked_param_func) {
   auto exec = make_object<GraphExecutorDebug>();
   exec->Init(sym_json, m, devs, lookup_linked_param_func);
   return Module(exec);
@@ -291,7 +291,7 @@ TVM_REGISTER_GLOBAL("tvm.graph_executor_debug.create").set_body([](TVMArgs args,
   }
 
   *rv = GraphExecutorDebugCreate(args[0], args[1], GetAllDevice(args, dev_start_arg),
-                                lookup_linked_param_func);
+                                 lookup_linked_param_func);
 });
 }  // namespace runtime
 }  // namespace tvm
diff --git a/src/runtime/graph_executor/graph_executor.cc b/src/runtime/graph_executor/graph_executor.cc
index 570d55a69e5a..c4d984fe9633 100644
--- a/src/runtime/graph_executor/graph_executor.cc
+++ b/src/runtime/graph_executor/graph_executor.cc
@@ -69,8 +69,8 @@ void GraphExecutor::Run() {
  * \param lookup_linked_param_func Linked parameter lookup function. Default is nullptr.
  */
 void GraphExecutor::Init(const std::string& graph_json, tvm::runtime::Module module,
-                        const std::vector<Device>& devs,
-                        const PackedFunc lookup_linked_param_func) {
+                         const std::vector<Device>& devs,
+                         const PackedFunc lookup_linked_param_func) {
   std::istringstream is(graph_json);
   dmlc::JSONReader reader(&is);
   this->Load(&reader);
@@ -389,8 +389,9 @@ void GraphExecutor::SetupOpExecs() {
   }
 }
 
-std::pair<std::function<void()>, std::shared_ptr<GraphExecutor::OpArgs> > GraphExecutor::CreateTVMOp(
-    const TVMOpParam& param, const std::vector<DLTensor>& args, size_t num_inputs) {
+std::pair<std::function<void()>, std::shared_ptr<GraphExecutor::OpArgs> >
+GraphExecutor::CreateTVMOp(const TVMOpParam& param, const std::vector<DLTensor>& args,
+                           size_t num_inputs) {
   std::shared_ptr<GraphExecutor::OpArgs> arg_ptr = std::make_shared<GraphExecutor::OpArgs>();
   // setup address.
   arg_ptr->args = args;
@@ -439,7 +440,7 @@ std::pair<std::function<void()>, std::shared_ptr<GraphExecutor::OpArgs> > GraphE
 }
 
 PackedFunc GraphExecutor::GetFunction(const std::string& name,
-                                     const ObjectPtr<Object>& sptr_to_self) {
+                                      const ObjectPtr<Object>& sptr_to_self) {
   // Return member functions during query.
   if (name == "set_input") {
     return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
@@ -505,8 +506,8 @@ PackedFunc GraphExecutor::GetFunction(const std::string& name,
 }
 
 Module GraphExecutorCreate(const std::string& sym_json, const tvm::runtime::Module& m,
-                          const std::vector<Device>& devs,
-                          const PackedFunc lookup_linked_param_func) {
+                           const std::vector<Device>& devs,
+                           const PackedFunc lookup_linked_param_func) {
   auto exec = make_object<GraphExecutor>();
   exec->Init(sym_json, m, devs, lookup_linked_param_func);
   return Module(exec);
diff --git a/src/runtime/graph_executor/graph_executor_factory.cc b/src/runtime/graph_executor/graph_executor_factory.cc
index bc12a4944b4a..4244f62df1d9 100644
--- a/src/runtime/graph_executor/graph_executor_factory.cc
+++ b/src/runtime/graph_executor/graph_executor_factory.cc
@@ -184,23 +184,24 @@ Module GraphExecutorFactoryModuleLoadBinary(void* strm) {
   return Module(exec);
 }
 
-TVM_REGISTER_GLOBAL("tvm.graph_executor_factory.create").set_body([](TVMArgs args, TVMRetValue* rv) {
-  ICHECK_GE(args.num_args, 3) << "The expected number of arguments for "
-                                 "graph_executor_factory.create needs at least 3, "
-                                 "but it has "
-                              << args.num_args;
-  // The argument order is graph_json, module, module_name, param0_name, param0_tensor,
-  // [param1_name, param1_tensor], ...
-  ICHECK_EQ((args.size() - 3) % 2, 0);
-  std::unordered_map<std::string, tvm::runtime::NDArray> params;
-  for (size_t i = 3; i < static_cast<size_t>(args.size()); i += 2) {
-    std::string name = args[i].operator String();
-    params[name] = args[i + 1].operator tvm::runtime::NDArray();
-  }
-  auto exec = make_object<GraphExecutorFactory>(args[0], params, args[2]);
-  exec->Import(args[1]);
-  *rv = Module(exec);
-});
+TVM_REGISTER_GLOBAL("tvm.graph_executor_factory.create")
+    .set_body([](TVMArgs args, TVMRetValue* rv) {
+      ICHECK_GE(args.num_args, 3) << "The expected number of arguments for "
+                                     "graph_executor_factory.create needs at least 3, "
+                                     "but it has "
+                                  << args.num_args;
+      // The argument order is graph_json, module, module_name, param0_name, param0_tensor,
+      // [param1_name, param1_tensor], ...
+      ICHECK_EQ((args.size() - 3) % 2, 0);
+      std::unordered_map<std::string, tvm::runtime::NDArray> params;
+      for (size_t i = 3; i < static_cast<size_t>(args.size()); i += 2) {
+        std::string name = args[i].operator String();
+        params[name] = args[i + 1].operator tvm::runtime::NDArray();
+      }
+      auto exec = make_object<GraphExecutorFactory>(args[0], params, args[2]);
+      exec->Import(args[1]);
+      *rv = Module(exec);
+    });
 
 TVM_REGISTER_GLOBAL("runtime.module.loadbinary_GraphExecutorFactory")
     .set_body_typed(GraphExecutorFactoryModuleLoadBinary);
diff --git a/src/runtime/graph_executor/graph_executor_factory.h b/src/runtime/graph_executor/graph_executor_factory.h
index 6aa443d1aafa..582985824d8b 100644
--- a/src/runtime/graph_executor/graph_executor_factory.h
+++ b/src/runtime/graph_executor/graph_executor_factory.h
@@ -51,8 +51,8 @@ class TVM_DLL GraphExecutorFactory : public runtime::ModuleNode {
    * \param module_name The module name of graph.
    */
   GraphExecutorFactory(const std::string& graph_json,
-                      const std::unordered_map<std::string, tvm::runtime::NDArray>& params,
-                      const std::string& module_name = "default");
+                       const std::unordered_map<std::string, tvm::runtime::NDArray>& params,
+                       const std::string& module_name = "default");
 
   /*!
    * \brief Get member function to front-end
diff --git a/src/runtime/micro/standalone/utvm_runtime.cc b/src/runtime/micro/standalone/utvm_runtime.cc
index eb96c1e79db7..585da9300128 100644
--- a/src/runtime/micro/standalone/utvm_runtime.cc
+++ b/src/runtime/micro/standalone/utvm_runtime.cc
@@ -24,7 +24,7 @@
 
 void* UTVMRuntimeCreate(const char* json, size_t json_len, void* module) {
   return new tvm::micro::MicroGraphExecutor(std::string(json, json + json_len),
-                                           reinterpret_cast<tvm::micro::DSOModule*>(module));
+                                            reinterpret_cast<tvm::micro::DSOModule*>(module));
 }
 
 void UTVMRuntimeDestroy(void* handle) {
diff --git a/web/src/runtime.ts b/web/src/runtime.ts
index 6b1840a0deed..a76096ebba4d 100644
--- a/web/src/runtime.ts
+++ b/web/src/runtime.ts
@@ -992,12 +992,8 @@ export class Instance implements Disposable {
    * @param lib The underlying library.
    * @param dev The execution device of the graph.
    */
-  createGraphExecutor(
-    graphJson: string,
-    lib: Module,
-    dev: DLDevice
-  ): GraphExecutor {
-    const fcreate = this.getGlobalFunc("tvm.graph_executor.create");
+  createGraphExecutor(graphJson: string, lib: Module, dev: DLDevice): GraphExecutor {
+    const fcreate = this.getGlobalFunc('tvm.graph_executor.create');
     const module = fcreate(
       graphJson,
       lib,

From bc010de04bb5a66d3612cdecce1c39271445bdef Mon Sep 17 00:00:00 2001
From: Andrew Reusch <areusch@octoml.ai>
Date: Fri, 12 Mar 2021 10:38:10 -0800
Subject: [PATCH 09/16] graphRuntime -> graphExecutor

---
 .../Camera2BasicFragment.java                  | 16 ++++++++--------
 .../apache/tvm/android/demo/MainActivity.java  | 18 +++++++++---------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/apps/android_camera/app/src/main/java/org/apache/tvm/android/androidcamerademo/Camera2BasicFragment.java b/apps/android_camera/app/src/main/java/org/apache/tvm/android/androidcamerademo/Camera2BasicFragment.java
index e02b703bd683..8a5f54a3e399 100644
--- a/apps/android_camera/app/src/main/java/org/apache/tvm/android/androidcamerademo/Camera2BasicFragment.java
+++ b/apps/android_camera/app/src/main/java/org/apache/tvm/android/androidcamerademo/Camera2BasicFragment.java
@@ -111,7 +111,7 @@ public class Camera2BasicFragment extends Fragment {
     private AppCompatTextView mInfoView;
     private ListView mModelView;
     private AssetManager assetManager;
-    private Module graphRuntimeModule;
+    private Module graphExecutorModule;
     private JSONObject labels;
     private ListenableFuture<ProcessCameraProvider> cameraProviderFuture;
     private PreviewView previewView;
@@ -187,21 +187,21 @@ private String[] getModels() {
     private String[] inference(float[] chw) {
         NDArray inputNdArray = NDArray.empty(new long[]{1, IMG_CHANNEL, MODEL_INPUT_SIZE, MODEL_INPUT_SIZE}, new TVMType("float32"));
         inputNdArray.copyFrom(chw);
-        Function setInputFunc = graphRuntimeModule.getFunction("set_input");
+        Function setInputFunc = graphExecutorModule.getFunction("set_input");
         setInputFunc.pushArg(INPUT_NAME).pushArg(inputNdArray).invoke();
         // release tvm local variables
         inputNdArray.release();
         setInputFunc.release();
 
         // get the function from the module(run it)
-        Function runFunc = graphRuntimeModule.getFunction("run");
+        Function runFunc = graphExecutorModule.getFunction("run");
         runFunc.invoke();
         // release tvm local variables
         runFunc.release();
 
         // get the function from the module(get output data)
         NDArray outputNdArray = NDArray.empty(new long[]{1, 1000}, new TVMType("float32"));
-        Function getOutputFunc = graphRuntimeModule.getFunction("get_output");
+        Function getOutputFunc = graphExecutorModule.getFunction("get_output");
         getOutputFunc.pushArg(OUTPUT_INDEX).pushArg(outputNdArray).invoke();
         float[] output = outputNdArray.asFloatArray();
         // release tvm local variables
@@ -272,8 +272,8 @@ public void onActivityCreated(Bundle savedInstanceState) {
     @Override
     public void onDestroy() {
         // release tvm local variables
-        if (null != graphRuntimeModule)
-            graphRuntimeModule.release();
+        if (null != graphExecutorModule)
+            graphExecutorModule.release();
         super.onDestroy();
     }
 
@@ -597,10 +597,10 @@ protected Integer doInBackground(Void... args) {
                     .invoke();
 
             Log.i(TAG, "as module...");
-            graphRuntimeModule = runtimeCreFunRes.asModule();
+            graphExecutorModule = runtimeCreFunRes.asModule();
             Log.i(TAG, "getting graph executor load params handle...");
             // get the function from the module(load parameters)
-            Function loadParamFunc = graphRuntimeModule.getFunction("load_params");
+            Function loadParamFunc = graphExecutorModule.getFunction("load_params");
             Log.i(TAG, "loading params...");
             loadParamFunc.pushArg(modelParams).invoke();
             // release tvm local variables
diff --git a/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java b/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java
index 6a559557e22e..85cc7a277b4d 100644
--- a/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java
+++ b/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java
@@ -90,7 +90,7 @@ public class MainActivity extends AppCompatActivity {
     private ImageView mImageView;
     private TextView mResultView;
     private AssetManager assetManager;
-    private Module graphRuntimeModule;
+    private Module graphExecutorModule;
     private Vector<String> labels = new Vector<String>();
 
     @Override
@@ -190,10 +190,10 @@ protected Integer doInBackground(Void... args) {
                     .pushArg(tvmDev.deviceType)
                     .pushArg(tvmDev.deviceId)
                     .invoke();
-            graphRuntimeModule = runtimeCreFunRes.asModule();
+            graphExecutorModule = runtimeCreFunRes.asModule();
 
             // get the function from the module(load parameters)
-            Function loadParamFunc = graphRuntimeModule.getFunction("load_params");
+            Function loadParamFunc = graphExecutorModule.getFunction("load_params");
             loadParamFunc.pushArg(modelParams).invoke();
 
             // release tvm local variables
@@ -231,7 +231,7 @@ private class ModelRunAsyncTask extends AsyncTask<Bitmap, Void, Integer> {
 
         @Override
         protected Integer doInBackground(Bitmap... bitmaps) {
-            if (null != graphRuntimeModule) {
+            if (null != graphExecutorModule) {
                 int count  = bitmaps.length;
                 for (int i = 0 ; i < count ; i++) {
                     long processingTimeMs = SystemClock.uptimeMillis();
@@ -283,7 +283,7 @@ protected Integer doInBackground(Bitmap... bitmaps) {
                     Log.i(TAG, "set input data");
                     NDArray inputNdArray = NDArray.empty(new long[]{1, IMG_CHANNEL, MODEL_INPUT_SIZE, MODEL_INPUT_SIZE}, new TVMType("float32"));;
                     inputNdArray.copyFrom(imgRgbTranValues);
-                    Function setInputFunc = graphRuntimeModule.getFunction("set_input");
+                    Function setInputFunc = graphExecutorModule.getFunction("set_input");
                     setInputFunc.pushArg(INPUT_NAME).pushArg(inputNdArray).invoke();
                     // release tvm local variables
                     inputNdArray.release();
@@ -291,7 +291,7 @@ protected Integer doInBackground(Bitmap... bitmaps) {
 
                     // get the function from the module(run it)
                     Log.i(TAG, "run function on target");
-                    Function runFunc = graphRuntimeModule.getFunction("run");
+                    Function runFunc = graphExecutorModule.getFunction("run");
                     runFunc.invoke();
                     // release tvm local variables
                     runFunc.release();
@@ -299,7 +299,7 @@ protected Integer doInBackground(Bitmap... bitmaps) {
                     // get the function from the module(get output data)
                     Log.i(TAG, "get output data");
                     NDArray outputNdArray = NDArray.empty(new long[]{1, 1000}, new TVMType("float32"));
-                    Function getOutputFunc = graphRuntimeModule.getFunction("get_output");
+                    Function getOutputFunc = graphExecutorModule.getFunction("get_output");
                     getOutputFunc.pushArg(OUTPUT_INDEX).pushArg(outputNdArray).invoke();
                     float[] output = outputNdArray.asFloatArray();
                     // release tvm local variables
@@ -351,8 +351,8 @@ protected void onPostExecute(Integer status) {
     @Override
     protected void onDestroy() {
         // release tvm local variables
-        if (null != graphRuntimeModule)
-            graphRuntimeModule.release();
+        if (null != graphExecutorModule)
+            graphExecutorModule.release();
         super.onDestroy();
     }
 

From 383104329c437250a0a41564dbb835cd0b6d6b4c Mon Sep 17 00:00:00 2001
From: Andrew Reusch <areusch@octoml.ai>
Date: Fri, 12 Mar 2021 10:48:26 -0800
Subject: [PATCH 10/16] rename more variables

---
 apps/bundle_deploy/bundle.c                   |  16 +-
 apps/bundle_deploy/bundle_static.c            |  16 +-
 include/tvm/runtime/crt/graph_executor.h      |  26 +-
 python/tvm/relay/build_module.py              |   4 +-
 .../crt/graph_executor/graph_executor.c       | 247 +++++++++---------
 .../graph_executor_module.c                   |  36 +--
 .../internal/graph_executor/graph_executor.h  |  12 +-
 .../graph_executor/graph_executor_factory.cc  |  10 +-
 .../graph_executor/graph_executor_factory.h   |  12 +-
 9 files changed, 190 insertions(+), 189 deletions(-)

diff --git a/apps/bundle_deploy/bundle.c b/apps/bundle_deploy/bundle.c
index 55c226f11794..9083f7b5f48b 100644
--- a/apps/bundle_deploy/bundle.c
+++ b/apps/bundle_deploy/bundle.c
@@ -82,22 +82,22 @@ TVM_DLL void* tvm_runtime_create(const char* json_data, const char* params_data,
   return graph_executor;
 }
 
-TVM_DLL void tvm_runtime_destroy(void* runtime) {
-  TVMGraphExecutor_Release((TVMGraphExecutor**)&runtime);
+TVM_DLL void tvm_runtime_destroy(void* executor) {
+  TVMGraphExecutor_Release((TVMGraphExecutor**)&executor);
 }
 
-TVM_DLL void tvm_runtime_set_input(void* runtime, const char* name, DLTensor* tensor) {
-  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)runtime;
+TVM_DLL void tvm_runtime_set_input(void* executor, const char* name, DLTensor* tensor) {
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)executor;
   TVMGraphExecutor_SetInput(graph_executor, name, tensor);
 }
 
-TVM_DLL void tvm_runtime_run(void* runtime) {
-  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)runtime;
+TVM_DLL void tvm_runtime_run(void* executor) {
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)executor;
   TVMGraphExecutor_Run(graph_executor);
 }
 
-TVM_DLL void tvm_runtime_get_output(void* runtime, int32_t index, DLTensor* tensor) {
-  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)runtime;
+TVM_DLL void tvm_runtime_get_output(void* executor, int32_t index, DLTensor* tensor) {
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)executor;
   TVMGraphExecutor_GetOutput(graph_executor, index, tensor);
 }
 
diff --git a/apps/bundle_deploy/bundle_static.c b/apps/bundle_deploy/bundle_static.c
index 5f7825aec01b..62e63d6b4fe2 100644
--- a/apps/bundle_deploy/bundle_static.c
+++ b/apps/bundle_deploy/bundle_static.c
@@ -82,23 +82,23 @@ TVM_DLL void* tvm_runtime_create(const char* json_data, const char* params_data,
   return graph_executor;
 }
 
-TVM_DLL void tvm_runtime_destroy(void* runtime) {
-  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)runtime;
+TVM_DLL void tvm_runtime_destroy(void* executor) {
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)executor;
   TVMGraphExecutor_Release(&graph_executor);
 }
 
-TVM_DLL void tvm_runtime_set_input(void* runtime, const char* name, DLTensor* tensor) {
-  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)runtime;
+TVM_DLL void tvm_runtime_set_input(void* executor, const char* name, DLTensor* tensor) {
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)executor;
   TVMGraphExecutor_SetInput(graph_executor, name, tensor);
 }
 
-TVM_DLL void tvm_runtime_run(void* runtime) {
-  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)runtime;
+TVM_DLL void tvm_runtime_run(void* executor) {
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)executor;
   TVMGraphExecutor_Run(graph_executor);
 }
 
-TVM_DLL void tvm_runtime_get_output(void* runtime, int32_t index, DLTensor* tensor) {
-  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)runtime;
+TVM_DLL void tvm_runtime_get_output(void* executor, int32_t index, DLTensor* tensor) {
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)executor;
   TVMGraphExecutor_GetOutput(graph_executor, index, tensor);
 }
 
diff --git a/include/tvm/runtime/crt/graph_executor.h b/include/tvm/runtime/crt/graph_executor.h
index 6895d2e71e34..eb68ff56d230 100644
--- a/include/tvm/runtime/crt/graph_executor.h
+++ b/include/tvm/runtime/crt/graph_executor.h
@@ -63,13 +63,13 @@ typedef struct TVMGraphExecutor TVMGraphExecutor;
  * \param sym_json JSON-encoded graph.
  * \param module_handle TVM Module that exposes the functions to call.
  * \param devices runtime execution device.
- * \param runtime Pointer which receives a pointer to the newly-created instance.
+ * \param executor Pointer which receives a pointer to the newly-created instance.
  * \return 0 if successful.
  */
 int TVMGraphExecutor_Create(const char* sym_json, TVMModuleHandle module_handle,
-                            const DLDevice* devices, TVMGraphExecutor** runtime);
+                            const DLDevice* devices, TVMGraphExecutor** executor);
 
-int TVMGraphExecutor_GetInputIndex(TVMGraphExecutor* runtime, const char* name);
+int TVMGraphExecutor_GetInputIndex(TVMGraphExecutor* executor, const char* name);
 
 /*!
  * \brief get number of input tensors allocated.
@@ -79,11 +79,11 @@ int TVMGraphExecutor_GetNumInputs();
 
 /*!
  * \brief set input to the graph based on name.
- * \param runtime The graph executor.
+ * \param executor The graph executor.
  * \param name The name of the input.
  * \param data_in The input data.
  */
-void TVMGraphExecutor_SetInput(TVMGraphExecutor* runtime, const char* name, DLTensor* data_in);
+void TVMGraphExecutor_SetInput(TVMGraphExecutor* executor, const char* name, DLTensor* data_in);
 
 /*!
  * \brief get number of output tensors allocated.
@@ -93,35 +93,35 @@ int TVMGraphExecutor_GetNumOutputs();
 
 /*!
  * \brief Return NDArray for given output index.
- * \param runtime The graph executor.
+ * \param executor The graph executor.
  * \param index The output index.
  * \param out The DLTensor corresponding to given output node index.
  * \return The result of this function execution.
  */
-int TVMGraphExecutor_GetOutput(TVMGraphExecutor* runtime, const int32_t index, DLTensor* out);
+int TVMGraphExecutor_GetOutput(TVMGraphExecutor* executor, const int32_t index, DLTensor* out);
 
 /*!
  * \brief Load parameters from parameter blob.
- * \param runtime The graph executor.
+ * \param executor The graph executor.
  * \param param_blob A binary blob of parameter.
  * \param param_size The parameter size.
  * \return The result of this function execution.
  */
-int TVMGraphExecutor_LoadParams(TVMGraphExecutor* runtime, const char* param_blob,
+int TVMGraphExecutor_LoadParams(TVMGraphExecutor* executor, const char* param_blob,
                                 const uint32_t param_size);
 
 /*!
  * \brief Execute the graph.
- * \param runtime The graph executor.
+ * \param executor The graph executor.
  */
-void TVMGraphExecutor_Run(TVMGraphExecutor* runtime);
+void TVMGraphExecutor_Run(TVMGraphExecutor* executor);
 
 /*!
  * \brief Release memory associated with the graph executor.
- * \param runtime Pointer to graph executor.
+ * \param executor Pointer to graph executor.
  * \return 0 if successful
  */
-int TVMGraphExecutor_Release(TVMGraphExecutor** runtime);
+int TVMGraphExecutor_Release(TVMGraphExecutor** executor);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/python/tvm/relay/build_module.py b/python/tvm/relay/build_module.py
index 3981a31900b5..e6c4a14ab04d 100644
--- a/python/tvm/relay/build_module.py
+++ b/python/tvm/relay/build_module.py
@@ -281,10 +281,10 @@ def build(ir_mod, target=None, target_host=None, params=None, mod_name="default"
     with tophub_context:
         bld_mod = BuildModule()
         graph_json, runtime_mod, params = bld_mod.build(ir_mod, target, target_host, params)
-        runtime_mod = _graph_executor_factory.GraphExecutorFactoryModule(
+        executor_factory = _graph_executor_factory.GraphExecutorFactoryModule(
             ir_mod, target, graph_json, runtime_mod, mod_name, params
         )
-        return runtime_mod
+        return executor_factory
 
 
 def optimize(mod, target=None, params=None):
diff --git a/src/runtime/crt/graph_executor/graph_executor.c b/src/runtime/crt/graph_executor/graph_executor.c
index 0fe4201e0e71..2324c5650d7f 100644
--- a/src/runtime/crt/graph_executor/graph_executor.c
+++ b/src/runtime/crt/graph_executor/graph_executor.c
@@ -568,7 +568,7 @@ int TVMGraphExecutorGraphAttr_Release(TVMGraphExecutorGraphAttr* attr) {
   return 0;
 }
 
-int TVMGraphExecutor_Load(TVMGraphExecutor* runtime, JSONReader* reader) {
+int TVMGraphExecutor_Load(TVMGraphExecutor* executor, JSONReader* reader) {
   int status = 0;
   reader->BeginObject(reader);
   int bitmask = 0;
@@ -584,29 +584,29 @@ int TVMGraphExecutor_Load(TVMGraphExecutor* runtime, JSONReader* reader) {
       }
       DLDevice dev = {kDLCPU, 0};
       tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorNode) * num_items, dev,
-                                                      (void**)&runtime->nodes);
+                                                      (void**)&executor->nodes);
       if (err != kTvmErrorNoError) {
         fprintf(stderr, "memory allocate error: %08x", err);
         status = -1;
         break;
       }
       while (reader->NextArrayItem(reader)) {
-        if (runtime->nodes_count == num_items) {
+        if (executor->nodes_count == num_items) {
           fprintf(stderr, "array too big\n");
           status = -1;
           return status;
         }
-        TVMGraphExecutorNode* node = runtime->nodes + runtime->nodes_count;
+        TVMGraphExecutorNode* node = executor->nodes + executor->nodes_count;
         status = TVMGraphExecutorNode_Load(node, reader);
         if (status != 0) {
           fprintf(stderr, "failed to load an element in `nodes` field in graph executor node.\n");
           break;
 #if TVM_CRT_DEBUG
         } else {
-          printf("loading: node (%u) %s loaded.\n", runtime->nodes_count, node->name);
+          printf("loading: node (%u) %s loaded.\n", executor->nodes_count, node->name);
 #endif  // TVM_CRT_DEBUG
         }
-        runtime->nodes_count++;
+        executor->nodes_count++;
       }
       bitmask |= 1;
     } else if (!strcmp(key, "arg_nodes")) {
@@ -619,21 +619,22 @@ int TVMGraphExecutor_Load(TVMGraphExecutor* runtime, JSONReader* reader) {
       }
       DLDevice dev = {kDLCPU, 0};
       tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(uint32_t) * num_items, dev,
-                                                      (void**)&runtime->input_nodes);
+                                                      (void**)&executor->input_nodes);
+
       if (err != kTvmErrorNoError) {
         fprintf(stderr, "memory allocate error: %08x", err);
         status = -1;
         break;
       }
       while (reader->NextArrayItem(reader)) {
-        if (runtime->input_nodes_count == num_items) {
+        if (executor->input_nodes_count == num_items) {
           fprintf(stderr, "array too big\n");
           status = -1;
           return status;
         }
-        uint32_t* node = runtime->input_nodes + runtime->input_nodes_count;
+        uint32_t* node = executor->input_nodes + executor->input_nodes_count;
         reader->ReadUnsignedInteger(reader, node);
-        runtime->input_nodes_count++;
+        executor->input_nodes_count++;
       }
       bitmask |= 2;
     } else if (!strcmp(key, "node_row_ptr")) {
@@ -646,22 +647,22 @@ int TVMGraphExecutor_Load(TVMGraphExecutor* runtime, JSONReader* reader) {
       }
       DLDevice dev = {kDLCPU, 0};
       tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(uint32_t) * num_items, dev,
-                                                      (void**)&runtime->node_row_ptr);
+                                                      (void**)&executor->node_row_ptr);
       if (err != kTvmErrorNoError) {
         fprintf(stderr, "memory allocate error: %08x", err);
         status = -1;
         break;
       }
       while (reader->NextArrayItem(reader)) {
-        if (runtime->node_row_ptr_count == num_items) {
+        if (executor->node_row_ptr_count == num_items) {
           fprintf(stderr, "array too big\n");
           status = -1;
           return status;
         }
-        uint32_t count = runtime->node_row_ptr_count;
-        uint32_t* node = runtime->node_row_ptr + count;
+        uint32_t count = executor->node_row_ptr_count;
+        uint32_t* node = executor->node_row_ptr + count;
         reader->ReadUnsignedInteger(reader, node);
-        runtime->node_row_ptr_count++;
+        executor->node_row_ptr_count++;
       }
       bitmask |= 4;
     } else if (!strcmp(key, "heads")) {
@@ -674,29 +675,29 @@ int TVMGraphExecutor_Load(TVMGraphExecutor* runtime, JSONReader* reader) {
       }
       DLDevice dev = {kDLCPU, 0};
       tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorNodeEntry) * num_items,
-                                                      dev, (void**)&runtime->outputs);
+                                                      dev, (void**)&executor->outputs);
       if (err != kTvmErrorNoError) {
         fprintf(stderr, "memory allocate error: %08x", err);
         status = -1;
         break;
       }
       while (reader->NextArrayItem(reader)) {
-        if (runtime->outputs_count == num_items) {
+        if (executor->outputs_count == num_items) {
           fprintf(stderr, "array too big\n");
           status = -1;
           return status;
         }
-        TVMGraphExecutorNodeEntry* entry = runtime->outputs + runtime->outputs_count;
+        TVMGraphExecutorNodeEntry* entry = executor->outputs + executor->outputs_count;
         status = NodeEntry_Load(entry, reader);
         if (status != 0) {
           fprintf(stderr, "Fail to load an element in `heads` field in graph executor node.\n");
           break;
         }
-        runtime->outputs_count++;
+        executor->outputs_count++;
       }
       bitmask |= 8;
     } else if (!strcmp(key, "attrs")) {
-      status = TVMGraphExecutorGraphAttr_Load(&(runtime->attrs), reader);
+      status = TVMGraphExecutorGraphAttr_Load(&(executor->attrs), reader);
       if (status != 0) {
         fprintf(stderr, "Fail to load an element in `heads` field in graph executor node.\n");
         break;
@@ -719,29 +720,29 @@ int TVMGraphExecutor_Load(TVMGraphExecutor* runtime, JSONReader* reader) {
   return status;
 }
 
-uint32_t TVMGraphExecutor_GetEntryId(TVMGraphExecutor* runtime, uint32_t nid, uint32_t index) {
-  return runtime->node_row_ptr[nid] + index;
+uint32_t TVMGraphExecutor_GetEntryId(TVMGraphExecutor* executor, uint32_t nid, uint32_t index) {
+  return executor->node_row_ptr[nid] + index;
 }
 
 /*!
  * \brief Get the number of input tensors allocated.
- * \param runtime The graph executor.
+ * \param executor The graph executor.
  * \return the number of input tensors allocated.
  */
-int TVMGraphExecutor_GetNumInputs(TVMGraphExecutor* runtime) { return runtime->input_nodes_count; }
+int TVMGraphExecutor_GetNumInputs(TVMGraphExecutor* executor) { return executor->input_nodes_count; }
 
 /*!
  * \brief Get the input index given the name of input.
- * \param runtime The graph executor.
+ * \param executor The graph executor.
  * \param name The name of the input.
  * \return The index of input.
  */
-int TVMGraphExecutor_GetInputIndex(TVMGraphExecutor* runtime, const char* name) {
+int TVMGraphExecutor_GetInputIndex(TVMGraphExecutor* executor, const char* name) {
   uint32_t i;
   int32_t rv = -1;
-  for (i = 0; i < runtime->input_nodes_count; ++i) {
-    uint32_t nid = runtime->input_nodes[i];
-    if (!strcmp(runtime->nodes[nid].name, name)) {
+  for (i = 0; i < executor->input_nodes_count; ++i) {
+    uint32_t nid = executor->input_nodes[i];
+    if (!strcmp(executor->nodes[nid].name, name)) {
       rv = i;
       break;
     }
@@ -752,27 +753,27 @@ int TVMGraphExecutor_GetInputIndex(TVMGraphExecutor* runtime, const char* name)
 
 /*!
  * \brief set input to the graph based on name.
- * \param runtime The graph executor.
+ * \param executor The graph executor.
  * \param name The name of the input.
  * \param data_in The input data.
  */
-void TVMGraphExecutor_SetInput(TVMGraphExecutor* runtime, const char* name, DLTensor* data_in) {
-  uint32_t index = TVMGraphExecutor_GetInputIndex(runtime, name);
-  if (index >= runtime->input_nodes_count) {
+void TVMGraphExecutor_SetInput(TVMGraphExecutor* executor, const char* name, DLTensor* data_in) {
+  uint32_t index = TVMGraphExecutor_GetInputIndex(executor, name);
+  if (index >= executor->input_nodes_count) {
     fprintf(stderr, "given index is greater than num of input nodes.\n");
   }
-  uint32_t eid = TVMGraphExecutor_GetEntryId(runtime, runtime->input_nodes[index], 0);
-  runtime->data_entry[eid].dl_tensor.data = data_in->data;
+  uint32_t eid = TVMGraphExecutor_GetEntryId(executor, executor->input_nodes[index], 0);
+  executor->data_entry[eid].dl_tensor.data = data_in->data;
 }
 
 /*!
  * \brief Load parameters from parameter blob.
- * \param runtime The graph executor.
+ * \param executor The graph executor.
  * \param param_blob A binary blob of parameter.
  * \param param_size The parameter size.
  * \return The result of this function execution.
  */
-int TVMGraphExecutor_LoadParams(TVMGraphExecutor* runtime, const char* param_blob,
+int TVMGraphExecutor_LoadParams(TVMGraphExecutor* executor, const char* param_blob,
                                 const uint32_t param_size) {
   int status = 0;
   const char* bptr = param_blob;
@@ -790,13 +791,13 @@ int TVMGraphExecutor_LoadParams(TVMGraphExecutor* runtime, const char* param_blo
   char* names = NULL;
   DLDevice dev = {kDLCPU, 0};
   tvm_crt_error_t err =
-      TVMPlatformMemoryAllocate(TVM_CRT_STRLEN_NAME * runtime->nodes_count, dev, (void**)&names);
+      TVMPlatformMemoryAllocate(TVM_CRT_STRLEN_NAME * executor->nodes_count, dev, (void**)&names);
   if (err != kTvmErrorNoError) {
     fprintf(stderr, "memory allocate error: %08x", err);
     status = -1;
     return status;
   }
-  memset(names, 0, TVM_CRT_STRLEN_NAME * runtime->nodes_count);
+  memset(names, 0, TVM_CRT_STRLEN_NAME * executor->nodes_count);
   uint64_t names_count;
   int idx;
   memcpy(&names_count, bptr, sizeof(names_count));
@@ -824,33 +825,33 @@ int TVMGraphExecutor_LoadParams(TVMGraphExecutor* runtime, const char* param_blo
   }
 
   for (idx = 0; idx < size; idx++) {
-    int32_t in_idx = TVMGraphExecutor_GetInputIndex(runtime, names + TVM_CRT_STRLEN_NAME * idx);
+    int32_t in_idx = TVMGraphExecutor_GetInputIndex(executor, names + TVM_CRT_STRLEN_NAME * idx);
     CHECK_GT(in_idx, 0, "Found param for non-existent input: %s\n",
              names + TVM_CRT_STRLEN_NAME * idx);
-    uint32_t eid = TVMGraphExecutor_GetEntryId(runtime, runtime->input_nodes[in_idx], 0);
-    if (!(eid < runtime->data_entry_count)) {
+    uint32_t eid = TVMGraphExecutor_GetEntryId(executor, executor->input_nodes[in_idx], 0);
+    if (!(eid < executor->data_entry_count)) {
       fprintf(stderr, "`entry_id`=%d is greater than expected(%d).\n", eid,
-              runtime->data_entry_count);
+              executor->data_entry_count);
       status = -1;
     }
 
-    if (runtime->data_entry[eid].dl_tensor.shape) {
-      err = TVMPlatformMemoryFree(runtime->data_entry[eid].dl_tensor.shape, dev);
+    if (executor->data_entry[eid].dl_tensor.shape) {
+      err = TVMPlatformMemoryFree(executor->data_entry[eid].dl_tensor.shape, dev);
       if (err != kTvmErrorNoError) {
         status = -1;
       }
-      runtime->data_entry[eid].dl_tensor.shape = 0;
+      executor->data_entry[eid].dl_tensor.shape = 0;
     }
-    if (runtime->data_entry[eid].dl_tensor.data) {
-      err = TVMPlatformMemoryFree(runtime->data_entry[eid].dl_tensor.data, dev);
+    if (executor->data_entry[eid].dl_tensor.data) {
+      err = TVMPlatformMemoryFree(executor->data_entry[eid].dl_tensor.data, dev);
       if (err != kTvmErrorNoError) {
         status = -1;
       }
-      runtime->data_entry[eid].dl_tensor.data = 0;
+      executor->data_entry[eid].dl_tensor.data = 0;
     }
-    status |= TVMNDArray_Load(&(runtime->data_entry[eid]), &bptr);
+    status |= TVMNDArray_Load(&(executor->data_entry[eid]), &bptr);
 #if TVM_CRT_DEBUG
-    TVMNDArray* entry = &(runtime->data_entry[eid]);
+    TVMNDArray* entry = &(executor->data_entry[eid]);
     printf("loading: param %s loaded, in_idx=%d, eid=%d, ndim=%d, data[0]=%f\n",
            names + TVM_CRT_STRLEN_NAME * idx, in_idx, eid, entry->dl_tensor.ndim,
            ((float*)entry->dl_tensor.data)[0]);  // NOLINT(*)
@@ -869,38 +870,38 @@ int TVMGraphExecutor_LoadParams(TVMGraphExecutor* runtime, const char* param_blo
 
 /*!
  * \brief Run all the operations one by one.
- * \param runtime The graph executor.
+ * \param executor The graph executor.
  */
-void TVMGraphExecutor_Run(TVMGraphExecutor* runtime) {
+void TVMGraphExecutor_Run(TVMGraphExecutor* executor) {
   // setup the array and requirements.
   uint32_t idx;
-  for (idx = 0; idx < runtime->op_execs_count; ++idx) {
-    if (runtime->op_execs[idx].fexec) {
+  for (idx = 0; idx < executor->op_execs_count; ++idx) {
+    if (executor->op_execs[idx].fexec) {
 #if TVM_CRT_DEBUG
-      printf("calling: %s (%d)\n", runtime->op_execs[idx].name, idx);
+      printf("calling: %s (%d)\n", executor->op_execs[idx].name, idx);
 #endif  // TVM_CRT_DEBUG
-      runtime->op_execs[idx].Call(&(runtime->op_execs[idx]));
+      executor->op_execs[idx].Call(&(executor->op_execs[idx]));
     }
   }
 }
 
 /*!
  * \brief Get the number of output tensors allocated.
- * \param runtime The graph executor.
+ * \param executor The graph executor.
  * \return the number of output tensors allocated.
  */
-int TVMGraphExecutor_GetNumOutputs(TVMGraphExecutor* runtime) { return runtime->outputs_count; }
+int TVMGraphExecutor_GetNumOutputs(TVMGraphExecutor* executor) { return executor->outputs_count; }
 
-int TVMGraphExecutor_GetOutput(TVMGraphExecutor* runtime, const int32_t idx, DLTensor* out) {
+int TVMGraphExecutor_GetOutput(TVMGraphExecutor* executor, const int32_t idx, DLTensor* out) {
   int status = 0;
-  uint32_t nid = runtime->outputs[idx].node_id;
-  uint32_t index = runtime->outputs[idx].index;
-  uint32_t eid = TVMGraphExecutor_GetEntryId(runtime, nid, index);
+  uint32_t nid = executor->outputs[idx].node_id;
+  uint32_t index = executor->outputs[idx].index;
+  uint32_t eid = TVMGraphExecutor_GetEntryId(executor, nid, index);
 
   // copy data section to allocated output tensor
   int32_t elem_bytes = out->dtype.bits / 8;
   int64_t size = Shape_Accumulate(out->shape, out->ndim);
-  DLTensor* tensor = &(runtime->data_entry[eid].dl_tensor);
+  DLTensor* tensor = &(executor->data_entry[eid].dl_tensor);
   CHECK(out->ndim == tensor->ndim);
   CHECK(out->dtype.bits == tensor->dtype.bits);
   CHECK(Shape_Accumulate(out->shape, out->ndim) == Shape_Accumulate(tensor->shape, tensor->ndim));
@@ -908,7 +909,7 @@ int TVMGraphExecutor_GetOutput(TVMGraphExecutor* runtime, const int32_t idx, DLT
   return status;
 }
 
-int TVMGraphExecutor_SetupStorage(TVMGraphExecutor* runtime) {
+int TVMGraphExecutor_SetupStorage(TVMGraphExecutor* executor) {
   TVMPackedFunc lookup_linked_param;
   int lookup_linked_param_valid;
   uint32_t idx;
@@ -919,12 +920,12 @@ int TVMGraphExecutor_SetupStorage(TVMGraphExecutor* runtime) {
     temp_args.tcodes[0] = kTVMArgInt;
     temp_args.values_count = 1;
     lookup_linked_param_valid =
-        (TVMPackedFunc_InitModuleFunc(&lookup_linked_param, runtime->module_handle,
+        (TVMPackedFunc_InitModuleFunc(&lookup_linked_param, executor->module_handle,
                                       "_lookup_linked_param", &temp_args) == 0);
   }
 
   // Grab saved optimization plan from graph.
-  TVMGraphExecutorGraphAttr* attrs = &(runtime->attrs);
+  TVMGraphExecutorGraphAttr* attrs = &(executor->attrs);
   DLDataType* vtype = NULL;
   DLDevice alloc_dev = {kDLCPU, 0};
   tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(DLDataType) * attrs->dltype_count,
@@ -939,19 +940,19 @@ int TVMGraphExecutor_SetupStorage(TVMGraphExecutor* runtime) {
 
   // Size and device type of each storage pool entry.
   TVMGraphExecutorPoolEntry* pool_entry = NULL;
-  err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorPoolEntry) * runtime->nodes_count,
+  err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorPoolEntry) * executor->nodes_count,
                                   alloc_dev, (void**)&pool_entry);
   if (err != kTvmErrorNoError) {
     fprintf(stderr, "memory allocate error: %08x", err);
     return -1;
   }
-  memset(pool_entry, 0, sizeof(TVMGraphExecutorPoolEntry) * runtime->nodes_count);
+  memset(pool_entry, 0, sizeof(TVMGraphExecutorPoolEntry) * executor->nodes_count);
   uint32_t pool_entry_count = 0;
   // Find the maximum space size.
   for (idx = 0; idx < attrs->shape_count; idx++) {
     int storage_id = attrs->storage_id[idx];
     // Use the fallback device if no device index is available.
-    int device_type = runtime->devices[0].device_type;
+    int device_type = executor->devices[0].device_type;
     uint32_t size = Shape_Accumulate(attrs->shape + idx * TVM_CRT_MAX_NDIM, attrs->ndim[idx]);
     DLDataType t = vtype[idx];
     uint32_t bits = t.bits * t.lanes;
@@ -968,14 +969,14 @@ int TVMGraphExecutor_SetupStorage(TVMGraphExecutor* runtime) {
 
   // Allocate the space.
   err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorStorageEntry) * pool_entry_count,
-                                  alloc_dev, (void**)&runtime->storage_pool);
+                                  alloc_dev, (void**)&executor->storage_pool);
   if (err != kTvmErrorNoError) {
     fprintf(stderr, "memory allocate error: %08x", err);
     return -1;
   }
   for (idx = 0; idx < pool_entry_count; idx++) {
     TVMGraphExecutorPoolEntry pit = pool_entry[idx];
-    DLDevice dev = runtime->devices[0];
+    DLDevice dev = executor->devices[0];
     uint8_t did_find_linked_param = 0;
     if (lookup_linked_param_valid) {
       lookup_linked_param.args.values[0].v_int64 = idx;
@@ -983,8 +984,8 @@ int TVMGraphExecutor_SetupStorage(TVMGraphExecutor* runtime) {
 
       void* linked_param_data = lookup_linked_param.ret_value.values[0].v_handle;
       if (linked_param_data != NULL) {
-        runtime->storage_pool[runtime->storage_pool_count].is_linked_param = 1;
-        DLTensor* tensor = &runtime->storage_pool[runtime->storage_pool_count].array.dl_tensor;
+        executor->storage_pool[executor->storage_pool_count].is_linked_param = 1;
+        DLTensor* tensor = &executor->storage_pool[executor->storage_pool_count].array.dl_tensor;
         tensor->data = linked_param_data;
         tensor->device = dev;
         tensor->ndim = attrs->ndim[pit.entry_id];
@@ -1001,28 +1002,28 @@ int TVMGraphExecutor_SetupStorage(TVMGraphExecutor* runtime) {
       };
       shape[0] = (pit.size + 3) / 4;
       int status = TVMNDArray_Empty(1, shape, dtype, dev,
-                                    &runtime->storage_pool[runtime->storage_pool_count].array);
+                                    &executor->storage_pool[executor->storage_pool_count].array);
       CHECK_EQ(status, 0, "fail to create storage_pool with idx=%d\n", idx);
     }
-    runtime->storage_pool_count++;
+    executor->storage_pool_count++;
   }
 
   // Assign the pooled entries. A unified memory pool is used to simplifiy
   // memory assignment for each node entry. The allocated memory on each device
   // is mapped to this pool.
-  runtime->data_entry_count = runtime->node_row_ptr[runtime->node_row_ptr_count - 1];
-  err = TVMPlatformMemoryAllocate(sizeof(TVMNDArray) * runtime->data_entry_count, alloc_dev,
-                                  (void**)&runtime->data_entry);
+  executor->data_entry_count = executor->node_row_ptr[executor->node_row_ptr_count - 1];
+  err = TVMPlatformMemoryAllocate(sizeof(TVMNDArray) * executor->data_entry_count, alloc_dev,
+                                  (void**)&executor->data_entry);
   if (err != kTvmErrorNoError) {
     fprintf(stderr, "memory allocate error: %08x", err);
     return -1;
   }
-  for (idx = 0; idx < runtime->data_entry_count; ++idx) {
+  for (idx = 0; idx < executor->data_entry_count; ++idx) {
     uint32_t storage_id = attrs->storage_id[idx];
-    CHECK(storage_id < runtime->storage_pool_count);
-    int status = TVMNDArray_CreateView(&(runtime->storage_pool[storage_id].array),
+    CHECK(storage_id < executor->storage_pool_count);
+    int status = TVMNDArray_CreateView(&(executor->storage_pool[storage_id].array),
                                        attrs->shape + idx * TVM_CRT_MAX_NDIM, attrs->ndim[idx],
-                                       vtype[idx], &runtime->data_entry[idx]);
+                                       vtype[idx], &executor->data_entry[idx]);
     CHECK_EQ(status, 0, "fail to create for node with idx=%d, storage_id=%u\n", idx, storage_id);
   }
 
@@ -1042,32 +1043,32 @@ int TVMGraphExecutor_SetupStorage(TVMGraphExecutor* runtime) {
   return 0;
 }
 
-int TVMGraphExecutor_SetupOpExecs(TVMGraphExecutor* runtime) {
+int TVMGraphExecutor_SetupOpExecs(TVMGraphExecutor* executor) {
   int status = 0;
   uint32_t nid, idx;
-  runtime->op_execs_count = runtime->nodes_count;
+  executor->op_execs_count = executor->nodes_count;
   DLDevice dev = {kDLCPU, 0};
-  tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMPackedFunc) * runtime->op_execs_count,
-                                                  dev, (void**)&runtime->op_execs);
+  tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMPackedFunc) * executor->op_execs_count,
+                                                  dev, (void**)&executor->op_execs);
   if (err != kTvmErrorNoError) {
     fprintf(stderr, "memory allocate error: %08x", err);
     status = -1;
     return status;
   }
-  for (nid = 0; nid < runtime->nodes_count; nid++) {
-    const TVMGraphExecutorNode* inode = runtime->nodes + nid;
+  for (nid = 0; nid < executor->nodes_count; nid++) {
+    const TVMGraphExecutorNode* inode = executor->nodes + nid;
     if (strcmp(inode->op_type, "null")) {
       DLTensorPtr args[TVM_CRT_MAX_ARGS];
       uint32_t args_count = 0;
       for (idx = 0; idx < inode->inputs_count; idx++) {
         const TVMGraphExecutorNodeEntry* entry = inode->inputs + idx;
-        uint32_t eid = TVMGraphExecutor_GetEntryId(runtime, entry->node_id, entry->index);
-        args[idx] = &(runtime->data_entry[eid].dl_tensor);
+        uint32_t eid = TVMGraphExecutor_GetEntryId(executor, entry->node_id, entry->index);
+        args[idx] = &(executor->data_entry[eid].dl_tensor);
         args_count++;
       }
       for (idx = 0; idx < inode->param.num_outputs; idx++) {
-        uint32_t eid = TVMGraphExecutor_GetEntryId(runtime, nid, idx);
-        args[args_count] = &(runtime->data_entry[eid].dl_tensor);
+        uint32_t eid = TVMGraphExecutor_GetEntryId(executor, nid, idx);
+        args[args_count] = &(executor->data_entry[eid].dl_tensor);
         args_count++;
       }
       if (strcmp(inode->op_type, "tvm_op")) {
@@ -1085,9 +1086,9 @@ int TVMGraphExecutor_SetupOpExecs(TVMGraphExecutor* runtime) {
       printf("tvm_op: creating %s with node_id=%d\n", inode->param.func_name, nid);
 #endif  // TVM_CRT_DEBUG
       TVMPackedFunc pf;
-      TVMGraphExecutor_CreateTVMOp(runtime, &(inode->param), args, args_count, inode->inputs_count,
+      TVMGraphExecutor_CreateTVMOp(executor, &(inode->param), args, args_count, inode->inputs_count,
                                    &pf);
-      runtime->op_execs[nid] = pf;
+      executor->op_execs[nid] = pf;
     }
   }
   return status;
@@ -1104,7 +1105,7 @@ typedef struct TVMOpArgs {
   uint32_t shape_data_count;
 } TVMOpArgs;
 
-int32_t TVMGraphExecutor_CreateTVMOp(TVMGraphExecutor* runtime, const TVMOpParam* param,
+int32_t TVMGraphExecutor_CreateTVMOp(TVMGraphExecutor* executor, const TVMOpParam* param,
                                      DLTensorPtr* args, const uint32_t args_count,
                                      uint32_t num_inputs, TVMPackedFunc* pf) {
   int status = 0;
@@ -1137,7 +1138,7 @@ int32_t TVMGraphExecutor_CreateTVMOp(TVMGraphExecutor* runtime, const TVMOpParam
   }
 
   TVMArgs targs = TVMArgs_Create(arg_ptr.arg_values, arg_ptr.arg_tcodes, arg_ptr.arg_values_count);
-  status = TVMPackedFunc_InitModuleFunc(pf, runtime->module_handle, param->func_name, &targs);
+  status = TVMPackedFunc_InitModuleFunc(pf, executor->module_handle, param->func_name, &targs);
 
   return status;
 }
@@ -1151,7 +1152,7 @@ int32_t TVMGraphExecutor_CreateTVMOp(TVMGraphExecutor* runtime, const TVMOpParam
  * executed on.
  * \return 0 on success.
  */
-int TVMGraphExecutor_Init(TVMGraphExecutor* runtime, const char* graph_json,
+int TVMGraphExecutor_Init(TVMGraphExecutor* executor, const char* graph_json,
                           TVMModuleHandle module_handle, const DLDevice* devs) {
   JSONReader reader;
   tvm_crt_error_t err = JSONReader_Create(graph_json, &reader);
@@ -1159,20 +1160,20 @@ int TVMGraphExecutor_Init(TVMGraphExecutor* runtime, const char* graph_json,
     return -1;
   }
 
-  TVMGraphExecutor_Load(runtime, &reader);
+  TVMGraphExecutor_Load(executor, &reader);
   err = JSONReader_Release(&reader);
   if (err != kTvmErrorNoError) {
     return -1;
   }
-  runtime->module_handle = module_handle;
-  runtime->devices[0] = devs[0];
+  executor->module_handle = module_handle;
+  executor->devices[0] = devs[0];
 
   int status;
-  status = TVMGraphExecutor_SetupStorage(runtime);
+  status = TVMGraphExecutor_SetupStorage(executor);
   if (status != 0) {
     return status;
   }
-  status = TVMGraphExecutor_SetupOpExecs(runtime);
+  status = TVMGraphExecutor_SetupOpExecs(executor);
   if (status != 0) {
     if (status != 0) {
       return status;
@@ -1185,73 +1186,73 @@ int TVMGraphExecutor_Init(TVMGraphExecutor* runtime, const char* graph_json,
 }
 
 int TVMGraphExecutor_Create(const char* sym_json, TVMModuleHandle module_handle,
-                            const DLDevice* devs, TVMGraphExecutor** runtime) {
+                            const DLDevice* devs, TVMGraphExecutor** executor) {
   DLDevice dev = {kDLCPU, 0};
-  tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutor), dev, (void**)runtime);
+  tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutor), dev, (void**)executor);
   if (err != kTvmErrorNoError) {
     fprintf(stderr, "memory allocate error: %08x", err);
     return -1;
   }
 
-  memset(*runtime, 0, sizeof(TVMGraphExecutor));
+  memset(*executor, 0, sizeof(TVMGraphExecutor));
   // init
-  return TVMGraphExecutor_Init(*runtime, sym_json, module_handle, devs);
+  return TVMGraphExecutor_Init(*executor, sym_json, module_handle, devs);
 }
 
 int TVMGraphExecutor_Release(TVMGraphExecutor** pptr) {
   int status = 0;
   int32_t idx;
-  TVMGraphExecutor* runtime = (TVMGraphExecutor*)(*pptr);
-  for (idx = 0; idx < runtime->nodes_count; ++idx) {
-    status = TVMGraphExecutorNodeRelease(&(runtime->nodes[idx]));
+  TVMGraphExecutor* executor = (TVMGraphExecutor*)(*pptr);
+  for (idx = 0; idx < executor->nodes_count; ++idx) {
+    status = TVMGraphExecutorNodeRelease(&(executor->nodes[idx]));
     if (status != 0) {
       return status;
     }
   }
   DLDevice dev = {kDLCPU, 0};
-  status = TVMPlatformMemoryFree(runtime->nodes, dev);
+  status = TVMPlatformMemoryFree(executor->nodes, dev);
   if (status != 0) {
     return status;
   }
-  status = TVMGraphExecutorGraphAttr_Release(&(runtime->attrs));
+  status = TVMGraphExecutorGraphAttr_Release(&(executor->attrs));
   if (status != 0) {
     return status;
   }
-  for (idx = 0; idx < runtime->storage_pool_count; ++idx) {
-    if (runtime->storage_pool[idx].is_linked_param == 0) {
-      status = TVMNDArray_Release(&(runtime->storage_pool[idx]).array);
+  for (idx = 0; idx < executor->storage_pool_count; ++idx) {
+    if (executor->storage_pool[idx].is_linked_param == 0) {
+      status = TVMNDArray_Release(&(executor->storage_pool[idx]).array);
       if (status != 0) {
         return status;
       }
     }
   }
-  for (idx = 0; idx < runtime->data_entry_count; ++idx) {
-    status = TVMPlatformMemoryFree(runtime->data_entry[idx].dl_tensor.shape, dev);
+  for (idx = 0; idx < executor->data_entry_count; ++idx) {
+    status = TVMPlatformMemoryFree(executor->data_entry[idx].dl_tensor.shape, dev);
     if (status != 0) {
       return status;
     }
   }
-  status = TVMPlatformMemoryFree(runtime->input_nodes, dev);
+  status = TVMPlatformMemoryFree(executor->input_nodes, dev);
   if (status != 0) {
     return status;
   }
-  status = TVMPlatformMemoryFree(runtime->node_row_ptr, dev);
+  status = TVMPlatformMemoryFree(executor->node_row_ptr, dev);
   if (status != 0) {
     return status;
   }
-  status = TVMPlatformMemoryFree(runtime->outputs, dev);
+  status = TVMPlatformMemoryFree(executor->outputs, dev);
   if (status != 0) {
     return status;
   }
-  status = TVMPlatformMemoryFree(runtime->storage_pool, dev);
+  status = TVMPlatformMemoryFree(executor->storage_pool, dev);
   if (status != 0) {
     return status;
   }
-  status = TVMPlatformMemoryFree(runtime->data_entry, dev);
+  status = TVMPlatformMemoryFree(executor->data_entry, dev);
   if (status != 0) {
     return status;
   }
-  status = TVMPlatformMemoryFree(runtime->op_execs, dev);
+  status = TVMPlatformMemoryFree(executor->op_execs, dev);
   if (status != 0) {
     return status;
   }
diff --git a/src/runtime/crt/graph_executor_module/graph_executor_module.c b/src/runtime/crt/graph_executor_module/graph_executor_module.c
index b5c7e742f5c0..7b2a25040d08 100644
--- a/src/runtime/crt/graph_executor_module/graph_executor_module.c
+++ b/src/runtime/crt/graph_executor_module/graph_executor_module.c
@@ -33,14 +33,14 @@
 
 typedef struct {
   TVMModule mod;
-  TVMGraphExecutor* runtime;
+  TVMGraphExecutor* executor;
 } GraphExecutorModule;
 
 static GraphExecutorModule graph_executor;
 
 int32_t TVMGraphExecutorModule_Create(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
                                       int* ret_tcodes, void* resource_handle) {
-  if (graph_executor.runtime != NULL) {
+  if (graph_executor.executor != NULL) {
     return kTvmErrorGraphModuleAlreadyCreated;
   }
 
@@ -59,7 +59,7 @@ int32_t TVMGraphExecutorModule_Create(TVMValue* args, int* tcodes, int nargs, TV
 
   DLDevice dev = {(DLDeviceType)args[2].v_int64, (int)args[3].v_int64};
   int ret_value =
-      TVMGraphExecutor_Create(args[0].v_str, args[1].v_handle, &dev, &graph_executor.runtime);
+      TVMGraphExecutor_Create(args[0].v_str, args[1].v_handle, &dev, &graph_executor.executor);
   if (ret_value != 0) {
     return ret_value;
   }
@@ -68,7 +68,7 @@ int32_t TVMGraphExecutorModule_Create(TVMValue* args, int* tcodes, int nargs, TV
   ret_value = TVMModCreateFromCModule(&graph_executor.mod, &out);
   if (ret_value != 0) {
     ret_tcodes[0] = kTVMNullptr;
-    TVMGraphExecutor_Release(&graph_executor.runtime);
+    TVMGraphExecutor_Release(&graph_executor.executor);
     return ret_value;
   }
 
@@ -88,14 +88,14 @@ int32_t TVMGraphExecutorModule_GetInput(TVMValue* args, int* tcodes, int nargs,
     return kTvmErrorFunctionCallWrongArgType;
   }
 
-  int index = TVMGraphExecutor_GetInputIndex(graph_executor.runtime, args[0].v_str);
+  int index = TVMGraphExecutor_GetInputIndex(graph_executor.executor, args[0].v_str);
   if (index < 0) {
     return kTvmErrorGraphModuleNoSuchInput;
   }
 
-  uint32_t eid = TVMGraphExecutor_GetEntryId(graph_executor.runtime,
-                                             graph_executor.runtime->input_nodes[index], 0);
-  ret_values[0].v_handle = (void*)&graph_executor.runtime->data_entry[eid].dl_tensor;
+  uint32_t eid = TVMGraphExecutor_GetEntryId(graph_executor.executor,
+                                             graph_executor.executor->input_nodes[index], 0);
+  ret_values[0].v_handle = (void*)&graph_executor.executor->data_entry[eid].dl_tensor;
   ret_tcodes[0] = kTVMNDArrayHandle;
   return 0;
 }
@@ -119,7 +119,7 @@ int32_t TVMGraphExecutorModule_GetNumOutputs(TVMValue* args, int* tcodes, int na
     return kTvmErrorFunctionCallNumArguments;
   }
 
-  ret_values[0].v_int64 = TVMGraphExecutor_GetNumOutputs(graph_executor.runtime);
+  ret_values[0].v_int64 = TVMGraphExecutor_GetNumOutputs(graph_executor.executor);
   ret_tcodes[0] = kTVMArgInt;
   return 0;
 }
@@ -136,15 +136,15 @@ int32_t TVMGraphExecutorModule_GetOutput(TVMValue* args, int* tcodes, int nargs,
   }
 
   int output_index = args[0].v_int64;
-  if (output_index < 0 || output_index > TVMGraphExecutor_GetNumOutputs(graph_executor.runtime)) {
+  if (output_index < 0 || output_index > TVMGraphExecutor_GetNumOutputs(graph_executor.executor)) {
     return kTvmErrorGraphModuleNoSuchInput;
   }
 
-  uint32_t nid = graph_executor.runtime->outputs[output_index].node_id;
-  uint32_t index = graph_executor.runtime->outputs[output_index].index;
-  uint32_t eid = TVMGraphExecutor_GetEntryId(graph_executor.runtime, nid, index);
+  uint32_t nid = graph_executor.executor->outputs[output_index].node_id;
+  uint32_t index = graph_executor.executor->outputs[output_index].index;
+  uint32_t eid = TVMGraphExecutor_GetEntryId(graph_executor.executor, nid, index);
 
-  ret_values[0].v_handle = (void*)&(graph_executor.runtime->data_entry[eid].dl_tensor);
+  ret_values[0].v_handle = (void*)&(graph_executor.executor->data_entry[eid].dl_tensor);
   ret_tcodes[0] = kTVMNDArrayHandle;
   return 0;
 }
@@ -163,7 +163,7 @@ int32_t TVMGraphExecutorModule_LoadParams(TVMValue* args, int* tcodes, int nargs
   ret_tcodes[0] = kTVMNullptr;
 
   TVMByteArray* arr = (TVMByteArray*)args[0].v_handle;
-  return TVMGraphExecutor_LoadParams(graph_executor.runtime, arr->data, arr->size);
+  return TVMGraphExecutor_LoadParams(graph_executor.executor, arr->data, arr->size);
 }
 
 int32_t TVMGraphExecutorModule_Run(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
@@ -172,7 +172,7 @@ int32_t TVMGraphExecutorModule_Run(TVMValue* args, int* tcodes, int nargs, TVMVa
     return kTvmErrorFunctionCallNumArguments;
   }
 
-  TVMGraphExecutor_Run(graph_executor.runtime);
+  TVMGraphExecutor_Run(graph_executor.executor);
 
   ret_tcodes[0] = kTVMNullptr;
   return 0;
@@ -189,7 +189,7 @@ int32_t TVMGraphExecutorModule_SetInput(TVMValue* args, int* tcodes, int nargs,
     return kTvmErrorFunctionCallWrongArgType;
   }
 
-  TVMGraphExecutor_SetInput(graph_executor.runtime, args[0].v_str, (DLTensor*)args[1].v_handle);
+  TVMGraphExecutor_SetInput(graph_executor.executor, args[0].v_str, (DLTensor*)args[1].v_handle);
 
   ret_tcodes[0] = kTVMNullptr;
   return 0;
@@ -221,7 +221,7 @@ static const TVMFuncRegistry graph_executor_registry = {
 
 tvm_crt_error_t TVMGraphExecutorModule_Register() {
   graph_executor.mod.registry = &graph_executor_registry;
-  graph_executor.runtime = NULL;
+  graph_executor.executor = NULL;
 
   return TVMFuncRegisterGlobal("tvm.graph_executor.create", &TVMGraphExecutorModule_Create, 0);
 }
diff --git a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
index 2a7c63f3d85f..47ef474778e0 100644
--- a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
+++ b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
@@ -107,14 +107,14 @@ typedef struct TVMGraphExecutor {
 typedef DLTensor* DLTensorPtr;
 
 // private functions
-uint32_t TVMGraphExecutor_GetEntryId(TVMGraphExecutor* runtime, uint32_t nid, uint32_t index);
-void TVMGraphExecutor_SetInput(TVMGraphExecutor* runtime, const char* name, DLTensor* data_in);
-int TVMGraphExecutor_LoadParams(TVMGraphExecutor* runtime, const char* param_blob,
+uint32_t TVMGraphExecutor_GetEntryId(TVMGraphExecutor* executor, uint32_t nid, uint32_t index);
+void TVMGraphExecutor_SetInput(TVMGraphExecutor* executor, const char* name, DLTensor* data_in);
+int TVMGraphExecutor_LoadParams(TVMGraphExecutor* executor, const char* param_blob,
                                 const uint32_t param_size);
-void TVMGraphExecutor_Run(TVMGraphExecutor* runtime);
-int TVMGraphExecutor_GetOutput(TVMGraphExecutor* runtime, const int32_t idx, DLTensor* out);
+void TVMGraphExecutor_Run(TVMGraphExecutor* executor);
+int TVMGraphExecutor_GetOutput(TVMGraphExecutor* executor, const int32_t idx, DLTensor* out);
 
-int32_t TVMGraphExecutor_CreateTVMOp(TVMGraphExecutor* runtime, const TVMOpParam* param,
+int32_t TVMGraphExecutor_CreateTVMOp(TVMGraphExecutor* executor, const TVMOpParam* param,
                                      DLTensorPtr* args, const uint32_t args_count,
                                      uint32_t num_inputs, TVMPackedFunc* pf);
 
diff --git a/src/runtime/graph_executor/graph_executor_factory.cc b/src/runtime/graph_executor/graph_executor_factory.cc
index 4244f62df1d9..a6cef931421b 100644
--- a/src/runtime/graph_executor/graph_executor_factory.cc
+++ b/src/runtime/graph_executor/graph_executor_factory.cc
@@ -51,7 +51,7 @@ PackedFunc GraphExecutorFactory::GetFunction(
       for (int i = 0; i < args.num_args; ++i) {
         devices.emplace_back(args[i].operator Device());
       }
-      *rv = this->RuntimeCreate(devices);
+      *rv = this->ExecutorCreate(devices);
     });
   } else if (name == "debug_create") {
     return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
@@ -62,7 +62,7 @@ PackedFunc GraphExecutorFactory::GetFunction(
       for (int i = 1; i < args.num_args; ++i) {
         devices.emplace_back(args[i].operator Device());
       }
-      *rv = this->DebugRuntimeCreate(devices);
+      *rv = this->DebugExecutorCreate(devices);
     });
   } else if (name == "remove_params") {
     return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
@@ -103,7 +103,7 @@ void GraphExecutorFactory::SaveToBinary(dmlc::Stream* stream) {
   stream->Write(module_name_);
 }
 
-Module GraphExecutorFactory::RuntimeCreate(const std::vector<Device>& devs) {
+Module GraphExecutorFactory::ExecutorCreate(const std::vector<Device>& devs) {
   auto exec = make_object<GraphExecutor>();
   exec->Init(this->graph_json_, this->imports_[0], devs, PackedFunc());
   // set params
@@ -111,11 +111,11 @@ Module GraphExecutorFactory::RuntimeCreate(const std::vector<Device>& devs) {
   return Module(exec);
 }
 
-Module GraphExecutorFactory::DebugRuntimeCreate(const std::vector<Device>& devs) {
+Module GraphExecutorFactory::DebugExecutorCreate(const std::vector<Device>& devs) {
   const PackedFunc* pf = tvm::runtime::Registry::Get("tvm.graph_executor_debug.create");
   ICHECK(pf != nullptr) << "Cannot find function tvm.graph_executor_debug.create in registry. "
                            "Do you enable debug graph executor build?";
-  // Debug runtime create packed function will call GetAllContexs, so we unpack the devs.
+  // Debug executor create packed function will call GetAllContexs, so we unpack the devs.
   std::vector<int> unpacked_devs;
   for (const auto& dev : devs) {
     unpacked_devs.emplace_back(dev.device_type);
diff --git a/src/runtime/graph_executor/graph_executor_factory.h b/src/runtime/graph_executor/graph_executor_factory.h
index 582985824d8b..46346cbea002 100644
--- a/src/runtime/graph_executor/graph_executor_factory.h
+++ b/src/runtime/graph_executor/graph_executor_factory.h
@@ -74,20 +74,20 @@ class TVM_DLL GraphExecutorFactory : public runtime::ModuleNode {
   void SaveToBinary(dmlc::Stream* stream) override;
 
   /*!
-   * \brief Create a specific runtime module
+   * \brief Create a specific executor module
    * \param devs The device of the host and devices where graph nodes will be
    *  executed on.
-   * \return created runtime module
+   * \return created executor module
    */
-  Module RuntimeCreate(const std::vector<Device>& devs);
+  Module ExecutorCreate(const std::vector<Device>& devs);
 
   /*!
-   * \brief Create a specific debug runtime module
+   * \brief Create a specific debug executor module
    * \param devs The device of the host and devices where graph nodes will be
    *  executed on.
-   * \return created debug runtime module
+   * \return created debug executor module
    */
-  Module DebugRuntimeCreate(const std::vector<Device>& devs);
+  Module DebugExecutorCreate(const std::vector<Device>& devs);
 
   /*!
    * \brief Create a specific cuda graph executor module

From 854edd4cd2ebb87579700afa3e2781eaec536a6f Mon Sep 17 00:00:00 2001
From: Andrew Reusch <areusch@octoml.ai>
Date: Thu, 18 Mar 2021 14:34:06 -0700
Subject: [PATCH 11/16] backwards compat changes

---
 CMakeLists.txt                               | 14 ++++++++++
 cmake/modules/contrib/ArmComputeLib.cmake    |  8 ++++++
 python/tvm/contrib/debugger/debug_runtime.py | 27 ++++++++++++++++++++
 python/tvm/contrib/graph_runtime.py          | 27 ++++++++++++++++++++
 4 files changed, 76 insertions(+)
 create mode 100644 python/tvm/contrib/debugger/debug_runtime.py
 create mode 100644 python/tvm/contrib/graph_runtime.py

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 74b6bb996419..277fe4a9bfbc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -307,6 +307,20 @@ else()
   list(APPEND COMPILER_SRCS ${STACKVM_RUNTIME_SRCS})
 endif(USE_STACKVM_RUNTIME)
 
+# NOTE(areusch): USE_GRAPH_RUNTIME will be deleted in a future release
+if(USE_GRAPH_RUNTIME AND NOT DEFINED USE_GRAPH_EXECUTOR)
+  message(WARNING "USE_GRAPH_RUNTIME renamed to USE_GRAPH_EXECUTOR. Please update your config.cmake")
+  set(USE_GRAPH_EXECUTOR ${USE_GRAPH_RUNTIME})
+  unset(USE_GRAPH_RUNTIME CACHE)
+endif(USE_GRAPH_RUNTIME AND NOT DEFINED USE_GRAPH_EXECUTOR)
+
+# NOTE(areusch): USE_GRAPH_RUNTIME_DEBUG will be deleted in a future release
+if(USE_GRAPH_RUNTIME_DEBUG AND NOT DEFINED USE_GRAPH_EXECUTOR_DEBUG)
+  message(WARNING "USE_GRAPH_RUNTIME_DEBUG renamed to USE_GRAPH_EXECUTOR_DEBUG. Please update your config.cmake")
+  set(USE_GRAPH_EXECUTOR_DEBUG ${USE_GRAPH_RUNTIME_DEBUG})
+  unset(USE_GRAPH_RUNTIME_DEBUG CACHE)
+endif(USE_GRAPH_RUNTIME_DEBUG AND NOT DEFINED USE_GRAPH_EXECUTOR_DEBUG)
+
 if(USE_GRAPH_EXECUTOR)
   message(STATUS "Build with Graph Executor support...")
   file(GLOB RUNTIME_GRAPH_EXECUTOR_SRCS src/runtime/graph_executor/*.cc)
diff --git a/cmake/modules/contrib/ArmComputeLib.cmake b/cmake/modules/contrib/ArmComputeLib.cmake
index 1e47d087abdc..54ce917dfb50 100644
--- a/cmake/modules/contrib/ArmComputeLib.cmake
+++ b/cmake/modules/contrib/ArmComputeLib.cmake
@@ -23,12 +23,20 @@ if(USE_ARM_COMPUTE_LIB)
     file(GLOB ACL_RELAY_CONTRIB_SRC src/relay/backend/contrib/arm_compute_lib/*.cc)
     file(GLOB ACL_RUNTIME_MODULE src/runtime/contrib/arm_compute_lib/acl_runtime.cc)
     list(APPEND COMPILER_SRCS ${ACL_RELAY_CONTRIB_SRC})
+
     if(NOT USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR)
         list(APPEND COMPILER_SRCS ${ACL_RUNTIME_MODULE})
     endif()
     message(STATUS "Build with Arm Compute Library support...")
 endif()
 
+if(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME AND NOT DEFINED USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR)
+    message(WARNING "USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME renamed to USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR. "
+                    "Please update your config.cmake")
+    set(USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR ${USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME})
+    unset(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME CACHE)
+endif(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME AND NOT DEFINED USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR)
+
 if(USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR)
     set(ACL_PATH ${CMAKE_CURRENT_SOURCE_DIR}/acl)
     # Detect custom ACL path.
diff --git a/python/tvm/contrib/debugger/debug_runtime.py b/python/tvm/contrib/debugger/debug_runtime.py
new file mode 100644
index 000000000000..0265bb2ba534
--- /dev/null
+++ b/python/tvm/contrib/debugger/debug_runtime.py
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Deprecated Python API for DebugExecutor."""
+
+import warnings
+
+from . import graph_executor
+
+
+def create(*args, **kwargs):
+    warnings.warn("This function has been moved to tvm.contrib.graph_executor and will be removed "
+                  "in the next TVM release")
+    return graph_executor.create(*args, **kwargs)
diff --git a/python/tvm/contrib/graph_runtime.py b/python/tvm/contrib/graph_runtime.py
new file mode 100644
index 000000000000..8d646976f04e
--- /dev/null
+++ b/python/tvm/contrib/graph_runtime.py
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Deprecated Python API for GraphExecutor."""
+
+import warnings
+
+from . import graph_executor
+
+
+def create(*args, **kwargs):
+    warnings.warn("This function has been moved to tvm.contrib.graph_executor and will be removed "
+                  "in the next TVM release")
+    return graph_executor.create(*args, **kwargs)

From 605b56162ba1c560f3964853d0f86968e19b09cd Mon Sep 17 00:00:00 2001
From: Andrew Reusch <areusch@octoml.ai>
Date: Fri, 19 Mar 2021 08:30:42 -0700
Subject: [PATCH 12/16] missed one

---
 python/tvm/contrib/debugger/debug_runtime.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/tvm/contrib/debugger/debug_runtime.py b/python/tvm/contrib/debugger/debug_runtime.py
index 0265bb2ba534..57bb722afad7 100644
--- a/python/tvm/contrib/debugger/debug_runtime.py
+++ b/python/tvm/contrib/debugger/debug_runtime.py
@@ -18,10 +18,10 @@
 
 import warnings
 
-from . import graph_executor
+from . import debug_executor
 
 
 def create(*args, **kwargs):
     warnings.warn("This function has been moved to tvm.contrib.graph_executor and will be removed "
                   "in the next TVM release")
-    return graph_executor.create(*args, **kwargs)
+    return debug_executor.create(*args, **kwargs)

From f4a6c3a79e481a4d6dff5152a92f9df06736c792 Mon Sep 17 00:00:00 2001
From: Andrew Reusch <areusch@octoml.ai>
Date: Thu, 18 Mar 2021 16:07:21 -0700
Subject: [PATCH 13/16] git-clang-format

---
 src/runtime/crt/graph_executor/graph_executor.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/runtime/crt/graph_executor/graph_executor.c b/src/runtime/crt/graph_executor/graph_executor.c
index 2324c5650d7f..2fe9e73aeddc 100644
--- a/src/runtime/crt/graph_executor/graph_executor.c
+++ b/src/runtime/crt/graph_executor/graph_executor.c
@@ -729,7 +729,9 @@ uint32_t TVMGraphExecutor_GetEntryId(TVMGraphExecutor* executor, uint32_t nid, u
  * \param executor The graph executor.
  * \return the number of input tensors allocated.
  */
-int TVMGraphExecutor_GetNumInputs(TVMGraphExecutor* executor) { return executor->input_nodes_count; }
+int TVMGraphExecutor_GetNumInputs(TVMGraphExecutor* executor) {
+  return executor->input_nodes_count;
+}
 
 /*!
  * \brief Get the input index given the name of input.

From 9343129dcdf1e074af314abba309735faa91b301 Mon Sep 17 00:00:00 2001
From: Andrew Reusch <areusch@octoml.ai>
Date: Thu, 18 Mar 2021 16:08:10 -0700
Subject: [PATCH 14/16] black format

---
 python/tvm/contrib/debugger/debug_runtime.py | 6 ++++--
 python/tvm/contrib/graph_runtime.py          | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/python/tvm/contrib/debugger/debug_runtime.py b/python/tvm/contrib/debugger/debug_runtime.py
index 57bb722afad7..ebd903b47570 100644
--- a/python/tvm/contrib/debugger/debug_runtime.py
+++ b/python/tvm/contrib/debugger/debug_runtime.py
@@ -22,6 +22,8 @@
 
 
 def create(*args, **kwargs):
-    warnings.warn("This function has been moved to tvm.contrib.graph_executor and will be removed "
-                  "in the next TVM release")
+    warnings.warn(
+        "This function has been moved to tvm.contrib.graph_executor and will be removed "
+        "in the next TVM release"
+    )
     return debug_executor.create(*args, **kwargs)
diff --git a/python/tvm/contrib/graph_runtime.py b/python/tvm/contrib/graph_runtime.py
index 8d646976f04e..f8ecfdd70a5b 100644
--- a/python/tvm/contrib/graph_runtime.py
+++ b/python/tvm/contrib/graph_runtime.py
@@ -22,6 +22,8 @@
 
 
 def create(*args, **kwargs):
-    warnings.warn("This function has been moved to tvm.contrib.graph_executor and will be removed "
-                  "in the next TVM release")
+    warnings.warn(
+        "This function has been moved to tvm.contrib.graph_executor and will be removed "
+        "in the next TVM release"
+    )
     return graph_executor.create(*args, **kwargs)

From d832c2d568441d0ee6c5938a9e02c85eb310e32a Mon Sep 17 00:00:00 2001
From: Andrew Reusch <areusch@octoml.ai>
Date: Fri, 19 Mar 2021 12:05:49 -0700
Subject: [PATCH 15/16] debug_runtime -> debug_executor

---
 docs/dev/debugger.rst                                       | 6 +++---
 python/tvm/contrib/cuda_graph/cuda_graph_executor.py        | 2 +-
 python/tvm/contrib/graph_executor.py                        | 2 +-
 python/tvm/driver/tvmc/runner.py                            | 4 ++--
 python/tvm/micro/__init__.py                                | 2 +-
 python/tvm/micro/session.py                                 | 6 +++---
 tests/python/unittest/test_runtime_graph_debug.py           | 6 +++---
 .../python/unittest/test_runtime_module_based_interface.py  | 4 ++--
 vta/scripts/tune_resnet.py                                  | 4 ++--
 vta/tutorials/frontend/deploy_classification.py             | 2 +-
 10 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/docs/dev/debugger.rst b/docs/dev/debugger.rst
index d3559feadb68..f1bd004717b4 100644
--- a/docs/dev/debugger.rst
+++ b/docs/dev/debugger.rst
@@ -134,12 +134,12 @@ How to use Debugger?
 
 3. In frontend script file instead of
    ``from tvm.contrib import graph_executor`` import the
-   ``debug_runtime``
-   ``from tvm.contrib.debugger import debug_runtime as graph_executor``
+   ``debug_executor``
+   ``from tvm.contrib.debugger import debug_executor as graph_executor``
 
 ::
 
-    from tvm.contrib.debugger import debug_runtime as graph_executor
+    from tvm.contrib.debugger import debug_executor as graph_executor
     m = graph_executor.create(graph, lib, dev, dump_root="/tmp/tvmdbg")
     # set inputs
     m.set_input('data', tvm.nd.array(data.astype(dtype)))
diff --git a/python/tvm/contrib/cuda_graph/cuda_graph_executor.py b/python/tvm/contrib/cuda_graph/cuda_graph_executor.py
index 53bb9b0cb37c..d047316eb564 100644
--- a/python/tvm/contrib/cuda_graph/cuda_graph_executor.py
+++ b/python/tvm/contrib/cuda_graph/cuda_graph_executor.py
@@ -131,4 +131,4 @@ def debug_get_output(self, node, out):
         out : NDArray
             The output array container
         """
-        raise NotImplementedError("Please use debugger.debug_runtime as graph_executor instead.")
+        raise NotImplementedError("Please use debugger.debug_executor as graph_executor instead.")
diff --git a/python/tvm/contrib/graph_executor.py b/python/tvm/contrib/graph_executor.py
index f9d54b883497..a4bc85905f5e 100644
--- a/python/tvm/contrib/graph_executor.py
+++ b/python/tvm/contrib/graph_executor.py
@@ -270,7 +270,7 @@ def debug_get_output(self, node, out):
         out : NDArray
             The output array container
         """
-        raise NotImplementedError("Please use debugger.debug_runtime as graph_executor instead.")
+        raise NotImplementedError("Please use debugger.debug_executor as graph_executor instead.")
 
     def load_params(self, params_bytes):
         """Load parameters from serialized byte array of parameter dict.
diff --git a/python/tvm/driver/tvmc/runner.py b/python/tvm/driver/tvmc/runner.py
index 9fcc49ac6471..b4c4e75aa37a 100644
--- a/python/tvm/driver/tvmc/runner.py
+++ b/python/tvm/driver/tvmc/runner.py
@@ -27,7 +27,7 @@
 from tvm import rpc
 from tvm.autotvm.measure import request_remote
 from tvm.contrib import graph_executor as runtime
-from tvm.contrib.debugger import debug_runtime
+from tvm.contrib.debugger import debug_executor
 from tvm.relay import load_param_dict
 
 from . import common
@@ -370,7 +370,7 @@ def run_module(
 
         if profile:
             logger.debug("creating runtime with profiling enabled")
-            module = debug_runtime.create(graph, lib, dev, dump_root="./prof")
+            module = debug_executor.create(graph, lib, dev, dump_root="./prof")
         else:
             logger.debug("creating runtime with profiling disabled")
             module = runtime.create(graph, lib, dev)
diff --git a/python/tvm/micro/__init__.py b/python/tvm/micro/__init__.py
index a75df5683966..a70cb96d9b13 100644
--- a/python/tvm/micro/__init__.py
+++ b/python/tvm/micro/__init__.py
@@ -26,7 +26,7 @@
 from .model_library_format import export_model_library_format, UnsupportedInModelLibraryFormatError
 from .session import (
     create_local_graph_executor,
-    create_local_debug_runtime,
+    create_local_debug_executor,
     Session,
     SessionTerminatedError,
 )
diff --git a/python/tvm/micro/session.py b/python/tvm/micro/session.py
index 9573db2095bb..78bf03379939 100644
--- a/python/tvm/micro/session.py
+++ b/python/tvm/micro/session.py
@@ -23,7 +23,7 @@
 from ..error import register_error
 from .._ffi import get_global_func
 from ..contrib import graph_executor
-from ..contrib.debugger import debug_runtime
+from ..contrib.debugger import debug_executor
 from ..rpc import RPCSession
 from .transport import IoTimeoutError
 from .transport import TransportLogger
@@ -218,7 +218,7 @@ def create_local_graph_executor(graph_json_str, mod, device):
     )
 
 
-def create_local_debug_runtime(graph_json_str, mod, device, dump_root=None):
+def create_local_debug_executor(graph_json_str, mod, device, dump_root=None):
     """Create a local debug runtime driving execution on the remote CPU device given.
 
     Parameters
@@ -242,7 +242,7 @@ def create_local_debug_runtime(graph_json_str, mod, device, dump_root=None):
     """
     device_type_id = [device.device_type, device.device_id]
     fcreate = get_global_func("tvm.graph_executor_debug.create")
-    return debug_runtime.GraphModuleDebug(
+    return debug_executor.GraphModuleDebug(
         fcreate(graph_json_str, mod, lookup_remote_linked_param, *device_type_id),
         [device],
         graph_json_str,
diff --git a/tests/python/unittest/test_runtime_graph_debug.py b/tests/python/unittest/test_runtime_graph_debug.py
index b0173d1e7b7f..6cab75d9b9fc 100644
--- a/tests/python/unittest/test_runtime_graph_debug.py
+++ b/tests/python/unittest/test_runtime_graph_debug.py
@@ -28,7 +28,7 @@
 import numpy as np
 from tvm import rpc
 from tvm.contrib import utils
-from tvm.contrib.debugger import debug_runtime
+from tvm.contrib.debugger import debug_executor
 
 
 @tvm.testing.requires_llvm
@@ -75,7 +75,7 @@ def myadd(*args):
         mlib_proxy = tvm.support.FrontendTestModule()
         mlib_proxy["myadd"] = myadd
         try:
-            mod = debug_runtime.create(graph, mlib_proxy, tvm.cpu(0))
+            mod = debug_executor.create(graph, mlib_proxy, tvm.cpu(0))
         except ValueError:
             return
 
@@ -171,7 +171,7 @@ def check_remote():
         remote.upload(path_dso)
         mlib = remote.load_module("dev_lib.so")
         try:
-            mod = debug_runtime.create(graph, mlib, remote.cpu(0))
+            mod = debug_executor.create(graph, mlib, remote.cpu(0))
         except ValueError:
             print("Skip because debug runtime not enabled")
             return
diff --git a/tests/python/unittest/test_runtime_module_based_interface.py b/tests/python/unittest/test_runtime_module_based_interface.py
index bf062afdb701..766338de3558 100644
--- a/tests/python/unittest/test_runtime_module_based_interface.py
+++ b/tests/python/unittest/test_runtime_module_based_interface.py
@@ -19,7 +19,7 @@
 from tvm.relay import testing
 import tvm
 from tvm.contrib import graph_executor
-from tvm.contrib.debugger import debug_runtime
+from tvm.contrib.debugger import debug_executor
 from tvm.contrib.cuda_graph import cuda_graph_executor
 import tvm.testing
 
@@ -527,7 +527,7 @@ def test_debug_graph_executor():
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
     # debug graph executor wrapper
-    debug_g_mod = debug_runtime.GraphModuleDebug(
+    debug_g_mod = debug_executor.GraphModuleDebug(
         complied_graph_lib["debug_create"]("default", dev),
         [dev],
         complied_graph_lib.get_json(),
diff --git a/vta/scripts/tune_resnet.py b/vta/scripts/tune_resnet.py
index f3246ebcd298..dfb74b129718 100644
--- a/vta/scripts/tune_resnet.py
+++ b/vta/scripts/tune_resnet.py
@@ -29,7 +29,7 @@
 from tvm.autotvm.measure.measure_methods import request_remote
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
 from tvm.contrib import graph_executor, utils, download
-from tvm.contrib.debugger import debug_runtime
+from tvm.contrib.debugger import debug_executor
 import vta
 from vta.testing import simulator
 from vta.top import graph_pack
@@ -325,7 +325,7 @@ def tune_tasks(
 
         # If detailed runtime info is needed build with debug runtime
         if opt.debug_profile:
-            m = debug_runtime.create(graph, lib, ctx)
+            m = debug_executor.create(graph, lib, ctx)
         else:
             m = graph_executor.create(graph, lib, ctx)
 
diff --git a/vta/tutorials/frontend/deploy_classification.py b/vta/tutorials/frontend/deploy_classification.py
index 808972e97979..f9db824eafa3 100644
--- a/vta/tutorials/frontend/deploy_classification.py
+++ b/vta/tutorials/frontend/deploy_classification.py
@@ -53,7 +53,7 @@
 from tvm import te
 from tvm import rpc, autotvm, relay
 from tvm.contrib import graph_executor, utils, download
-from tvm.contrib.debugger import debug_runtime
+from tvm.contrib.debugger import debug_executor
 from tvm.relay import transform
 
 import vta

From 40e797b572a2646ab20f4647732d928970a7fb02 Mon Sep 17 00:00:00 2001
From: Andrew Reusch <areusch@octoml.ai>
Date: Sun, 28 Mar 2021 21:11:09 -0700
Subject: [PATCH 16/16] black format

---
 python/tvm/relay/build_module.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/tvm/relay/build_module.py b/python/tvm/relay/build_module.py
index e6c4a14ab04d..4795a2d38685 100644
--- a/python/tvm/relay/build_module.py
+++ b/python/tvm/relay/build_module.py
@@ -392,7 +392,9 @@ def _make_executor(self, expr=None):
         self.mod = InferType()(self.mod)
         ret_type = self.mod["main"].checked_type.ret_type
         if _ty.is_dynamic(ret_type):
-            raise ValueError("Graph Executor only supports static graphs, got output type", ret_type)
+            raise ValueError(
+                "Graph Executor only supports static graphs, got output type", ret_type
+            )
         mod = build(self.mod, target=self.target)
         gmodule = _graph_rt.GraphModule(mod["default"](self.device))