diff --git a/python/tvm/relay/backend/interpreter.py b/python/tvm/relay/backend/interpreter.py
index b62fca86668d..81edf74a0a03 100644
--- a/python/tvm/relay/backend/interpreter.py
+++ b/python/tvm/relay/backend/interpreter.py
@@ -227,6 +227,8 @@ def _make_executor(self, expr=None):
         if expr is None or isinstance(expr, GlobalVar):
             assert self.mod is not None
 
+        _intrp = _backend.CreateInterpreter(self.optimize(), self.device, self.target)
+
         def _interp_wrapper(*args, **kwargs):
             if expr is None:
                 args = self._convert_args(self.mod["main"], args, kwargs)
@@ -253,7 +255,6 @@ def _interp_wrapper(*args, **kwargs):
 
             mod = self.optimize()
             opt_expr = Call(mod["main"], relay_args)
-            _intrp = _backend.CreateInterpreter(mod, self.device, self.target)
             return _intrp(opt_expr)
 
         return _interp_wrapper
diff --git a/python/tvm/relay/testing/__init__.py b/python/tvm/relay/testing/__init__.py
index bfe797d844a8..de85ed69238a 100644
--- a/python/tvm/relay/testing/__init__.py
+++ b/python/tvm/relay/testing/__init__.py
@@ -154,15 +154,16 @@ def check_grad(
         assert len(grads) > 0, "You must test at least one gradient."
 
         # Get numeric gradients for each dimension of each param, using two-sided approximation.
+        fwd_func_compiled = intrp.evaluate(fwd_func)
         approx_grads = []
         for x in test_inputs:
             approx_grad = np.zeros(x.shape)
             for i in np.ndindex(*x.shape):
                 x_i = x[i]
                 x[i] = x_i + eps
-                fwd_plus = intrp.evaluate(fwd_func)(*inputs).numpy().astype("float64")
+                fwd_plus = fwd_func_compiled(*inputs).numpy().astype("float64")
                 x[i] = x_i - eps
-                fwd_minus = intrp.evaluate(fwd_func)(*inputs).numpy().astype("float64")
+                fwd_minus = fwd_func_compiled(*inputs).numpy().astype("float64")
                 x[i] = x_i
                 approx_grad[i] = np.sum((fwd_plus - fwd_minus) / (2 * eps))
             approx_grads.append(approx_grad)
diff --git a/python/tvm/topi/arm_cpu/group_conv2d.py b/python/tvm/topi/arm_cpu/group_conv2d.py
index d852b9acef66..81b2c7260f05 100644
--- a/python/tvm/topi/arm_cpu/group_conv2d.py
+++ b/python/tvm/topi/arm_cpu/group_conv2d.py
@@ -42,7 +42,9 @@ def schedule_group_conv2d_nchw(outs):
     return schedule_group_conv2d_nchwc(outs)
 
 
-def _get_default_config(cfg, data, kernel, strides, padding, groups, out_dtype, layout="NCHW"):
+def _get_default_config(
+    cfg, data, kernel, strides, padding, dilation, groups, out_dtype, layout="NCHW"
+):
     """
     Get default schedule config for the workload
     """
@@ -54,7 +56,7 @@ def _get_default_config(cfg, data, kernel, strides, padding, groups, out_dtype,
             static_data_shape.append(dim)
     data = te.placeholder(static_data_shape, dtype=data.dtype)
 
-    wkl = _get_conv2d_workload(data, kernel, strides, padding, out_dtype, layout)
+    wkl = _get_conv2d_workload(data, kernel, strides, padding, dilation, out_dtype, layout)
     _fallback_schedule(cfg, wkl)
 
 
@@ -158,6 +160,7 @@ def group_conv2d_nchw_spatial_pack(
             ),
             strides,
             padding,
+            dilation,
             groups,
             out_dtype,
         )