diff --git a/python/tvm/topi/cuda/ssd/multibox.py b/python/tvm/topi/cuda/ssd/multibox.py
index bccabaa069da..fba293934d31 100644
--- a/python/tvm/topi/cuda/ssd/multibox.py
+++ b/python/tvm/topi/cuda/ssd/multibox.py
@@ -234,7 +234,7 @@ def transform_loc_pre(cls_prob, valid_count, temp_valid_count, temp_cls_id, temp
                     temp_valid_count[tid * num_anchors + k] += temp_valid_count[
                         tid * num_anchors + k - 1
                     ]
-            valid_count[i] = temp_valid_count[tid * num_anchors + num_anchors - 1]
+            valid_count[tid] = temp_valid_count[tid * num_anchors + num_anchors - 1]
 
     return ib.get()
 
@@ -342,7 +342,7 @@ def transform_loc(loc, loc_base_idx, anchor, anchor_base_idx, clip, vx, vy, vw,
         j = idxm(tid, num_anchors)
 
         with ib.if_scope(cls_id[tid] > 0):
-            with ib.if_scope(tid == 0):
+            with ib.if_scope(j == 0):
                 out_base_idx = i * num_anchors * 6
                 out_loc[out_base_idx] = cls_id[tid] - 1.0
                 out_loc[out_base_idx + 1] = score[tid]
diff --git a/python/tvm/topi/vision/ssd/multibox.py b/python/tvm/topi/vision/ssd/multibox.py
index cbb2c1b0a0a8..c1d0d958e2d0 100644
--- a/python/tvm/topi/vision/ssd/multibox.py
+++ b/python/tvm/topi/vision/ssd/multibox.py
@@ -137,17 +137,17 @@ def multibox_prior(data, sizes=(1,), ratios=(1,), steps=(-1, -1), offsets=(0.5,
 
 
 @hybrid.script
-def _hybridy_transform_loc(box, pred_loc, variance, clip):
+def _hybridy_transform_loc(anchor, pred_loc, variance, clip, batch_idx, anchor_idx):
     """Transform prior anchor box to output box through location predictions."""
-    al = box[0]
-    at = box[1]
-    ar = box[2]
-    ab = box[3]
+    al = anchor[0, anchor_idx, 0]
+    at = anchor[0, anchor_idx, 1]
+    ar = anchor[0, anchor_idx, 2]
+    ab = anchor[0, anchor_idx, 3]
 
-    px = pred_loc[0]
-    py = pred_loc[1]
-    pw = pred_loc[2]
-    ph = pred_loc[3]
+    px = pred_loc[batch_idx, 0]
+    py = pred_loc[batch_idx, 1]
+    pw = pred_loc[batch_idx, 2]
+    ph = pred_loc[batch_idx, 3]
 
     vx = variance[0]
     vy = variance[1]
@@ -206,8 +206,13 @@ def hybrid_multibox_transform_loc(cls_prob, loc_pred, anchor, clip, threshold, v
     batch_size = cls_prob.shape[0]
     num_classes = cls_prob.shape[1]
     num_anchors = cls_prob.shape[2]
-    box_coord = allocate((4,), loc_pred.dtype)
-    pred_coord = allocate((4,), loc_pred.dtype)
+    pred_coord = allocate(
+        (
+            batch_size,
+            4,
+        ),
+        loc_pred.dtype,
+    )
     out_loc = output_tensor((batch_size, num_anchors, 6), loc_pred.dtype)
     valid_count = output_tensor((batch_size,), "int32")
 
@@ -230,9 +235,8 @@ def hybrid_multibox_transform_loc(cls_prob, loc_pred, anchor, clip, threshold, v
                 out_loc[i, valid_count[i], 0] = cls_id - 1.0
                 out_loc[i, valid_count[i], 1] = score
                 for l in range(4):
-                    box_coord[l] = anchor[0, j, l]
-                    pred_coord[l] = loc_pred[i, j * 4 + l]
-                out_coord = _hybridy_transform_loc(box_coord, pred_coord, variances, clip)
+                    pred_coord[i, l] = loc_pred[i, j * 4 + l]
+                out_coord = _hybridy_transform_loc(anchor, pred_coord, variances, clip, i, j)
                 out_loc[i, valid_count[i], 2] = out_coord[0]
                 out_loc[i, valid_count[i], 3] = out_coord[1]
                 out_loc[i, valid_count[i], 4] = out_coord[2]
diff --git a/tests/python/topi/python/test_topi_vision.py b/tests/python/topi/python/test_topi_vision.py
index 5cc064944b63..86594ab1241b 100644
--- a/tests/python/topi/python/test_topi_vision.py
+++ b/tests/python/topi/python/test_topi_vision.py
@@ -364,41 +364,57 @@ def test_multibox_prior(
         tvm.testing.assert_allclose(tvm_out.numpy(), np_out, rtol=1e-3)
 
 
-def test_multibox_detection(target, dev):
-    batch_size = 1
-    num_anchors = 3
-    num_classes = 3
-    cls_prob = te.placeholder((batch_size, num_anchors, num_classes), name="cls_prob")
-    loc_preds = te.placeholder((batch_size, num_anchors * 4), name="loc_preds")
-    anchors = te.placeholder((1, num_anchors, 4), name="anchors")
-
-    # Manually create test case
-    np_cls_prob = np.array([[[0.2, 0.5, 0.3], [0.25, 0.3, 0.45], [0.7, 0.1, 0.2]]])
-    np_loc_preds = np.array([[0.1, -0.2, 0.3, 0.2, 0.2, 0.4, 0.5, -0.3, 0.7, -0.2, -0.4, -0.8]])
-    np_anchors = np.array([[[-0.1, -0.1, 0.1, 0.1], [-0.2, -0.2, 0.2, 0.2], [1.2, 1.2, 1.5, 1.5]]])
-
-    expected_np_out = np.array(
-        [
+class TestMultiboxDetection:
+    (batch_size,) = tvm.testing.parameters((1,), (6,))
+
+    @tvm.testing.fixture(cache_return_value=True)
+    def ref_data(
+        self,
+        batch_size,
+    ):
+        # Manually create test case
+        np_cls_prob = np.array([[[0.2, 0.5, 0.3], [0.25, 0.3, 0.45], [0.7, 0.1, 0.2]]] * batch_size)
+        np_loc_preds = np.array(
+            [[0.1, -0.2, 0.3, 0.2, 0.2, 0.4, 0.5, -0.3, 0.7, -0.2, -0.4, -0.8]] * batch_size
+        )
+        np_anchors = np.array(
+            [[[-0.1, -0.1, 0.1, 0.1], [-0.2, -0.2, 0.2, 0.2], [1.2, 1.2, 1.5, 1.5]]] * batch_size
+        )
+        expected_np_out = np.array(
             [
-                [1, 0.69999999, 0, 0, 0.10818365, 0.10008108],
-                [0, 0.44999999, 1, 1, 1, 1],
-                [0, 0.30000001, 0, 0, 0.22903419, 0.20435292],
+                [
+                    [1, 0.69999999, 0, 0, 0.10818365, 0.10008108],
+                    [0, 0.44999999, 1, 1, 1, 1],
+                    [0, 0.30000001, 0, 0, 0.22903419, 0.20435292],
+                ]
             ]
-        ]
-    )
+            * batch_size
+        )
+        return np_cls_prob, np_loc_preds, np_anchors, expected_np_out
 
-    fcompute, fschedule = tvm.topi.testing.dispatch(target, _multibox_detection_implement)
-    with tvm.target.Target(target):
-        out = fcompute(cls_prob, loc_preds, anchors)
-        s = fschedule(out)
+    def test_multibox_detection(self, target, dev, ref_data):
+
+        np_cls_prob, np_loc_preds, np_anchors, expected_np_out = ref_data
+
+        batch_size = np_cls_prob.shape[0]
+        num_anchors = 3
+        num_classes = 3
+        cls_prob = te.placeholder((batch_size, num_anchors, num_classes), name="cls_prob")
+        loc_preds = te.placeholder((batch_size, num_anchors * 4), name="loc_preds")
+        anchors = te.placeholder((batch_size, num_anchors, 4), name="anchors")
+
+        fcompute, fschedule = tvm.topi.testing.dispatch(target, _multibox_detection_implement)
+        with tvm.target.Target(target):
+            out = fcompute(cls_prob, loc_preds, anchors)
+            s = fschedule(out)
 
-    tvm_cls_prob = tvm.nd.array(np_cls_prob.astype(cls_prob.dtype), dev)
-    tvm_loc_preds = tvm.nd.array(np_loc_preds.astype(loc_preds.dtype), dev)
-    tvm_anchors = tvm.nd.array(np_anchors.astype(anchors.dtype), dev)
-    tvm_out = tvm.nd.array(np.zeros((batch_size, num_anchors, 6)).astype(out.dtype), dev)
-    f = tvm.build(s, [cls_prob, loc_preds, anchors, out], target)
-    f(tvm_cls_prob, tvm_loc_preds, tvm_anchors, tvm_out)
-    tvm.testing.assert_allclose(tvm_out.numpy(), expected_np_out, rtol=1e-4)
+        tvm_cls_prob = tvm.nd.array(np_cls_prob.astype(cls_prob.dtype), dev)
+        tvm_loc_preds = tvm.nd.array(np_loc_preds.astype(loc_preds.dtype), dev)
+        tvm_anchors = tvm.nd.array(np_anchors.astype(anchors.dtype), dev)
+        tvm_out = tvm.nd.array(np.zeros((batch_size, num_anchors, 6)).astype(out.dtype), dev)
+        f = tvm.build(s, [cls_prob, loc_preds, anchors, out], target)
+        f(tvm_cls_prob, tvm_loc_preds, tvm_anchors, tvm_out)
+        tvm.testing.assert_allclose(tvm_out.numpy(), expected_np_out, rtol=1e-4)
 
 
 class TestRoiAlign: