diff --git a/python/tvm/testing/utils.py b/python/tvm/testing/utils.py
index 8227530f7ab7..40790bc1797e 100644
--- a/python/tvm/testing/utils.py
+++ b/python/tvm/testing/utils.py
@@ -903,6 +903,9 @@ def _multi_gpu_exists():
 # Mark a test as requiring NCCL support
 requires_nccl = Feature("nccl", "NCCL", cmake_flag="USE_NCCL", parent_features="cuda")
 
+# Mark a test as requiring RCCL support
+requires_rccl = Feature("rccl", "RCCL", cmake_flag="USE_RCCL", parent_features="rocm")
+
 # Mark a test as requiring the NVPTX compilation on the CUDA runtime
 requires_nvptx = Feature(
     "nvptx",
diff --git a/tests/python/disco/test_ccl.py b/tests/python/disco/test_ccl.py
index c29ece957245..0f630c3efc39 100644
--- a/tests/python/disco/test_ccl.py
+++ b/tests/python/disco/test_ccl.py
@@ -32,7 +32,13 @@
 from tvm.script import relax as R
 
 _all_session_kinds = [di.ThreadedSession, di.ProcessSession]
-_ccl = [get_global_func("runtime.disco.compiled_ccl")()]
+_ccl = [
+    pytest.param(
+        ccl,
+        marks=tvm.testing.Feature._all_features[ccl].marks(),
+    )
+    for ccl in ["nccl", "rccl"]
+]
 
 
 def create_device_target(ccl):
@@ -445,7 +451,6 @@ def main(
             W1: R.Tensor((128, 128), "float32"),
             W2: R.Tensor((128, 128), "float32"),
         ) -> R.Tensor((128, 128), "float32"):
-            R.func_attr({"global_symbol": "main"})
             with R.dataflow():
                 lv0: R.Tensor((128, 128), "float32") = R.matmul(x, W1)
                 lv1: R.Tensor((128, 128), "float32") = R.nn.gelu(lv0)
@@ -461,7 +466,6 @@ def main(
             W1: R.Tensor((128, 64), "float32"),  # shard along axis 1
             W2: R.Tensor((64, 128), "float32"),  # shard along axis 0
         ) -> R.Tensor((128, 128), "float32"):
-            R.func_attr({"global_symbol": "main"})
             with R.dataflow():
                 broadcast_x: R.Tensor((128, 128), "float32") = R.ccl.broadcast_from_worker0(x)
                 lv0: R.Tensor((128, 64), "float32") = R.matmul(broadcast_x, W1)
@@ -538,7 +542,6 @@ def main(  # pylint: disable=too-many-locals
             Wv: R.Tensor((128, 512), "float32"),
             Wo: R.Tensor((512, 128), "float32"),
         ) -> R.Tensor((128, 128), "float32"):
-            R.func_attr({"global_symbol": "main"})
             with R.dataflow():
                 # q
                 lv0: R.Tensor((1, 10, 512), "float32") = R.matmul(x, Wq)
@@ -578,7 +581,6 @@ def main(  # pylint: disable=too-many-locals
             Wv: R.Tensor((128, 256), "float32"),  # shard along axis 1
             Wo: R.Tensor((256, 128), "float32"),  # shard along axis 0
         ) -> R.Tensor((128, 128), "float32"):
-            R.func_attr({"global_symbol": "main"})
             with R.dataflow():
                 broadcast_x: R.Tensor((1, 10, 128), "float32") = R.ccl.broadcast_from_worker0(x)
                 # q
diff --git a/tests/python/disco/test_custom_allreduce.py b/tests/python/disco/test_custom_allreduce.py
index 4aed32c052d9..cd075d6ba2b3 100644
--- a/tests/python/disco/test_custom_allreduce.py
+++ b/tests/python/disco/test_custom_allreduce.py
@@ -44,7 +44,13 @@ class AllReduceStrategyType(enum.IntEnum):
     AllReduceStrategyType.AUTO,
 ]
 
-_ccl = [ccl for ccl in tvm.get_global_func("runtime.disco.compiled_ccl")() if ccl == "nccl"]
+_ccl = [
+    pytest.param(
+        ccl,
+        marks=tvm.testing.Feature._all_features[ccl].marks(),
+    )
+    for ccl in ["nccl"]
+]
 
 
 @pytest.mark.parametrize("shape", _shapes)
diff --git a/tests/python/disco/test_loader.py b/tests/python/disco/test_loader.py
index b4e2440857e6..77fbe4b2667f 100644
--- a/tests/python/disco/test_loader.py
+++ b/tests/python/disco/test_loader.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 """Test sharded loader"""
+
 # pylint: disable=missing-docstring
 import json
 import tempfile
@@ -120,6 +121,7 @@ def _simulate_presharded_weights(base_path, param_dict, num_shards, shard_info):
     )
 
 
+@tvm.testing.requires_nccl
 def test_load_shard():
     devices = [0, 1]
     num_shards = len(devices)
@@ -177,6 +179,7 @@ def _create_presharded_loader(sess, path):
     return loader
 
 
+@tvm.testing.requires_nccl
 def test_load_presharded():
     devices = [0, 1]
     param_dict = {
@@ -217,6 +220,7 @@ def test_load_presharded():
         )
 
 
+@tvm.testing.requires_nccl
 def test_load_shard_in_relax():
     devices = [0, 1]
     num_shards = len(devices)
@@ -248,7 +252,6 @@ class Module:  # pylint: disable=too-few-public-methods
         def main(
             loader: R.Object,
         ) -> R.Tuple(R.Tensor((64, 64), "float32"), R.Tensor((16, 128), "float32")):
-            R.func_attr({"global_symbol": "main"})
             with R.dataflow():
                 lv0: R.Tensor((64, 64), "float32") = R.call_pure_packed(
                     "runtime.disco.ShardLoaderLoad",
@@ -309,6 +312,7 @@ def relax_build(mod, target):
         )
 
 
+@tvm.testing.requires_nccl
 def test_load_shard_all():
     devices = [0, 1]
     num_shards = len(devices)
@@ -346,6 +350,7 @@ def test_load_shard_all():
         np.testing.assert_equal(param_dict["param_1"][16:32, :], p_1[1].numpy())
 
 
+@tvm.testing.requires_nccl
 def test_load_all_presharded():
     devices = [0, 1]
     num_shards = len(devices)
@@ -375,6 +380,7 @@ def test_load_all_presharded():
         np.testing.assert_equal(param_dict["param_1"][:, 64:128], p_1[1].numpy())
 
 
+@tvm.testing.requires_nccl
 def test_load_shard_broadcast():
     devices = [0, 1]
     param_dict = {
@@ -396,6 +402,7 @@ def test_load_shard_broadcast():
         np.testing.assert_equal(param_dict["param_1"], p_1[1].numpy())
 
 
+@tvm.testing.requires_nccl
 def test_load_qkv_proj_shard():  # pylint: disable=too-many-locals
     devices = [0, 1]
     num_shards = len(devices)
diff --git a/tests/python/disco/test_session.py b/tests/python/disco/test_session.py
index 38aa757bf8f1..b780eb97ab01 100644
--- a/tests/python/disco/test_session.py
+++ b/tests/python/disco/test_session.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 """Basic tests for a Disco session"""
+
 # pylint: disable=missing-docstring
 import tempfile
 
@@ -260,7 +261,6 @@ def t2(A: T.Buffer((16, 8), "float32"), B: T.Buffer((8, 16), "float32")):
         def transpose_1(
             A: R.Tensor((8, 16), dtype="float32")
         ) -> R.Tensor((16, 8), dtype="float32"):
-            R.func_attr({"global_symbol": "transpose_1"})
             cls = TestMod
             with R.dataflow():
                 B = R.call_tir(cls.t1, (A,), out_sinfo=R.Tensor((16, 8), dtype="float32"))
@@ -271,7 +271,6 @@ def transpose_1(
         def transpose_2(
             A: R.Tensor((16, 8), dtype="float32")
         ) -> R.Tensor((8, 16), dtype="float32"):
-            R.func_attr({"global_symbol": "transpose_2"})
             cls = TestMod
             with R.dataflow():
                 B = R.call_tir(cls.t2, (A,), out_sinfo=R.Tensor((8, 16), dtype="float32"))
diff --git a/tests/scripts/task_python_unittest.sh b/tests/scripts/task_python_unittest.sh
index 5b07b5256ea5..764383976f77 100755
--- a/tests/scripts/task_python_unittest.sh
+++ b/tests/scripts/task_python_unittest.sh
@@ -39,6 +39,7 @@ TEST_FILES=(
   "auto_scheduler"
   "autotvm"
   "codegen"
+  "disco"
   "ir"
   "meta_schedule"
   "micro"