diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 3b10108e6a..42dc8d015b 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -221,8 +221,8 @@ jobs:
       UNIT_TEST_SCRIPT: |
         cd /opt/nemo-rl
         if [[ "${{ needs.pre-flight.outputs.test_level }}" =~ ^(L0|L1|L2)$ ]]; then
-          uv run --no-sync bash -x ./tests/run_unit.sh --cov=nemo_rl -m \"not mcore\"
-          uv run --extra mcore bash -x ./tests/run_unit.sh --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json -m mcore
+          uv run --no-sync bash -x ./tests/run_unit.sh --cov=nemo_rl --hf-gated
+          uv run --extra mcore bash -x ./tests/run_unit.sh --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --mcore-only
         else
           echo Skipping unit tests for docs-only level
         fi
diff --git a/docs/testing.md b/docs/testing.md
index 4d44b141fb..8ce97346b9 100644
--- a/docs/testing.md
+++ b/docs/testing.md
@@ -10,14 +10,19 @@ Unit tests require 2 GPUs to test the full suite.
 
 ```sh
 # Run the unit tests using local GPUs
+
+# Configuration 1: Default tests only - excludes both hf_gated and mcore tests
 uv run --group test bash tests/run_unit.sh
-```
 
-:::{note}
-Tests can also be run on Slurm with `ray.sub`, but note that some tests will be skipped
-due to no GPUs being located on the head node. To run the full suite of tests, please
-launch on a regular GPU allocation.
-:::
+# Configuration 2: Default + HF gated tests, excluding mcore tests
+uv run --group test bash tests/run_unit.sh --hf-gated
+
+# Configuration 3: ONLY mcore tests, excluding ones with hf_gated
+uv run --extra mcore --group test bash tests/run_unit.sh --mcore-only
+
+# Configuration 4: ONLY mcore tests, including ones with hf_gated
+uv run --extra mcore --group test bash tests/run_unit.sh --mcore-only --hf-gated
+```
 
 ### Run Unit Tests in a Hermetic Environment
 
diff --git a/pyproject.toml b/pyproject.toml
index cddda79abe..643f1cdb1a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -174,6 +174,7 @@ testpaths = ["tests"]
 python_files = "test_*.py"
 markers = [
     "mcore: marks tests that require the mcore extra",
+    "hf_gated: marks tests that require HuggingFace token access for gated models",
 ]
 
 [tool.coverage.run]
diff --git a/tests/unit/algorithms/test_utils.py b/tests/unit/algorithms/test_utils.py
index 82338de026..2dd00a5eeb 100755
--- a/tests/unit/algorithms/test_utils.py
+++ b/tests/unit/algorithms/test_utils.py
@@ -77,6 +77,7 @@ def get_format_with_simple_role_header(messages):
     return message
 
 
+@pytest.mark.hf_gated
 def test_get_tokenizer_no_chat_template(conversation_messages):
     """Test get_tokenizer when no chat template is specified in config"""
     config = {"name": "meta-llama/Llama-3.2-1B-Instruct"}
@@ -89,6 +90,7 @@ def test_get_tokenizer_no_chat_template(conversation_messages):
     assert formatted == expected
 
 
+@pytest.mark.hf_gated
 def test_get_tokenizer_default_chat_template(conversation_messages):
     """Test get_tokenizer when chat_template is 'default' in config"""
     config = {"name": "meta-llama/Llama-3.2-1B-Instruct", "chat_template": "default"}
@@ -100,6 +102,7 @@ def test_get_tokenizer_default_chat_template(conversation_messages):
     assert formatted == expected
 
 
+@pytest.mark.hf_gated
 def test_get_tokenizer_null_chat_template(conversation_messages):
     """Test get_tokenizer when chat_template is None in config"""
     config = {"name": "meta-llama/Llama-3.2-1B-Instruct", "chat_template": None}
@@ -113,6 +116,7 @@ def test_get_tokenizer_null_chat_template(conversation_messages):
     assert formatted == expected
 
 
+@pytest.mark.hf_gated
 def test_get_tokenizer_custom_jinja_template(conversation_messages):
     """Test get_tokenizer when a custom jinja template is specified"""
     custom_template = COMMON_CHAT_TEMPLATES.simple_role_header
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index 3197fa2d57..1346a1173d 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -30,6 +30,63 @@
 
 dir_path = os.path.dirname(os.path.abspath(__file__))
 
+
+def pytest_addoption(parser):
+    """Add custom command line options for controlling test execution."""
+    parser.addoption(
+        "--hf-gated",
+        action="store_true",
+        default=False,
+        help="Include tests that require HuggingFace token access",
+    )
+    parser.addoption(
+        "--mcore-only",
+        action="store_true",
+        default=False,
+        help="Run ONLY mcore tests (combine with --hf-gated to include mcore+hf_gated tests)",
+    )
+
+
+def pytest_collection_modifyitems(config, items):
+    """Modify test collection to skip tests based on markers unless explicitly requested."""
+    run_hf_gated = config.getoption("--hf-gated")
+    run_mcore_only = config.getoption("--mcore-only")
+    marker_expr = config.getoption("-m", default="")
+
+    # If user specified -m marker expressions, let pytest handle everything normally
+    if marker_expr:
+        return
+
+    # Filter tests based on the desired configurations
+    new_items = []
+
+    if run_mcore_only and run_hf_gated:
+        # Configuration 4: Only mcore tests, including ones with hf_gated
+        new_items = [item for item in items if item.get_closest_marker("mcore")]
+    elif run_mcore_only:
+        # Configuration 3: Only mcore tests, excluding ones with hf_gated
+        new_items = [
+            item
+            for item in items
+            if item.get_closest_marker("mcore")
+            and not item.get_closest_marker("hf_gated")
+        ]
+    elif run_hf_gated:
+        # Configuration 2: Default tests + hf_gated tests, excluding mcore
+        new_items = [item for item in items if not item.get_closest_marker("mcore")]
+    else:
+        # Configuration 1: Default only - exclude both hf_gated and mcore
+        new_items = [
+            item
+            for item in items
+            if not item.get_closest_marker("hf_gated")
+            and not item.get_closest_marker("mcore")
+        ]
+
+    # Update the items list in-place
+    items[:] = new_items
+
+
 TEST_ASSETS_DIR = os.path.join(dir_path, "test_assets")
 UNIT_RESULTS_FILE = os.path.join(dir_path, "unit_results.json")
 UNIT_RESULTS_FILE_DATED = os.path.join(
@@ -37,25 +94,6 @@
 )
 
 
-# Mapping between asset and absolute path (each are populated from a session level fixture)
-class TEST_ASSETS:
-    TINY_LLAMA_MODEL_PATH = os.path.join(
-        TEST_ASSETS_DIR, "tiny_llama_with_llama3.2_tokenizer"
-    )
-    TINY_LLAMA_TIED_MODEL_PATH = os.path.join(
-        TEST_ASSETS_DIR, "tiny_llama_tied_with_llama3.2_tokenizer"
-    )
-    TINY_QWEN2_MODEL_PATH = os.path.join(
-        TEST_ASSETS_DIR, "tiny_qwen2_with_qwen2_tokenizer"
-    )
-    TINY_QWEN3_MODEL_PATH = os.path.join(
-        TEST_ASSETS_DIR, "tiny_qwen3_with_qwen3_tokenizer"
-    )
-    TINY_GEMMA3_MODEL_PATH = os.path.join(
-        TEST_ASSETS_DIR, "tiny_gemma3_with_gemma3_tokenizer"
-    )
-
-
 class UnitTestData(TypedDict):
     exit_status: int | str
     git_commit: str
@@ -198,6 +236,10 @@ def log_max_mem(self, metric_name: str):
     session_data["metrics"][qualified_name]["_elapsed"] = end_time - start_time
 
 
+def pytest_sessionstart(session):
+    os.makedirs(TEST_ASSETS_DIR, exist_ok=True)
+
+
 def pytest_sessionfinish(session, exitstatus):
     if not hasattr(session.config, "_unit_test_data"):
         return
@@ -394,14 +436,14 @@ def mock_2gpu_distributed_env():
 #######################
 
 
-@pytest.fixture(scope="session", autouse=True)
+@pytest.fixture(scope="session")
 def tiny_llama_model_path():
     """Fixture that returns a path to a tiny llama model with a dummy tokenizer."""
     import shutil
 
     from transformers import AutoTokenizer, LlamaConfig, LlamaForCausalLM
 
-    model_path = TEST_ASSETS.TINY_LLAMA_MODEL_PATH
+    model_path = os.path.join(TEST_ASSETS_DIR, "tiny_llama_with_llama3.2_tokenizer")
     # hidden_size//num_attention_heads = 32 (smallest value to not error due to vllm paged attention)
     # vocab_size=128256 (so we can re-use llama3.2 1b tokenizer)
     config = LlamaConfig(
@@ -422,14 +464,16 @@ def tiny_llama_model_path():
     yield model_path
 
 
-@pytest.fixture(scope="session", autouse=True)
+@pytest.fixture(scope="session")
 def tiny_llama_tied_model_path():
     """Fixture that returns a path to a tiny llama model with a dummy tokenizer."""
     import shutil
 
     from transformers import AutoTokenizer, LlamaConfig, LlamaForCausalLM
 
-    model_path = TEST_ASSETS.TINY_LLAMA_TIED_MODEL_PATH
+    model_path = os.path.join(
+        TEST_ASSETS_DIR, "tiny_llama_tied_with_llama3.2_tokenizer"
+    )
     # hidden_size//num_attention_heads = 32 (smallest value to not error due to vllm paged attention)
     # vocab_size=128256 (so we can re-use llama3.2 1b tokenizer)
     config = LlamaConfig(
@@ -450,14 +494,14 @@ def tiny_llama_tied_model_path():
     yield model_path
 
 
-@pytest.fixture(scope="session", autouse=True)
+@pytest.fixture(scope="session")
 def tiny_qwen2_model_path():
     """Fixture that returns a path to a tiny llama model with a dummy tokenizer."""
     import shutil
 
     from transformers import AutoTokenizer, Qwen2Config, Qwen2ForCausalLM
 
-    model_path = TEST_ASSETS.TINY_QWEN2_MODEL_PATH
+    model_path = os.path.join(TEST_ASSETS_DIR, "tiny_qwen2_with_qwen2_tokenizer")
     # hidden_size//num_attention_heads = 32 (smallest value to not error due to vllm paged attention)
     # vocab_size=151936 (so we can re-use qwen2 1.5b tokenizer)
     config = Qwen2Config(
@@ -478,14 +522,14 @@ def tiny_qwen2_model_path():
     yield model_path
 
 
-@pytest.fixture(scope="session", autouse=True)
+@pytest.fixture(scope="session")
 def tiny_qwen3_model_path():
     """Fixture that returns a path to a tiny llama model with a dummy tokenizer."""
     import shutil
 
     from transformers import AutoTokenizer, Qwen3Config, Qwen3ForCausalLM
 
-    model_path = TEST_ASSETS.TINY_QWEN3_MODEL_PATH
+    model_path = os.path.join(TEST_ASSETS_DIR, "tiny_qwen3_with_qwen3_tokenizer")
     # hidden_size//num_attention_heads = 32 (smallest value to not error due to vllm paged attention)
     # vocab_size=151936 (so we can re-use qwen2 1.5b tokenizer)
     config = Qwen3Config(
@@ -506,14 +550,14 @@ def tiny_qwen3_model_path():
     yield model_path
 
 
-@pytest.fixture(scope="session", autouse=True)
+@pytest.fixture(scope="session")
 def tiny_gemma3_model_path():
     """Fixture that returns a path to a tiny llama model with a dummy tokenizer."""
     import shutil
 
     from transformers import AutoTokenizer, Gemma3ForCausalLM, Gemma3TextConfig
 
-    model_path = TEST_ASSETS.TINY_GEMMA3_MODEL_PATH
+    model_path = os.path.join(TEST_ASSETS_DIR, "tiny_gemma3_with_gemma3_tokenizer")
     # hidden_size//num_attention_heads = 32 (smallest value to not error due to vllm paged attention)
     # vocab_size=262144 so we can re-use gemma-3-1b tokenizer
     config = Gemma3TextConfig(
diff --git a/tests/unit/data/hf_datasets/test_oai_format_dataset.py b/tests/unit/data/hf_datasets/test_oai_format_dataset.py
index 4ba75a6a1d..ae6b878779 100644
--- a/tests/unit/data/hf_datasets/test_oai_format_dataset.py
+++ b/tests/unit/data/hf_datasets/test_oai_format_dataset.py
@@ -87,6 +87,7 @@ def test_custom_keys(sample_data):
     assert dataset.system_prompt == "You are a helpful assistant."
 
 
+@pytest.mark.hf_gated
 @pytest.mark.parametrize("sample_data", [("messages", "system_key")], indirect=True)
 def test_message_formatting(sample_data):
     train_path, val_path = sample_data
diff --git a/tests/unit/data/hf_datasets/test_prompt_response.py b/tests/unit/data/hf_datasets/test_prompt_response.py
index 8ff7f5c5f6..cbf18977a4 100644
--- a/tests/unit/data/hf_datasets/test_prompt_response.py
+++ b/tests/unit/data/hf_datasets/test_prompt_response.py
@@ -76,6 +76,7 @@ def test_custom_keys(sample_data):
     assert dataset.output_key == "answer"
 
 
+@pytest.mark.hf_gated
 @pytest.mark.parametrize("sample_data", [("question", "answer")], indirect=True)
 def test_message_formatting(sample_data):
     train_path, val_path = sample_data
diff --git a/tests/unit/data/hf_datasets/test_squad.py b/tests/unit/data/hf_datasets/test_squad.py
index 5e736ee8ac..f5e01b250a 100644
--- a/tests/unit/data/hf_datasets/test_squad.py
+++ b/tests/unit/data/hf_datasets/test_squad.py
@@ -17,6 +17,7 @@
 from nemo_rl.data.hf_datasets.squad import SquadDataset
 
 
+@pytest.mark.hf_gated
 @pytest.mark.skip(reason="dataset download is flaky")
 def test_squad_dataset():
     tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
diff --git a/tests/unit/data/test_llm_message_utils.py b/tests/unit/data/test_llm_message_utils.py
index 6f9c3f3129..91ae2e41b7 100644
--- a/tests/unit/data/test_llm_message_utils.py
+++ b/tests/unit/data/test_llm_message_utils.py
@@ -328,6 +328,7 @@ def test_batch_pad_message_log_custom_pad_value(
     )
 
 
+@pytest.mark.hf_gated
 def test_get_formatted_message_log_llama(
     raw_chat_message_log: LLMMessageLogType,
 ) -> None:
@@ -372,6 +373,7 @@ def test_get_formatted_message_log_llama(
     assert actual_text == expected_text
 
 
+@pytest.mark.hf_gated
 def test_get_formatted_message_log_add_generation_prompt_llama(
     raw_chat_message_log: LLMMessageLogType,
 ) -> None:
@@ -499,6 +501,7 @@ def test_get_formatted_message_log_add_generation_prompt_qwen(
     assert actual_text == expected_text
 
 
+@pytest.mark.hf_gated
 def test_formatted_message_log_empty_message():
     message_logs = [
         [
diff --git a/tests/unit/models/dtensor/test_parallelize.py b/tests/unit/models/dtensor/test_parallelize.py
index 5acb7addc4..192fa354ac 100644
--- a/tests/unit/models/dtensor/test_parallelize.py
+++ b/tests/unit/models/dtensor/test_parallelize.py
@@ -26,6 +26,7 @@
 )
 
 
+@pytest.mark.hf_gated
 @pytest.mark.parametrize(
     "model_name, parallelize_func, sequence_parallel",
     [
diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py
index 8a38e5c61e..94280a03c5 100644
--- a/tests/unit/models/generation/test_vllm_generation.py
+++ b/tests/unit/models/generation/test_vllm_generation.py
@@ -1620,13 +1620,15 @@ def test_vllm_megatron_weight_update_with_packing(cluster, test_input_data):
         # Enable packing during test
         os.environ["NEMO_RL_MEGATRON_IPC_TENSOR_PACKING_THRESHOLD"] = "1"
 
-        # Both policies must use the same model (Qwen2.5-0.5B) for weight transfer compatibility
-        model_name = "Qwen/Qwen2.5-0.5B"
+        # Both policies must use the same model for weight transfer compatibility
+        # NOTE: We have tried using Qwen/Qwen2.5-0.5B, but some small models exhibit variance depending
+        #  on which hardware it is run on.
+        model_name = "Qwen/Qwen3-0.6B"
         tokenizer = get_tokenizer({"name": model_name})
 
         # Create Policy
         megatron_config = get_basic_megatron_test_config(
-            tp=1, pp=1, precision="float32"
+            tp=1, pp=1, precision="bfloat16"
         )
         megatron_config["model_name"] = model_name
         megatron_config["tokenizer"]["name"] = model_name
@@ -1653,8 +1655,8 @@ def test_vllm_megatron_weight_update_with_packing(cluster, test_input_data):
         output_ids = outputs["output_ids"]
         generated_texts = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
         assert generated_texts == [
-            "Hello, my name is John. I am a",
-            "The capital of France is Paris. It is the",
+            "Hello, my name is Lina. I'm",
+            "The capital of France is Paris. The capital of",
         ], "Output should be the same as the expected output"
 
     finally:
diff --git a/tests/unit/models/generation/test_vllm_large_model.py b/tests/unit/models/generation/test_vllm_large_model.py
index d24a0c0f31..7b93ef46d1 100644
--- a/tests/unit/models/generation/test_vllm_large_model.py
+++ b/tests/unit/models/generation/test_vllm_large_model.py
@@ -125,6 +125,7 @@ def test_input_data(tokenizer):
 
 # skip this test for now
 @pytest.mark.skip(reason="Skipping large model test until we have resources in CI.")
+@pytest.mark.hf_gated
 @pytest.mark.asyncio
 @pytest.mark.parametrize("tensor_parallel_size", [4, 8])
 @pytest.mark.parametrize("pipeline_parallel_size", [2])
diff --git a/tests/unit/models/huggingface/test_common.py b/tests/unit/models/huggingface/test_common.py
index faf06fbdb7..95da64b0b4 100644
--- a/tests/unit/models/huggingface/test_common.py
+++ b/tests/unit/models/huggingface/test_common.py
@@ -17,6 +17,7 @@
 from nemo_rl.models.huggingface.common import ModelFlag, is_gemma_model
 
 
+@pytest.mark.hf_gated
 @pytest.mark.parametrize(
     "model_name",
     [
@@ -42,6 +43,7 @@ def test_gemma_models(model_name):
     assert ModelFlag.VLLM_LOAD_FORMAT_AUTO.matches(model_name)
 
 
+@pytest.mark.hf_gated
 @pytest.mark.parametrize(
     "model_name",
     [
diff --git a/tests/unit/models/policy/test_dtensor_worker.py b/tests/unit/models/policy/test_dtensor_worker.py
index c176082698..33a91c37eb 100644
--- a/tests/unit/models/policy/test_dtensor_worker.py
+++ b/tests/unit/models/policy/test_dtensor_worker.py
@@ -31,12 +31,11 @@
 from nemo_rl.models.generation import configure_generation_config
 from nemo_rl.models.policy import PolicyConfig
 from nemo_rl.models.policy.lm_policy import Policy
-from tests.unit.conftest import TEST_ASSETS
 from tests.unit.test_utils import SimpleLoss
 
 
 def create_test_config(
-    model_name: str = TEST_ASSETS.TINY_LLAMA_MODEL_PATH,
+    model_name: str,
     tp: int = 1,
     cp: int = 1,
     sequence_parallel: bool = False,
@@ -145,9 +144,9 @@ def gc_collect():
 
 
 @pytest.fixture
-def policy_setup(two_gpu_virtual_cluster):
+def policy_setup(two_gpu_virtual_cluster, tiny_llama_model_path):
     """Setup and teardown for policy tests - creates a virtual cluster and policy."""
-    config = create_test_config()
+    config = create_test_config(tiny_llama_model_path)
     tokenizer = get_tokenizer(config["tokenizer"])
     config["generation"] = configure_generation_config(config["generation"], tokenizer)
 
@@ -160,6 +159,7 @@ def policy_setup(two_gpu_virtual_cluster):
     policy.shutdown()
 
 
+@pytest.mark.hf_gated
 @pytest.mark.timeout(180)
 def test_lm_policy_init(policy_setup):
     policy = policy_setup
@@ -240,9 +240,17 @@ def test_lm_policy_init(policy_setup):
 @pytest.fixture
 def training_setup(request, two_gpu_virtual_cluster):
     """Setup and teardown specifically for training tests."""
-    model_name, tp, cp, sequence_parallel, cpu_offload, activation_checkpointing = (
-        request.param
-    )
+    (
+        model_fixture_name,
+        tp,
+        cp,
+        sequence_parallel,
+        cpu_offload,
+        activation_checkpointing,
+    ) = request.param
+
+    # Get the actual model path from the requested fixture
+    model_name = request.getfixturevalue(model_fixture_name)
     policy = None
     data = None
     loss_fn = None
@@ -299,37 +307,38 @@ def training_setup(request, two_gpu_virtual_cluster):
         policy.shutdown()
 
 
+@pytest.mark.hf_gated
 @pytest.mark.timeout(60)
 @pytest.mark.parametrize(
     "training_setup",
     [
-        # model_name                        tp cp  sp     cpu    act
-        (TEST_ASSETS.TINY_LLAMA_MODEL_PATH, 1, 1, False, False, False),
-        (TEST_ASSETS.TINY_LLAMA_MODEL_PATH, 1, 1, True, False, False),
-        (TEST_ASSETS.TINY_LLAMA_MODEL_PATH, 1, 1, False, True, False),
-        (TEST_ASSETS.TINY_LLAMA_MODEL_PATH, 1, 1, False, False, True),
-        (TEST_ASSETS.TINY_LLAMA_MODEL_PATH, 1, 2, False, False, False),
-        (TEST_ASSETS.TINY_QWEN2_MODEL_PATH, 1, 1, True, True, False),
-        (TEST_ASSETS.TINY_QWEN2_MODEL_PATH, 1, 1, True, False, True),
-        (TEST_ASSETS.TINY_QWEN2_MODEL_PATH, 1, 1, False, True, True),
-        (TEST_ASSETS.TINY_QWEN2_MODEL_PATH, 1, 1, True, True, True),
-        (TEST_ASSETS.TINY_QWEN2_MODEL_PATH, 1, 2, False, False, False),
-        (TEST_ASSETS.TINY_QWEN3_MODEL_PATH, 1, 1, True, True, False),
-        (TEST_ASSETS.TINY_QWEN3_MODEL_PATH, 1, 1, True, False, True),
-        (TEST_ASSETS.TINY_QWEN3_MODEL_PATH, 1, 1, False, True, True),
-        (TEST_ASSETS.TINY_QWEN3_MODEL_PATH, 1, 1, True, True, True),
-        (TEST_ASSETS.TINY_QWEN3_MODEL_PATH, 1, 2, False, False, False),
+        # model_fixture_name        tp cp  sp     cpu    act
+        ("tiny_llama_model_path", 1, 1, False, False, False),
+        ("tiny_llama_model_path", 1, 1, True, False, False),
+        ("tiny_llama_model_path", 1, 1, False, True, False),
+        ("tiny_llama_model_path", 1, 1, False, False, True),
+        ("tiny_llama_model_path", 1, 2, False, False, False),
+        ("tiny_qwen2_model_path", 1, 1, True, True, False),
+        ("tiny_qwen2_model_path", 1, 1, True, False, True),
+        ("tiny_qwen2_model_path", 1, 1, False, True, True),
+        ("tiny_qwen2_model_path", 1, 1, True, True, True),
+        ("tiny_qwen2_model_path", 1, 2, False, False, False),
+        ("tiny_qwen3_model_path", 1, 1, True, True, False),
+        ("tiny_qwen3_model_path", 1, 1, True, False, True),
+        ("tiny_qwen3_model_path", 1, 1, False, True, True),
+        ("tiny_qwen3_model_path", 1, 1, True, True, True),
+        ("tiny_qwen3_model_path", 1, 2, False, False, False),
         (
-            TEST_ASSETS.TINY_GEMMA3_MODEL_PATH,
+            "tiny_gemma3_model_path",
             1,
             1,
             True,
             True,
             False,
         ),  # gemma3 doesn't support spda
-        (TEST_ASSETS.TINY_GEMMA3_MODEL_PATH, 1, 1, True, False, True),
-        (TEST_ASSETS.TINY_GEMMA3_MODEL_PATH, 1, 1, False, True, True),
-        (TEST_ASSETS.TINY_GEMMA3_MODEL_PATH, 1, 1, True, True, True),
+        ("tiny_gemma3_model_path", 1, 1, True, False, True),
+        ("tiny_gemma3_model_path", 1, 1, False, True, True),
+        ("tiny_gemma3_model_path", 1, 1, True, True, True),
         # CP doesn't support gemma3 due to spda input has attent_mask != None.
     ],
     indirect=True,
@@ -372,9 +381,17 @@ def verify_loss_tensor(loss_tensor):
 @pytest.fixture
 def logprob_setup(request, two_gpu_virtual_cluster):
     """Setup and teardown specifically for training tests."""
-    model_name, tp, cp, sequence_parallel, cpu_offload, activation_checkpointing = (
-        request.param
-    )
+    (
+        model_fixture_name,
+        tp,
+        cp,
+        sequence_parallel,
+        cpu_offload,
+        activation_checkpointing,
+    ) = request.param
+
+    # Get the actual model path from the requested fixture
+    model_name = request.getfixturevalue(model_fixture_name)
     policy = None
     data = None
 
@@ -449,28 +466,29 @@ def logprob_setup(request, two_gpu_virtual_cluster):
         policy.shutdown()
 
 
+@pytest.mark.hf_gated
 @pytest.mark.timeout(360)
 @pytest.mark.parametrize(
     "logprob_setup",
     [
         # TP=2, CP=1
-        (TEST_ASSETS.TINY_QWEN2_MODEL_PATH, 2, 1, False, True, False),
-        (TEST_ASSETS.TINY_QWEN2_MODEL_PATH, 2, 1, False, False, False),
-        (TEST_ASSETS.TINY_LLAMA_MODEL_PATH, 2, 1, False, False, False),
-        (TEST_ASSETS.TINY_LLAMA_MODEL_PATH, 2, 1, False, True, False),
-        (TEST_ASSETS.TINY_LLAMA_MODEL_PATH, 2, 1, False, True, True),
-        (TEST_ASSETS.TINY_QWEN3_MODEL_PATH, 2, 1, False, True, False),
-        (TEST_ASSETS.TINY_QWEN3_MODEL_PATH, 2, 1, False, False, False),
-        (TEST_ASSETS.TINY_GEMMA3_MODEL_PATH, 2, 1, False, True, False),
-        (TEST_ASSETS.TINY_GEMMA3_MODEL_PATH, 2, 1, False, False, False),
+        ("tiny_qwen2_model_path", 2, 1, False, True, False),
+        ("tiny_qwen2_model_path", 2, 1, False, False, False),
+        ("tiny_llama_model_path", 2, 1, False, False, False),
+        ("tiny_llama_model_path", 2, 1, False, True, False),
+        ("tiny_llama_model_path", 2, 1, False, True, True),
+        ("tiny_qwen3_model_path", 2, 1, False, True, False),
+        ("tiny_qwen3_model_path", 2, 1, False, False, False),
+        ("tiny_gemma3_model_path", 2, 1, False, True, False),
+        ("tiny_gemma3_model_path", 2, 1, False, False, False),
         # TP=1, CP=2
-        (TEST_ASSETS.TINY_QWEN2_MODEL_PATH, 1, 2, False, True, False),
-        (TEST_ASSETS.TINY_QWEN2_MODEL_PATH, 1, 2, False, False, False),
-        (TEST_ASSETS.TINY_LLAMA_MODEL_PATH, 1, 2, False, False, False),
-        (TEST_ASSETS.TINY_LLAMA_MODEL_PATH, 1, 2, False, True, False),
-        (TEST_ASSETS.TINY_LLAMA_MODEL_PATH, 1, 2, False, True, True),
-        (TEST_ASSETS.TINY_QWEN3_MODEL_PATH, 1, 2, False, True, False),
-        (TEST_ASSETS.TINY_QWEN3_MODEL_PATH, 1, 2, False, False, False),
+        ("tiny_qwen2_model_path", 1, 2, False, True, False),
+        ("tiny_qwen2_model_path", 1, 2, False, False, False),
+        ("tiny_llama_model_path", 1, 2, False, False, False),
+        ("tiny_llama_model_path", 1, 2, False, True, False),
+        ("tiny_llama_model_path", 1, 2, False, True, True),
+        ("tiny_qwen3_model_path", 1, 2, False, True, False),
+        ("tiny_qwen3_model_path", 1, 2, False, False, False),
     ],
     indirect=True,
 )
@@ -491,7 +509,10 @@ def test_dtensor_worker_logprob_tp2_or_cp2_matches_unsharded(logprob_setup):
     )
 
 
-def test_dtensor_tp_and_tied_model_with_custom_parallel_plan(two_gpu_virtual_cluster):
+@pytest.mark.hf_gated
+def test_dtensor_tp_and_tied_model_with_custom_parallel_plan(
+    two_gpu_virtual_cluster, tiny_llama_tied_model_path
+):
     """Test that DTensor with a tp > 1 and a tied model with a custom parallel plan works."""
     from torch.distributed.tensor.parallel import ColwiseParallel
     from torch.distributed.tensor.placement_types import Replicate
@@ -501,7 +522,7 @@ def test_dtensor_tp_and_tied_model_with_custom_parallel_plan(two_gpu_virtual_clu
         "model.embed_tokens": ColwiseParallel(output_layouts=Replicate()),
     }
     config = create_test_config(
-        model_name=TEST_ASSETS.TINY_LLAMA_TIED_MODEL_PATH,
+        model_name=tiny_llama_tied_model_path,
         tp=2,
         cp=1,
         sequence_parallel=False,
@@ -534,8 +555,11 @@ def test_dtensor_tp_and_tied_model_with_custom_parallel_plan(two_gpu_virtual_clu
     policy.shutdown()
 
 
+@pytest.mark.hf_gated
 @pytest.mark.timeout(180)
-def test_dtensor_loss_independent_of_microbatch_size_two_gpus(two_gpu_virtual_cluster):
+def test_dtensor_loss_independent_of_microbatch_size_two_gpus(
+    two_gpu_virtual_cluster, tiny_llama_model_path
+):
     """Tests that changing microbatch size while keeping global batch size constant does not affect loss values in DTensor."""
     # Create test batch with global batch size of 8
     global_batch_size = 8
@@ -569,7 +593,7 @@ def test_dtensor_loss_independent_of_microbatch_size_two_gpus(two_gpu_virtual_cl
     )
 
     # Test with mbs=1, 2 microbatches per GPU
-    config = create_test_config()
+    config = create_test_config(tiny_llama_model_path)
     tokenizer = get_tokenizer(config["tokenizer"])
 
     print("Creating training Policy with mbs=1...")
@@ -605,7 +629,7 @@ def test_dtensor_loss_independent_of_microbatch_size_two_gpus(two_gpu_virtual_cl
     policy_mbs1.worker_group.shutdown()
 
     # Test with mbs=2, 1 microbatch per GPU
-    config = create_test_config()
+    config = create_test_config(tiny_llama_model_path)
     config["train_micro_batch_size"] = 2
     config["generation"] = configure_generation_config(config["generation"], tokenizer)
 
diff --git a/tests/unit/models/policy/test_megatron_worker.py b/tests/unit/models/policy/test_megatron_worker.py
index a399bca0d5..38607ba59f 100644
--- a/tests/unit/models/policy/test_megatron_worker.py
+++ b/tests/unit/models/policy/test_megatron_worker.py
@@ -28,12 +28,11 @@
 from nemo_rl.models.generation import configure_generation_config
 from nemo_rl.models.policy import PolicyConfig
 from nemo_rl.models.policy.lm_policy import Policy
-from tests.unit.conftest import TEST_ASSETS
 from tests.unit.test_utils import SimpleLoss
 
 
 def create_megatron_test_config(
-    model_name: str = TEST_ASSETS.TINY_LLAMA_MODEL_PATH,
+    model_name: str,
     tp: int = 1,
     pp: int = 1,
     precision: str = "float32",
@@ -152,7 +151,7 @@ def gc_collect():
 
 
 @pytest.fixture
-def policy_setup(request):
+def policy_setup(request, tiny_llama_model_path):
     """Setup and teardown for policy tests - creates a virtual cluster and policy."""
     # Get parameters from request
     if hasattr(request, "param") and request.param is not None:
@@ -177,7 +176,7 @@ def policy_setup(request):
             max_colocated_worker_groups=1,
         )
 
-        config = create_megatron_test_config(tp=tp, pp=pp)
+        config = create_megatron_test_config(tiny_llama_model_path, tp=tp, pp=pp)
         tokenizer = get_tokenizer(config["tokenizer"])
         config["generation"] = configure_generation_config(
             config["generation"], tokenizer
@@ -199,18 +198,21 @@ def policy_setup(request):
 @pytest.fixture
 def training_setup(request):
     """Setup and teardown specifically for training tests."""
-    # Parse parameters: (num_gpus, tp, pp, model_name, config_updates)
+    # Parse parameters: (num_gpus, tp, pp, model_fixture_name, config_updates)
     if hasattr(request, "param") and request.param is not None:
-        num_gpus, tp, pp, model_name, config_updates = request.param
+        num_gpus, tp, pp, model_fixture_name, config_updates = request.param
     else:
-        num_gpus, tp, pp, model_name, config_updates = (
+        num_gpus, tp, pp, model_fixture_name, config_updates = (
             2,
             1,
             1,
-            TEST_ASSETS.TINY_LLAMA_MODEL_PATH,
+            "tiny_llama_model_path",
             {},
         )
 
+    # Get the actual model path from the requested fixture
+    model_name = request.getfixturevalue(model_fixture_name)
+
     policy = None
     cluster = None
     data = None
@@ -317,24 +319,25 @@ def training_setup(request):
             cluster.shutdown()
 
 
+@pytest.mark.hf_gated
 @pytest.mark.timeout(300)
 @pytest.mark.parametrize(
     "training_setup",
     [
-        # (num_gpus, tp, pp, model_name, config_updates)
-        (2, 1, 1, TEST_ASSETS.TINY_LLAMA_MODEL_PATH, {}),
-        (2, 2, 1, TEST_ASSETS.TINY_LLAMA_MODEL_PATH, {}),
-        (2, 1, 1, TEST_ASSETS.TINY_QWEN2_MODEL_PATH, {}),
-        (2, 2, 1, TEST_ASSETS.TINY_QWEN2_MODEL_PATH, {}),
-        (2, 1, 1, TEST_ASSETS.TINY_LLAMA_MODEL_PATH, {"precision": "bfloat16"}),
+        # (num_gpus, tp, pp, model_fixture_name, config_updates)
+        (2, 1, 1, "tiny_llama_model_path", {}),
+        (2, 2, 1, "tiny_llama_model_path", {}),
+        (2, 1, 1, "tiny_qwen2_model_path", {}),
+        (2, 2, 1, "tiny_qwen2_model_path", {}),
+        (2, 1, 1, "tiny_llama_model_path", {"precision": "bfloat16"}),
         (
             2,
             1,
             1,
-            TEST_ASSETS.TINY_LLAMA_MODEL_PATH,
+            "tiny_llama_model_path",
             {"activation_checkpointing": True},
         ),
-        (2, 2, 1, TEST_ASSETS.TINY_LLAMA_MODEL_PATH, {"sequence_parallel": True}),
+        (2, 2, 1, "tiny_llama_model_path", {"sequence_parallel": True}),
     ],
     indirect=True,
     ids=[
@@ -386,7 +389,7 @@ def verify_loss_tensor(loss_tensor):
 
 
 @pytest.fixture
-def generation_setup(request):
+def generation_setup(request, tiny_llama_model_path):
     """Setup and teardown specifically for generation tests."""
     # Parse parameters: (num_gpus, tp, pp, generation_backend)
     if hasattr(request, "param") and request.param is not None:
@@ -415,6 +418,7 @@ def generation_setup(request):
         )
 
         config = create_megatron_test_config(
+            tiny_llama_model_path,
             tp=tp,
             pp=pp,
             generation_backend=generation_backend,
@@ -536,11 +540,14 @@ def test_megatron_policy_generation(generation_setup):
 @pytest.fixture
 def logprob_setup(request):
     """Setup and teardown specifically for logprob tests."""
-    # Parse parameters: (num_gpus, tp, pp, model_name)
+    # Parse parameters: (num_gpus, tp, pp, model_fixture_name)
     if hasattr(request, "param") and request.param is not None:
-        num_gpus, tp, pp, model_name = request.param
+        num_gpus, tp, pp, model_fixture_name = request.param
     else:
-        num_gpus, tp, pp, model_name = 2, 1, 1, TEST_ASSETS.TINY_LLAMA_MODEL_PATH
+        num_gpus, tp, pp, model_fixture_name = 2, 1, 1, "tiny_llama_model_path"
+
+    # Get the actual model path from the requested fixture
+    model_name = request.getfixturevalue(model_fixture_name)
 
     policy = None
     cluster = None
@@ -616,14 +623,15 @@ def logprob_setup(request):
 
 
 @pytest.mark.timeout(180)
+@pytest.mark.hf_gated
 @pytest.mark.parametrize(
     "logprob_setup",
     [
-        # (num_gpus, tp, pp, model_name)
-        (2, 1, 1, TEST_ASSETS.TINY_LLAMA_MODEL_PATH),
-        (2, 2, 1, TEST_ASSETS.TINY_LLAMA_MODEL_PATH),
-        (2, 1, 1, TEST_ASSETS.TINY_QWEN2_MODEL_PATH),
-        (2, 2, 1, TEST_ASSETS.TINY_QWEN2_MODEL_PATH),
+        # (num_gpus, tp, pp, model_fixture_name)
+        (2, 1, 1, "tiny_llama_model_path"),
+        (2, 2, 1, "tiny_llama_model_path"),
+        (2, 1, 1, "tiny_qwen2_model_path"),
+        (2, 2, 1, "tiny_qwen2_model_path"),
     ],
     indirect=True,
     ids=["2gpu_dp2_llama", "2gpu_tp2_llama", "2gpu_dp2_qwen2", "2gpu_tp2_qwen2"],
@@ -656,7 +664,8 @@ def test_megatron_policy_logprobs(logprob_setup):
 
 
 @pytest.mark.timeout(240)
-def test_megatron_loss_independent_of_microbatch_size():
+@pytest.mark.hf_gated
+def test_megatron_loss_independent_of_microbatch_size(tiny_llama_model_path):
     """Test that changing microbatch size while keeping global batch size constant does not affect loss values."""
     num_gpus = 2
     global_batch_size = 8
@@ -697,7 +706,7 @@ def test_megatron_loss_independent_of_microbatch_size():
         max_colocated_worker_groups=1,
     )
 
-    config1 = create_megatron_test_config()
+    config1 = create_megatron_test_config(tiny_llama_model_path)
     config1["train_micro_batch_size"] = 1
     tokenizer = get_tokenizer(config1["tokenizer"])
     config1["generation"] = configure_generation_config(
@@ -745,7 +754,7 @@ def test_megatron_loss_independent_of_microbatch_size():
         max_colocated_worker_groups=1,
     )
 
-    config2 = create_megatron_test_config()
+    config2 = create_megatron_test_config(tiny_llama_model_path)
     config2["train_micro_batch_size"] = 2
     config2["generation"] = configure_generation_config(
         config2["generation"], tokenizer
@@ -774,7 +783,8 @@ def test_megatron_loss_independent_of_microbatch_size():
 
 
 @pytest.mark.timeout(300)
-def test_megatron_reference_policy_functionality():
+@pytest.mark.hf_gated
+def test_megatron_reference_policy_functionality(tiny_llama_model_path):
     """Test Megatron reference policy functionality."""
     num_gpus = 2
 
@@ -786,7 +796,7 @@ def test_megatron_reference_policy_functionality():
         max_colocated_worker_groups=1,
     )
 
-    config = create_megatron_test_config()
+    config = create_megatron_test_config(tiny_llama_model_path)
     config["megatron_cfg"]["optimizer"]["lr"] = 1e-2  # Increase from 5e-6 to 1e-2
     config["megatron_cfg"]["optimizer"]["min_lr"] = 1e-3  # Increase min_lr as well
 
@@ -894,6 +904,7 @@ def test_megatron_reference_policy_functionality():
 
 
 @pytest.mark.timeout(400)
+@pytest.mark.hf_gated
 @pytest.mark.parametrize(
     "num_gpus,tp,pp",
     [
@@ -903,12 +914,14 @@ def test_megatron_reference_policy_functionality():
     ],
     ids=["2gpu_dp2_save_restore", "2gpu_pp2_save_restore", "2gpu_tp2_save_restore"],
 )
-def test_megatron_checkpoint_save_kill_and_restore(num_gpus, tp, pp):
+def test_megatron_checkpoint_save_kill_and_restore(
+    num_gpus, tp, pp, tiny_llama_model_path
+):
     """Test full checkpoint save/restore cycle: save -> kill worker -> restart -> verify restore."""
     from copy import deepcopy
 
     # Use tiny model for faster testing
-    model_name = TEST_ASSETS.TINY_LLAMA_MODEL_PATH
+    model_name = tiny_llama_model_path
     tokenizer = get_tokenizer({"name": model_name})
 
     with tempfile.TemporaryDirectory(prefix="megatron_save_restore_") as temp_dir:
@@ -1146,7 +1159,8 @@ def test_megatron_checkpoint_save_kill_and_restore(num_gpus, tp, pp):
 
 
 @pytest.mark.timeout(300)
-def test_megatron_dpo_training():
+@pytest.mark.hf_gated
+def test_megatron_dpo_training(tiny_llama_model_path):
     """Test DPO training with Megatron backend."""
     num_gpus = 2
     batch_size = 8
@@ -1184,7 +1198,7 @@ def test_megatron_dpo_training():
         max_colocated_worker_groups=1,
     )
 
-    config = create_megatron_test_config()
+    config = create_megatron_test_config(tiny_llama_model_path)
     tokenizer = get_tokenizer(config["tokenizer"])
 
     policy = Policy(
@@ -1242,7 +1256,8 @@ def test_megatron_dpo_training():
 
 
 @pytest.mark.timeout(300)
-def test_megatron_sft_training():
+@pytest.mark.hf_gated
+def test_megatron_sft_training(tiny_llama_model_path):
     """Test SFT training with Megatron backend."""
     num_gpus = 2
     batch_size = 8
@@ -1277,7 +1292,7 @@ def test_megatron_sft_training():
         max_colocated_worker_groups=1,
     )
 
-    config = create_megatron_test_config()
+    config = create_megatron_test_config(tiny_llama_model_path)
     tokenizer = get_tokenizer(config["tokenizer"])
 
     policy = Policy(
@@ -1323,8 +1338,9 @@ def test_megatron_sft_training():
         cluster.shutdown()
 
 
+@pytest.mark.hf_gated
 @pytest.mark.timeout(300)
-def test_megatron_context_parallel_logprob_agreement():
+def test_megatron_context_parallel_logprob_agreement(tiny_llama_model_path):
     """Test that CP and non-CP models produce identical logprobs with sequence packing enabled."""
     num_gpus = 2
     batch_size = 4
@@ -1362,7 +1378,9 @@ def test_megatron_context_parallel_logprob_agreement():
         max_colocated_worker_groups=1,
     )
 
-    config_no_cp = create_megatron_test_config(tp=1, pp=1, precision="bfloat16")
+    config_no_cp = create_megatron_test_config(
+        tiny_llama_model_path, tp=1, pp=1, precision="bfloat16"
+    )
     # Ensure context parallel is disabled
     config_no_cp["megatron_cfg"]["context_parallel_size"] = 1
 
@@ -1442,7 +1460,9 @@ def test_megatron_context_parallel_logprob_agreement():
         max_colocated_worker_groups=1,
     )
 
-    config_cp = create_megatron_test_config(tp=1, pp=1, precision="bfloat16")
+    config_cp = create_megatron_test_config(
+        tiny_llama_model_path, tp=1, pp=1, precision="bfloat16"
+    )
     # Enable context parallel
     config_cp["megatron_cfg"]["context_parallel_size"] = 2
 
@@ -1521,8 +1541,9 @@ def test_megatron_context_parallel_logprob_agreement():
     )
 
 
+@pytest.mark.hf_gated
 @pytest.mark.timeout(300)
-def test_megatron_context_parallel_training_agreement():
+def test_megatron_context_parallel_training_agreement(tiny_llama_model_path):
     """Test that CP and non-CP models produce consistent training results with ClippedPG loss and sequence packing."""
     num_gpus = 2
     batch_size = 2
@@ -1580,7 +1601,9 @@ def test_megatron_context_parallel_training_agreement():
         max_colocated_worker_groups=1,
     )
 
-    config_no_cp = create_megatron_test_config(tp=1, pp=1, precision="bfloat16")
+    config_no_cp = create_megatron_test_config(
+        tiny_llama_model_path, tp=1, pp=1, precision="bfloat16"
+    )
     # Ensure context parallel is disabled
     config_no_cp["megatron_cfg"]["context_parallel_size"] = 1
     config_no_cp["train_global_batch_size"] = 2
@@ -1642,7 +1665,9 @@ def test_megatron_context_parallel_training_agreement():
         max_colocated_worker_groups=1,
     )
 
-    config_cp = create_megatron_test_config(tp=1, pp=1, precision="bfloat16")
+    config_cp = create_megatron_test_config(
+        tiny_llama_model_path, tp=1, pp=1, precision="bfloat16"
+    )
     # Enable context parallel
     config_cp["megatron_cfg"]["context_parallel_size"] = 2
     config_cp["train_global_batch_size"] = 2