diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index e0f0a6532b..c38cc2dd87 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -150,7 +150,7 @@ jobs:
     if: ${{ needs.pre-flight.outputs.test_level != 'none' }}
     with:
       RUNNER: self-hosted-azure
-      TIMEOUT: 60
+      TIMEOUT: 75
       UNIT_TEST_SCRIPT: |
         cd /opt/nemo-rl
         if [[ "${{ needs.pre-flight.outputs.test_level }}" =~ ^(L0|L1|L2)$ ]]; then
@@ -168,10 +168,10 @@ jobs:
       FUNCTIONAL_TEST_SCRIPT: |
         cd /opt/nemo-rl
         if [[ "${{ needs.pre-flight.outputs.test_level }}" =~ ^(L1|L2)$ ]]; then
-          uv run --no-sync bash ./tests/functional/sft.sh
-          uv run --no-sync bash ./tests/functional/grpo.sh
-          uv run --no-sync bash ./tests/functional/grpo_multiturn.sh
-          uv run --no-sync bash ./tests/functional/dpo.sh
+          time uv run --no-sync bash ./tests/functional/sft.sh
+          time uv run --no-sync bash ./tests/functional/grpo.sh
+          time uv run --no-sync bash ./tests/functional/grpo_multiturn.sh
+          time uv run --no-sync bash ./tests/functional/dpo.sh
         else
           echo Skipping functional tests for level ${{ needs.pre-flight.outputs.test_level }}
         fi
diff --git a/.gitignore b/.gitignore
index 478990ddc8..12121a4155 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,8 @@ apidocs/
 dist/
 *.egg-info/
 *.vscode/
+release_run*
+ckpts/
 
 # Test
 coverage.json
diff --git a/docker/Dockerfile b/docker/Dockerfile
index b1977a4ac9..2baf5d4ea3 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -17,7 +17,7 @@ RUN chmod 755 /home/ray/.cache
 
 FROM base AS hermetic
 
-WORKDIR /opt/reinforcer
+WORKDIR /opt/nemo-rl
 
 # First copy only the dependency files
 COPY --chown=ray --chmod=755 pyproject.toml uv.lock ./
diff --git a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.yaml b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.yaml
new file mode 100644
index 0000000000..ba6ba255f3
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.yaml
@@ -0,0 +1,109 @@
+grpo:
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 32
+  max_rollout_turns: 1
+  max_num_steps: 500
+  normalize_rewards: true
+  use_leave_one_out_baseline: true
+  val_period: 10
+  val_at_start: false
+  max_val_samples: 256
+  val_batch_size: 256
+loss_fn:
+  reference_policy_kl_penalty: 0.01
+  ratio_eps_min: 0.2
+  ratio_eps_max: 0.2
+  use_on_policy_kl_approximation: false
+  use_importance_sampling_correction: false
+checkpointing:
+  enabled: true
+  checkpoint_dir: results/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long
+  metric_name: val_reward
+  higher_is_better: true
+  keep_top_k: 3
+  save_period: 10
+policy:
+  model_name: meta-llama/Llama-3.1-8B-Instruct
+  tokenizer:
+    name: meta-llama/Llama-3.1-8B-Instruct
+  train_global_batch_size: 512
+  train_micro_batch_size: 1
+  generation_batch_size: 32
+  logprob_batch_size: 2
+  max_total_sequence_length: 4096
+  precision: bfloat16
+  fsdp_offload_enabled: false
+  activation_checkpointing_enabled: false
+  refit_buffer_size_gb: 4
+  dtensor_cfg:
+    enabled: true
+    cpu_offload: false
+    sequence_parallel: false
+    activation_checkpointing: false
+    tensor_parallel_size: 1
+  make_sequence_length_divisible_by: 1
+  max_grad_norm: 1
+  optimizer:
+    name: torch.optim.AdamW
+    kwargs:
+      lr: 3e-07
+      weight_decay: 0.01
+      betas:
+        - 0.9
+        - 0.999
+      eps: 1e-08
+      foreach: false
+      fused: false
+  scheduler:
+    - name: torch.optim.lr_scheduler.LinearLR
+      kwargs:
+        start_factor: 0.1
+        end_factor: 1
+        total_iters: 50
+    - name: torch.optim.lr_scheduler.ConstantLR
+      kwargs:
+        factor: 1
+        total_iters: 10000000000
+    - milestones:
+        - 50
+  generation:
+    backend: vllm
+    max_new_tokens: 4096
+    temperature: 1
+    top_p: 1
+    top_k: null
+    stop_token_ids:
+      - 128009
+    stop_strings: null
+    vllm_cfg:
+      tensor_parallel_size: 1
+      gpu_memory_utilization: 0.6
+      max_model_len: 4096
+      load_format: dummy
+      skip_tokenizer_init: true
+    pad_token_id: 128009
+    model_name: meta-llama/Llama-3.1-8B-Instruct
+data:
+  max_input_seq_length: 4096
+  prompt_file: examples/prompts/cot.txt
+  system_prompt_file: null
+  dataset_name: OpenMathInstruct-2
+env:
+  math:
+    num_workers: 8
+logger:
+  log_dir: logs/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long
+  num_val_samples_to_print: 0
+  wandb_enabled: true
+  tensorboard_enabled: true
+  monitor_gpus: true
+  wandb:
+    project: nemo-rl
+    name: grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long
+  tensorboard: {}
+  gpu_monitoring:
+    collection_interval: 10
+    flush_interval: 10
+cluster:
+  gpus_per_node: 8
+  num_nodes: 4
diff --git a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.yaml b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.yaml
new file mode 100644
index 0000000000..96e8e023cb
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.yaml
@@ -0,0 +1,109 @@
+grpo:
+  num_prompts_per_step: 32
+  num_generations_per_prompt: 16
+  max_rollout_turns: 1
+  max_num_steps: 500
+  normalize_rewards: true
+  use_leave_one_out_baseline: true
+  val_period: 10
+  val_at_start: false
+  max_val_samples: 256
+  val_batch_size: 256
+loss_fn:
+  reference_policy_kl_penalty: 0.01
+  ratio_eps_min: 0.2
+  ratio_eps_max: 0.2
+  use_on_policy_kl_approximation: false
+  use_importance_sampling_correction: false
+checkpointing:
+  enabled: true
+  checkpoint_dir: results/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1
+  metric_name: val_reward
+  higher_is_better: true
+  keep_top_k: 3
+  save_period: 10
+policy:
+  model_name: meta-llama/Llama-3.2-1B-Instruct
+  tokenizer:
+    name: meta-llama/Llama-3.2-1B-Instruct
+  train_global_batch_size: 512
+  train_micro_batch_size: 4
+  generation_batch_size: 32
+  logprob_batch_size: 4
+  max_total_sequence_length: 512
+  precision: bfloat16
+  fsdp_offload_enabled: false
+  activation_checkpointing_enabled: false
+  refit_buffer_size_gb: 4
+  dtensor_cfg:
+    enabled: true
+    cpu_offload: false
+    sequence_parallel: false
+    activation_checkpointing: false
+    tensor_parallel_size: 1
+  make_sequence_length_divisible_by: 1
+  max_grad_norm: 1
+  optimizer:
+    name: torch.optim.AdamW
+    kwargs:
+      lr: 5e-06
+      weight_decay: 0.01
+      betas:
+        - 0.9
+        - 0.999
+      eps: 1e-08
+      foreach: false
+      fused: false
+  scheduler:
+    - name: torch.optim.lr_scheduler.LinearLR
+      kwargs:
+        start_factor: 0.1
+        end_factor: 1
+        total_iters: 50
+    - name: torch.optim.lr_scheduler.ConstantLR
+      kwargs:
+        factor: 1
+        total_iters: 10000000000
+    - milestones:
+        - 50
+  generation:
+    backend: vllm
+    max_new_tokens: 512
+    temperature: 1
+    top_p: 1
+    top_k: null
+    stop_token_ids:
+      - 128009
+    stop_strings: null
+    vllm_cfg:
+      tensor_parallel_size: 1
+      gpu_memory_utilization: 0.6
+      max_model_len: 512
+      load_format: dummy
+      skip_tokenizer_init: true
+    pad_token_id: 128009
+    model_name: meta-llama/Llama-3.2-1B-Instruct
+data:
+  max_input_seq_length: 512
+  prompt_file: examples/prompts/cot.txt
+  system_prompt_file: null
+  dataset_name: OpenMathInstruct-2
+env:
+  math:
+    num_workers: 8
+logger:
+  log_dir: logs/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1
+  num_val_samples_to_print: 0
+  wandb_enabled: true
+  tensorboard_enabled: true
+  monitor_gpus: true
+  wandb:
+    project: nemo-rl
+    name: grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1
+  tensorboard: {}
+  gpu_monitoring:
+    collection_interval: 10
+    flush_interval: 10
+cluster:
+  gpus_per_node: 8
+  num_nodes: 1
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.yaml
new file mode 100644
index 0000000000..3693ac4677
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.yaml
@@ -0,0 +1,109 @@
+grpo:
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 32
+  max_rollout_turns: 1
+  max_num_steps: 20
+  normalize_rewards: true
+  use_leave_one_out_baseline: true
+  val_period: 10
+  val_at_start: false
+  max_val_samples: 256
+  val_batch_size: 256
+loss_fn:
+  reference_policy_kl_penalty: 0.01
+  ratio_eps_min: 0.2
+  ratio_eps_max: 0.2
+  use_on_policy_kl_approximation: false
+  use_importance_sampling_correction: false
+checkpointing:
+  enabled: true
+  checkpoint_dir: results/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long
+  metric_name: val_reward
+  higher_is_better: true
+  keep_top_k: 3
+  save_period: 10
+policy:
+  model_name: Qwen/Qwen2.5-32B
+  tokenizer:
+    name: Qwen/Qwen2.5-32B
+  train_global_batch_size: 512
+  train_micro_batch_size: 1
+  generation_batch_size: 32
+  logprob_batch_size: 2
+  max_total_sequence_length: 16384
+  precision: bfloat16
+  fsdp_offload_enabled: false
+  activation_checkpointing_enabled: false
+  refit_buffer_size_gb: 4
+  dtensor_cfg:
+    enabled: true
+    cpu_offload: false
+    sequence_parallel: true
+    activation_checkpointing: true
+    tensor_parallel_size: 8
+  make_sequence_length_divisible_by: 8
+  max_grad_norm: 1
+  optimizer:
+    name: torch.optim.AdamW
+    kwargs:
+      lr: 3e-07
+      weight_decay: 0.01
+      betas:
+        - 0.9
+        - 0.999
+      eps: 1e-08
+      foreach: false
+      fused: false
+  scheduler:
+    - name: torch.optim.lr_scheduler.LinearLR
+      kwargs:
+        start_factor: 0.1
+        end_factor: 1
+        total_iters: 50
+    - name: torch.optim.lr_scheduler.ConstantLR
+      kwargs:
+        factor: 1
+        total_iters: 10000000000
+    - milestones:
+        - 50
+  generation:
+    backend: vllm
+    max_new_tokens: 16384
+    temperature: 1
+    top_p: 1
+    top_k: null
+    stop_token_ids:
+      - 151643
+    stop_strings: null
+    vllm_cfg:
+      tensor_parallel_size: 4
+      gpu_memory_utilization: 0.6
+      max_model_len: 16384
+      load_format: dummy
+      skip_tokenizer_init: true
+    pad_token_id: 151643
+    model_name: Qwen/Qwen2.5-32B
+data:
+  max_input_seq_length: 16384
+  prompt_file: examples/prompts/cot.txt
+  system_prompt_file: null
+  dataset_name: OpenMathInstruct-2
+env:
+  math:
+    num_workers: 8
+logger:
+  log_dir: logs/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long
+  num_val_samples_to_print: 0
+  wandb_enabled: true
+  tensorboard_enabled: true
+  monitor_gpus: true
+  wandb:
+    project: nemo-rl
+    name: grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long
+  tensorboard: {}
+  gpu_monitoring:
+    collection_interval: 10
+    flush_interval: 10
+cluster:
+  gpus_per_node: 8
+  num_nodes: 16
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.yaml
new file mode 100644
index 0000000000..aed12183a8
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.yaml
@@ -0,0 +1,109 @@
+grpo:
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 32
+  max_rollout_turns: 1
+  max_num_steps: 2
+  normalize_rewards: true
+  use_leave_one_out_baseline: true
+  val_period: 10
+  val_at_start: false
+  max_val_samples: 256
+  val_batch_size: 256
+loss_fn:
+  reference_policy_kl_penalty: 0.01
+  ratio_eps_min: 0.2
+  ratio_eps_max: 0.2
+  use_on_policy_kl_approximation: false
+  use_importance_sampling_correction: false
+checkpointing:
+  enabled: true
+  checkpoint_dir: results/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt
+  metric_name: val_reward
+  higher_is_better: true
+  keep_top_k: 3
+  save_period: 10
+policy:
+  model_name: Qwen/Qwen2.5-32B
+  tokenizer:
+    name: Qwen/Qwen2.5-32B
+  train_global_batch_size: 512
+  train_micro_batch_size: 1
+  generation_batch_size: 32
+  logprob_batch_size: 2
+  max_total_sequence_length: 16384
+  precision: bfloat16
+  fsdp_offload_enabled: false
+  activation_checkpointing_enabled: false
+  refit_buffer_size_gb: 4
+  dtensor_cfg:
+    enabled: true
+    cpu_offload: false
+    sequence_parallel: true
+    activation_checkpointing: true
+    tensor_parallel_size: 8
+  make_sequence_length_divisible_by: 8
+  max_grad_norm: 1
+  optimizer:
+    name: torch.optim.AdamW
+    kwargs:
+      lr: 3e-07
+      weight_decay: 0.01
+      betas:
+        - 0.9
+        - 0.999
+      eps: 1e-08
+      foreach: false
+      fused: false
+  scheduler:
+    - name: torch.optim.lr_scheduler.LinearLR
+      kwargs:
+        start_factor: 0.1
+        end_factor: 1
+        total_iters: 50
+    - name: torch.optim.lr_scheduler.ConstantLR
+      kwargs:
+        factor: 1
+        total_iters: 10000000000
+    - milestones:
+        - 50
+  generation:
+    backend: vllm
+    max_new_tokens: 16384
+    temperature: 1
+    top_p: 1
+    top_k: null
+    stop_token_ids:
+      - 151643
+    stop_strings: null
+    vllm_cfg:
+      tensor_parallel_size: 4
+      gpu_memory_utilization: 0.6
+      max_model_len: 16384
+      load_format: dummy
+      skip_tokenizer_init: true
+    pad_token_id: 151643
+    model_name: Qwen/Qwen2.5-32B
+data:
+  max_input_seq_length: 16384
+  prompt_file: examples/prompts/cot.txt
+  system_prompt_file: null
+  dataset_name: OpenMathInstruct-2
+env:
+  math:
+    num_workers: 8
+logger:
+  log_dir: logs/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt
+  num_val_samples_to_print: 0
+  wandb_enabled: true
+  tensorboard_enabled: true
+  monitor_gpus: true
+  wandb:
+    project: nemo-rl
+    name: grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt
+  tensorboard: {}
+  gpu_monitoring:
+    collection_interval: 10
+    flush_interval: 10
+cluster:
+  gpus_per_node: 8
+  num_nodes: 16
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.yaml
new file mode 100644
index 0000000000..27211ddc7e
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.yaml
@@ -0,0 +1,109 @@
+grpo:
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 32
+  max_rollout_turns: 1
+  max_num_steps: 30
+  normalize_rewards: true
+  use_leave_one_out_baseline: true
+  val_period: 10
+  val_at_start: false
+  max_val_samples: 256
+  val_batch_size: 256
+loss_fn:
+  reference_policy_kl_penalty: 0.01
+  ratio_eps_min: 0.2
+  ratio_eps_max: 0.2
+  use_on_policy_kl_approximation: false
+  use_importance_sampling_correction: false
+checkpointing:
+  enabled: true
+  checkpoint_dir: results/grpo-qwen2.5-7b-instruct-4n8g-fsdp1
+  metric_name: val_reward
+  higher_is_better: true
+  keep_top_k: 3
+  save_period: 10
+policy:
+  model_name: Qwen/Qwen2.5-7B-Instruct
+  tokenizer:
+    name: Qwen/Qwen2.5-7B-Instruct
+  train_global_batch_size: 512
+  train_micro_batch_size: 1
+  generation_batch_size: 32
+  logprob_batch_size: 2
+  max_total_sequence_length: 4096
+  precision: bfloat16
+  fsdp_offload_enabled: false
+  activation_checkpointing_enabled: false
+  refit_buffer_size_gb: 4
+  dtensor_cfg:
+    enabled: false
+    cpu_offload: false
+    sequence_parallel: false
+    activation_checkpointing: false
+    tensor_parallel_size: 1
+  make_sequence_length_divisible_by: 1
+  max_grad_norm: 1
+  optimizer:
+    name: torch.optim.AdamW
+    kwargs:
+      lr: 3e-07
+      weight_decay: 0.01
+      betas:
+        - 0.9
+        - 0.999
+      eps: 1e-08
+      foreach: false
+      fused: false
+  scheduler:
+    - name: torch.optim.lr_scheduler.LinearLR
+      kwargs:
+        start_factor: 0.1
+        end_factor: 1
+        total_iters: 50
+    - name: torch.optim.lr_scheduler.ConstantLR
+      kwargs:
+        factor: 1
+        total_iters: 10000000000
+    - milestones:
+        - 50
+  generation:
+    backend: vllm
+    max_new_tokens: 4096
+    temperature: 1
+    top_p: 1
+    top_k: null
+    stop_token_ids:
+      - 151645
+    stop_strings: null
+    vllm_cfg:
+      tensor_parallel_size: 1
+      gpu_memory_utilization: 0.6
+      max_model_len: 4096
+      load_format: dummy
+      skip_tokenizer_init: true
+    pad_token_id: 151643
+    model_name: Qwen/Qwen2.5-7B-Instruct
+data:
+  max_input_seq_length: 4096
+  prompt_file: examples/prompts/cot.txt
+  system_prompt_file: null
+  dataset_name: OpenMathInstruct-2
+env:
+  math:
+    num_workers: 8
+logger:
+  log_dir: logs/grpo-qwen2.5-7b-instruct-4n8g-fsdp1
+  num_val_samples_to_print: 0
+  wandb_enabled: true
+  tensorboard_enabled: true
+  monitor_gpus: true
+  wandb:
+    project: nemo-rl
+    name: grpo-qwen2.5-7b-instruct-4n8g-fsdp1
+  tensorboard: {}
+  gpu_monitoring:
+    collection_interval: 10
+    flush_interval: 10
+cluster:
+  gpus_per_node: 8
+  num_nodes: 4
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.yaml
new file mode 100644
index 0000000000..87e2c592c0
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.yaml
@@ -0,0 +1,109 @@
+grpo:
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 32
+  max_rollout_turns: 1
+  max_num_steps: 30
+  normalize_rewards: true
+  use_leave_one_out_baseline: true
+  val_period: 10
+  val_at_start: false
+  max_val_samples: 256
+  val_batch_size: 256
+loss_fn:
+  reference_policy_kl_penalty: 0.01
+  ratio_eps_min: 0.2
+  ratio_eps_max: 0.2
+  use_on_policy_kl_approximation: false
+  use_importance_sampling_correction: false
+checkpointing:
+  enabled: true
+  checkpoint_dir: results/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp
+  metric_name: val_reward
+  higher_is_better: true
+  keep_top_k: 3
+  save_period: 10
+policy:
+  model_name: Qwen/Qwen2.5-7B-Instruct
+  tokenizer:
+    name: Qwen/Qwen2.5-7B-Instruct
+  train_global_batch_size: 512
+  train_micro_batch_size: 1
+  generation_batch_size: 32
+  logprob_batch_size: 2
+  max_total_sequence_length: 4096
+  precision: bfloat16
+  fsdp_offload_enabled: false
+  activation_checkpointing_enabled: false
+  refit_buffer_size_gb: 4
+  dtensor_cfg:
+    enabled: true
+    cpu_offload: false
+    sequence_parallel: true
+    activation_checkpointing: false
+    tensor_parallel_size: 4
+  make_sequence_length_divisible_by: 4
+  max_grad_norm: 1
+  optimizer:
+    name: torch.optim.AdamW
+    kwargs:
+      lr: 3e-07
+      weight_decay: 0.01
+      betas:
+        - 0.9
+        - 0.999
+      eps: 1e-08
+      foreach: false
+      fused: false
+  scheduler:
+    - name: torch.optim.lr_scheduler.LinearLR
+      kwargs:
+        start_factor: 0.1
+        end_factor: 1
+        total_iters: 50
+    - name: torch.optim.lr_scheduler.ConstantLR
+      kwargs:
+        factor: 1
+        total_iters: 10000000000
+    - milestones:
+        - 50
+  generation:
+    backend: vllm
+    max_new_tokens: 4096
+    temperature: 1
+    top_p: 1
+    top_k: null
+    stop_token_ids:
+      - 151645
+    stop_strings: null
+    vllm_cfg:
+      tensor_parallel_size: 4
+      gpu_memory_utilization: 0.6
+      max_model_len: 4096
+      load_format: dummy
+      skip_tokenizer_init: true
+    pad_token_id: 151643
+    model_name: Qwen/Qwen2.5-7B-Instruct
+data:
+  max_input_seq_length: 4096
+  prompt_file: examples/prompts/cot.txt
+  system_prompt_file: null
+  dataset_name: OpenMathInstruct-2
+env:
+  math:
+    num_workers: 8
+logger:
+  log_dir: logs/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp
+  num_val_samples_to_print: 0
+  wandb_enabled: true
+  tensorboard_enabled: true
+  monitor_gpus: true
+  wandb:
+    project: nemo-rl
+    name: grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp
+  tensorboard: {}
+  gpu_monitoring:
+    collection_interval: 10
+    flush_interval: 10
+cluster:
+  gpus_per_node: 8
+  num_nodes: 4
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.yaml
new file mode 100644
index 0000000000..9f5762f173
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.yaml
@@ -0,0 +1,109 @@
+grpo:
+  num_prompts_per_step: 32
+  num_generations_per_prompt: 16
+  max_rollout_turns: 1
+  max_num_steps: 450
+  normalize_rewards: true
+  use_leave_one_out_baseline: true
+  val_period: 10
+  val_at_start: false
+  max_val_samples: 256
+  val_batch_size: 256
+loss_fn:
+  reference_policy_kl_penalty: 0.01
+  ratio_eps_min: 0.2
+  ratio_eps_max: 0.2
+  use_on_policy_kl_approximation: false
+  use_importance_sampling_correction: false
+checkpointing:
+  enabled: true
+  checkpoint_dir: results/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1
+  metric_name: val_reward
+  higher_is_better: true
+  keep_top_k: 3
+  save_period: 10
+policy:
+  model_name: Qwen/Qwen2.5-Math-1.5B-Instruct
+  tokenizer:
+    name: Qwen/Qwen2.5-Math-1.5B-Instruct
+  train_global_batch_size: 512
+  train_micro_batch_size: 4
+  generation_batch_size: 32
+  logprob_batch_size: 4
+  max_total_sequence_length: 512
+  precision: bfloat16
+  fsdp_offload_enabled: false
+  activation_checkpointing_enabled: false
+  refit_buffer_size_gb: 4
+  dtensor_cfg:
+    enabled: true
+    cpu_offload: false
+    sequence_parallel: false
+    activation_checkpointing: false
+    tensor_parallel_size: 1
+  make_sequence_length_divisible_by: 1
+  max_grad_norm: 1
+  optimizer:
+    name: torch.optim.AdamW
+    kwargs:
+      lr: 5e-06
+      weight_decay: 0.01
+      betas:
+        - 0.9
+        - 0.999
+      eps: 1e-08
+      foreach: false
+      fused: false
+  scheduler:
+    - name: torch.optim.lr_scheduler.LinearLR
+      kwargs:
+        start_factor: 0.1
+        end_factor: 1
+        total_iters: 50
+    - name: torch.optim.lr_scheduler.ConstantLR
+      kwargs:
+        factor: 1
+        total_iters: 10000000000
+    - milestones:
+        - 50
+  generation:
+    backend: vllm
+    max_new_tokens: 512
+    temperature: 1
+    top_p: 1
+    top_k: null
+    stop_token_ids:
+      - 151645
+    stop_strings: null
+    vllm_cfg:
+      tensor_parallel_size: 1
+      gpu_memory_utilization: 0.6
+      max_model_len: 512
+      load_format: dummy
+      skip_tokenizer_init: true
+    pad_token_id: 151643
+    model_name: Qwen/Qwen2.5-Math-1.5B-Instruct
+data:
+  max_input_seq_length: 512
+  prompt_file: examples/prompts/cot.txt
+  system_prompt_file: null
+  dataset_name: OpenMathInstruct-2
+env:
+  math:
+    num_workers: 8
+logger:
+  log_dir: logs/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1
+  num_val_samples_to_print: 0
+  wandb_enabled: true
+  tensorboard_enabled: true
+  monitor_gpus: true
+  wandb:
+    project: nemo-rl
+    name: grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1
+  tensorboard: {}
+  gpu_monitoring:
+    collection_interval: 10
+    flush_interval: 10
+cluster:
+  gpus_per_node: 8
+  num_nodes: 1
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.yaml
new file mode 100644
index 0000000000..da0140a73e
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.yaml
@@ -0,0 +1,67 @@
+sft:
+  max_num_epochs: 1
+  max_num_steps: 250
+  val_period: 10
+  val_batches: 8
+  val_global_batch_size: 32
+  val_micro_batch_size: 1
+  val_at_start: true
+  seed: 42
+checkpointing:
+  enabled: true
+  checkpoint_dir: results/sft-llama3.1-8b-instruct-1n8g-fsdp1
+  metric_name: val_loss
+  higher_is_better: false
+  keep_top_k: 3
+  save_period: 10
+policy:
+  model_name: meta-llama/Llama-3.1-8B-Instruct
+  tokenizer:
+    name: meta-llama/Llama-3.1-8B-Instruct
+    chat_template: '{% for message in messages %}{%- if message[''role''] == ''system''  %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user''  %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant''  %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}'
+  train_global_batch_size: 32
+  train_micro_batch_size: 1
+  max_total_sequence_length: 1024
+  precision: bfloat16
+  fsdp_offload_enabled: false
+  activation_checkpointing_enabled: false
+  dtensor_cfg:
+    enabled: false
+    cpu_offload: false
+    sequence_parallel: false
+    activation_checkpointing: false
+    tensor_parallel_size: 1
+  make_sequence_length_divisible_by: 1
+  max_grad_norm: 1
+  optimizer:
+    name: torch.optim.AdamW
+    kwargs:
+      lr: 5e-06
+      weight_decay: 0.1
+      betas:
+        - 0.9
+        - 0.98
+      eps: 1e-05
+      foreach: false
+      fused: false
+data:
+  max_input_seq_length: 1024
+  dataset_name: squad
+  add_bos: true
+  add_eos: true
+logger:
+  log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp1
+  wandb_enabled: true
+  tensorboard_enabled: true
+  monitor_gpus: true
+  wandb:
+    project: nemo-rl
+    name: sft-llama3.1-8b-instruct-1n8g-fsdp1
+  tensorboard:
+    log_dir: tb_logs-sft-dev-squad
+  gpu_monitoring:
+    collection_interval: 10
+    flush_interval: 10
+cluster:
+  gpus_per_node: 8
+  num_nodes: 1
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.yaml
new file mode 100644
index 0000000000..288f365c1a
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.yaml
@@ -0,0 +1,67 @@
+sft:
+  max_num_epochs: 1
+  max_num_steps: 2730
+  val_period: 10
+  val_batches: 8
+  val_global_batch_size: 32
+  val_micro_batch_size: 1
+  val_at_start: true
+  seed: 42
+checkpointing:
+  enabled: true
+  checkpoint_dir: results/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
+  metric_name: val_loss
+  higher_is_better: false
+  keep_top_k: 3
+  save_period: 10
+policy:
+  model_name: meta-llama/Llama-3.1-8B-Instruct
+  tokenizer:
+    name: meta-llama/Llama-3.1-8B-Instruct
+    chat_template: '{% for message in messages %}{%- if message[''role''] == ''system''  %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user''  %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant''  %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}'
+  train_global_batch_size: 32
+  train_micro_batch_size: 1
+  max_total_sequence_length: 1024
+  precision: bfloat16
+  fsdp_offload_enabled: false
+  activation_checkpointing_enabled: false
+  dtensor_cfg:
+    enabled: true
+    cpu_offload: false
+    sequence_parallel: false
+    activation_checkpointing: false
+    tensor_parallel_size: 1
+  make_sequence_length_divisible_by: 1
+  max_grad_norm: 1
+  optimizer:
+    name: torch.optim.AdamW
+    kwargs:
+      lr: 5e-06
+      weight_decay: 0.1
+      betas:
+        - 0.9
+        - 0.98
+      eps: 1e-05
+      foreach: false
+      fused: false
+data:
+  max_input_seq_length: 1024
+  dataset_name: squad
+  add_bos: true
+  add_eos: true
+logger:
+  log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
+  wandb_enabled: true
+  tensorboard_enabled: true
+  monitor_gpus: true
+  wandb:
+    project: nemo-rl
+    name: sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
+  tensorboard:
+    log_dir: tb_logs-sft-dev-squad
+  gpu_monitoring:
+    collection_interval: 10
+    flush_interval: 10
+cluster:
+  gpus_per_node: 8
+  num_nodes: 1
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.yaml
new file mode 100644
index 0000000000..f065b5cd34
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.yaml
@@ -0,0 +1,67 @@
+sft:
+  max_num_epochs: 1
+  max_num_steps: 350
+  val_period: 10
+  val_batches: 8
+  val_global_batch_size: 32
+  val_micro_batch_size: 1
+  val_at_start: true
+  seed: 42
+checkpointing:
+  enabled: true
+  checkpoint_dir: results/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp
+  metric_name: val_loss
+  higher_is_better: false
+  keep_top_k: 3
+  save_period: 10
+policy:
+  model_name: meta-llama/Llama-3.1-8B-Instruct
+  tokenizer:
+    name: meta-llama/Llama-3.1-8B-Instruct
+    chat_template: '{% for message in messages %}{%- if message[''role''] == ''system''  %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user''  %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant''  %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}'
+  train_global_batch_size: 32
+  train_micro_batch_size: 1
+  max_total_sequence_length: 1024
+  precision: bfloat16
+  fsdp_offload_enabled: false
+  activation_checkpointing_enabled: false
+  dtensor_cfg:
+    enabled: true
+    cpu_offload: false
+    sequence_parallel: true
+    activation_checkpointing: false
+    tensor_parallel_size: 2
+  make_sequence_length_divisible_by: 2
+  max_grad_norm: 1
+  optimizer:
+    name: torch.optim.AdamW
+    kwargs:
+      lr: 5e-06
+      weight_decay: 0.1
+      betas:
+        - 0.9
+        - 0.98
+      eps: 1e-05
+      foreach: false
+      fused: false
+data:
+  max_input_seq_length: 1024
+  dataset_name: squad
+  add_bos: true
+  add_eos: true
+logger:
+  log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp
+  wandb_enabled: true
+  tensorboard_enabled: true
+  monitor_gpus: true
+  wandb:
+    project: nemo-rl
+    name: sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp
+  tensorboard:
+    log_dir: tb_logs-sft-dev-squad
+  gpu_monitoring:
+    collection_interval: 10
+    flush_interval: 10
+cluster:
+  gpus_per_node: 8
+  num_nodes: 1
diff --git a/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.yaml b/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.yaml
new file mode 100644
index 0000000000..7c4bd357ed
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.yaml
@@ -0,0 +1,67 @@
+sft:
+  max_num_epochs: 1
+  max_num_steps: 500
+  val_period: 10
+  val_batches: 8
+  val_global_batch_size: 32
+  val_micro_batch_size: 1
+  val_at_start: true
+  seed: 42
+checkpointing:
+  enabled: true
+  checkpoint_dir: results/sft-llama3.2-1b-1n8g-fsdp2tp1
+  metric_name: val_loss
+  higher_is_better: false
+  keep_top_k: 3
+  save_period: 10
+policy:
+  model_name: meta-llama/Llama-3.2-1B
+  tokenizer:
+    name: meta-llama/Llama-3.2-1B
+    chat_template: '{% for message in messages %}{%- if message[''role''] == ''system''  %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user''  %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant''  %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}'
+  train_global_batch_size: 32
+  train_micro_batch_size: 1
+  max_total_sequence_length: 1024
+  precision: float32
+  fsdp_offload_enabled: false
+  activation_checkpointing_enabled: false
+  dtensor_cfg:
+    enabled: true
+    cpu_offload: false
+    sequence_parallel: false
+    activation_checkpointing: false
+    tensor_parallel_size: 1
+  make_sequence_length_divisible_by: 1
+  max_grad_norm: 1
+  optimizer:
+    name: torch.optim.AdamW
+    kwargs:
+      lr: 5e-06
+      weight_decay: 0.1
+      betas:
+        - 0.9
+        - 0.98
+      eps: 1e-05
+      foreach: false
+      fused: false
+data:
+  max_input_seq_length: 1024
+  dataset_name: squad
+  add_bos: true
+  add_eos: true
+logger:
+  log_dir: logs/sft-llama3.2-1b-1n8g-fsdp2tp1
+  wandb_enabled: true
+  tensorboard_enabled: true
+  monitor_gpus: true
+  wandb:
+    project: nemo-rl
+    name: sft-llama3.2-1b-1n8g-fsdp2tp1
+  tensorboard:
+    log_dir: tb_logs-sft-dev-squad
+  gpu_monitoring:
+    collection_interval: 10
+    flush_interval: 10
+cluster:
+  gpus_per_node: 8
+  num_nodes: 1
diff --git a/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.yaml b/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.yaml
new file mode 100644
index 0000000000..4cd1a5387c
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.yaml
@@ -0,0 +1,67 @@
+sft:
+  max_num_epochs: 1
+  max_num_steps: 20
+  val_period: 10
+  val_batches: 8
+  val_global_batch_size: 32
+  val_micro_batch_size: 1
+  val_at_start: true
+  seed: 42
+checkpointing:
+  enabled: true
+  checkpoint_dir: results/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt
+  metric_name: val_loss
+  higher_is_better: false
+  keep_top_k: 3
+  save_period: 10
+policy:
+  model_name: Qwen/Qwen2.5-32B
+  tokenizer:
+    name: Qwen/Qwen2.5-32B
+    chat_template: '{% for message in messages %}{%- if message[''role''] == ''system''  %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user''  %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant''  %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}'
+  train_global_batch_size: 32
+  train_micro_batch_size: 1
+  max_total_sequence_length: 16000
+  precision: bfloat16
+  fsdp_offload_enabled: false
+  activation_checkpointing_enabled: false
+  dtensor_cfg:
+    enabled: true
+    cpu_offload: false
+    sequence_parallel: true
+    activation_checkpointing: true
+    tensor_parallel_size: 8
+  make_sequence_length_divisible_by: 8
+  max_grad_norm: 1
+  optimizer:
+    name: torch.optim.AdamW
+    kwargs:
+      lr: 5e-06
+      weight_decay: 0.1
+      betas:
+        - 0.9
+        - 0.98
+      eps: 1e-05
+      foreach: false
+      fused: false
+data:
+  max_input_seq_length: 16000
+  dataset_name: squad
+  add_bos: true
+  add_eos: true
+logger:
+  log_dir: logs/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt
+  wandb_enabled: true
+  tensorboard_enabled: true
+  monitor_gpus: true
+  wandb:
+    project: nemo-rl
+    name: sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt
+  tensorboard:
+    log_dir: tb_logs-sft-dev-squad
+  gpu_monitoring:
+    collection_interval: 10
+    flush_interval: 10
+cluster:
+  gpus_per_node: 8
+  num_nodes: 4
diff --git a/nemo_rl/__init__.py b/nemo_rl/__init__.py
index 1606956b87..c755e5ed0f 100644
--- a/nemo_rl/__init__.py
+++ b/nemo_rl/__init__.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import os
 from nemo_rl.package_info import (
     __contact_emails__,
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 0000000000..4e51a6efad
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,20 @@
+# Tests
+
+## Launching Release Tests
+
+```sh
+# Assuming in NeMo RL project root
+
+cd tools/
+
+IS_RELEASE=1 CONTAINER=... ACCOUNT=... PARTITION=... ./launch <script_path> <another_script_path> ...
+
+# DRYRUN=1 to get a rough estimate of compute
+DRYRUN=1 IS_RELEASE=1 CONTAINER=... ACCOUNT=... PARTITION=... ./launch <script_path> <another_script_path> ...
+
+# DRYRUN=2 will create a codesnapshot with a fully hermetic example
+DRYRUN=2 IS_RELEASE=1 CONTAINER=... ACCOUNT=... PARTITION=... ./launch <script_path> <another_script_path> ...
+
+# Run all (Caution: this will use a lot of compute; consider listing out the jobs)
+IS_RELEASE=1 CONTAINER=... ACCOUNT=... PARTITION=... ./launch ../../recipes/**/*.sh
+```
diff --git a/tests/functional/check_metrics.py b/tests/check_metrics.py
similarity index 100%
rename from tests/functional/check_metrics.py
rename to tests/check_metrics.py
diff --git a/tests/functional/dpo.sh b/tests/functional/dpo.sh
index 2421c5da6a..200a08cdd7 100755
--- a/tests/functional/dpo.sh
+++ b/tests/functional/dpo.sh
@@ -7,18 +7,19 @@ git config --global --add safe.directory $PROJECT_ROOT
 
 set -eou pipefail
 
-LOG_DIR=$SCRIPT_DIR/$(basename $0 .sh)-logs
-JSON_METRICS=$LOG_DIR/$(basename $0 .sh).json
-RUN_LOG=$LOG_DIR/$(basename $0 .sh).log
-export RAY_DEDUP_LOGS=0
+EXP_NAME=$(basename $0 .sh)
+EXP_DIR=$SCRIPT_DIR/$EXP_NAME
+LOG_DIR=$EXP_DIR/logs
+JSON_METRICS=$EXP_DIR/metrics.json
+RUN_LOG=$EXP_DIR/run.log
 export UV_CACHE_DIR=${UV_CACHE_DIR:-$PROJECT_ROOT/uv_cache}
 export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
 
-rm -rf $LOG_DIR
-mkdir -p $LOG_DIR
+rm -rf $EXP_DIR $LOG_DIR
+mkdir -p $EXP_DIR $LOG_DIR
 
 cd $PROJECT_ROOT
-python -u $PROJECT_ROOT/examples/run_dpo.py \
+uv run $PROJECT_ROOT/examples/run_dpo.py \
     cluster.gpus_per_node=2 \
     dpo.max_num_steps=3 \
     dpo.val_batches=1 \
@@ -31,9 +32,8 @@ python -u $PROJECT_ROOT/examples/run_dpo.py \
     $@ \
     2>&1 | tee $RUN_LOG
 
-cd $SCRIPT_DIR
-python json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 
-python check_metrics.py $JSON_METRICS \
+uv run tests/check_metrics.py $JSON_METRICS \
   'data["train/loss"]["2"] < 0.694' \
 
diff --git a/tests/functional/grpo.sh b/tests/functional/grpo.sh
index b61442227b..bbbbd44a11 100755
--- a/tests/functional/grpo.sh
+++ b/tests/functional/grpo.sh
@@ -2,22 +2,24 @@
 
 SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
-# Mark the current repo as safe, since wandb fetchs metadata about the repo
+# Mark the current repo as safe, since wandb fetches metadata about the repo
 git config --global --add safe.directory $PROJECT_ROOT
 
 set -eou pipefail
 
-LOG_DIR=$SCRIPT_DIR/$(basename $0 .sh)-logs
-JSON_METRICS=$LOG_DIR/$(basename $0 .sh).json
-RUN_LOG=$LOG_DIR/$(basename $0 .sh).log
+EXP_NAME=$(basename $0 .sh)
+EXP_DIR=$SCRIPT_DIR/$EXP_NAME
+LOG_DIR=$EXP_DIR/logs
+JSON_METRICS=$EXP_DIR/metrics.json
+RUN_LOG=$EXP_DIR/run.log
 export UV_CACHE_DIR=${UV_CACHE_DIR:-$PROJECT_ROOT/uv_cache}
 export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
 
-rm -rf $LOG_DIR
-mkdir -p $LOG_DIR
+rm -rf $EXP_DIR $LOG_DIR
+mkdir -p $EXP_DIR $LOG_DIR
 
 cd $PROJECT_ROOT
-python -u $PROJECT_ROOT/examples/run_grpo_math.py \
+uv run $PROJECT_ROOT/examples/run_grpo_math.py \
     cluster.gpus_per_node=2 \
     grpo.max_num_steps=3 \
     logger.tensorboard_enabled=true \
@@ -27,9 +29,8 @@ python -u $PROJECT_ROOT/examples/run_grpo_math.py \
     $@ \
     2>&1 | tee $RUN_LOG
 
-cd $SCRIPT_DIR
-python json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 
-python check_metrics.py $JSON_METRICS \
+uv run tests/check_metrics.py $JSON_METRICS \
     'max(data["train/token_mult_prob_error"]) < 1.1' \
 
diff --git a/tests/functional/grpo_multiturn.sh b/tests/functional/grpo_multiturn.sh
index ff9befcdd7..a22153c729 100755
--- a/tests/functional/grpo_multiturn.sh
+++ b/tests/functional/grpo_multiturn.sh
@@ -7,17 +7,19 @@ git config --global --add safe.directory $PROJECT_ROOT
 
 set -eou pipefail
 
-LOG_DIR=$SCRIPT_DIR/$(basename $0 .sh)-logs
-JSON_METRICS=$LOG_DIR/$(basename $0 .sh).json
-RUN_LOG=$LOG_DIR/$(basename $0 .sh).log
+EXP_NAME=$(basename $0 .sh)
+EXP_DIR=$SCRIPT_DIR/$EXP_NAME
+LOG_DIR=$EXP_DIR/logs
+JSON_METRICS=$EXP_DIR/metrics.json
+RUN_LOG=$EXP_DIR/run.log
 export UV_CACHE_DIR=${UV_CACHE_DIR:-$PROJECT_ROOT/uv_cache}
 export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
 
-rm -rf $LOG_DIR
-mkdir -p $LOG_DIR
+rm -rf $EXP_DIR $LOG_DIR
+mkdir -p $EXP_DIR $LOG_DIR
 
 cd $PROJECT_ROOT
-python -u $PROJECT_ROOT/examples/run_grpo_sliding_puzzle.py \
+uv run $PROJECT_ROOT/examples/run_grpo_sliding_puzzle.py \
     cluster.gpus_per_node=2 \
     grpo.max_rollout_turns=10 \
     grpo.max_num_steps=3 \
@@ -32,9 +34,8 @@ python -u $PROJECT_ROOT/examples/run_grpo_sliding_puzzle.py \
     $@ \
     2>&1 | tee $RUN_LOG
 
-cd $SCRIPT_DIR
-python json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 
-python check_metrics.py $JSON_METRICS \
+uv run tests/check_metrics.py $JSON_METRICS \
     'max(data["train/token_mult_prob_error"]) < 1.1' \
 
diff --git a/tests/functional/sft.sh b/tests/functional/sft.sh
index f3474fb0fd..90985ae2c1 100755
--- a/tests/functional/sft.sh
+++ b/tests/functional/sft.sh
@@ -1,26 +1,28 @@
 #!/bin/bash
 
-## clean up checkpoint directory on exit
+# clean up checkpoint directory on exit
 trap "rm -rf /tmp/sft_checkpoints" EXIT
 
 SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
-# Mark the current repo as safe, since wandb fetchs metadata about the repo
+# Mark the current repo as safe, since wandb fetches metadata about the repo
 git config --global --add safe.directory $PROJECT_ROOT
 
 set -eou pipefail
 
-LOG_DIR=$SCRIPT_DIR/$(basename $0 .sh)-logs
-JSON_METRICS=$LOG_DIR/$(basename $0 .sh).json
-RUN_LOG=$LOG_DIR/$(basename $0 .sh).log
+EXP_NAME=$(basename $0 .sh)
+EXP_DIR=$SCRIPT_DIR/$EXP_NAME
+LOG_DIR=$EXP_DIR/logs
+JSON_METRICS=$EXP_DIR/metrics.json
+RUN_LOG=$EXP_DIR/run.log
 export UV_CACHE_DIR=${UV_CACHE_DIR:-$PROJECT_ROOT/uv_cache}
 export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
 
-rm -rf $LOG_DIR
-mkdir -p $LOG_DIR
+rm -rf $EXP_DIR $LOG_DIR
+mkdir -p $EXP_DIR $LOG_DIR
 
 cd $PROJECT_ROOT
-python -u $PROJECT_ROOT/examples/run_sft.py \
+uv run $PROJECT_ROOT/examples/run_sft.py \
     policy.model_name=meta-llama/Llama-3.2-1B \
     cluster.gpus_per_node=2 \
     sft.max_num_steps=10 \
@@ -34,10 +36,9 @@ python -u $PROJECT_ROOT/examples/run_sft.py \
     $@ \
     2>&1 | tee $RUN_LOG
 
-cd $SCRIPT_DIR
-python json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 
 # TODO: loss is very noisy, this check is mainly for sanity of immediate divergence
-python check_metrics.py $JSON_METRICS \
+uv run tests/check_metrics.py $JSON_METRICS \
   'data["train/loss"]["9"] < 1500' \
 
diff --git a/tests/functional/json_dump_tb_logs.py b/tests/json_dump_tb_logs.py
similarity index 100%
rename from tests/functional/json_dump_tb_logs.py
rename to tests/json_dump_tb_logs.py
diff --git a/tests/test_suites/README.md b/tests/test_suites/README.md
new file mode 100644
index 0000000000..3ccf0d75c9
--- /dev/null
+++ b/tests/test_suites/README.md
@@ -0,0 +1,67 @@
+# Recipes
+
+## Naming
+
+Each test is named:
+```
+<algo>-<model>-#n#g-<parallelism>-<opt:long>.sh
+```
+
+Examples:
+* sft-llama3.2-1b-1n8g-fsdp2tp1.sh
+* grpo-qwen2-1.5B-instruct-4n8g-fsdp2tp2.sh
+* grpo-qwen2-1.5B-instruct-4n8g-fsdp2tp2-long.sh
+
+## Running manually
+
+Each recipe can be run on the head node:
+
+```sh
+uv run ./llm/sft-llama3.2-1b-1n8g-fsdp2tp1.sh
+```
+
+and the result directory can be found at the same level of the script (w/o `.sh` prefix):
+
+```sh
+ls -lh llm/sft-llama3.2-1b-1n8g-fsdp2tp1/
+# drwxr-xr-x 2 terryk dip 4.0K Apr 23 18:07 ckpts
+# drwxr-xr-x 3 terryk dip 4.0K Apr 23 18:07 logs
+# -rw-r--r-- 1 terryk dip 142K Apr 23 18:23 metrics.json
+# -rw-r--r-- 1 terryk dip  94K Apr 23 18:23 run.log
+```
+
+## Launching with code snapshots
+
+We provide a convenience script that will create a code snapshot and launch `NUM_RUNS` number of slurm jobs (`NUM_RUNS` is defined in the script itself). We create a code snapshot to
+ensure that even as the master repo changes its code, you can always run your experiment with
+the snapshot of the code at the time the experiment was initially launched.
+
+```sh
+# Launch
+CONTAINER=... ACCOUNT=... PARTITION=... ../tools/launch ./llm/sft-llama3.2-1b-1n8g-fsdp2tp1.sh
+
+# Prints Estimated GPUhrs and then exits
+DRYRUN=1 CONTAINER=... ACCOUNT=... PARTITION=... ../tools/launch ./llm/sft-llama3.2-1b-1n8g-fsdp2tp1.sh
+
+# Prints Estimated GPUhrs, creates code snapshot, then exits
+DRYRUN=2 CONTAINER=... ACCOUNT=... PARTITION=... ../tools/launch ./llm/sft-llama3.2-1b-1n8g-fsdp2tp1.sh
+```
+
+After this completes, you can find the result under
+
+```sh
+ls -lh ../code_snapshots/sft-llama3.2-1b-1n8g-fsdp2tp1/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1/
+# drwxr-xr-x 2 terryk dip 4.0K Apr 23 18:07 ckpts
+# drwxr-xr-x 3 terryk dip 4.0K Apr 23 18:07 logs
+# -rw-r--r-- 1 terryk dip 142K Apr 23 18:23 metrics.json
+# -rw-r--r-- 1 terryk dip  94K Apr 23 18:23 run.log
+```
+
+As a convenience, there's also a `continue.sh` script under that will launch
+another run using the same arguments. This is helpful if your job was
+unexpectedly cancelled or you want to run it for a little longer.
+
+```sh
+# This launches one more run of the same experiment
+../code_snapshots/sft-llama3.2-1b-1n8g-fsdp2tp1/continue.sh
+```
diff --git a/tests/test_suites/llm/common.env b/tests/test_suites/llm/common.env
new file mode 100644
index 0000000000..c2008292b9
--- /dev/null
+++ b/tests/test_suites/llm/common.env
@@ -0,0 +1,46 @@
+#!/bin/bash
+# Source this file before running test to setup
+#
+#   source ./common.env
+set -eou pipefail
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+# Mark all repos as safe in the test context, since wandb fetchs metadata about the repo and it's a
+# catch-22 to get the project root and mark it safe if you don't know the project root
+git config --global --add safe.directory "*"
+PROJECT_ROOT=$(git rev-parse --show-toplevel)
+
+exit_if_max_steps_reached() {
+  # Early stopping to save compute if max step has been reached
+  STEPS_SO_FAR=$(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS || echo 0)
+  if [[ $STEPS_SO_FAR -ge $MAX_STEPS ]]; then
+      echo "[INFO] Target step $MAX_STEPS reached, skipping run"
+      exit 0
+  fi
+  echo "[INFO] Steps so far: $STEPS_SO_FAR, running till $MAX_STEPS steps"
+}
+
+EXP_NAME=$(basename $0 .sh)
+EXP_DIR=$SCRIPT_DIR/$EXP_NAME
+LOG_DIR=$EXP_DIR/logs
+CKPT_DIR=$EXP_DIR/ckpts
+JSON_METRICS=$EXP_DIR/metrics.json
+RUN_LOG=$EXP_DIR/run.log
+
+# Test script has path:  tests/test_suites/llm/${EXP_NAME}.sh
+# where config has path:  examples/configs/recipes/llm/${EXP_NAME}.yaml
+# We will assume/check the path matches this pattern
+CONFIG_PATH=$(echo $SCRIPT_DIR/${EXP_NAME}.yaml | sed 's#tests/test_suites#examples/configs/recipes#')
+if [[ ! -f $CONFIG_PATH ]]; then
+  echo "[ERROR] Config file $CONFIG_PATH not found"
+  exit 1
+fi
+
+export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
+
+if [[ -n "${TEST_DRYRUN:-}" ]]; then
+  echo "[INFO] TEST_DRYRUN mode: used for testing"
+  exit
+fi
+
+mkdir -p $EXP_DIR $LOG_DIR $CKPT_DIR
diff --git a/tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.sh b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.sh
new file mode 100755
index 0000000000..6e64876058
--- /dev/null
+++ b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=4
+STEPS_PER_RUN=100
+MAX_STEPS=500
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["100"] < 1.1'
+fi
+
diff --git a/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.sh b/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.sh
new file mode 100755
index 0000000000..45cfad6e83
--- /dev/null
+++ b/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=500
+MAX_STEPS=500
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=120
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["500"] < 1.1'
+fi
+
diff --git a/tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.sh b/tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.sh
new file mode 100755
index 0000000000..69c9899ccd
--- /dev/null
+++ b/tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=16
+STEPS_PER_RUN=10
+MAX_STEPS=20
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["20"] < 1.1'
+fi
+
diff --git a/tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.sh b/tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.sh
new file mode 100755
index 0000000000..ccdef1b2bd
--- /dev/null
+++ b/tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=16
+STEPS_PER_RUN=2  # 40min: step_time: [1341, 801]
+MAX_STEPS=2
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=60
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["2"] < 1.1'
+fi
+
diff --git a/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.sh b/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.sh
new file mode 100755
index 0000000000..49c96a6f58
--- /dev/null
+++ b/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=4
+STEPS_PER_RUN=30
+MAX_STEPS=30
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=90
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["30"] < 1.1'
+fi
+
diff --git a/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.sh b/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.sh
new file mode 100755
index 0000000000..b3071fb58e
--- /dev/null
+++ b/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=4
+STEPS_PER_RUN=30
+MAX_STEPS=30
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=180
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["30"] < 1.1'
+fi
+
diff --git a/tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.sh b/tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.sh
new file mode 100755
index 0000000000..98df00c25c
--- /dev/null
+++ b/tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=450
+MAX_STEPS=450
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=120
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["450"] < 1.1'
+fi
+
diff --git a/tests/test_suites/llm/performance/.gitkeep b/tests/test_suites/llm/performance/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.sh b/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.sh
new file mode 100755
index 0000000000..1e51c2a78f
--- /dev/null
+++ b/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=250
+MAX_STEPS=250
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=30
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_sft.py \
+    --config $CONFIG_PATH \
+    sft.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    # TODO: FIGURE OUT CORRECT METRICS
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 4' \
+        'data["train/loss"]["250"] < 0.5' \
+        'max(data["ray/node.0.gpu.0.memory"]) < 60000'
+fi 
diff --git a/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.sh b/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.sh
new file mode 100755
index 0000000000..32bb6dacb7
--- /dev/null
+++ b/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# TODO: @ashors real convergence run (dataset only has 2737)
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=2730
+MAX_STEPS=2730
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=120
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_sft.py \
+    --config $CONFIG_PATH \
+    sft.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# TODO: the memory check is known to OOM. see https://github.com/NVIDIA/nemo-rl/issues/263
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    # TODO: FIGURE OUT CORRECT METRICS
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 5' \
+        'data["train/loss"]["2730"] < 0.3' \
+        'max(data["ray/node.0.gpu.0.memory"]) < 45000'
+fi 
diff --git a/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.sh b/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.sh
new file mode 100755
index 0000000000..ac441240fc
--- /dev/null
+++ b/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=350
+MAX_STEPS=350
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=45
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_sft.py \
+    --config $CONFIG_PATH \
+    sft.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# TODO: memory check will fail due to OOM tracked here https://github.com/NVIDIA/nemo-rl/issues/263
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    # TODO: FIGURE OUT CORRECT METRICS
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 5' \
+        'data["train/loss"]["350"] < 0.5' \
+        'max(data["ray/node.0.gpu.0.memory"]) < 45000'
+fi 
diff --git a/tests/test_suites/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.sh b/tests/test_suites/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.sh
new file mode 100755
index 0000000000..24b966c2af
--- /dev/null
+++ b/tests/test_suites/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=500
+MAX_STEPS=500
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=15
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_sft.py \
+    --config $CONFIG_PATH \
+    sft.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 2.4' \
+        'data["train/loss"]["500"] < 0.5' \
+        'max(data["ray/node.0.gpu.0.memory"]) < 25000'
+fi
+
diff --git a/tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.sh b/tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.sh
new file mode 100755
index 0000000000..9fb5f7839b
--- /dev/null
+++ b/tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# TODO: this config can crash on OOM
+# https://github.com/NVIDIA/nemo-rl/issues/263
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=4
+STEPS_PER_RUN=20  # step_time ~ 29sec
+MAX_STEPS=20
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=30
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_sft.py \
+    --config $CONFIG_PATH \
+    sft.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 1.5' \
+        'data["train/loss"]["20"] < 0.3' \
+        'max(data["ray/node.0.gpu.0.memory"]) < 35000'
+fi 
diff --git a/tests/test_suites/nightly.txt b/tests/test_suites/nightly.txt
new file mode 100644
index 0000000000..4c609d5bff
--- /dev/null
+++ b/tests/test_suites/nightly.txt
@@ -0,0 +1,28 @@
+########
+# GRPO #
+########
+
+# Short 1N/1B runs (go past 200 steps - usually divergence happens by now) -- going to 4 nodes doesn't help that much
+tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.sh
+tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.sh
+
+# FSDP1 vs Dtensor (Qwen/Qwen2.5-7B-Instruct)
+tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.sh
+tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.sh
+
+# Functional 32b run
+tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.sh
+
+#######
+# SFT #
+#######
+
+# 1N 1B/8B runs
+tests/test_suites/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.sh
+
+# Dtensor vs fsdp1 (8B)
+tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.sh
+tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.sh
+
+# Functional 32b test
+tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.sh
diff --git a/tests/test_suites/nightly_performance.txt b/tests/test_suites/nightly_performance.txt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/test_suites/release.txt b/tests/test_suites/release.txt
new file mode 100644
index 0000000000..69735cb0cb
--- /dev/null
+++ b/tests/test_suites/release.txt
@@ -0,0 +1,16 @@
+########
+# GRPO #
+########
+
+# Long 8b run
+tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.sh
+
+# Long 32b run
+tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.sh
+
+#######
+# SFT #
+#######
+
+# Long 8b convergence
+tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.sh
\ No newline at end of file
diff --git a/tests/test_suites/release_performance.txt b/tests/test_suites/release_performance.txt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/unit/test_recipes_and_test_suites.py b/tests/unit/test_recipes_and_test_suites.py
new file mode 100644
index 0000000000..edceba3649
--- /dev/null
+++ b/tests/unit/test_recipes_and_test_suites.py
@@ -0,0 +1,216 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+import os
+import glob
+import subprocess
+
+dir_path = os.path.dirname(os.path.abspath(__file__))
+project_root = os.path.abspath(os.path.join(dir_path, "..", ".."))
+test_suites_dir = os.path.join(project_root, "tests", "test_suites")
+
+nightly_test_suite_path = os.path.join(test_suites_dir, "nightly.txt")
+release_test_suite_path = os.path.join(test_suites_dir, "release.txt")
+nightly_performance_test_suite_path = os.path.join(
+    test_suites_dir, "nightly_performance.txt"
+)
+release_performance_test_suite_path = os.path.join(
+    test_suites_dir, "release_performance.txt"
+)
+
+
+@pytest.fixture
+def nightly_test_suite():
+    nightly_suite = []
+    with open(nightly_test_suite_path, "r") as f:
+        for line in f:
+            line = line.strip()
+            if line and not line.startswith("#"):
+                nightly_suite.append(line)
+    return nightly_suite
+
+
+@pytest.fixture
+def release_test_suite():
+    release_suite = []
+    with open(release_test_suite_path, "r") as f:
+        for line in f:
+            line = line.strip()
+            if line and not line.startswith("#"):
+                release_suite.append(line)
+    return release_suite
+
+
+@pytest.fixture
+def nightly_performance_test_suite():
+    nightly_performance_suite = []
+    with open(nightly_performance_test_suite_path, "r") as f:
+        for line in f:
+            line = line.strip()
+            if line and not line.startswith("#"):
+                nightly_performance_suite.append(line)
+    return nightly_performance_suite
+
+
+@pytest.fixture
+def release_performance_test_suite():
+    release_performance_suite = []
+    with open(release_performance_test_suite_path, "r") as f:
+        for line in f:
+            line = line.strip()
+            if line and not line.startswith("#"):
+                release_performance_suite.append(line)
+    return release_performance_suite
+
+
+@pytest.fixture
+def all_test_suites(
+    nightly_test_suite,
+    release_test_suite,
+    nightly_performance_test_suite,
+    release_performance_test_suite,
+):
+    return (
+        nightly_test_suite
+        + release_test_suite
+        + nightly_performance_test_suite
+        + release_performance_test_suite
+    )
+
+
+@pytest.mark.parametrize(
+    "test_suite_path",
+    [
+        nightly_test_suite_path,
+        release_test_suite_path,
+        nightly_performance_test_suite_path,
+        release_performance_test_suite_path,
+    ],
+    ids=[
+        "nightly_test_suite",
+        "release_test_suite",
+        "nightly_performance_test_suite",
+        "release_performance_test_suite",
+    ],
+)
+def test_test_suites_exist(test_suite_path):
+    assert os.path.exists(test_suite_path), (
+        f"Test suite {test_suite_path} does not exist"
+    )
+
+
+def test_no_overlap_across_test_suites(all_test_suites):
+    recipes = set(all_test_suites)
+    assert len(recipes) == len(all_test_suites), f"Test suites have repeats {recipes}"
+
+
+def test_all_recipes_accounted_for_in_test_suites(all_test_suites):
+    all_recipes_in_test_suites = set(all_test_suites)
+
+    all_tests_in_test_suites_dir = set()
+    for recipe_path in glob.glob(
+        os.path.join(test_suites_dir, "**", "*.sh"), recursive=True
+    ):
+        # Strip off the project root and leading slash
+        recipe_name = recipe_path[len(project_root) + 1 :]
+        all_tests_in_test_suites_dir.add(recipe_name)
+
+    assert all_recipes_in_test_suites == all_tests_in_test_suites_dir, (
+        "All recipes are not accounted for in the test suites"
+    )
+
+
+def test_nightly_compute_stays_below_1024_hours(nightly_test_suite, tracker):
+    command = f"DRYRUN=1 HF_HOME=... HF_DATASETS_CACHE=... CONTAINER= ACCOUNT= PARTITION= ./tools/launch {' '.join(nightly_test_suite)}"
+
+    print(f"Running command: {command}")
+
+    # Run the command from the project root directory
+    result = subprocess.run(
+        command,
+        shell=True,
+        cwd=project_root,
+        capture_output=True,
+        text=True,
+        check=False,  # Don't raise exception on non-zero exit code
+    )
+
+    # Print stdout and stderr for debugging if the test fails
+    print("STDOUT:")
+    print(result.stdout)
+    print("STDERR:")
+    print(result.stderr)
+
+    # Assert that the command exited successfully
+    assert result.returncode == 0, f"Command failed with exit code {result.returncode}"
+
+    # Assert that the last line of stdout contains the expected prefix
+    stdout_lines = result.stdout.strip().splitlines()
+    assert len(stdout_lines) > 0, "Command produced no output"
+    last_line = stdout_lines[-1]
+    assert last_line.startswith("[INFO]: Total GPU hours:"), (
+        f"Last line of output was not as expected: '{last_line}'"
+    )
+    total_gpu_hours = float(last_line.split(":")[-1].strip())
+    assert total_gpu_hours <= 1024, (
+        f"Total GPU hours exceeded 1024: {last_line}. We should revisit the test suites to reduce the total GPU hours."
+    )
+    tracker.track("total_nightly_gpu_hours", total_gpu_hours)
+
+
+def test_dry_run_does_not_fail_and_prints_total_gpu_hours():
+    command = "DRYRUN=1 HF_HOME=... HF_DATASETS_CACHE=... CONTAINER= ACCOUNT= PARTITION= ./tools/launch ./tests/test_suites/**/*.sh"
+
+    # Run the command from the project root directory
+    result = subprocess.run(
+        command,
+        shell=True,
+        cwd=project_root,
+        capture_output=True,
+        text=True,
+        check=False,  # Don't raise exception on non-zero exit code
+    )
+
+    # Print stdout and stderr for debugging if the test fails
+    print("STDOUT:")
+    print(result.stdout)
+    print("STDERR:")
+    print(result.stderr)
+
+    # Assert that the command exited successfully
+    assert result.returncode == 0, f"Command failed with exit code {result.returncode}"
+
+    # Assert that the last line of stdout contains the expected prefix
+    stdout_lines = result.stdout.strip().splitlines()
+    assert len(stdout_lines) > 0, "Command produced no output"
+    last_line = stdout_lines[-1]
+    assert last_line.startswith("[INFO]: Total GPU hours:"), (
+        f"Last line of output was not as expected: '{last_line}'"
+    )
+
+
+def test_all_tests_can_find_config_if_dryrun(all_test_suites):
+    for test_suite in all_test_suites:
+        command = f"TEST_DRYRUN=1 {test_suite}"
+        result = subprocess.run(
+            command,
+            shell=True,
+            cwd=project_root,
+            capture_output=True,
+            text=True,
+            check=False,
+        )
+        assert result.returncode == 0, (
+            f"Command failed with exit code {result.returncode}"
+        )
diff --git a/tools/autoformat.sh b/tools/autoformat.sh
old mode 100644
new mode 100755
diff --git a/tools/code_snapshot.sh b/tools/code_snapshot.sh
new file mode 100644
index 0000000000..62136a8632
--- /dev/null
+++ b/tools/code_snapshot.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+set -euo pipefail
+
+SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
+PROJECT_ROOT=${SCRIPT_DIR}/..
+cd ${PROJECT_ROOT}
+
+echo2() {
+    echo "$@" >&2
+}
+
+if [[ ! -e "$PROJECT_ROOT/.git" ]]; then
+  echo2 "[Error]: This script was not run from the root of NeMo RL git repo. Please clone it first."
+  exit 1
+elif [[ $# -lt 1 ]]; then
+  echo2 "[Error]: This script requires one argument: the name of the experiment to be used as the snapshot directory name"
+  echo2 "Usage: bash tools/code_snapshot.sh <experiment_name>"
+  exit 1
+fi
+
+EXP_NAME=$1
+
+SNAPSHOT_DIR="$PROJECT_ROOT/code_snapshots/${EXP_NAME}"
+if [[ ! -d "$SNAPSHOT_DIR" ]]; then
+  echo2 "Creating new code snapshot in $SNAPSHOT_DIR"
+  mkdir -p $SNAPSHOT_DIR
+else
+  echo2 "Using existing code snapshot in $SNAPSHOT_DIR"
+  # Echo the snapshot directory so the caller can use it to `cd` into it
+  echo ${SNAPSHOT_DIR}
+  exit
+fi
+
+echo2 "Copying git-tracked files..."
+rsync -a --files-from=<(git ls-files) ./ $SNAPSHOT_DIR/
+
+
+# Echo the snapshot directory so the caller can use it to `cd` into it
+echo ${SNAPSHOT_DIR}
\ No newline at end of file
diff --git a/tools/launch b/tools/launch
new file mode 100755
index 0000000000..4c76cee78d
--- /dev/null
+++ b/tools/launch
@@ -0,0 +1,175 @@
+#!/bin/bash
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+# This is a helper script to launch a release test on slurm.
+# It reads a demarcated section of the script to extract the config,
+# and uses that to determine how many nodes and how many chained jobs to launch.
+#
+# It also creates a code snapshot to ensure that the code is reproducible and subsequent
+# jobs can be launched with the same code. It also creates a continue.sh in the code
+# snapshot directory to continue launching the job even if the original invocation was
+# forgotten.
+#
+# Usage:
+#   CONTAINER=... ACCOUNT=... PARTITION=... ./launch <script_path> <another_script_path> ...
+#
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+PROJECT_ROOT=$(realpath $SCRIPT_DIR/..)
+
+# Function to extract config from a script
+extract_config() {
+    local script_path="$1"
+    local config=$(sed -n '/^# =\+ BEGIN CONFIG =\+/,/^# =\+ END CONFIG =\+/p' "$script_path" | 
+                   grep -v "^#" | 
+                   grep "=" )
+    if [[ -z "$config" ]]; then
+        echo "[ERROR]: No config section found in script_path=$script_path"
+        echo "[ERROR]: Please add and update a section in the script with these variables:"
+        echo
+        echo "# ===== BEGIN CONFIG ====="
+        echo "NUM_NODES=1        # How many nodes this job uses"
+        echo "STEPS_PER_RUN=60   # Approximately how many steps reached in one job"
+        echo "MAX_STEPS=60       # Max training steps"
+        echo 'NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up'
+        echo "NUM_MINUTES=240    # How many minutes one job is (SLURM specific)"
+        echo "# ===== END CONFIG ====="
+        return 1
+    fi 1>&2
+    echo "$config"
+}
+
+check_file_in_version_control_and_get_relpath_from_git_root() {
+    local script_path="$1"
+    # Check if the script is tracked in git (assumes we're in the repo already)
+    rel_path_from_git_root=$(git ls-files --full-name --error-unmatch "$script_path")
+    ret_code=$?
+    if [[ $ret_code -ne 0 ]]; then
+        echo "[ERROR]: Script '$script_path' is not tracked in version control." >&2
+        echo "[ERROR]: This may cause reproducibility issues. Add it to git to continue." >&2
+        return 1
+    fi
+    echo "$rel_path_from_git_root"
+}
+
+set -eou pipefail
+
+if [[ $# -eq 0 ]]; then
+    echo "Error: No script provided."
+    echo "Usage: CONTAINER=... ACCOUNT=... PARTITION=... $0 <script_path> <another_script_path> ..."
+    exit 1
+fi
+
+# Check for mandatory environment variables
+for VAR in "HF_HOME" "HF_DATASETS_CACHE"; do
+    if [[ -z "${!VAR:-}" ]]; then
+        echo "[ERROR]: $VAR environment variable is not set."
+        echo "[ERROR]: Please set $VAR to specify the appropriate Hugging Face directory."
+        echo "Example: export $VAR=/path/to/appropriate/directory"
+        exit 1
+    fi
+done
+
+CONTAINER=$CONTAINER
+ACCOUNT=$ACCOUNT
+PARTITION=$PARTITION
+MOUNTS=${MOUNTS:-}
+# DRYRUN=1 prints the runs and how much compute they use
+# DRYRUN=2 additionally creates the snapshots (helpful to run a hermetic example manually or share a repro)
+DRYRUN=${DRYRUN:-}
+IS_RELEASE=${IS_RELEASE:-}  # Adds extra configuration for wandb to track this in the right project
+NOW=$(date '+%y%m%d-%H%M%S')
+
+if [[ -n "$MOUNTS" ]]; then
+    # Comma needed since we always mount PWD
+    MOUNTS=",$MOUNTS"
+fi
+
+SCRIPTS=""
+for SCRIPT in $@; do
+    if [[ ! -f "$SCRIPT" ]]; then
+        echo "Error: Script '$SCRIPT' does not exist or is not a file."
+        echo "Please provide a valid script path."
+        exit 1
+    fi
+    SCRIPTS+=" $SCRIPT"
+done
+
+total_gpu_hours=0
+
+for SCRIPT in $SCRIPTS; do
+    # Extract and evaluate the config
+    if ! config=$(extract_config $SCRIPT); then
+        # Error message is already printed by extract_config
+        exit 1
+    fi
+    eval "$config"
+
+    # NUM_RUNS * NUM_NODES * NUM_GPUS * (NUM_MINUTES / 60)
+    gpu_hours=$((NUM_RUNS * NUM_NODES * 8 * NUM_MINUTES / 60))
+    total_gpu_hours=$((total_gpu_hours + gpu_hours))
+    echo "[INFO]: $gpu_hours GPUhrs to run $SCRIPT"
+    if [[ "${DRYRUN}" -eq 1 ]]; then
+        echo "[DRY_RUN]: Skipping creation of snapshot and submission of $SCRIPT."
+        continue
+    fi
+
+    rel_script=$(check_file_in_version_control_and_get_relpath_from_git_root $SCRIPT)
+    
+    EXP_NAME=$(basename $SCRIPT .sh)
+    SNAPSHOT_DIR=$(bash $PROJECT_ROOT/tools/code_snapshot.sh $EXP_NAME)
+
+    # Now use the variables
+    for i in $(seq 1 $NUM_RUNS); do
+        echo "Submitting $i/$NUM_RUNS job with ${NUM_NODES} nodes for $(basename $SCRIPT)"
+        JOB_NAME=$(basename $SCRIPT .sh)
+
+        RELEASE_ARGS=()
+        if [[ -n "${IS_RELEASE}" ]]; then
+            RELEASE_ARGS=(
+                logger.wandb.project=nemo-rl-release
+                logger.wandb.name=$(basename $SCRIPT .sh)-$(git rev-parse --short HEAD)
+            )
+        fi
+    
+        # TODO: jq install is just to be backward compatible with older containers. Should eventually remove.
+        cat <<EOF >$SNAPSHOT_DIR/continue.sh
+#!/bin/bash
+SCRIPT_DIR=\$( cd -- "\$( dirname -- "\${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+cd \$SCRIPT_DIR
+
+HF_HOME=$HF_HOME \\
+HF_DATASETS_CACHE=$HF_DATASETS_CACHE \\
+COMMAND="apt install -y jq && uv run $rel_script ${RELEASE_ARGS[@]}" \\
+CONTAINER=$CONTAINER \\
+MOUNTS="$SNAPSHOT_DIR:$SNAPSHOT_DIR${MOUNTS}" \\
+sbatch \\
+    --nodes=$NUM_NODES \\
+    --account=$ACCOUNT \\
+    --job-name=$ACCOUNT:$JOB_NAME \\
+    --partition=$PARTITION \\
+    --time=0:${NUM_MINUTES}:0 \\
+    --gres=gpu:8 \\
+    --output=slurm-${NOW}-%j-${JOB_NAME}-${i}.${NUM_RUNS}.out \\
+    ray.sub
+EOF
+        if [[ "${DRYRUN}" -eq 2 ]]; then
+            echo "[DRY_RUN]: Skipping submission of $SCRIPT. Find the snapshot at $SNAPSHOT_DIR and manually launch with 'bash continue.sh'"
+        else
+            bash $SNAPSHOT_DIR/continue.sh
+        fi
+    done
+done
+echo [INFO]: Total GPU hours: $total_gpu_hours
diff --git a/tools/package_release_runs.sh b/tools/package_release_runs.sh
new file mode 100755
index 0000000000..357c9ad618
--- /dev/null
+++ b/tools/package_release_runs.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# This script packages all release runs into a tarball with a git SHA so that we can upload to our
+# release page. The SHA is to avoid conflicts with previous runs, but when we upload we should
+# remove that so that users can expect that the name is release_runs.tar.gz (this renaming can be
+# done in the Github Release UI).
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+PROJECT_ROOT=$(realpath $SCRIPT_DIR/..)
+cd $PROJECT_ROOT
+
+set -eou pipefail
+# Enable recursive globbing
+shopt -s globstar
+
+OUTPUT_TAR="release_runs-$(git rev-parse --short HEAD).tar.gz"
+
+TB_EVENTS=$(ls code_snapshots/*/tests/test_suites/**/logs/*/tensorboard/events* || true)
+
+# Check if the glob expanded to any files
+if [ -z "$TB_EVENTS" ]; then
+    echo "Error: No tensorboard event files found matching the pattern."
+    exit 1
+elif [[ -f $OUTPUT_TAR ]]; then
+    echo "Error: $OUTPUT_TAR already exists. Clean it up before continuing."
+    exit 1
+fi
+
+TMP_DIR=$(mktemp -d)
+echo "Created temporary directory: $TMP_DIR"
+
+# Set up trap to clean up temporary directory on exit
+trap "echo 'Cleaning up temporary directory $TMP_DIR'; rm -rf $TMP_DIR" EXIT
+
+# Loop over all the recipe runs and package them into a tarball
+for tbevent in $TB_EVENTS; do
+    exp_name=$(basename -- $(cut -d/ -f2 <<<$tbevent) -logs)
+    # Obfuscate the hostname
+    # events.out.tfevents.1744822578.<host-name>.780899.0
+    obfuscated_event_path=$(basename $tbevent | awk -F. '{print $1"."$2"."$3"."$4".HOSTNAME."$(NF-1)"."$NF}')
+    
+    # Create subdirectory for experiment if it doesn't exist
+    mkdir -p "$TMP_DIR/$exp_name"
+    
+    # Copy the event file with obfuscated name to the experiment subdirectory
+    cp "$tbevent" "$TMP_DIR/$exp_name/$obfuscated_event_path"
+    
+    echo "[$exp_name] Copied $tbevent to $TMP_DIR/$exp_name/$obfuscated_event_path"
+done
+
+# Create a tarball of all the processed event files
+tar -czf "$OUTPUT_TAR" -C "$TMP_DIR" .
+echo "Created tarball: $OUTPUT_TAR"