NVIDIA-NeMo · chtruong814 · Jun 12, 2025 · Jun 10, 2025 · Jun 10, 2025 · Jun 6, 2025
@@ -188,6 +188,7 @@ jobs:
           time uv run --no-sync bash ./tests/functional/sft.sh
           time uv run --no-sync bash ./tests/functional/grpo.sh
           time uv run --no-sync bash ./tests/functional/grpo_multiturn.sh
+          time uv run --no-sync bash ./tests/functional/grpo_non_colocated.sh
           time uv run --no-sync bash ./tests/functional/dpo.sh
           time uv run --no-sync bash ./tests/functional/eval.sh
           time uv run --no-sync bash ./tests/functional/test_mcore_extra_installed_correctly.sh

@@ -101,6 +101,14 @@ policy:
       # For most cases, use "dummy" to load the initial weights, since they will be overwritten during refit
       # For Gemma models, we need to use "auto" due to a vllm bug
       load_format: dummy
+    colocated:
+      # true: generation shares training GPUs
+      # false: uses dedicated generation resources
+      enabled: true
+      # only relevant when enabled is false
+      resources:
+        gpus_per_node: null # Decides num gpus to be dedicated to generation when there is one node in the cluster i.e cluster.num_nodes == 1
+        num_nodes: null # Decides number of nodes to be dedicated to generation
 
 data:
   max_input_seq_length: ${policy.max_total_sequence_length} # upper bound, real truncation occurs at vllm.max_model_len

@@ -106,6 +106,14 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: ${policy.max_total_sequence_length}
+    colocated:
+      # true: generation shares training GPUs
+      # false: uses dedicated generation resources
+      enabled: true
+      # only relevant when enabled is false
+      resources:
+        gpus_per_node: null # Decides num gpus to be dedicated to generation when there is one node in the cluster i.e cluster.num_nodes == 1
+        num_nodes: null # Decides number of nodes to be dedicated to generation
 
 data:
   max_input_seq_length: ${policy.max_total_sequence_length} # upper bound, real truncation occurs at vllm.max_model_len

@@ -88,6 +88,11 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 512
+    colocated:
+      enabled: true
+      resources:
+        gpus_per_node: null
+        num_nodes: null
 data:
   max_input_seq_length: 512
   prompt_file: examples/prompts/cot.txt

@@ -89,6 +89,11 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 16384
+    colocated:
+      enabled: true
+      resources:
+        gpus_per_node: null
+        num_nodes: null
 data:
   max_input_seq_length: 16384
   prompt_file: examples/prompts/cot.txt

@@ -89,6 +89,11 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 4096
+    colocated:
+      enabled: true
+      resources:
+        gpus_per_node: null
+        num_nodes: null
 data:
   max_input_seq_length: 4096
   prompt_file: examples/prompts/cot.txt

@@ -89,6 +89,11 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 512
+    colocated:
+      enabled: true
+      resources:
+        gpus_per_node: null
+        num_nodes: null
 data:
   max_input_seq_length: 512
   prompt_file: examples/prompts/cot.txt

@@ -89,6 +89,11 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 16384
+    colocated:
+      enabled: true
+      resources:
+        gpus_per_node: null
+        num_nodes: null
 data:
   max_input_seq_length: 16384
   prompt_file: examples/prompts/cot.txt

@@ -89,6 +89,11 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 16384
+    colocated:
+      enabled: true
+      resources:
+        gpus_per_node: null
+        num_nodes: null
 data:
   max_input_seq_length: 16384
   prompt_file: examples/prompts/cot.txt

@@ -86,6 +86,11 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 4096
+    colocated:
+      enabled: true
+      resources:
+        gpus_per_node: null
+        num_nodes: null
 data:
   max_input_seq_length: 4096
   prompt_file: examples/prompts/cot.txt

@@ -89,6 +89,11 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 4096
+    colocated:
+      enabled: true
+      resources:
+        gpus_per_node: null
+        num_nodes: null
 data:
   max_input_seq_length: 4096
   prompt_file: examples/prompts/cot.txt

@@ -89,6 +89,11 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 512
+    colocated:
+      enabled: true
+      resources:
+        gpus_per_node: null
+        num_nodes: null
 data:
   max_input_seq_length: 512
   prompt_file: examples/prompts/cot.txt