diff --git a/examples/configs/grpo_math_8B_megatron.yaml b/examples/configs/grpo_math_8B_megatron.yaml index 41ce4412fc..fc839c8239 100644 --- a/examples/configs/grpo_math_8B_megatron.yaml +++ b/examples/configs/grpo_math_8B_megatron.yaml @@ -34,7 +34,8 @@ policy: empty_unused_memory_level: 0 converter_type: "LlamaForCausalLM" tensor_model_parallel_size: 1 - pipeline_model_parallel_size: 1 + # On H100, can run with pp=1 for better performance with expandable segments (which is enabled by default) + pipeline_model_parallel_size: 2 context_parallel_size: 1 pipeline_dtype: ${policy.precision} @@ -66,7 +67,7 @@ policy: stop_strings: null vllm_cfg: tensor_parallel_size: 1 - gpu_memory_utilization: 0.95 + gpu_memory_utilization: 0.8 max_model_len: ${policy.max_total_sequence_length} cluster: