diff --git a/docs/guides/async-grpo.md b/docs/guides/async-grpo.md index 8c0d8e6baa..50f84b4e66 100644 --- a/docs/guides/async-grpo.md +++ b/docs/guides/async-grpo.md @@ -39,6 +39,7 @@ policy: ```yaml grpo: async_grpo: + enabled: true max_trajectory_age_steps: 1 # Maximum age, in training steps, for trajectories ``` @@ -62,6 +63,7 @@ grpo: num_prompts_per_step: 32 num_generations_per_prompt: 4 async_grpo: + enabled: true max_trajectory_age_steps: 1 cluster: