From 04011b5185e320fa94421b83b6f124743487c5f1 Mon Sep 17 00:00:00 2001 From: ashors1 Date: Tue, 29 Apr 2025 15:18:22 -0700 Subject: [PATCH] remove outdated comment in DPO config Signed-off-by: ashors1 --- examples/configs/dpo.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/configs/dpo.yaml b/examples/configs/dpo.yaml index 44e2491227..34367dee20 100755 --- a/examples/configs/dpo.yaml +++ b/examples/configs/dpo.yaml @@ -55,8 +55,6 @@ policy: # makes the training sequence length divisible by the tensor parallel size # this is useful for sequence parallel training make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size} - ## NOTE: there is a known issue with gradient clipping when using Dtensor - ## if using dtensor, set max_grad_norm to NULL max_grad_norm: 1.0 optimizer: