diff --git a/applications/ColossalChat/examples/training_scripts/train_dpo.py b/applications/ColossalChat/examples/training_scripts/train_dpo.py index b8de6396f172..3b324ee784e0 100755 --- a/applications/ColossalChat/examples/training_scripts/train_dpo.py +++ b/applications/ColossalChat/examples/training_scripts/train_dpo.py @@ -279,10 +279,7 @@ def train(args): beta=args.beta, gamma=args.gamma, length_normalization=args.length_normalization, -<<<<<<< HEAD -======= apply_loss_mask=not args.disable_loss_mask, ->>>>>>> main ) trainer.fit( @@ -351,10 +348,7 @@ def train(args): default=False, help="Disable the reference model (enabled by default)", ) -<<<<<<< HEAD -======= parser.add_argument("--disable_loss_mask", default=False, action="store_true") ->>>>>>> main parser.add_argument("--mixed_precision", type=str, default="fp16", choices=["fp16", "bf16"], help="Mixed precision") parser.add_argument("--lora_config", type=str, default=None, help="low-rank adaptation config file path") parser.add_argument("--save_interval", type=int, default=1000, help="number of step between two checkpoints")