From da3ed94235469111facde3a75095483d6ccfc707 Mon Sep 17 00:00:00 2001 From: ashors1 Date: Tue, 22 Apr 2025 09:10:08 -0700 Subject: [PATCH 1/2] decrease global batch size in dpo functional test Signed-off-by: ashors1 --- tests/functional/dpo.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/functional/dpo.sh b/tests/functional/dpo.sh index 1431f17e61..2fb6a6c9b4 100755 --- a/tests/functional/dpo.sh +++ b/tests/functional/dpo.sh @@ -22,6 +22,7 @@ python -u $PROJECT_ROOT/examples/run_dpo.py \ cluster.gpus_per_node=2 \ dpo.max_num_steps=3 \ dpo.val_batches=1 \ + policy.train_global_batch_size=8 \ logger.tensorboard_enabled=true \ logger.log_dir=$LOG_DIR \ logger.wandb_enabled=false \ From 12085e8761d1ca80d3ef6088fac854daa665ab7d Mon Sep 17 00:00:00 2001 From: ashors1 Date: Tue, 22 Apr 2025 11:31:09 -0700 Subject: [PATCH 2/2] decrease val batch size Signed-off-by: ashors1 --- tests/functional/dpo.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/functional/dpo.sh b/tests/functional/dpo.sh index 2fb6a6c9b4..2421c5da6a 100755 --- a/tests/functional/dpo.sh +++ b/tests/functional/dpo.sh @@ -22,6 +22,7 @@ python -u $PROJECT_ROOT/examples/run_dpo.py \ cluster.gpus_per_node=2 \ dpo.max_num_steps=3 \ dpo.val_batches=1 \ + dpo.val_global_batch_size=8 \ policy.train_global_batch_size=8 \ logger.tensorboard_enabled=true \ logger.log_dir=$LOG_DIR \