conda create -n Alpha python=3.10
conda activate Alpha
pip install -r requirements.txt
wandb loginOpen ./examples/ppo_trainer/qwen3B-instruct.sh and fill your checkpoint save dir, and start running qwen3B-instruct.
bash ./examples/ppo_trainer/qwen3B-instruct.shOpen merge script, filled in your specific checkpoint path and hf_model_path to get the final model
please refer to Verl Official readme
Open ./Evaluation