From c4f1ee23bb350c0fcec869f5f84b090d3fe48960 Mon Sep 17 00:00:00 2001 From: Parth Chadha Date: Wed, 23 Apr 2025 20:28:05 +0000 Subject: [PATCH 1/2] fix: fix broken eval script Signed-off-by: Parth Chadha --- nemo_reinforcer/evals/eval.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nemo_reinforcer/evals/eval.py b/nemo_reinforcer/evals/eval.py index a1a4cad74b..d0c27044d8 100644 --- a/nemo_reinforcer/evals/eval.py +++ b/nemo_reinforcer/evals/eval.py @@ -165,10 +165,10 @@ def run_env_eval(vllm_generation, dataloader, env, master_config): get_keys_from_message_log(batch["message_log"][i], ["role", "content"]) for i in range(len(batch["message_log"])) ] - _, _, rewards, _ = ray.get(env.step.remote(to_env, batch["extra_env_info"])) + env_return = ray.get(env.step.remote(to_env, batch["extra_env_info"])) - score += rewards.sum().item() - count += len(rewards) + score += env_return.rewards.sum().item() + count += len(env_return.rewards) # Cleanup before printing results ray.get(env.shutdown.remote()) From 94aaf332a2368ad0c42140c2baa6f2d3715afbae Mon Sep 17 00:00:00 2001 From: Parth Chadha Date: Wed, 23 Apr 2025 20:48:01 +0000 Subject: [PATCH 2/2] Update eval.md to reflect how to eval grpo/sft model Signed-off-by: Parth Chadha --- docs/guides/eval.md | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/docs/guides/eval.md b/docs/guides/eval.md index 8ac5ab5675..f547e19ff8 100644 --- a/docs/guides/eval.md +++ b/docs/guides/eval.md @@ -3,6 +3,11 @@ ## Start Evaluation ### Start Script + +**Evaluating Standard Models:** + +To run evaluation using a model directly from Hugging Face Hub or a local path already in HF format, use the `run_eval.py` script. + ```sh # To run the evaluation with default config (examples/configs/eval.yaml) uv run python examples/run_eval.py @@ -10,10 +15,34 @@ uv run python examples/run_eval.py # Specify a custom config file uv run python examples/run_eval.py --config path/to/custom_config.yaml -# Override specific config values via command line +# Override specific config values via command line (e.g., model name) uv run python examples/run_eval.py generation.model_name="Qwen/Qwen2.5-Math-7B-Instruct" ``` +**Evaluating Models Trained with DCP Checkpoints (GRPO/SFT):** + +If you have trained a model using GRPO or SFT and saved the checkpoint in the Pytorch DCP format, you first need to convert it to the Hugging Face format before running evaluation. + +1. **Convert DCP to HF:** + Use the `examples/convert_dcp_to_hf.py` script. You'll need the path to the training configuration file (`config.json`), the DCP checkpoint directory, and specify an output path for the HF format model. + + ```sh + # Example for a GRPO checkpoint at step 170 + uv run python examples/convert_dcp_to_hf.py \ + --config results/grpo/step_170/config.json \ + --dcp-ckpt-path results/grpo/step_170/policy/weights/ \ + --hf-ckpt-path results/grpo/hf + ``` + *Note: Adjust the paths according to your training output directory structure.* + +2. **Run Evaluation on Converted Model:** + Once the conversion is complete, run the evaluation script, overriding the `generation.model_name` to point to the directory containing the converted HF model. + + ```sh + # Example using the converted HF model from the previous step + uv run python examples/run_eval.py generation.model_name=$PWD/results/grpo/hf + ``` + ### Example Output ```