From c4f1ee23bb350c0fcec869f5f84b090d3fe48960 Mon Sep 17 00:00:00 2001
From: Parth Chadha <pchadha@nvidia.com>
Date: Wed, 23 Apr 2025 20:28:05 +0000
Subject: [PATCH 1/2] fix: fix broken eval script

Signed-off-by: Parth Chadha <pchadha@nvidia.com>
---
 nemo_reinforcer/evals/eval.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/nemo_reinforcer/evals/eval.py b/nemo_reinforcer/evals/eval.py
index a1a4cad74b..d0c27044d8 100644
--- a/nemo_reinforcer/evals/eval.py
+++ b/nemo_reinforcer/evals/eval.py
@@ -165,10 +165,10 @@ def run_env_eval(vllm_generation, dataloader, env, master_config):
             get_keys_from_message_log(batch["message_log"][i], ["role", "content"])
             for i in range(len(batch["message_log"]))
         ]
-        _, _, rewards, _ = ray.get(env.step.remote(to_env, batch["extra_env_info"]))
+        env_return = ray.get(env.step.remote(to_env, batch["extra_env_info"]))
 
-        score += rewards.sum().item()
-        count += len(rewards)
+        score += env_return.rewards.sum().item()
+        count += len(env_return.rewards)
 
     # Cleanup before printing results
     ray.get(env.shutdown.remote())

From 94aaf332a2368ad0c42140c2baa6f2d3715afbae Mon Sep 17 00:00:00 2001
From: Parth Chadha <pchadha@nvidia.com>
Date: Wed, 23 Apr 2025 20:48:01 +0000
Subject: [PATCH 2/2] Update eval.md to reflect how to eval grpo/sft model

Signed-off-by: Parth Chadha <pchadha@nvidia.com>
---
 docs/guides/eval.md | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/docs/guides/eval.md b/docs/guides/eval.md
index 8ac5ab5675..f547e19ff8 100644
--- a/docs/guides/eval.md
+++ b/docs/guides/eval.md
@@ -3,6 +3,11 @@
 ## Start Evaluation
 
 ### Start Script
+
+**Evaluating Standard Models:**
+
+To run evaluation using a model directly from Hugging Face Hub or a local path already in HF format, use the `run_eval.py` script.
+
 ```sh
 # To run the evaluation with default config (examples/configs/eval.yaml)
 uv run python examples/run_eval.py
@@ -10,10 +15,34 @@ uv run python examples/run_eval.py
 # Specify a custom config file
 uv run python examples/run_eval.py --config path/to/custom_config.yaml
 
-# Override specific config values via command line
+# Override specific config values via command line (e.g., model name)
 uv run python examples/run_eval.py generation.model_name="Qwen/Qwen2.5-Math-7B-Instruct"
 ```
 
+**Evaluating Models Trained with DCP Checkpoints (GRPO/SFT):**
+
+If you have trained a model using GRPO or SFT and saved the checkpoint in the Pytorch DCP format, you first need to convert it to the Hugging Face format before running evaluation.
+
+1.  **Convert DCP to HF:**
+    Use the `examples/convert_dcp_to_hf.py` script. You'll need the path to the training configuration file (`config.json`), the DCP checkpoint directory, and specify an output path for the HF format model.
+
+    ```sh
+    # Example for a GRPO checkpoint at step 170
+    uv run python examples/convert_dcp_to_hf.py \
+        --config results/grpo/step_170/config.json \
+        --dcp-ckpt-path results/grpo/step_170/policy/weights/ \
+        --hf-ckpt-path results/grpo/hf
+    ```
+    *Note: Adjust the paths according to your training output directory structure.*
+
+2.  **Run Evaluation on Converted Model:**
+    Once the conversion is complete, run the evaluation script, overriding the `generation.model_name` to point to the directory containing the converted HF model.
+
+    ```sh
+    # Example using the converted HF model from the previous step
+    uv run python examples/run_eval.py generation.model_name=$PWD/results/grpo/hf
+    ```
+
 ### Example Output
 
 ```