NVIDIA-NeMo · terrykong · Apr 18, 2025 · Apr 17, 2025 · Apr 18, 2025
@@ -14,6 +14,10 @@ generation:
     gpu_memory_utilization: 0.9
     max_model_len: 2048
 
+tokenizer:
+  name: ${generation.model_name} ## specify if you'd like to use a tokenizer different from the model's default
+  chat_template: "default"
+
 data:
   max_input_seq_length: ${generation.vllm_cfg.max_model_len} # useless since we directly use prompts in evaluation
   prompt_file: null

@@ -114,7 +114,7 @@ def main():
     init_ray()
 
     # Setup tokenizer
-    tokenizer = get_tokenizer(config["generation"]["model_name"])
+    tokenizer = get_tokenizer(config["tokenizer"])
     config["generation"] = configure_generation_config(
         config["generation"], tokenizer, is_eval=True
     )

@@ -114,15 +114,13 @@ def math_data_processor(
     solution = str(datum_dict["expected_answer"])
     extra_env_info = {"ground_truth": solution}
 
-    template = task_data_spec.custom_template
     message_log: LLMMessageLogType = []
 
     # system prompt
     if task_data_spec.system_prompt:
         sys_message = {"role": "system", "content": task_data_spec.system_prompt}
         message = tokenizer.apply_chat_template(
             [sys_message],
-            chat_template=template,
             tokenize=False,
             add_generation_prompt=False,
             add_special_tokens=False,
@@ -138,7 +136,6 @@ def math_data_processor(
     user_message = {"role": "user", "content": problem}
     message = tokenizer.apply_chat_template(
         [user_message],
-        chat_template=template,
         tokenize=False,
         add_generation_prompt=True,
         add_special_tokens=False,

@@ -0,0 +1,61 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import pytest
+import sys
+from datasets import Dataset
+
+abspath = os.path.abspath(__file__)
+sys.path.append("/".join(abspath.split("/")[:-4]))
+
+from examples.run_grpo_math import math_data_processor
+from nemo_reinforcer.algorithms.utils import get_tokenizer
+from nemo_reinforcer.data.datasets import AllTaskProcessedDataset
+from nemo_reinforcer.data.interfaces import TaskDataSpec
+from nemo_reinforcer.models.policy import TokenizerConfig
+
+
+basic_tokenizer_test_config: TokenizerConfig = {
+    "name": "Qwen/Qwen2.5-Math-1.5B-Instruct",
+    "chat_template": "default",
+}
+
+
+def test_math_data_processor():
+    raw_dataset = Dataset.from_list(
+        [
+            {"problem": "problem1", "expected_answer": "answer1"},
+            {"problem": "problem2", "expected_answer": "answer2"},
+        ]
+    )
+
+    tokenizer = get_tokenizer(basic_tokenizer_test_config)
+
+    math_task_spec = TaskDataSpec(
+        task_name="math",
+        prompt_file=None,
+        system_prompt_file=None,
+    )
+
+    dataset = AllTaskProcessedDataset(
+        dataset=raw_dataset,
+        tokenizer=tokenizer,
+        default_task_data_spec=math_task_spec,
+        task_data_processors=math_data_processor,
+        max_seq_length=128,
+    )
+
+    assert dataset[0]["extra_env_info"]["ground_truth"] == "answer1"
+    assert dataset[1]["extra_env_info"]["ground_truth"] == "answer2"