diff --git a/examples/configs/eval.yaml b/examples/configs/eval.yaml index e319276094..da5420dc8a 100644 --- a/examples/configs/eval.yaml +++ b/examples/configs/eval.yaml @@ -14,6 +14,10 @@ generation: gpu_memory_utilization: 0.9 max_model_len: 2048 +tokenizer: + name: ${generation.model_name} ## specify if you'd like to use a tokenizer different from the model's default + chat_template: "default" + data: max_input_seq_length: ${generation.vllm_cfg.max_model_len} # useless since we directly use prompts in evaluation prompt_file: null diff --git a/examples/run_eval.py b/examples/run_eval.py index 90a7c23235..9c9cb8d9a3 100644 --- a/examples/run_eval.py +++ b/examples/run_eval.py @@ -114,7 +114,7 @@ def main(): init_ray() # Setup tokenizer - tokenizer = get_tokenizer(config["generation"]["model_name"]) + tokenizer = get_tokenizer(config["tokenizer"]) config["generation"] = configure_generation_config( config["generation"], tokenizer, is_eval=True ) diff --git a/examples/run_grpo_math.py b/examples/run_grpo_math.py index e147d167f9..7a686d6a9e 100644 --- a/examples/run_grpo_math.py +++ b/examples/run_grpo_math.py @@ -114,7 +114,6 @@ def math_data_processor( solution = str(datum_dict["expected_answer"]) extra_env_info = {"ground_truth": solution} - template = task_data_spec.custom_template message_log: LLMMessageLogType = [] # system prompt @@ -122,7 +121,6 @@ def math_data_processor( sys_message = {"role": "system", "content": task_data_spec.system_prompt} message = tokenizer.apply_chat_template( [sys_message], - chat_template=template, tokenize=False, add_generation_prompt=False, add_special_tokens=False, @@ -138,7 +136,6 @@ def math_data_processor( user_message = {"role": "user", "content": problem} message = tokenizer.apply_chat_template( [user_message], - chat_template=template, tokenize=False, add_generation_prompt=True, add_special_tokens=False, diff --git a/tests/unit/data/test_data_processor.py b/tests/unit/data/test_data_processor.py new file mode 100644 index 0000000000..2f32f117f0 --- /dev/null +++ b/tests/unit/data/test_data_processor.py @@ -0,0 +1,61 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import pytest +import sys +from datasets import Dataset + +abspath = os.path.abspath(__file__) +sys.path.append("/".join(abspath.split("/")[:-4])) + +from examples.run_grpo_math import math_data_processor +from nemo_reinforcer.algorithms.utils import get_tokenizer +from nemo_reinforcer.data.datasets import AllTaskProcessedDataset +from nemo_reinforcer.data.interfaces import TaskDataSpec +from nemo_reinforcer.models.policy import TokenizerConfig + + +basic_tokenizer_test_config: TokenizerConfig = { + "name": "Qwen/Qwen2.5-Math-1.5B-Instruct", + "chat_template": "default", +} + + +def test_math_data_processor(): + raw_dataset = Dataset.from_list( + [ + {"problem": "problem1", "expected_answer": "answer1"}, + {"problem": "problem2", "expected_answer": "answer2"}, + ] + ) + + tokenizer = get_tokenizer(basic_tokenizer_test_config) + + math_task_spec = TaskDataSpec( + task_name="math", + prompt_file=None, + system_prompt_file=None, + ) + + dataset = AllTaskProcessedDataset( + dataset=raw_dataset, + tokenizer=tokenizer, + default_task_data_spec=math_task_spec, + task_data_processors=math_data_processor, + max_seq_length=128, + ) + + assert dataset[0]["extra_env_info"]["ground_truth"] == "answer1" + assert dataset[1]["extra_env_info"]["ground_truth"] == "answer2"