From 3b2cb04ba9ca0acc54c08b0fb8834797369fa765 Mon Sep 17 00:00:00 2001 From: Yizhen Date: Sun, 23 Jun 2024 03:26:32 +0800 Subject: [PATCH 01/13] [Feature] reward model inferencer support --- examples/rm_inference.py | 61 +++++ examples/vllm_inference.py | 4 +- scripts/run_rm_inference.sh | 67 +++++ src/lmflow/args.py | 15 +- src/lmflow/datasets/dataset.py | 21 +- src/lmflow/models/hf_decoder_model.py | 6 +- src/lmflow/models/hf_model_mixin.py | 241 ++++++++---------- src/lmflow/models/hf_text_regression_model.py | 217 ++++++++++++---- src/lmflow/pipeline/auto_pipeline.py | 6 +- src/lmflow/pipeline/rm_inferencer.py | 205 +++++++++++++++ src/lmflow/pipeline/rm_tuner.py | 6 +- .../utils/memory_safe_vllm_inference.py | 4 +- .../tokenization/hf_text_regression_model.py | 71 ++++++ src/lmflow/utils/constants.py | 20 ++ 14 files changed, 747 insertions(+), 197 deletions(-) create mode 100644 examples/rm_inference.py create mode 100644 scripts/run_rm_inference.sh create mode 100644 src/lmflow/pipeline/rm_inferencer.py diff --git a/examples/rm_inference.py b/examples/rm_inference.py new file mode 100644 index 000000000..0f8acf7ce --- /dev/null +++ b/examples/rm_inference.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved. +import logging +import os +import sys + +from transformers import ( + HfArgumentParser +) + +from lmflow.datasets import Dataset +from lmflow.models.auto_model import AutoModel +from lmflow.pipeline.auto_pipeline import AutoPipeline +from lmflow.args import ( + ModelArguments, + DatasetArguments, + AutoArguments, +) + + +logger = logging.getLogger(__name__) + + +def main(): + # Parses arguments + pipeline_name = "rm_inferencer" + PipelineArguments = AutoArguments.get_pipeline_args_class(pipeline_name) + + parser = HfArgumentParser(( + ModelArguments, + DatasetArguments, + PipelineArguments + )) + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, pipeline_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, pipeline_args = parser.parse_args_into_dataclasses() + + dataset = Dataset(data_args) + model = AutoModel.get_model(model_args, tune_strategy='none', use_accelerator=pipeline_args.use_accelerator) + inferencer = AutoPipeline.get_pipeline( + pipeline_name=pipeline_name, + model_args=model_args, + data_args=data_args, + pipeline_args=pipeline_args + ) + + res = inferencer.inference( + model, + dataset, + ) + + if pipeline_args.save_results: + res.save(pipeline_args.results_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/vllm_inference.py b/examples/vllm_inference.py index 83f89b008..f9d329cb5 100644 --- a/examples/vllm_inference.py +++ b/examples/vllm_inference.py @@ -10,7 +10,7 @@ ) from lmflow.datasets import Dataset -from lmflow.models.hf_decoder_model import HFDecoderModel +from lmflow.models.auto_model import AutoModel from lmflow.pipeline.auto_pipeline import AutoPipeline from lmflow.args import ( ModelArguments, @@ -40,7 +40,7 @@ def main(): model_args, data_args, pipeline_args = parser.parse_args_into_dataclasses() dataset = Dataset(data_args) - model = HFDecoderModel(model_args) + model = AutoModel.get_model(model_args, tune_strategy='none') inferencer = AutoPipeline.get_pipeline( pipeline_name=pipeline_name, model_args=model_args, diff --git a/scripts/run_rm_inference.sh b/scripts/run_rm_inference.sh new file mode 100644 index 000000000..40a8c87af --- /dev/null +++ b/scripts/run_rm_inference.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved. + +# Parses arguments +run_name=rm_inference +# model_name_or_path=sfairXC/FsfairX-LLaMA3-RM-v0.1 +model_name_or_path=/vol/yizhenjia/projs/RLHFlow-fox/models/rm/sfairXC-FsfairX-LLaMA3-RM-v0.1 +dataset_path=data/alpaca/test +output_dir=data/rm_inference_results +output_file_name=results.json + +# Safety related arguments +trust_remote_code=0 + +while [[ $# -ge 1 ]]; do + key="$1" + case ${key} in + -r|--run_name) + run_name="$2" + shift + ;; + -m|--model_name_or_path) + model_name_or_path="$2" + shift + ;; + -d|--dataset_path) + dataset_path="$2" + shift + ;; + --output_dir) + output_dir="$2" + shift + ;; + --output_file_name) + output_file_name="$2" + shift + ;; + --trust_remote_code) + trust_remote_code="$2" + shift + ;; + *) + echo "error: unknown option \"${key}\"" 1>&2 + exit 1 + esac + shift +done + +# inference +project_dir=$(cd "$(dirname $0)"/..; pwd) +log_dir=${project_dir}/log/${run_name} +output_file_path=${output_dir}/${run_name}/${output_file_name} +mkdir -p ${output_dir}/${run_name} ${log_dir} + +accelerate launch --config_file configs/accelerator_multigpu_config.yaml \ + examples/rm_inference.py \ + --trust_remote_code ${trust_remote_code} \ + --model_name_or_path ${model_name_or_path} \ + --arch_type text_regression \ + --use_accelerator True \ + --block_size 4096 \ + --inference_batch_size 16 \ + --dataset_path ${dataset_path} \ + --preprocessing_num_workers 16 \ + --save_results True \ + --results_path ${output_file_path} \ + 2>&1 | tee ${log_dir}/rm_inference.log \ No newline at end of file diff --git a/src/lmflow/args.py b/src/lmflow/args.py index 56d8d43e3..e3a7efe10 100644 --- a/src/lmflow/args.py +++ b/src/lmflow/args.py @@ -648,7 +648,7 @@ class FinetunerArguments(TrainingArguments): @dataclass -class RewardModelingArguments(FinetunerArguments): +class RewardModelTunerArguments(FinetunerArguments): """ Arguments for reward modeling. """ @@ -825,18 +825,15 @@ class InferencerArguments: local_rank : str For distributed training: local_rank - random_seed : int, default = 1 - + inference_batch_size : int, default = 1 deepspeed : Enable deepspeed and pass the path to deepspeed json config file (e.g. ds_config.json) or an already loaded json file as a dict mixed_precision : str, choice from ["bf16","fp16"]. mixed precision mode, whether to use bf16 or fp16 - temperature : float An argument of model.generate in huggingface to control the diversity of generation. - repetition_penalty : float An argument of model.generate in huggingface to penalize repetitions. use_beam_search : Optional[bool] @@ -882,7 +879,10 @@ class InferencerArguments: metadata={"help": "For distributed training: local_rank" }, ) - + inference_batch_size: int = field( + default=1, + metadata={"help": "batch size for inference"}, + ) temperature: float = field( default=0.0, metadata={"help": "Temperature during inference."}, @@ -1251,9 +1251,10 @@ class IterativeAlignerArguments(InferencerArguments): "evaluator": EvaluatorArguments, "inferencer": InferencerArguments, "vllm_inferencer": InferencerArguments, + "rm_inferencer": InferencerArguments, "raft_aligner": RaftAlignerArguments, "dpo_aligner": DPOAlignerArguments, - "rm_tuner": RewardModelingArguments, + "rm_tuner": RewardModelTunerArguments, } diff --git a/src/lmflow/datasets/dataset.py b/src/lmflow/datasets/dataset.py index 826217f48..952fd9439 100644 --- a/src/lmflow/datasets/dataset.py +++ b/src/lmflow/datasets/dataset.py @@ -12,6 +12,7 @@ # Importing necessary libraries and modules import copy import json +from pathlib import Path from cmath import e from pathlib import Path @@ -24,6 +25,7 @@ from lmflow.utils.constants import ( DATASET_DESCRIPTION_MAP, TEXT_ONLY_DATASET_DESCRIPTION, + SCORED_TEXT_ONLY_DATASET_DESCRIPTION, TEXT2TEXT_DATASET_DESCRIPTION, FLOAT_ONLY_DATASET_DESCRIPTION, INSTANCE_FIELDS_MAP, @@ -42,6 +44,7 @@ KEY_TYPE = "type" KEY_INSTANCES = "instances" +KEY_SCORES = "score" class Dataset: r""" @@ -236,7 +239,7 @@ def from_dict(self, dict_obj: dict, *args, **kwargs): f' {list(fields)}: should be {list(correct_fields)}.\n' f'The bad instance triggers the error, the {i}-th instance:\n' f' {instance}' - ) + ) try: hf_dict = {} @@ -427,4 +430,18 @@ def get_type(self): self.type """ - return self.type \ No newline at end of file + return self.type + + + def save(self, file_path: str): + r""" + Save the dataset to a json file. + + Parameters + ------------ + file_path : str. + The path to the file where the dataset will be saved. + """ + assert Path(file_path).suffix == ".json", "The file path must have a .json extension." + with open(file_path, "w") as fout: + json.dump(self.to_dict(), fout, indent=2) \ No newline at end of file diff --git a/src/lmflow/models/hf_decoder_model.py b/src/lmflow/models/hf_decoder_model.py index ee4e94ff5..d987bfb8e 100644 --- a/src/lmflow/models/hf_decoder_model.py +++ b/src/lmflow/models/hf_decoder_model.py @@ -333,7 +333,7 @@ def decode(self, input, *args, **kwargs ) -> Union[str, List[str]]: else: # Can be list of ints or a Tensor return self.tokenizer.decode(input, *args, **kwargs) - + def inference( self, @@ -380,7 +380,7 @@ def inference( return res - def __inference(self, inputs, use_accelerator=False, *args, **kwargs): + def __inference(self, inputs, *args, **kwargs): """ Perform generation process of the model. @@ -401,7 +401,7 @@ def __inference(self, inputs, use_accelerator=False, *args, **kwargs): The generated sequence output """ with torch.no_grad(): - if use_accelerator: + if self.use_accelerator: outputs = self.backend_model.generate( input_ids=inputs, pad_token_id=self.tokenizer.pad_token_id, diff --git a/src/lmflow/models/hf_model_mixin.py b/src/lmflow/models/hf_model_mixin.py index c01e916da..1dce5c57a 100644 --- a/src/lmflow/models/hf_model_mixin.py +++ b/src/lmflow/models/hf_model_mixin.py @@ -4,7 +4,8 @@ import gc import os import logging -from typing import Union, Optional, Dict +from typing import Union, Optional, Dict, List +import copy import torch import deepspeed @@ -26,7 +27,7 @@ prepare_model_for_kbit_training ) from peft.utils.constants import TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING -from vllm import LLM +from vllm import LLM, SamplingParams from vllm.distributed.parallel_state import destroy_model_parallel from lmflow.models.base_model import BaseModel @@ -94,6 +95,7 @@ def __init__( self.hf_auto_model = HF_AUTOMODEL_MAPPING[model_args.arch_type] self.use_accelerator = use_accelerator self.ds_config = ds_config + self.do_train = do_train self.tokenizer = self.__prepare_tokenizer(model_args) self.torch_dtype = self.__prepare_dtype(model_args) @@ -105,22 +107,23 @@ def __init__( # Some implementations require custom modules to be injected into the model. self.__model_module_inject(model_args) - if do_train: + if self.do_train: self.__prepare_model_for_training(model_args, self.hf_auto_model) - - # some post processing - if self.tokenizer.eos_token_id is None: - self.tokenizer.eos_token_id = self.backend_model.config.eos_token_id - if self.tokenizer.pad_token_id is None: - self.tokenizer.pad_token_id = self.tokenizer.eos_token_id - if self.backend_model.config.pad_token_id is None: - self.backend_model.config.pad_token_id = self.tokenizer.pad_token_id - + def __prepare_tokenizer( self, model_args: ModelArguments, ) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]: + tokenizer_name = model_args.tokenizer_name or model_args.model_name_or_path + if not tokenizer_name: + raise ValueError( + "You are instantiating a new tokenizer from scratch. This is" + " not supported by this script. You can do it from another" + " script, save it, and load it from here, using" + " --tokenizer_name." + ) + tokenizer_kwargs = { "cache_dir": model_args.cache_dir, "use_fast": model_args.use_fast_tokenizer, @@ -132,39 +135,15 @@ def __prepare_tokenizer( tokenizer_kwargs["padding_side"] = model_args.padding_side try: - if model_args.tokenizer_name: - tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs) - elif model_args.model_name_or_path: - tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs) - else: - raise ValueError( - "You are instantiating a new tokenizer from scratch. This is" - " not supported by this script. You can do it from another" - " script, save it, and load it from here, using" - " --tokenizer_name." - ) - + tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, **tokenizer_kwargs) except RecursionError: logger.warning( "The tokenizer_config.json file doesn't set the special tokens. Using default values: " ", , for unknown token, bos token and eos token respectively.") - if model_args.tokenizer_name: - tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, unk_token="", - bos_token="", - eos_token="", - **tokenizer_kwargs) - elif model_args.model_name_or_path: - tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, unk_token="", - bos_token="", - eos_token="", - **tokenizer_kwargs) - else: - raise ValueError( - "You are instantiating a new tokenizer from scratch. This is" - " not supported by this script. You can do it from another" - " script, save it, and load it from here, using" - " --tokenizer_name." - ) + tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, unk_token="", + bos_token="", + eos_token="", + **tokenizer_kwargs) tokenizer.truncation_side = model_args.truncation_side or tokenizer.truncation_side tokenizer.model_max_length = model_args.model_max_length or tokenizer.model_max_length @@ -249,19 +228,25 @@ def __prepare_quant_config( model_args: ModelArguments, ): quant_config = None - if model_args.use_qlora: - quant_config = BitsAndBytesConfig( - load_in_4bit=model_args.bits == 4, - load_in_8bit=model_args.bits == 8, - llm_int8_threshold=6.0, - llm_int8_has_fp16_weight=False, - bnb_4bit_compute_dtype=self.torch_dtype, - bnb_4bit_use_double_quant=model_args.double_quant, - bnb_4bit_quant_type=model_args.quant_type, - ) - + if self.do_train: + if model_args.use_qlora: + quant_config = BitsAndBytesConfig( + load_in_4bit=model_args.bits == 4, + load_in_8bit=model_args.bits == 8, + llm_int8_threshold=6.0, + llm_int8_has_fp16_weight=False, + bnb_4bit_compute_dtype=self.torch_dtype, + bnb_4bit_use_double_quant=model_args.double_quant, + bnb_4bit_quant_type=model_args.quant_type, + ) + else: # inference + if model_args.use_int8: + quant_config = BitsAndBytesConfig( + load_in_8bit = model_args.use_int8, + ) + return quant_config - + def __prepare_peft_config( self, @@ -318,8 +303,9 @@ def __prepare_model_for_training( model_args: ModelArguments, hf_auto_model: HF_AUTOMODEL_TYPE, ): + assert self.do_train, "To prepare the model for training, set do_train=True." # TODO: change to accelerate - logger.info("Preparing model for training") + logger.warning("Preparing model for training") if model_args.model_name_or_path: model = hf_auto_model.from_pretrained( model_args.model_name_or_path, @@ -351,6 +337,7 @@ def __prepare_model_for_training( model.resize_token_embeddings(len(self.tokenizer)) self.backend_model = model + self.__prepare_model_post_process() def __prepare_model_for_inference( @@ -360,85 +347,68 @@ def __prepare_model_for_inference( use_accelerator: bool, ds_config ): - if not hasattr(self, "backend_model"): - # TODO: change to accelerate - logger.info("Preparing model for inference") - if use_accelerator: - peft_model_id = model_args.lora_model_path - self.backend_model = hf_auto_model.from_pretrained( - model_args.model_name_or_path, - config=self.hf_model_config, - device_map="auto", - offload_folder="offload", - offload_state_dict=True, - load_in_8bit = model_args.use_int8, - ) - if peft_model_id is not None: - self.backend_model = PeftModel.from_pretrained( - self.backend_model, - peft_model_id, - ) - else: - from transformers.integrations import HfDeepSpeedConfig - dschf = HfDeepSpeedConfig(ds_config) - peft_model_id = model_args.lora_model_path - # NOTE: Currently offload is not supported by llama - if self.hf_model_config.model_type == "llama" and model_args.use_ram_optimized_load: - logger.warning( - "llama does not support RAM optimized load. Automatically" - " use original load instead." - ) - model_args.use_ram_optimized_load = False - - if model_args.use_ram_optimized_load and peft_model_id is None: - try: - # RAM-optimized load - self.backend_model = hf_auto_model.from_pretrained( - model_args.model_name_or_path, - config=self.hf_model_config, - device_map="auto", - offload_folder="offload", - offload_state_dict=True, - ) - except: - logger.warning( - "Failed to use RAM optimized load. Automatically" - " use original load instead." - ) - # Normal load - self.backend_model = hf_auto_model.from_pretrained( - model_args.model_name_or_path, - config=self.hf_model_config, - ) - else: - if peft_model_id is not None: - logger.warning( - "LoRA does not support RAM optimized load currently." - " Automatically use original load instead." - ) - self.backend_model = hf_auto_model.from_pretrained( - model_args.model_name_or_path, - config=self.hf_model_config, - ) - - self.backend_model_full = self.backend_model - if peft_model_id is not None: - self.backend_model = PeftModel.from_pretrained( - self.backend_model, peft_model_id - ) - - if self.device == "gpu": - deepspeed.init_distributed() - self.ds_engine = deepspeed.initialize(model=self.backend_model, config_params=ds_config)[0] - self.ds_engine.module.eval() - - # backend model already initialized - else: + logger.info(f"Backend model already initialized, moving to device: {self.device}") + if hasattr(self, "backend_model"): if self.backend_model.device == torch.device("cpu"): self.backend_model.to(self.device) - else: - return - + return + + # TODO: change to accelerate + logger.info("Preparing model for inference") + inference_load_kwargs = {} + inference_load_kwargs_bak = copy.deepcopy(inference_load_kwargs) + ram_optimized_load_kwargs = { + "device_map": "auto", + "offload_folder": "offload", + "offload_state_dict": True, + } + + if model_args.lora_model_path is not None: + logger.warning( + "LoRA does not support RAM optimized load currently. Using original load." + ) + model_args.use_ram_optimized_load = False + + if use_accelerator or model_args.use_ram_optimized_load: + inference_load_kwargs.update(ram_optimized_load_kwargs) + + if not use_accelerator: + from transformers.integrations import HfDeepSpeedConfig + dschf = HfDeepSpeedConfig(ds_config) + + try: + self.backend_model = hf_auto_model.from_pretrained( + model_args.model_name_or_path, + config=self.hf_model_config, + quantization_config=self.quant_config, + **inference_load_kwargs, + ) + except: + logger.warning( + "Failed to use RAM optimized load. Using original load instead." + ) + self.backend_model = hf_auto_model.from_pretrained( + model_args.model_name_or_path, + config=self.hf_model_config, + quantization_config=self.quant_config, + **inference_load_kwargs_bak, + ) + + self.backend_model_full = self.backend_model + + if model_args.lora_model_path is not None: + self.backend_model = PeftModel.from_pretrained( + self.backend_model, + model_args.lora_model_path, + ) + + if (not use_accelerator) and self.device == "gpu": + deepspeed.init_distributed() + self.ds_engine = deepspeed.initialize(model=self.backend_model, config_params=ds_config)[0] + self.ds_engine.module.eval() + + self.__prepare_model_post_process() + def __prepare_model_for_vllm_inference( self, @@ -456,6 +426,15 @@ def __prepare_model_for_vllm_inference( ) + def __prepare_model_post_process(self): + if self.tokenizer.eos_token_id is None: + self.tokenizer.eos_token_id = self.backend_model.config.eos_token_id + if self.tokenizer.pad_token_id is None: + self.tokenizer.pad_token_id = self.tokenizer.eos_token_id + if self.backend_model.config.pad_token_id is None: + self.backend_model.config.pad_token_id = self.tokenizer.pad_token_id + + def activate_model_for_inference( self, use_vllm: bool=False, diff --git a/src/lmflow/models/hf_text_regression_model.py b/src/lmflow/models/hf_text_regression_model.py index 4bce86306..a6bdf3305 100644 --- a/src/lmflow/models/hf_text_regression_model.py +++ b/src/lmflow/models/hf_text_regression_model.py @@ -5,6 +5,7 @@ import hashlib import logging from pathlib import Path +from typing import List, Union, Dict, Optional import torch import deepspeed @@ -17,27 +18,25 @@ get_peft_model, prepare_model_for_kbit_training ) -from transformers import ( - BitsAndBytesConfig, - CONFIG_MAPPING, - AutoConfig, - AutoTokenizer, - AutoModelForSequenceClassification, -) -from transformers.deepspeed import HfDeepSpeedConfig -from transformers.testing_utils import CaptureLogger +from transformers.modeling_outputs import SequenceClassifierOutputWithPast +from vllm import SamplingParams +from lmflow.args import ModelArguments from lmflow.datasets import Dataset from lmflow.models.interfaces.tunable import Tunable from lmflow.models.hf_model_mixin import HFModelMixin from lmflow.models.text_regression_model import TextRegressionModel -from lmflow.tokenization.hf_text_regression_model import paired_conversation_tokenize_function, tokenize_function +from lmflow.tokenization.hf_text_regression_model import ( + paired_conversation_tokenize_function, + conversation_tokenize_function, + tokenize_function, +) from lmflow.utils.conversation_template import PRESET_TEMPLATES from lmflow.utils.constants import ( PAIRED_CONVERSATION_DATASET_DESCRIPTION, TEXT2TEXT_DATASET_DESCRIPTION, TEXT_ONLY_DATASET_DESCRIPTION, - CONVERSATION_ROLE_NAMES, + CONVERSATION_DATASET_DESCRIPTION, ) @@ -69,8 +68,8 @@ class HFTextRegressionModel(TextRegressionModel, HFModelMixin, Tunable): def __init__( self, - model_args, - tune_strategy='normal', + model_args: ModelArguments, + tune_strategy: str='normal', ds_config=None, device="gpu", use_accelerator=False, @@ -83,6 +82,10 @@ def __init__( :param tune_strategy: tuning strategy: normal, none, lora or adapter :param ds_config: deepspeed configuration for distributed training """ + assert model_args.arch_type == "text_regression", ( + f"Invalid model architecture type: {model_args.arch_type}. " + f"Expected: text_regression" + ) config_additional_args = {"num_labels": 1} HFModelMixin.__init__( self, @@ -138,6 +141,11 @@ def tokenize( hf_raw_datasets = dataset.get_backend_dataset() column_names = list(hf_raw_datasets.features) # in paired conversation, for example, would be 'chosen' and 'rejected' data_args = raw_datasets.get_data_args() + + # Whether to truncate long sequences to fit into max_length + use_truncation = False + if model_args.use_lora or data_args.disable_group_texts: + use_truncation = True # Requires three types of information for tokenizing different datasets # 1) Which fields require tokenization, e.g. @@ -149,16 +157,32 @@ def tokenize( # 3) Which fields require loss in final computation, e.g. # "text_only": "text" # "text2text": "output" only - tokenized_column_order = None # Handles 1) and 2) - label_columns = None # Handles 3) + tokenize_fn = None + tokenize_fn_kwargs = { + "data_args": data_args, + "tokenizer": self.tokenizer, + "column_names": column_names, + } if dataset_type == "text_only": - tokenized_column_order = ["text"] - label_columns = ["text"] + tokenize_fn = tokenize_function + tokenize_fn_kwargs["tokenized_column_order"] = ["text"] + tokenize_fn_kwargs["label_columns"] = ["text"] + tokenize_fn_kwargs["add_special_tokens"] = add_special_tokens + tokenize_fn_kwargs["use_truncation"] = use_truncation + elif dataset_type == "text2text": - tokenized_column_order = ["input", "output"] - label_columns = ["output"] - add_special_tokens = False - elif dataset_type == "paired_conversation": + tokenize_fn = tokenize_function + tokenize_fn_kwargs["tokenized_column_order"] = ["input", "output"] + tokenize_fn_kwargs["label_columns"] = ["output"] + tokenize_fn_kwargs["add_special_tokens"] = False + tokenize_fn_kwargs["use_truncation"] = use_truncation + + elif dataset_type in ["conversation", "paired_conversation"]: + if dataset_type == "conversation": + tokenize_fn = conversation_tokenize_function + elif dataset_type == "paired_conversation": + tokenize_fn = paired_conversation_tokenize_function + if data_args.conversation_template: if data_args.conversation_template in PRESET_TEMPLATES.keys(): conversation_template = PRESET_TEMPLATES[data_args.conversation_template] @@ -169,36 +193,19 @@ def tokenize( else: logger.warning("No conversation template provided. Using default template.") conversation_template = PRESET_TEMPLATES['empty'] - + tokenize_fn_kwargs["conversation_template"] = conversation_template logger.warning(f"Conversation template: {conversation_template}") + else: raise NotImplementedError( f"Dataset type \"{dataset_type}\" is not supported, currently" " only support following data types for HFTextRegressionModel:\n" - f" 1) {TEXT_ONLY_DATASET_DESCRIPTION}\n" - f" 2) {TEXT2TEXT_DATASET_DESCRIPTION}\n" - f" 3) {PAIRED_CONVERSATION_DATASET_DESCRIPTION}\n" + f" 1) [Inference]{TEXT_ONLY_DATASET_DESCRIPTION}\n" + f" 2) [Inference]{TEXT2TEXT_DATASET_DESCRIPTION}\n" + f" 3) [Training]{PAIRED_CONVERSATION_DATASET_DESCRIPTION}\n" + f" 4) [Inference]{CONVERSATION_DATASET_DESCRIPTION}\n" ) - - # Whether to truncate long sequences to fit into max_length - use_truncation = False - if model_args.use_lora or data_args.disable_group_texts: - use_truncation = True - - tokenize_fn = paired_conversation_tokenize_function if "conversation" in dataset_type else tokenize_function - tokenize_fn_kwargs = { - "data_args": data_args, - "tokenizer": self.tokenizer, - "column_names": column_names, - } - if "conversation" in dataset_type: - tokenize_fn_kwargs["conversation_template"] = conversation_template - else: - tokenize_fn_kwargs["label_columns"] = label_columns - tokenize_fn_kwargs["tokenized_column_order"] = tokenized_column_order - tokenize_fn_kwargs["add_special_tokens"] = add_special_tokens - tokenize_fn_kwargs["use_truncation"] = use_truncation - + tokenize_kwargs = {} if not data_args.streaming: fingerprint = hashlib.md5( @@ -226,8 +233,128 @@ def tokenize( **tokenize_kwargs ) return tokenized_datasets + + + def inference( + self, + inputs, + release_gpu: bool = False, + use_vllm: bool = False, + **kwargs + ) -> Union[List[float], SequenceClassifierOutputWithPast]: + """ + Perform generation process of the model. + + Parameters + ------------ + inputs : + The sequence used as a prompt for the generation or as model inputs to the model. + When using vllm inference, this should be a string or a list of strings. + When using normal inference, this should be a tensor. + release_gpu : bool, optional + Whether to release the GPU resource after inference, by default False. + use_vllm : bool, optional + Whether to use VLLM for inference, by default False. + kwargs : Optional. + Keyword arguments. + + Returns + ------------ + outputs : + The generated sequence output + """ + if use_vllm: + logger.warning( + "VLLM inference is not supported for text regression model, using normal inference instead." + ) + use_vllm = False + + if not self._activated: + self.activate_model_for_inference( + use_vllm=use_vllm, + **kwargs, + ) + + if use_vllm: + res = self.__vllm_inference(inputs, **kwargs) + else: + res = self.__inference(inputs, **kwargs) + if release_gpu: + self.deactivate_model_for_inference(use_vllm=use_vllm) + return res + + + def __inference( + self, + inputs, + **kwargs + ): + """ + Perform generation process of the model. + + Parameters + ------------ + inputs : + The **tokenized** sequence used as a prompt for the generation or as model inputs to the model. + kwargs : Optional. + Keyword arguments. + + Returns + ------------ + outputs : + The generated sequence output + """ + with torch.no_grad(): + if self.use_accelerator: + outputs = self.backend_model( + input_ids=inputs, + **kwargs, + ) + else: + if self.device == "gpu": + outputs = self.ds_engine.module( + input_ids=inputs, + synced_gpus=True, + **kwargs, + ) + elif self.device == "cpu": + outputs = self.backend_model( + input_ids=inputs, + synced_gpus=True, + **kwargs, + ) + else: + raise NotImplementedError( + f"device \"{self.device}\" is not supported" + ) + return outputs + + + def __vllm_inference( + self, + inputs: Union[str, List[str]], + sampling_params: Optional[SamplingParams] = None, + **kwargs, + ) -> Union[List[List[str]], List[List[List[int]]]]: + """Perform VLLM inference process of the model. + + Parameters + ---------- + inputs : Union[str, List[str]] + Prompt(s), string or a list of strings. + sampling_params : Optional[SamplingParams], optional + vllm SamplingParams object, by default None. + + Returns + ------- + """ + raise NotImplementedError( + "VLLM inference is not supported for text regression model." + ) + + def save(self, dir, *args, **kwargs): """ Perform generation process of the model. diff --git a/src/lmflow/pipeline/auto_pipeline.py b/src/lmflow/pipeline/auto_pipeline.py index a4c053cf6..a5e815636 100644 --- a/src/lmflow/pipeline/auto_pipeline.py +++ b/src/lmflow/pipeline/auto_pipeline.py @@ -19,14 +19,16 @@ def is_package_version_at_least(package_name, min_version): from lmflow.pipeline.inferencer import Inferencer from lmflow.pipeline.vllm_inferencer import VLLMInferencer from lmflow.pipeline.dpo_aligner import DPOAligner -from lmflow.pipeline.rm_tuner import RewardModelingTuner +from lmflow.pipeline.rm_tuner import RewardModelTuner +from lmflow.pipeline.rm_inferencer import RewardModelInferencer PIPELINE_MAPPING = { "evaluator": Evaluator, "finetuner": Finetuner, "inferencer": Inferencer, "vllm_inferencer": VLLMInferencer, + "rm_inferencer": RewardModelInferencer, "dpo_aligner": DPOAligner, - "rm_tuner": RewardModelingTuner, + "rm_tuner": RewardModelTuner, } if not is_package_version_at_least('transformers', '4.35.0'): diff --git a/src/lmflow/pipeline/rm_inferencer.py b/src/lmflow/pipeline/rm_inferencer.py new file mode 100644 index 000000000..b5a3d81e1 --- /dev/null +++ b/src/lmflow/pipeline/rm_inferencer.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved. +import copy +import os +import torch +import wandb +import deepspeed +import sys +import numpy as np +import datetime +import json +import time +import logging +from typing import Dict, List, Union + +from accelerate import Accelerator +import torch +from tqdm import tqdm +from transformers import AutoConfig +from transformers.modeling_outputs import SequenceClassifierOutputWithPast +import torch.distributed as dist +import torch.nn.functional as F + +from lmflow.args import ( + DatasetArguments, + ModelArguments, + InferencerArguments, +) +from lmflow.datasets.dataset import Dataset +from lmflow.models.hf_text_regression_model import HFTextRegressionModel +from lmflow.pipeline.base_pipeline import BasePipeline +from lmflow.utils.data_utils import ( + set_random_seed, + batchlize +) + + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warnings about parallelism in tokenizers +logger = logging.getLogger(__name__) + + +class RewardModelInferencer(BasePipeline): + """ + Initializes the `Inferencer` class with given arguments. + + Parameters + ------------ + model_args : ModelArguments object. + Contains the arguments required to load the model. + + data_args : DatasetArguments object. + Contains the arguments required to load the dataset. + + inferencer_args : InferencerArguments object. + Contains the arguments required to perform inference. + """ + def __init__( + self, + model_args: ModelArguments, + data_args: DatasetArguments, + inferencer_args: InferencerArguments, + ): + self.data_args = data_args + self.inferencer_args = inferencer_args + self.model_args = model_args + + set_random_seed(self.inferencer_args.random_seed) + + self.local_rank = int(os.getenv("LOCAL_RANK", "0")) + self.world_size = int(os.getenv("WORLD_SIZE", "1")) + if inferencer_args.device == "gpu": + torch.cuda.set_device(self.local_rank) # NOTE: cpu-only machine will have error + deepspeed.init_distributed() + else: + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "15000" + dist.init_process_group( + "gloo", rank=self.local_rank, world_size=self.world_size + ) + + if inferencer_args.use_accelerator: + self.accelerator = Accelerator() + self.accelerator.wait_for_everyone() + + + def inference( + self, + model: HFTextRegressionModel, + dataset: Dataset, + transform_dataset_in_place: bool=True, + use_vllm: bool = False, + ) -> Dataset: + if use_vllm: + logger.warning("VLLM doesn't support reward model inference, using normal inference instead.") + use_vllm = False + + assert isinstance(model, HFTextRegressionModel), "model should be HFTextRegressionModel" + if not transform_dataset_in_place: + dataset = copy.deepcopy(dataset) + output_dict = { + "type": f"scored_{dataset.get_type()}", + "instances": dataset.to_dict()["instances"], + } + + if use_vllm: + scores = self.__vllm_inference(model, dataset) + else: + scores = self.__inference(model, dataset) + + for i, score in enumerate(scores): + output_dict["instances"][i]["score"] = score + + output_dataset_args = copy.deepcopy(self.data_args) + output_dataset_args.dataset_path = None + output_dataset_args.dataset_name = f"scored_{output_dataset_args.dataset_name}" + output_dataset = Dataset(output_dataset_args) + output_dataset = output_dataset.from_dict(output_dict) + + return output_dataset + + + def __inference( + self, + model: HFTextRegressionModel, + dataset: Dataset, + ) -> List[float]: + tokenized_dataset = model.tokenize(dataset) + dataloader, _ = self.create_dataloader( + dataset=tokenized_dataset, + batch_size=self.inferencer_args.inference_batch_size, + random_shuffle=False, # no need to shuffle when inference + ) + num_batches = len(dataloader) + final_output = [] + + for batch_index, batched_input_ids in tqdm( + iterable=enumerate(dataloader), + total=num_batches, + desc="Inference", + unit="batch" + ): + # len(batch) = batch_size, and batch element is dataset sample + model_input = torch.LongTensor(batched_input_ids).to("cpu" if model.device == "cpu" else "cuda") + if self.inferencer_args.use_accelerator: + with self.accelerator.autocast(): + batch_output = model.inference( + inputs=model_input, + use_vllm=False, + ) + else: + batch_output = model.inference( + inputs=model_input, + use_vllm=False, + ) + + batch_output = self.__post_process_model_output(batch_output) + final_output.extend(batch_output) + + return final_output + + + def __vllm_inference( + self, + model: HFTextRegressionModel, + dataset: Dataset, + ) -> List[float]: + raise NotImplementedError("VLLM inference for reward model is not implemented yet.") + + + def __post_process_model_output( + self, + model_output: SequenceClassifierOutputWithPast, + ) -> List[float]: + final_output = model_output.logits.to("cpu").reshape(-1).tolist() + + return final_output + + + def create_dataloader( + self, + dataset: Dataset, + batch_size: int = 1, + random_shuffle: bool = False, + ): + r"""Batchlize dataset and format it to dataloader. + + Args: + dataset (Dataset): the dataset object + + Output: + dataloader (batchlize): the dataloader object + dataset_size (int): the length of the dataset + + """ + inputs = dataset.get_backend_dataset()["input_ids"] # this comes from lmflow model.tokenize(dataset) + dataset_size = len(inputs) + + dataloader = batchlize( + inputs, + batch_size=batch_size, + random_shuffle=random_shuffle, + ) + return dataloader, dataset_size + \ No newline at end of file diff --git a/src/lmflow/pipeline/rm_tuner.py b/src/lmflow/pipeline/rm_tuner.py index 2997aba2a..06b2094f1 100644 --- a/src/lmflow/pipeline/rm_tuner.py +++ b/src/lmflow/pipeline/rm_tuner.py @@ -23,8 +23,8 @@ logger = logging.getLogger(__name__) -class RewardModelingTuner(Finetuner): - """Initializes the `RewardModelingTuner` class. +class RewardModelTuner(Finetuner): + """Initializes the `RewardModelTuner` class. Parameters ---------- @@ -34,7 +34,7 @@ class RewardModelingTuner(Finetuner): data_args : DatasetArguments object. Contains the arguments required to load the dataset. - finetuner_args : RewardModelingArguments object. + finetuner_args : RewardModelTunerArguments object. Contains the arguments required to perform finetuning. args : Optional. diff --git a/src/lmflow/pipeline/utils/memory_safe_vllm_inference.py b/src/lmflow/pipeline/utils/memory_safe_vllm_inference.py index 3502d13e2..86f765acb 100644 --- a/src/lmflow/pipeline/utils/memory_safe_vllm_inference.py +++ b/src/lmflow/pipeline/utils/memory_safe_vllm_inference.py @@ -16,7 +16,7 @@ ) from lmflow.datasets import Dataset -from lmflow.models.hf_decoder_model import HFDecoderModel +from lmflow.models.auto_model import AutoModel from lmflow.pipeline.vllm_inferencer import VLLMInferencer from lmflow.args import ( ModelArguments, @@ -47,7 +47,7 @@ def main(): model_args, data_args, pipeline_args = parser.parse_args_into_dataclasses() dataset = Dataset(data_args) - model = HFDecoderModel(model_args) + model = AutoModel.get_model(model_args, tune_strategy='none') inferencer = VLLMInferencer(model_args, data_args, pipeline_args) res = inferencer.inference( diff --git a/src/lmflow/tokenization/hf_text_regression_model.py b/src/lmflow/tokenization/hf_text_regression_model.py index 483f9db58..806ff5654 100644 --- a/src/lmflow/tokenization/hf_text_regression_model.py +++ b/src/lmflow/tokenization/hf_text_regression_model.py @@ -191,6 +191,77 @@ def paired_conversation_tokenize_function( ) return token_dict + +def conversation_tokenize_function( + examples, + data_args: DatasetArguments, + tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast], + column_names, + conversation_template: ConversationTemplate, +) -> Dict: + """Handels conversation datasets tokenization + """ + num_example = len(examples[column_names[0]]) + token_dict = { + "input_ids": [[] for _ in range(num_example)], + "attention_mask": [[] for _ in range(num_example)], + "labels": [[] for _ in range(num_example)], + } + with CaptureLogger(tok_logger) as cl: + for i in range(len(examples["messages"])): + messages = examples["messages"][i] + system = examples.get("system", [None] * num_example)[i] + tools = examples.get("tools", [None] * num_example)[i] + if len(messages) < 2 or messages[0]['role'] != CONVERSATION_ROLE_NAMES['user']: + tok_logger.warning( + "Invalid instance encountered. Either the conversation has less than " + "one round or the first message is not from the user." + ) + continue + + if len(messages) % 2 != 0: + logger.warning( + "The number of messages is not even, the last message will be ignored." + ) + messages = messages[:-1] + + encoded_conversation = conversation_template.encode_conversation( + tokenizer=tokenizer, + messages=messages, + system=system, + tools=tools, + ) + + input_ids, labels = [], [] + for turn_idx, (user_input, assistant_result) in enumerate(encoded_conversation): + input_ids += user_input + assistant_result + + if data_args.train_on_prompt: + labels += user_input + assistant_result + else: + labels += [-100] * len(user_input) + assistant_result + + token_dict["input_ids"][i].extend(input_ids) + token_dict["attention_mask"][i].extend([1] * len(input_ids)) + token_dict["labels"][i].extend(labels) + + if data_args.disable_group_texts: + token_dict = blocking( + token_dict=token_dict, + block_size=data_args.block_size, + model_max_length=tokenizer.model_max_length, + pad_token_id=tokenizer.pad_token_id, + padding_side=tokenizer.padding_side, + ) + + # clm input could be much much longer than block_size + if "Token indices sequence length is longer than the" in cl.out: + tok_logger.warning( + "^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits" + " before being passed to the model." + ) + return token_dict + def tokenize_function( examples, diff --git a/src/lmflow/utils/constants.py b/src/lmflow/utils/constants.py index bb17899dc..323e4d723 100644 --- a/src/lmflow/utils/constants.py +++ b/src/lmflow/utils/constants.py @@ -20,6 +20,23 @@ ).lstrip("\n") +SCORED_TEXT_ONLY_DATASET_DESCRIPTION = ( +""" +"scored_text_only": a dataset with only raw text instances and corresponding scores, with following format: + + { + "type": "text_only", + "instances": [ + { "text": "TEXT_1" }, + { "text": "TEXT_2" }, + ... + ], + "scores": [1.0, 0.5, ...] + } +""" +).lstrip("\n") + + TEXT_ONLY_DATASET_DETAILS = ( """ For example, @@ -282,6 +299,9 @@ "paired_conversation": ["chosen", "rejected"], "float_only": ["value"], "image_text": ["images", "text"], + "scored_text_only": ["text", "score"], + "scored_text2text": ["input", "output", "score"], + "scored_conversation": ["messages", "score"], } CONVERSATION_ROLE_NAMES = { From 43ec933ac958c37b03913ea351f816fd648f2325 Mon Sep 17 00:00:00 2001 From: Yizhen Date: Sun, 23 Jun 2024 03:26:32 +0800 Subject: [PATCH 02/13] [Feature] reward model inferencer support --- examples/rm_inference.py | 61 +++++ examples/vllm_inference.py | 4 +- scripts/run_rm_inference.sh | 67 +++++ src/lmflow/args.py | 15 +- src/lmflow/datasets/dataset.py | 21 +- src/lmflow/models/hf_decoder_model.py | 6 +- src/lmflow/models/hf_model_mixin.py | 241 ++++++++---------- src/lmflow/models/hf_text_regression_model.py | 217 ++++++++++++---- src/lmflow/pipeline/auto_pipeline.py | 6 +- src/lmflow/pipeline/rm_inferencer.py | 205 +++++++++++++++ src/lmflow/pipeline/rm_tuner.py | 6 +- .../utils/memory_safe_vllm_inference.py | 4 +- .../tokenization/hf_text_regression_model.py | 71 ++++++ src/lmflow/utils/constants.py | 20 ++ 14 files changed, 747 insertions(+), 197 deletions(-) create mode 100644 examples/rm_inference.py create mode 100644 scripts/run_rm_inference.sh create mode 100644 src/lmflow/pipeline/rm_inferencer.py diff --git a/examples/rm_inference.py b/examples/rm_inference.py new file mode 100644 index 000000000..0f8acf7ce --- /dev/null +++ b/examples/rm_inference.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved. +import logging +import os +import sys + +from transformers import ( + HfArgumentParser +) + +from lmflow.datasets import Dataset +from lmflow.models.auto_model import AutoModel +from lmflow.pipeline.auto_pipeline import AutoPipeline +from lmflow.args import ( + ModelArguments, + DatasetArguments, + AutoArguments, +) + + +logger = logging.getLogger(__name__) + + +def main(): + # Parses arguments + pipeline_name = "rm_inferencer" + PipelineArguments = AutoArguments.get_pipeline_args_class(pipeline_name) + + parser = HfArgumentParser(( + ModelArguments, + DatasetArguments, + PipelineArguments + )) + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, pipeline_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, pipeline_args = parser.parse_args_into_dataclasses() + + dataset = Dataset(data_args) + model = AutoModel.get_model(model_args, tune_strategy='none', use_accelerator=pipeline_args.use_accelerator) + inferencer = AutoPipeline.get_pipeline( + pipeline_name=pipeline_name, + model_args=model_args, + data_args=data_args, + pipeline_args=pipeline_args + ) + + res = inferencer.inference( + model, + dataset, + ) + + if pipeline_args.save_results: + res.save(pipeline_args.results_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/vllm_inference.py b/examples/vllm_inference.py index 83f89b008..f9d329cb5 100644 --- a/examples/vllm_inference.py +++ b/examples/vllm_inference.py @@ -10,7 +10,7 @@ ) from lmflow.datasets import Dataset -from lmflow.models.hf_decoder_model import HFDecoderModel +from lmflow.models.auto_model import AutoModel from lmflow.pipeline.auto_pipeline import AutoPipeline from lmflow.args import ( ModelArguments, @@ -40,7 +40,7 @@ def main(): model_args, data_args, pipeline_args = parser.parse_args_into_dataclasses() dataset = Dataset(data_args) - model = HFDecoderModel(model_args) + model = AutoModel.get_model(model_args, tune_strategy='none') inferencer = AutoPipeline.get_pipeline( pipeline_name=pipeline_name, model_args=model_args, diff --git a/scripts/run_rm_inference.sh b/scripts/run_rm_inference.sh new file mode 100644 index 000000000..40a8c87af --- /dev/null +++ b/scripts/run_rm_inference.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved. + +# Parses arguments +run_name=rm_inference +# model_name_or_path=sfairXC/FsfairX-LLaMA3-RM-v0.1 +model_name_or_path=/vol/yizhenjia/projs/RLHFlow-fox/models/rm/sfairXC-FsfairX-LLaMA3-RM-v0.1 +dataset_path=data/alpaca/test +output_dir=data/rm_inference_results +output_file_name=results.json + +# Safety related arguments +trust_remote_code=0 + +while [[ $# -ge 1 ]]; do + key="$1" + case ${key} in + -r|--run_name) + run_name="$2" + shift + ;; + -m|--model_name_or_path) + model_name_or_path="$2" + shift + ;; + -d|--dataset_path) + dataset_path="$2" + shift + ;; + --output_dir) + output_dir="$2" + shift + ;; + --output_file_name) + output_file_name="$2" + shift + ;; + --trust_remote_code) + trust_remote_code="$2" + shift + ;; + *) + echo "error: unknown option \"${key}\"" 1>&2 + exit 1 + esac + shift +done + +# inference +project_dir=$(cd "$(dirname $0)"/..; pwd) +log_dir=${project_dir}/log/${run_name} +output_file_path=${output_dir}/${run_name}/${output_file_name} +mkdir -p ${output_dir}/${run_name} ${log_dir} + +accelerate launch --config_file configs/accelerator_multigpu_config.yaml \ + examples/rm_inference.py \ + --trust_remote_code ${trust_remote_code} \ + --model_name_or_path ${model_name_or_path} \ + --arch_type text_regression \ + --use_accelerator True \ + --block_size 4096 \ + --inference_batch_size 16 \ + --dataset_path ${dataset_path} \ + --preprocessing_num_workers 16 \ + --save_results True \ + --results_path ${output_file_path} \ + 2>&1 | tee ${log_dir}/rm_inference.log \ No newline at end of file diff --git a/src/lmflow/args.py b/src/lmflow/args.py index c4e87491c..9b4d9b7cb 100644 --- a/src/lmflow/args.py +++ b/src/lmflow/args.py @@ -682,7 +682,7 @@ class FinetunerArguments(TrainingArguments): @dataclass -class RewardModelingArguments(FinetunerArguments): +class RewardModelTunerArguments(FinetunerArguments): """ Arguments for reward modeling. """ @@ -859,18 +859,15 @@ class InferencerArguments: local_rank : str For distributed training: local_rank - random_seed : int, default = 1 - + inference_batch_size : int, default = 1 deepspeed : Enable deepspeed and pass the path to deepspeed json config file (e.g. ds_config.json) or an already loaded json file as a dict mixed_precision : str, choice from ["bf16","fp16"]. mixed precision mode, whether to use bf16 or fp16 - temperature : float An argument of model.generate in huggingface to control the diversity of generation. - repetition_penalty : float An argument of model.generate in huggingface to penalize repetitions. use_beam_search : Optional[bool] @@ -916,7 +913,10 @@ class InferencerArguments: metadata={"help": "For distributed training: local_rank" }, ) - + inference_batch_size: int = field( + default=1, + metadata={"help": "batch size for inference"}, + ) temperature: float = field( default=0.0, metadata={"help": "Temperature during inference."}, @@ -1285,9 +1285,10 @@ class IterativeAlignerArguments(InferencerArguments): "evaluator": EvaluatorArguments, "inferencer": InferencerArguments, "vllm_inferencer": InferencerArguments, + "rm_inferencer": InferencerArguments, "raft_aligner": RaftAlignerArguments, "dpo_aligner": DPOAlignerArguments, - "rm_tuner": RewardModelingArguments, + "rm_tuner": RewardModelTunerArguments, } diff --git a/src/lmflow/datasets/dataset.py b/src/lmflow/datasets/dataset.py index 826217f48..952fd9439 100644 --- a/src/lmflow/datasets/dataset.py +++ b/src/lmflow/datasets/dataset.py @@ -12,6 +12,7 @@ # Importing necessary libraries and modules import copy import json +from pathlib import Path from cmath import e from pathlib import Path @@ -24,6 +25,7 @@ from lmflow.utils.constants import ( DATASET_DESCRIPTION_MAP, TEXT_ONLY_DATASET_DESCRIPTION, + SCORED_TEXT_ONLY_DATASET_DESCRIPTION, TEXT2TEXT_DATASET_DESCRIPTION, FLOAT_ONLY_DATASET_DESCRIPTION, INSTANCE_FIELDS_MAP, @@ -42,6 +44,7 @@ KEY_TYPE = "type" KEY_INSTANCES = "instances" +KEY_SCORES = "score" class Dataset: r""" @@ -236,7 +239,7 @@ def from_dict(self, dict_obj: dict, *args, **kwargs): f' {list(fields)}: should be {list(correct_fields)}.\n' f'The bad instance triggers the error, the {i}-th instance:\n' f' {instance}' - ) + ) try: hf_dict = {} @@ -427,4 +430,18 @@ def get_type(self): self.type """ - return self.type \ No newline at end of file + return self.type + + + def save(self, file_path: str): + r""" + Save the dataset to a json file. + + Parameters + ------------ + file_path : str. + The path to the file where the dataset will be saved. + """ + assert Path(file_path).suffix == ".json", "The file path must have a .json extension." + with open(file_path, "w") as fout: + json.dump(self.to_dict(), fout, indent=2) \ No newline at end of file diff --git a/src/lmflow/models/hf_decoder_model.py b/src/lmflow/models/hf_decoder_model.py index ee4e94ff5..d987bfb8e 100644 --- a/src/lmflow/models/hf_decoder_model.py +++ b/src/lmflow/models/hf_decoder_model.py @@ -333,7 +333,7 @@ def decode(self, input, *args, **kwargs ) -> Union[str, List[str]]: else: # Can be list of ints or a Tensor return self.tokenizer.decode(input, *args, **kwargs) - + def inference( self, @@ -380,7 +380,7 @@ def inference( return res - def __inference(self, inputs, use_accelerator=False, *args, **kwargs): + def __inference(self, inputs, *args, **kwargs): """ Perform generation process of the model. @@ -401,7 +401,7 @@ def __inference(self, inputs, use_accelerator=False, *args, **kwargs): The generated sequence output """ with torch.no_grad(): - if use_accelerator: + if self.use_accelerator: outputs = self.backend_model.generate( input_ids=inputs, pad_token_id=self.tokenizer.pad_token_id, diff --git a/src/lmflow/models/hf_model_mixin.py b/src/lmflow/models/hf_model_mixin.py index c01e916da..1dce5c57a 100644 --- a/src/lmflow/models/hf_model_mixin.py +++ b/src/lmflow/models/hf_model_mixin.py @@ -4,7 +4,8 @@ import gc import os import logging -from typing import Union, Optional, Dict +from typing import Union, Optional, Dict, List +import copy import torch import deepspeed @@ -26,7 +27,7 @@ prepare_model_for_kbit_training ) from peft.utils.constants import TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING -from vllm import LLM +from vllm import LLM, SamplingParams from vllm.distributed.parallel_state import destroy_model_parallel from lmflow.models.base_model import BaseModel @@ -94,6 +95,7 @@ def __init__( self.hf_auto_model = HF_AUTOMODEL_MAPPING[model_args.arch_type] self.use_accelerator = use_accelerator self.ds_config = ds_config + self.do_train = do_train self.tokenizer = self.__prepare_tokenizer(model_args) self.torch_dtype = self.__prepare_dtype(model_args) @@ -105,22 +107,23 @@ def __init__( # Some implementations require custom modules to be injected into the model. self.__model_module_inject(model_args) - if do_train: + if self.do_train: self.__prepare_model_for_training(model_args, self.hf_auto_model) - - # some post processing - if self.tokenizer.eos_token_id is None: - self.tokenizer.eos_token_id = self.backend_model.config.eos_token_id - if self.tokenizer.pad_token_id is None: - self.tokenizer.pad_token_id = self.tokenizer.eos_token_id - if self.backend_model.config.pad_token_id is None: - self.backend_model.config.pad_token_id = self.tokenizer.pad_token_id - + def __prepare_tokenizer( self, model_args: ModelArguments, ) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]: + tokenizer_name = model_args.tokenizer_name or model_args.model_name_or_path + if not tokenizer_name: + raise ValueError( + "You are instantiating a new tokenizer from scratch. This is" + " not supported by this script. You can do it from another" + " script, save it, and load it from here, using" + " --tokenizer_name." + ) + tokenizer_kwargs = { "cache_dir": model_args.cache_dir, "use_fast": model_args.use_fast_tokenizer, @@ -132,39 +135,15 @@ def __prepare_tokenizer( tokenizer_kwargs["padding_side"] = model_args.padding_side try: - if model_args.tokenizer_name: - tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs) - elif model_args.model_name_or_path: - tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs) - else: - raise ValueError( - "You are instantiating a new tokenizer from scratch. This is" - " not supported by this script. You can do it from another" - " script, save it, and load it from here, using" - " --tokenizer_name." - ) - + tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, **tokenizer_kwargs) except RecursionError: logger.warning( "The tokenizer_config.json file doesn't set the special tokens. Using default values: " ", , for unknown token, bos token and eos token respectively.") - if model_args.tokenizer_name: - tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, unk_token="", - bos_token="", - eos_token="", - **tokenizer_kwargs) - elif model_args.model_name_or_path: - tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, unk_token="", - bos_token="", - eos_token="", - **tokenizer_kwargs) - else: - raise ValueError( - "You are instantiating a new tokenizer from scratch. This is" - " not supported by this script. You can do it from another" - " script, save it, and load it from here, using" - " --tokenizer_name." - ) + tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, unk_token="", + bos_token="", + eos_token="", + **tokenizer_kwargs) tokenizer.truncation_side = model_args.truncation_side or tokenizer.truncation_side tokenizer.model_max_length = model_args.model_max_length or tokenizer.model_max_length @@ -249,19 +228,25 @@ def __prepare_quant_config( model_args: ModelArguments, ): quant_config = None - if model_args.use_qlora: - quant_config = BitsAndBytesConfig( - load_in_4bit=model_args.bits == 4, - load_in_8bit=model_args.bits == 8, - llm_int8_threshold=6.0, - llm_int8_has_fp16_weight=False, - bnb_4bit_compute_dtype=self.torch_dtype, - bnb_4bit_use_double_quant=model_args.double_quant, - bnb_4bit_quant_type=model_args.quant_type, - ) - + if self.do_train: + if model_args.use_qlora: + quant_config = BitsAndBytesConfig( + load_in_4bit=model_args.bits == 4, + load_in_8bit=model_args.bits == 8, + llm_int8_threshold=6.0, + llm_int8_has_fp16_weight=False, + bnb_4bit_compute_dtype=self.torch_dtype, + bnb_4bit_use_double_quant=model_args.double_quant, + bnb_4bit_quant_type=model_args.quant_type, + ) + else: # inference + if model_args.use_int8: + quant_config = BitsAndBytesConfig( + load_in_8bit = model_args.use_int8, + ) + return quant_config - + def __prepare_peft_config( self, @@ -318,8 +303,9 @@ def __prepare_model_for_training( model_args: ModelArguments, hf_auto_model: HF_AUTOMODEL_TYPE, ): + assert self.do_train, "To prepare the model for training, set do_train=True." # TODO: change to accelerate - logger.info("Preparing model for training") + logger.warning("Preparing model for training") if model_args.model_name_or_path: model = hf_auto_model.from_pretrained( model_args.model_name_or_path, @@ -351,6 +337,7 @@ def __prepare_model_for_training( model.resize_token_embeddings(len(self.tokenizer)) self.backend_model = model + self.__prepare_model_post_process() def __prepare_model_for_inference( @@ -360,85 +347,68 @@ def __prepare_model_for_inference( use_accelerator: bool, ds_config ): - if not hasattr(self, "backend_model"): - # TODO: change to accelerate - logger.info("Preparing model for inference") - if use_accelerator: - peft_model_id = model_args.lora_model_path - self.backend_model = hf_auto_model.from_pretrained( - model_args.model_name_or_path, - config=self.hf_model_config, - device_map="auto", - offload_folder="offload", - offload_state_dict=True, - load_in_8bit = model_args.use_int8, - ) - if peft_model_id is not None: - self.backend_model = PeftModel.from_pretrained( - self.backend_model, - peft_model_id, - ) - else: - from transformers.integrations import HfDeepSpeedConfig - dschf = HfDeepSpeedConfig(ds_config) - peft_model_id = model_args.lora_model_path - # NOTE: Currently offload is not supported by llama - if self.hf_model_config.model_type == "llama" and model_args.use_ram_optimized_load: - logger.warning( - "llama does not support RAM optimized load. Automatically" - " use original load instead." - ) - model_args.use_ram_optimized_load = False - - if model_args.use_ram_optimized_load and peft_model_id is None: - try: - # RAM-optimized load - self.backend_model = hf_auto_model.from_pretrained( - model_args.model_name_or_path, - config=self.hf_model_config, - device_map="auto", - offload_folder="offload", - offload_state_dict=True, - ) - except: - logger.warning( - "Failed to use RAM optimized load. Automatically" - " use original load instead." - ) - # Normal load - self.backend_model = hf_auto_model.from_pretrained( - model_args.model_name_or_path, - config=self.hf_model_config, - ) - else: - if peft_model_id is not None: - logger.warning( - "LoRA does not support RAM optimized load currently." - " Automatically use original load instead." - ) - self.backend_model = hf_auto_model.from_pretrained( - model_args.model_name_or_path, - config=self.hf_model_config, - ) - - self.backend_model_full = self.backend_model - if peft_model_id is not None: - self.backend_model = PeftModel.from_pretrained( - self.backend_model, peft_model_id - ) - - if self.device == "gpu": - deepspeed.init_distributed() - self.ds_engine = deepspeed.initialize(model=self.backend_model, config_params=ds_config)[0] - self.ds_engine.module.eval() - - # backend model already initialized - else: + logger.info(f"Backend model already initialized, moving to device: {self.device}") + if hasattr(self, "backend_model"): if self.backend_model.device == torch.device("cpu"): self.backend_model.to(self.device) - else: - return - + return + + # TODO: change to accelerate + logger.info("Preparing model for inference") + inference_load_kwargs = {} + inference_load_kwargs_bak = copy.deepcopy(inference_load_kwargs) + ram_optimized_load_kwargs = { + "device_map": "auto", + "offload_folder": "offload", + "offload_state_dict": True, + } + + if model_args.lora_model_path is not None: + logger.warning( + "LoRA does not support RAM optimized load currently. Using original load." + ) + model_args.use_ram_optimized_load = False + + if use_accelerator or model_args.use_ram_optimized_load: + inference_load_kwargs.update(ram_optimized_load_kwargs) + + if not use_accelerator: + from transformers.integrations import HfDeepSpeedConfig + dschf = HfDeepSpeedConfig(ds_config) + + try: + self.backend_model = hf_auto_model.from_pretrained( + model_args.model_name_or_path, + config=self.hf_model_config, + quantization_config=self.quant_config, + **inference_load_kwargs, + ) + except: + logger.warning( + "Failed to use RAM optimized load. Using original load instead." + ) + self.backend_model = hf_auto_model.from_pretrained( + model_args.model_name_or_path, + config=self.hf_model_config, + quantization_config=self.quant_config, + **inference_load_kwargs_bak, + ) + + self.backend_model_full = self.backend_model + + if model_args.lora_model_path is not None: + self.backend_model = PeftModel.from_pretrained( + self.backend_model, + model_args.lora_model_path, + ) + + if (not use_accelerator) and self.device == "gpu": + deepspeed.init_distributed() + self.ds_engine = deepspeed.initialize(model=self.backend_model, config_params=ds_config)[0] + self.ds_engine.module.eval() + + self.__prepare_model_post_process() + def __prepare_model_for_vllm_inference( self, @@ -456,6 +426,15 @@ def __prepare_model_for_vllm_inference( ) + def __prepare_model_post_process(self): + if self.tokenizer.eos_token_id is None: + self.tokenizer.eos_token_id = self.backend_model.config.eos_token_id + if self.tokenizer.pad_token_id is None: + self.tokenizer.pad_token_id = self.tokenizer.eos_token_id + if self.backend_model.config.pad_token_id is None: + self.backend_model.config.pad_token_id = self.tokenizer.pad_token_id + + def activate_model_for_inference( self, use_vllm: bool=False, diff --git a/src/lmflow/models/hf_text_regression_model.py b/src/lmflow/models/hf_text_regression_model.py index 4bce86306..a6bdf3305 100644 --- a/src/lmflow/models/hf_text_regression_model.py +++ b/src/lmflow/models/hf_text_regression_model.py @@ -5,6 +5,7 @@ import hashlib import logging from pathlib import Path +from typing import List, Union, Dict, Optional import torch import deepspeed @@ -17,27 +18,25 @@ get_peft_model, prepare_model_for_kbit_training ) -from transformers import ( - BitsAndBytesConfig, - CONFIG_MAPPING, - AutoConfig, - AutoTokenizer, - AutoModelForSequenceClassification, -) -from transformers.deepspeed import HfDeepSpeedConfig -from transformers.testing_utils import CaptureLogger +from transformers.modeling_outputs import SequenceClassifierOutputWithPast +from vllm import SamplingParams +from lmflow.args import ModelArguments from lmflow.datasets import Dataset from lmflow.models.interfaces.tunable import Tunable from lmflow.models.hf_model_mixin import HFModelMixin from lmflow.models.text_regression_model import TextRegressionModel -from lmflow.tokenization.hf_text_regression_model import paired_conversation_tokenize_function, tokenize_function +from lmflow.tokenization.hf_text_regression_model import ( + paired_conversation_tokenize_function, + conversation_tokenize_function, + tokenize_function, +) from lmflow.utils.conversation_template import PRESET_TEMPLATES from lmflow.utils.constants import ( PAIRED_CONVERSATION_DATASET_DESCRIPTION, TEXT2TEXT_DATASET_DESCRIPTION, TEXT_ONLY_DATASET_DESCRIPTION, - CONVERSATION_ROLE_NAMES, + CONVERSATION_DATASET_DESCRIPTION, ) @@ -69,8 +68,8 @@ class HFTextRegressionModel(TextRegressionModel, HFModelMixin, Tunable): def __init__( self, - model_args, - tune_strategy='normal', + model_args: ModelArguments, + tune_strategy: str='normal', ds_config=None, device="gpu", use_accelerator=False, @@ -83,6 +82,10 @@ def __init__( :param tune_strategy: tuning strategy: normal, none, lora or adapter :param ds_config: deepspeed configuration for distributed training """ + assert model_args.arch_type == "text_regression", ( + f"Invalid model architecture type: {model_args.arch_type}. " + f"Expected: text_regression" + ) config_additional_args = {"num_labels": 1} HFModelMixin.__init__( self, @@ -138,6 +141,11 @@ def tokenize( hf_raw_datasets = dataset.get_backend_dataset() column_names = list(hf_raw_datasets.features) # in paired conversation, for example, would be 'chosen' and 'rejected' data_args = raw_datasets.get_data_args() + + # Whether to truncate long sequences to fit into max_length + use_truncation = False + if model_args.use_lora or data_args.disable_group_texts: + use_truncation = True # Requires three types of information for tokenizing different datasets # 1) Which fields require tokenization, e.g. @@ -149,16 +157,32 @@ def tokenize( # 3) Which fields require loss in final computation, e.g. # "text_only": "text" # "text2text": "output" only - tokenized_column_order = None # Handles 1) and 2) - label_columns = None # Handles 3) + tokenize_fn = None + tokenize_fn_kwargs = { + "data_args": data_args, + "tokenizer": self.tokenizer, + "column_names": column_names, + } if dataset_type == "text_only": - tokenized_column_order = ["text"] - label_columns = ["text"] + tokenize_fn = tokenize_function + tokenize_fn_kwargs["tokenized_column_order"] = ["text"] + tokenize_fn_kwargs["label_columns"] = ["text"] + tokenize_fn_kwargs["add_special_tokens"] = add_special_tokens + tokenize_fn_kwargs["use_truncation"] = use_truncation + elif dataset_type == "text2text": - tokenized_column_order = ["input", "output"] - label_columns = ["output"] - add_special_tokens = False - elif dataset_type == "paired_conversation": + tokenize_fn = tokenize_function + tokenize_fn_kwargs["tokenized_column_order"] = ["input", "output"] + tokenize_fn_kwargs["label_columns"] = ["output"] + tokenize_fn_kwargs["add_special_tokens"] = False + tokenize_fn_kwargs["use_truncation"] = use_truncation + + elif dataset_type in ["conversation", "paired_conversation"]: + if dataset_type == "conversation": + tokenize_fn = conversation_tokenize_function + elif dataset_type == "paired_conversation": + tokenize_fn = paired_conversation_tokenize_function + if data_args.conversation_template: if data_args.conversation_template in PRESET_TEMPLATES.keys(): conversation_template = PRESET_TEMPLATES[data_args.conversation_template] @@ -169,36 +193,19 @@ def tokenize( else: logger.warning("No conversation template provided. Using default template.") conversation_template = PRESET_TEMPLATES['empty'] - + tokenize_fn_kwargs["conversation_template"] = conversation_template logger.warning(f"Conversation template: {conversation_template}") + else: raise NotImplementedError( f"Dataset type \"{dataset_type}\" is not supported, currently" " only support following data types for HFTextRegressionModel:\n" - f" 1) {TEXT_ONLY_DATASET_DESCRIPTION}\n" - f" 2) {TEXT2TEXT_DATASET_DESCRIPTION}\n" - f" 3) {PAIRED_CONVERSATION_DATASET_DESCRIPTION}\n" + f" 1) [Inference]{TEXT_ONLY_DATASET_DESCRIPTION}\n" + f" 2) [Inference]{TEXT2TEXT_DATASET_DESCRIPTION}\n" + f" 3) [Training]{PAIRED_CONVERSATION_DATASET_DESCRIPTION}\n" + f" 4) [Inference]{CONVERSATION_DATASET_DESCRIPTION}\n" ) - - # Whether to truncate long sequences to fit into max_length - use_truncation = False - if model_args.use_lora or data_args.disable_group_texts: - use_truncation = True - - tokenize_fn = paired_conversation_tokenize_function if "conversation" in dataset_type else tokenize_function - tokenize_fn_kwargs = { - "data_args": data_args, - "tokenizer": self.tokenizer, - "column_names": column_names, - } - if "conversation" in dataset_type: - tokenize_fn_kwargs["conversation_template"] = conversation_template - else: - tokenize_fn_kwargs["label_columns"] = label_columns - tokenize_fn_kwargs["tokenized_column_order"] = tokenized_column_order - tokenize_fn_kwargs["add_special_tokens"] = add_special_tokens - tokenize_fn_kwargs["use_truncation"] = use_truncation - + tokenize_kwargs = {} if not data_args.streaming: fingerprint = hashlib.md5( @@ -226,8 +233,128 @@ def tokenize( **tokenize_kwargs ) return tokenized_datasets + + + def inference( + self, + inputs, + release_gpu: bool = False, + use_vllm: bool = False, + **kwargs + ) -> Union[List[float], SequenceClassifierOutputWithPast]: + """ + Perform generation process of the model. + + Parameters + ------------ + inputs : + The sequence used as a prompt for the generation or as model inputs to the model. + When using vllm inference, this should be a string or a list of strings. + When using normal inference, this should be a tensor. + release_gpu : bool, optional + Whether to release the GPU resource after inference, by default False. + use_vllm : bool, optional + Whether to use VLLM for inference, by default False. + kwargs : Optional. + Keyword arguments. + + Returns + ------------ + outputs : + The generated sequence output + """ + if use_vllm: + logger.warning( + "VLLM inference is not supported for text regression model, using normal inference instead." + ) + use_vllm = False + + if not self._activated: + self.activate_model_for_inference( + use_vllm=use_vllm, + **kwargs, + ) + + if use_vllm: + res = self.__vllm_inference(inputs, **kwargs) + else: + res = self.__inference(inputs, **kwargs) + if release_gpu: + self.deactivate_model_for_inference(use_vllm=use_vllm) + return res + + + def __inference( + self, + inputs, + **kwargs + ): + """ + Perform generation process of the model. + + Parameters + ------------ + inputs : + The **tokenized** sequence used as a prompt for the generation or as model inputs to the model. + kwargs : Optional. + Keyword arguments. + + Returns + ------------ + outputs : + The generated sequence output + """ + with torch.no_grad(): + if self.use_accelerator: + outputs = self.backend_model( + input_ids=inputs, + **kwargs, + ) + else: + if self.device == "gpu": + outputs = self.ds_engine.module( + input_ids=inputs, + synced_gpus=True, + **kwargs, + ) + elif self.device == "cpu": + outputs = self.backend_model( + input_ids=inputs, + synced_gpus=True, + **kwargs, + ) + else: + raise NotImplementedError( + f"device \"{self.device}\" is not supported" + ) + return outputs + + + def __vllm_inference( + self, + inputs: Union[str, List[str]], + sampling_params: Optional[SamplingParams] = None, + **kwargs, + ) -> Union[List[List[str]], List[List[List[int]]]]: + """Perform VLLM inference process of the model. + + Parameters + ---------- + inputs : Union[str, List[str]] + Prompt(s), string or a list of strings. + sampling_params : Optional[SamplingParams], optional + vllm SamplingParams object, by default None. + + Returns + ------- + """ + raise NotImplementedError( + "VLLM inference is not supported for text regression model." + ) + + def save(self, dir, *args, **kwargs): """ Perform generation process of the model. diff --git a/src/lmflow/pipeline/auto_pipeline.py b/src/lmflow/pipeline/auto_pipeline.py index a4c053cf6..a5e815636 100644 --- a/src/lmflow/pipeline/auto_pipeline.py +++ b/src/lmflow/pipeline/auto_pipeline.py @@ -19,14 +19,16 @@ def is_package_version_at_least(package_name, min_version): from lmflow.pipeline.inferencer import Inferencer from lmflow.pipeline.vllm_inferencer import VLLMInferencer from lmflow.pipeline.dpo_aligner import DPOAligner -from lmflow.pipeline.rm_tuner import RewardModelingTuner +from lmflow.pipeline.rm_tuner import RewardModelTuner +from lmflow.pipeline.rm_inferencer import RewardModelInferencer PIPELINE_MAPPING = { "evaluator": Evaluator, "finetuner": Finetuner, "inferencer": Inferencer, "vllm_inferencer": VLLMInferencer, + "rm_inferencer": RewardModelInferencer, "dpo_aligner": DPOAligner, - "rm_tuner": RewardModelingTuner, + "rm_tuner": RewardModelTuner, } if not is_package_version_at_least('transformers', '4.35.0'): diff --git a/src/lmflow/pipeline/rm_inferencer.py b/src/lmflow/pipeline/rm_inferencer.py new file mode 100644 index 000000000..b5a3d81e1 --- /dev/null +++ b/src/lmflow/pipeline/rm_inferencer.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved. +import copy +import os +import torch +import wandb +import deepspeed +import sys +import numpy as np +import datetime +import json +import time +import logging +from typing import Dict, List, Union + +from accelerate import Accelerator +import torch +from tqdm import tqdm +from transformers import AutoConfig +from transformers.modeling_outputs import SequenceClassifierOutputWithPast +import torch.distributed as dist +import torch.nn.functional as F + +from lmflow.args import ( + DatasetArguments, + ModelArguments, + InferencerArguments, +) +from lmflow.datasets.dataset import Dataset +from lmflow.models.hf_text_regression_model import HFTextRegressionModel +from lmflow.pipeline.base_pipeline import BasePipeline +from lmflow.utils.data_utils import ( + set_random_seed, + batchlize +) + + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warnings about parallelism in tokenizers +logger = logging.getLogger(__name__) + + +class RewardModelInferencer(BasePipeline): + """ + Initializes the `Inferencer` class with given arguments. + + Parameters + ------------ + model_args : ModelArguments object. + Contains the arguments required to load the model. + + data_args : DatasetArguments object. + Contains the arguments required to load the dataset. + + inferencer_args : InferencerArguments object. + Contains the arguments required to perform inference. + """ + def __init__( + self, + model_args: ModelArguments, + data_args: DatasetArguments, + inferencer_args: InferencerArguments, + ): + self.data_args = data_args + self.inferencer_args = inferencer_args + self.model_args = model_args + + set_random_seed(self.inferencer_args.random_seed) + + self.local_rank = int(os.getenv("LOCAL_RANK", "0")) + self.world_size = int(os.getenv("WORLD_SIZE", "1")) + if inferencer_args.device == "gpu": + torch.cuda.set_device(self.local_rank) # NOTE: cpu-only machine will have error + deepspeed.init_distributed() + else: + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "15000" + dist.init_process_group( + "gloo", rank=self.local_rank, world_size=self.world_size + ) + + if inferencer_args.use_accelerator: + self.accelerator = Accelerator() + self.accelerator.wait_for_everyone() + + + def inference( + self, + model: HFTextRegressionModel, + dataset: Dataset, + transform_dataset_in_place: bool=True, + use_vllm: bool = False, + ) -> Dataset: + if use_vllm: + logger.warning("VLLM doesn't support reward model inference, using normal inference instead.") + use_vllm = False + + assert isinstance(model, HFTextRegressionModel), "model should be HFTextRegressionModel" + if not transform_dataset_in_place: + dataset = copy.deepcopy(dataset) + output_dict = { + "type": f"scored_{dataset.get_type()}", + "instances": dataset.to_dict()["instances"], + } + + if use_vllm: + scores = self.__vllm_inference(model, dataset) + else: + scores = self.__inference(model, dataset) + + for i, score in enumerate(scores): + output_dict["instances"][i]["score"] = score + + output_dataset_args = copy.deepcopy(self.data_args) + output_dataset_args.dataset_path = None + output_dataset_args.dataset_name = f"scored_{output_dataset_args.dataset_name}" + output_dataset = Dataset(output_dataset_args) + output_dataset = output_dataset.from_dict(output_dict) + + return output_dataset + + + def __inference( + self, + model: HFTextRegressionModel, + dataset: Dataset, + ) -> List[float]: + tokenized_dataset = model.tokenize(dataset) + dataloader, _ = self.create_dataloader( + dataset=tokenized_dataset, + batch_size=self.inferencer_args.inference_batch_size, + random_shuffle=False, # no need to shuffle when inference + ) + num_batches = len(dataloader) + final_output = [] + + for batch_index, batched_input_ids in tqdm( + iterable=enumerate(dataloader), + total=num_batches, + desc="Inference", + unit="batch" + ): + # len(batch) = batch_size, and batch element is dataset sample + model_input = torch.LongTensor(batched_input_ids).to("cpu" if model.device == "cpu" else "cuda") + if self.inferencer_args.use_accelerator: + with self.accelerator.autocast(): + batch_output = model.inference( + inputs=model_input, + use_vllm=False, + ) + else: + batch_output = model.inference( + inputs=model_input, + use_vllm=False, + ) + + batch_output = self.__post_process_model_output(batch_output) + final_output.extend(batch_output) + + return final_output + + + def __vllm_inference( + self, + model: HFTextRegressionModel, + dataset: Dataset, + ) -> List[float]: + raise NotImplementedError("VLLM inference for reward model is not implemented yet.") + + + def __post_process_model_output( + self, + model_output: SequenceClassifierOutputWithPast, + ) -> List[float]: + final_output = model_output.logits.to("cpu").reshape(-1).tolist() + + return final_output + + + def create_dataloader( + self, + dataset: Dataset, + batch_size: int = 1, + random_shuffle: bool = False, + ): + r"""Batchlize dataset and format it to dataloader. + + Args: + dataset (Dataset): the dataset object + + Output: + dataloader (batchlize): the dataloader object + dataset_size (int): the length of the dataset + + """ + inputs = dataset.get_backend_dataset()["input_ids"] # this comes from lmflow model.tokenize(dataset) + dataset_size = len(inputs) + + dataloader = batchlize( + inputs, + batch_size=batch_size, + random_shuffle=random_shuffle, + ) + return dataloader, dataset_size + \ No newline at end of file diff --git a/src/lmflow/pipeline/rm_tuner.py b/src/lmflow/pipeline/rm_tuner.py index 2997aba2a..06b2094f1 100644 --- a/src/lmflow/pipeline/rm_tuner.py +++ b/src/lmflow/pipeline/rm_tuner.py @@ -23,8 +23,8 @@ logger = logging.getLogger(__name__) -class RewardModelingTuner(Finetuner): - """Initializes the `RewardModelingTuner` class. +class RewardModelTuner(Finetuner): + """Initializes the `RewardModelTuner` class. Parameters ---------- @@ -34,7 +34,7 @@ class RewardModelingTuner(Finetuner): data_args : DatasetArguments object. Contains the arguments required to load the dataset. - finetuner_args : RewardModelingArguments object. + finetuner_args : RewardModelTunerArguments object. Contains the arguments required to perform finetuning. args : Optional. diff --git a/src/lmflow/pipeline/utils/memory_safe_vllm_inference.py b/src/lmflow/pipeline/utils/memory_safe_vllm_inference.py index 3502d13e2..86f765acb 100644 --- a/src/lmflow/pipeline/utils/memory_safe_vllm_inference.py +++ b/src/lmflow/pipeline/utils/memory_safe_vllm_inference.py @@ -16,7 +16,7 @@ ) from lmflow.datasets import Dataset -from lmflow.models.hf_decoder_model import HFDecoderModel +from lmflow.models.auto_model import AutoModel from lmflow.pipeline.vllm_inferencer import VLLMInferencer from lmflow.args import ( ModelArguments, @@ -47,7 +47,7 @@ def main(): model_args, data_args, pipeline_args = parser.parse_args_into_dataclasses() dataset = Dataset(data_args) - model = HFDecoderModel(model_args) + model = AutoModel.get_model(model_args, tune_strategy='none') inferencer = VLLMInferencer(model_args, data_args, pipeline_args) res = inferencer.inference( diff --git a/src/lmflow/tokenization/hf_text_regression_model.py b/src/lmflow/tokenization/hf_text_regression_model.py index 483f9db58..806ff5654 100644 --- a/src/lmflow/tokenization/hf_text_regression_model.py +++ b/src/lmflow/tokenization/hf_text_regression_model.py @@ -191,6 +191,77 @@ def paired_conversation_tokenize_function( ) return token_dict + +def conversation_tokenize_function( + examples, + data_args: DatasetArguments, + tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast], + column_names, + conversation_template: ConversationTemplate, +) -> Dict: + """Handels conversation datasets tokenization + """ + num_example = len(examples[column_names[0]]) + token_dict = { + "input_ids": [[] for _ in range(num_example)], + "attention_mask": [[] for _ in range(num_example)], + "labels": [[] for _ in range(num_example)], + } + with CaptureLogger(tok_logger) as cl: + for i in range(len(examples["messages"])): + messages = examples["messages"][i] + system = examples.get("system", [None] * num_example)[i] + tools = examples.get("tools", [None] * num_example)[i] + if len(messages) < 2 or messages[0]['role'] != CONVERSATION_ROLE_NAMES['user']: + tok_logger.warning( + "Invalid instance encountered. Either the conversation has less than " + "one round or the first message is not from the user." + ) + continue + + if len(messages) % 2 != 0: + logger.warning( + "The number of messages is not even, the last message will be ignored." + ) + messages = messages[:-1] + + encoded_conversation = conversation_template.encode_conversation( + tokenizer=tokenizer, + messages=messages, + system=system, + tools=tools, + ) + + input_ids, labels = [], [] + for turn_idx, (user_input, assistant_result) in enumerate(encoded_conversation): + input_ids += user_input + assistant_result + + if data_args.train_on_prompt: + labels += user_input + assistant_result + else: + labels += [-100] * len(user_input) + assistant_result + + token_dict["input_ids"][i].extend(input_ids) + token_dict["attention_mask"][i].extend([1] * len(input_ids)) + token_dict["labels"][i].extend(labels) + + if data_args.disable_group_texts: + token_dict = blocking( + token_dict=token_dict, + block_size=data_args.block_size, + model_max_length=tokenizer.model_max_length, + pad_token_id=tokenizer.pad_token_id, + padding_side=tokenizer.padding_side, + ) + + # clm input could be much much longer than block_size + if "Token indices sequence length is longer than the" in cl.out: + tok_logger.warning( + "^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits" + " before being passed to the model." + ) + return token_dict + def tokenize_function( examples, diff --git a/src/lmflow/utils/constants.py b/src/lmflow/utils/constants.py index bb17899dc..323e4d723 100644 --- a/src/lmflow/utils/constants.py +++ b/src/lmflow/utils/constants.py @@ -20,6 +20,23 @@ ).lstrip("\n") +SCORED_TEXT_ONLY_DATASET_DESCRIPTION = ( +""" +"scored_text_only": a dataset with only raw text instances and corresponding scores, with following format: + + { + "type": "text_only", + "instances": [ + { "text": "TEXT_1" }, + { "text": "TEXT_2" }, + ... + ], + "scores": [1.0, 0.5, ...] + } +""" +).lstrip("\n") + + TEXT_ONLY_DATASET_DETAILS = ( """ For example, @@ -282,6 +299,9 @@ "paired_conversation": ["chosen", "rejected"], "float_only": ["value"], "image_text": ["images", "text"], + "scored_text_only": ["text", "score"], + "scored_text2text": ["input", "output", "score"], + "scored_conversation": ["messages", "score"], } CONVERSATION_ROLE_NAMES = { From abd9759bb181be04dde1993514f0d50f74a818b0 Mon Sep 17 00:00:00 2001 From: Yizhen Date: Sun, 23 Jun 2024 03:43:00 +0800 Subject: [PATCH 03/13] [Feature] change demo model path --- scripts/run_rm_inference.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/run_rm_inference.sh b/scripts/run_rm_inference.sh index 40a8c87af..78d90aa1e 100644 --- a/scripts/run_rm_inference.sh +++ b/scripts/run_rm_inference.sh @@ -3,8 +3,7 @@ # Parses arguments run_name=rm_inference -# model_name_or_path=sfairXC/FsfairX-LLaMA3-RM-v0.1 -model_name_or_path=/vol/yizhenjia/projs/RLHFlow-fox/models/rm/sfairXC-FsfairX-LLaMA3-RM-v0.1 +model_name_or_path=sfairXC/FsfairX-LLaMA3-RM-v0.1 dataset_path=data/alpaca/test output_dir=data/rm_inference_results output_file_name=results.json From c9c93de1ca61207f5883b1dd0872f3789dfb6fee Mon Sep 17 00:00:00 2001 From: Yizhen Date: Sun, 23 Jun 2024 11:25:24 +0800 Subject: [PATCH 04/13] [Feature] add conversation template arg for rm infer --- scripts/run_rm_inference.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/run_rm_inference.sh b/scripts/run_rm_inference.sh index 78d90aa1e..f6446986d 100644 --- a/scripts/run_rm_inference.sh +++ b/scripts/run_rm_inference.sh @@ -7,6 +7,7 @@ model_name_or_path=sfairXC/FsfairX-LLaMA3-RM-v0.1 dataset_path=data/alpaca/test output_dir=data/rm_inference_results output_file_name=results.json +conversation_template=llama3 # Safety related arguments trust_remote_code=0 @@ -26,6 +27,10 @@ while [[ $# -ge 1 ]]; do dataset_path="$2" shift ;; + --conversation_template) + conversation_template="$2" + shift + ;; --output_dir) output_dir="$2" shift @@ -60,6 +65,7 @@ accelerate launch --config_file configs/accelerator_multigpu_config.yaml \ --block_size 4096 \ --inference_batch_size 16 \ --dataset_path ${dataset_path} \ + --conversation_template ${conversation_template} \ --preprocessing_num_workers 16 \ --save_results True \ --results_path ${output_file_path} \ From 9f0a1da3a8d02229afcdafd1dfb4832cdeffeaa1 Mon Sep 17 00:00:00 2001 From: Yizhen Date: Sun, 23 Jun 2024 12:56:45 +0800 Subject: [PATCH 05/13] [Usability] better to set overwrite_cache for rm inference in online rlhf --- scripts/run_rm_inference.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/run_rm_inference.sh b/scripts/run_rm_inference.sh index f6446986d..701daf120 100644 --- a/scripts/run_rm_inference.sh +++ b/scripts/run_rm_inference.sh @@ -65,6 +65,7 @@ accelerate launch --config_file configs/accelerator_multigpu_config.yaml \ --block_size 4096 \ --inference_batch_size 16 \ --dataset_path ${dataset_path} \ + --overwrite_cache True \ --conversation_template ${conversation_template} \ --preprocessing_num_workers 16 \ --save_results True \ From 8f9f9d8fadf785fa46879be1fabd0e246a2d35ec Mon Sep 17 00:00:00 2001 From: Yizhen Date: Tue, 25 Jun 2024 01:46:06 +0800 Subject: [PATCH 06/13] [Feature] Add dpo support --- src/lmflow/args.py | 77 +++++ src/lmflow/datasets/dataset.py | 46 ++- src/lmflow/models/hf_decoder_model.py | 8 +- src/lmflow/models/hf_model_mixin.py | 18 +- src/lmflow/models/hf_text_regression_model.py | 8 + src/lmflow/pipeline/dpov2_aligner.py | 269 ++++++++++++++++++ src/lmflow/pipeline/rm_inferencer.py | 73 ++--- .../pipeline/utils/dpov2_dataprocessor.py | 195 +++++++++++++ src/lmflow/pipeline/utils/dpov2_trainer.py | 245 ++++++++++++++++ .../tokenization/hf_text_regression_model.py | 90 +++++- src/lmflow/utils/constants.py | 32 ++- 11 files changed, 987 insertions(+), 74 deletions(-) create mode 100644 src/lmflow/pipeline/dpov2_aligner.py create mode 100644 src/lmflow/pipeline/utils/dpov2_dataprocessor.py create mode 100644 src/lmflow/pipeline/utils/dpov2_trainer.py diff --git a/src/lmflow/args.py b/src/lmflow/args.py index 9b4d9b7cb..6a1aa438e 100644 --- a/src/lmflow/args.py +++ b/src/lmflow/args.py @@ -99,6 +99,10 @@ class ModelArguments: Model architecture type. padding_side : str The side on which the tokenizer should have padding applied. + eos_padding : bool + whether to pad with eos token instead of pad token. + ignore_bias_buffers : bool + fix for DDP issues with LM bias/mask buffers - invalid scalar type,`inplace operation. """ model_name_or_path: Optional[str] = field( @@ -312,6 +316,18 @@ class ModelArguments: "choices": ["right", "left", "auto"], } ) + eos_padding: Optional[bool] = field( + default=False, + metadata={"help": "whether to pad with eos token"} + ) + ignore_bias_buffers: Optional[bool] = field( + default=False, + metadata={ + # debug argument for distributed training + "help": "fix for DDP issues with LM bias/mask buffers - invalid scalar type,`inplace operation. See" + "https://github.com/huggingface/transformers/issues/22482#issuecomment-1595790992" + }, + ) def __post_init__(self): @@ -1272,6 +1288,67 @@ class DPOAlignerArguments: ) +@dataclass +class DPOv2AlignerArguments(TrainingArguments): + """ + The arguments for the DPOv2 training script. + """ + + # data parameters, i.e., the KL penalty in the paper + beta: Optional[float] = field(default=0.1, metadata={"help": "the beta parameter for DPO loss"}) + + # training parameters + eval_dir: Optional[str] = field( + default="/export/home/hanze/project/vllm-gen/uf_split0_offline_reward.json", # "/export/home/data/gemma_it_2b_3w_k8_with_pairrm_rewards.json", + metadata={"help": "the location of the evalset name or path"}, + ) + learning_rate: Optional[float] = field(default=5e-7, metadata={"help": "optimizer learning rate"}) + lr_scheduler_type: Optional[str] = field( + default="constant_with_warmup", metadata={"help": "the lr scheduler type"} + ) + warmup_steps: Optional[int] = field(default=100, metadata={"help": "the number of warmup steps"}) + weight_decay: Optional[float] = field(default=0.01, metadata={"help": "the weight decay"}) + + per_device_train_batch_size: Optional[int] = field(default=1, metadata={"help": "train batch size per device"}) + per_device_eval_batch_size: Optional[int] = field(default=1, metadata={"help": "eval batch size per device"}) + gradient_accumulation_steps: Optional[int] = field( + default=16, metadata={"help": "the number of gradient accumulation steps"} + ) + gradient_checkpointing: Optional[bool] = field( + default=True, metadata={"help": "whether to use gradient checkpointing"} + ) + + + lora_alpha: Optional[float] = field(default=16, metadata={"help": "the lora alpha parameter"}) + lora_dropout: Optional[float] = field(default=0.05, metadata={"help": "the lora dropout parameter"}) + lora_r: Optional[int] = field(default=8, metadata={"help": "the lora r parameter"}) + + margin_scale: Optional[float] = field(default=1.0, metadata={"help": "the margin scale"}) + + max_prompt_length: Optional[int] = field(default=1000, metadata={"help": "the maximum prompt length"}) + max_length: Optional[int] = field(default=2048, metadata={"help": "the maximum sequence length"}) + num_train_epochs: Optional[int] = field(default=2, metadata={"help": "max number of training epochs"}) + logging_steps: Optional[int] = field(default=2, metadata={"help": "the logging frequency"}) + save_strategy: Optional[str] = field(default="epoch", metadata={"help": "the saving strategy"}) + save_steps: Optional[int] = field(default=50000, metadata={"help": "the saving frequency"}) + eval_steps: Optional[int] = field(default=100, metadata={"help": "the evaluation frequency"}) + run_name: Optional[str] = field(default="dpo_soft", metadata={"help": "the run name"}) + loss_type: Optional[str] = field(default="sigmoid", metadata={"help": "the loss type"}) + output_dir: Optional[str] = field(default="./dpo_soft", metadata={"help": "the output directory"}) + log_freq: Optional[int] = field(default=1, metadata={"help": "the logging frequency"}) + + # instrumentation + sampling_paired_method: Optional[str] = field(default="max_random", metadata={"help": "the choose type"}) + + mask_prompt: Optional[bool] = field(default=False, metadata={"help": "mask prompt"}) + length_penalty: Optional[float] = field(default=0, metadata={"help": "the length penalty"}) + + # need to add + evaluation_strategy: Optional[str] = field( + default="steps", + metadata={"help": "the evaluation strategy"} + ) + @dataclass class IterativeAlignerArguments(InferencerArguments): """ diff --git a/src/lmflow/datasets/dataset.py b/src/lmflow/datasets/dataset.py index 952fd9439..333ff4e65 100644 --- a/src/lmflow/datasets/dataset.py +++ b/src/lmflow/datasets/dataset.py @@ -39,12 +39,15 @@ "float_only", "image_text", "conversation", - "paired_conversation" + "paired_conversation", + "paired_text2text", + "grouped_text2text", + "grouped_conversation", ] KEY_TYPE = "type" KEY_INSTANCES = "instances" -KEY_SCORES = "score" +KEY_SCORES = "scores" class Dataset: r""" @@ -150,19 +153,11 @@ def _check_data_format(self): data_type = data_dict[KEY_TYPE] fields = self.get_backend_dataset().features correct_fields = INSTANCE_FIELDS_MAP[data_type] - # TODO: this can not guarantee every instance has correct fields. - if set(fields) != set(correct_fields): - if data_type == "conversation": - if "messages" not in fields: - raise ValueError( - f'Conversation dataset should have "messages" field' - f' but got {list(fields)}' - ) - else: - raise ValueError( - f'Data instance fields incorrect' - f' {list(fields)}: should be {list(correct_fields)}.' - ) + if not set(correct_fields).issubset(set(fields)): + raise ValueError( + f'data instance fields incorrect' + f' {list(correct_fields)} are required.' + ) def from_dict(self, dict_obj: dict, *args, **kwargs): @@ -226,20 +221,11 @@ def from_dict(self, dict_obj: dict, *args, **kwargs): for i, instance in enumerate(dict_obj[KEY_INSTANCES]): fields = instance.keys() - if set(fields) != set(correct_fields): - if self.type == "conversation": - if "messages" not in fields: - raise ValueError( - f'Conversation dataset should have "messages" field' - f' but got {list(fields)}' - ) - else: - raise ValueError( - f'data instance fields incorrect' - f' {list(fields)}: should be {list(correct_fields)}.\n' - f'The bad instance triggers the error, the {i}-th instance:\n' - f' {instance}' - ) + if not set(correct_fields).issubset(set(fields)): + raise ValueError( + f'data instance fields incorrect' + f' {list(correct_fields)} are required.' + ) try: hf_dict = {} @@ -423,7 +409,7 @@ def get_data_args(self): return self.data_args - def get_type(self): + def get_type(self) -> str: r""" Returns --------- diff --git a/src/lmflow/models/hf_decoder_model.py b/src/lmflow/models/hf_decoder_model.py index d987bfb8e..9e3630bc8 100644 --- a/src/lmflow/models/hf_decoder_model.py +++ b/src/lmflow/models/hf_decoder_model.py @@ -145,7 +145,13 @@ def __init__( ) - def tokenize(self, dataset, add_special_tokens=True, *args, **kwargs): + def tokenize( + self, + dataset, + add_special_tokens=True, + *args, + **kwargs + ) -> Dataset: """ Tokenize the full dataset. diff --git a/src/lmflow/models/hf_model_mixin.py b/src/lmflow/models/hf_model_mixin.py index 1dce5c57a..4fc35e451 100644 --- a/src/lmflow/models/hf_model_mixin.py +++ b/src/lmflow/models/hf_model_mixin.py @@ -144,7 +144,7 @@ def __prepare_tokenizer( bos_token="", eos_token="", **tokenizer_kwargs) - + tokenizer.truncation_side = model_args.truncation_side or tokenizer.truncation_side tokenizer.model_max_length = model_args.model_max_length or tokenizer.model_max_length @@ -322,6 +322,11 @@ def __prepare_model_for_training( logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params") self.backend_model_full = model + if model_args.ignore_bias_buffers: + model._ddp_params_and_buffers_to_ignore = [ + name for name, buffer in model.named_buffers() if buffer.dtype == torch.bool + ] + if model_args.use_lora: model.enable_input_require_grads() model = get_peft_model(model, self.peft_config) @@ -427,12 +432,19 @@ def __prepare_model_for_vllm_inference( def __prepare_model_post_process(self): + # old models/tokenizers may not have these attributes, fixing + if self.tokenizer.eos_token is None: + self.tokenizer.eos_token = self.backend_model.config.eos_token if self.tokenizer.eos_token_id is None: self.tokenizer.eos_token_id = self.backend_model.config.eos_token_id + + if self.tokenizer.pad_token is None: + self.tokenizer.pad_token = self.tokenizer.eos_token if self.tokenizer.pad_token_id is None: self.tokenizer.pad_token_id = self.tokenizer.eos_token_id - if self.backend_model.config.pad_token_id is None: - self.backend_model.config.pad_token_id = self.tokenizer.pad_token_id + + if self.model_args.eos_padding: + self.tokenizer.pad_token = self.tokenizer.eos_token def activate_model_for_inference( diff --git a/src/lmflow/models/hf_text_regression_model.py b/src/lmflow/models/hf_text_regression_model.py index a6bdf3305..8b8d2f19e 100644 --- a/src/lmflow/models/hf_text_regression_model.py +++ b/src/lmflow/models/hf_text_regression_model.py @@ -30,12 +30,14 @@ paired_conversation_tokenize_function, conversation_tokenize_function, tokenize_function, + grouped_text2text_tokenize_function, ) from lmflow.utils.conversation_template import PRESET_TEMPLATES from lmflow.utils.constants import ( PAIRED_CONVERSATION_DATASET_DESCRIPTION, TEXT2TEXT_DATASET_DESCRIPTION, TEXT_ONLY_DATASET_DESCRIPTION, + GROUPED_TEXT2TEXT_DATASET_DESCRIPTION, CONVERSATION_DATASET_DESCRIPTION, ) @@ -196,6 +198,11 @@ def tokenize( tokenize_fn_kwargs["conversation_template"] = conversation_template logger.warning(f"Conversation template: {conversation_template}") + elif dataset_type == "grouped_text2text": + tokenize_fn = grouped_text2text_tokenize_function + tokenize_fn_kwargs["add_special_tokens"] = add_special_tokens + tokenize_fn_kwargs["use_truncation"] = use_truncation + else: raise NotImplementedError( f"Dataset type \"{dataset_type}\" is not supported, currently" @@ -204,6 +211,7 @@ def tokenize( f" 2) [Inference]{TEXT2TEXT_DATASET_DESCRIPTION}\n" f" 3) [Training]{PAIRED_CONVERSATION_DATASET_DESCRIPTION}\n" f" 4) [Inference]{CONVERSATION_DATASET_DESCRIPTION}\n" + f" 5) [Inference]{GROUPED_TEXT2TEXT_DATASET_DESCRIPTION}\n" ) tokenize_kwargs = {} diff --git a/src/lmflow/pipeline/dpov2_aligner.py b/src/lmflow/pipeline/dpov2_aligner.py new file mode 100644 index 000000000..2109c5d15 --- /dev/null +++ b/src/lmflow/pipeline/dpov2_aligner.py @@ -0,0 +1,269 @@ +import copy +import logging +import os +from typing import Optional, List, Tuple, Dict, Union + +import numpy as np +from tqdm import tqdm +from transformers import TrainingArguments + +from lmflow.pipeline.utils.dpov2_trainer import PreferenceTrainer +from lmflow.pipeline.base_aligner import BaseAligner +from lmflow.args import ( + ModelArguments, + DatasetArguments, + DPOv2AlignerArguments +) +from lmflow.models.hf_decoder_model import HFDecoderModel +from lmflow.datasets.dataset import Dataset, KEY_SCORES, KEY_TYPE, KEY_INSTANCES + + +logger = logging.getLogger(__name__) + + +class DPOv2Aligner(BaseAligner): + def __init__( + self, + model_args: ModelArguments, + ref_model_args: ModelArguments, + data_args: DatasetArguments, + aligner_args: DPOv2AlignerArguments, + ): + self.model_args = model_args + self.ref_model_args = ref_model_args + self.data_args = data_args + self.aligner_args = aligner_args + + + def align( + self, + model: HFDecoderModel, + ref_model: HFDecoderModel, + train_dataset: Dataset, + eval_dataset: Dataset, + transform_dataset_in_place: bool=True, + ): + # step 0. setting up + if self.aligner_args.gradient_checkpointing: + logger.warning( + "Setting backend_model.config.use_cache to False since using gradient checkpointing" + ) + model.get_backend_model().config.use_cache = False + ref_model.get_backend_model().config.use_cache = False + + # step 1. prepare datasets + paired_train_dataset = self.convert_grouped_to_paired_dataset( + grouped_dataset=train_dataset, + sampling_paired_method=self.aligner_args.sampling_paired_method, + length_penalty=self.aligner_args.length_penalty, + margin_scale=self.aligner_args.margin_scale, + use_fast=False, + ) + if self.data_args.max_train_samples: + paired_train_dataset.backend_dataset = paired_train_dataset.backend_dataset.select(range(self.data_args.max_train_samples)) + + paired_eval_dataset = self.convert_grouped_to_paired_dataset( + grouped_dataset=eval_dataset, + sampling_paired_method=self.aligner_args.sampling_paired_method, + margin_scale=self.aligner_args.margin_scale, + use_fast=False, + ) + + # step 2. prepare trainer + dpo_trainer = PreferenceTrainer( + model.get_backend_model(), + ref_model.get_backend_model(), + train_dataset=paired_eval_dataset.get_backend_dataset(), # tokenization is done in the trainer + eval_dataset=paired_eval_dataset.get_backend_dataset(), + tokenizer=model.tokenizer, + args=self.__prepare_training_args(self.aligner_args), + beta=self.aligner_args.beta, + loss_type=self.aligner_args.loss_type, + max_prompt_length=self.aligner_args.max_prompt_length, + max_length=self.aligner_args.max_length, + mask_prompt=self.aligner_args.mask_prompt, + len_penalty=self.aligner_args.length_penalty, + ) + + # step 3. train + dpo_trainer.train() + dpo_trainer.save_model(self.aligner_args.output_dir) + + # step 4. save + output_dir = os.path.join(self.aligner_args.output_dir, "final_checkpoint") + dpo_trainer.model.save_pretrained(output_dir) + + + def __prepare_training_args( + self, + aligner_args: DPOv2AlignerArguments, + ) -> TrainingArguments: + training_args = TrainingArguments( + per_device_train_batch_size=aligner_args.per_device_train_batch_size, + per_device_eval_batch_size=aligner_args.per_device_eval_batch_size, + num_train_epochs=aligner_args.num_train_epochs, + save_strategy=aligner_args.save_strategy, + logging_steps=aligner_args.logging_steps, + save_steps=aligner_args.save_steps, + gradient_accumulation_steps=aligner_args.gradient_accumulation_steps, + gradient_checkpointing=aligner_args.gradient_checkpointing, + learning_rate=aligner_args.learning_rate, + evaluation_strategy=aligner_args.evaluation_strategy, + eval_steps=aligner_args.eval_steps, + output_dir=aligner_args.output_dir, + lr_scheduler_type=aligner_args.lr_scheduler_type, + warmup_steps=aligner_args.warmup_steps, + optim=aligner_args.optim, + bf16=aligner_args.bf16, + report_to=aligner_args.report_to, + run_name=aligner_args.run_name, + remove_unused_columns=False, # DO NOT CHANGE THIS, may cause error + ) + logger.warning(f"Actual training arguments for dpo trainer: {training_args}") + + return training_args + + + def convert_grouped_to_paired_dataset( + self, + grouped_dataset: Dataset, + sampling_paired_method: str="random", + length_penalty: float=0.0, + margin_scale: float=1.0, + use_fast: bool=False, + ) -> Dataset: + """Convert a grouped dataset to a paired dataset by rejection sampling. + """ + output_dict = { + KEY_TYPE: f"paired_{grouped_dataset.get_type().replace('grouped_','')}", + KEY_INSTANCES: [] + } + + for sample in tqdm(grouped_dataset.get_backend_dataset(), desc="Converting to paired dataset"): + sample_output_dict = {} + lengths = self._calc_response_lengths(sample["outputs"], grouped_dataset.get_type()) + penalized_rewards = self._calc_reward_with_length_penalty( + rewards=sample[KEY_SCORES], + lengths=lengths, + length_penalty=length_penalty + ) + chosen_idx, rejected_idx = self.sampling_paired_idx_from_rewards( + rewards=penalized_rewards, + sampling_paired_method=sampling_paired_method, + use_fast=use_fast + ) + + sample_output_dict["prompt"] = sample["input"] + sample_output_dict["chosen"] = sample["outputs"][chosen_idx] + sample_output_dict["rejected"] = sample["outputs"][rejected_idx] + sample_output_dict["margin"] = (sample[KEY_SCORES][chosen_idx] - sample[KEY_SCORES][rejected_idx]) * margin_scale + output_dict[KEY_INSTANCES].append(sample_output_dict) + + output_dataset_args = copy.deepcopy(grouped_dataset.data_args) + output_dataset_args.dataset_path = None + output_dataset_args.dataset_name = f"paired_{output_dataset_args.dataset_name}" + output_dataset = Dataset(output_dataset_args) + output_dataset = output_dataset.from_dict(output_dict) + + return output_dataset + + + def _calc_response_lengths( + self, + outputs: List[Union[str, Dict[str, str]]], + dataset_type: str, + ) -> List[int]: + all_lengths = [] + if dataset_type == 'grouped_text2text': + all_lengths = [len(output) for output in outputs] + + else: + raise NotImplementedError( + f"Unknown dataset type {dataset_type} when calculating the response length." + ) + + return all_lengths + + + def _calc_reward_with_length_penalty( + self, + rewards: List[float], + lengths: List[int], + length_penalty: float, + ) -> List[float]: + """When length_penalty > 0, penalize the longer sequence by subtracting + length_penalty * length from the reward. Vice versa when length_penalty < 0. + """ + assert len(rewards) == len(lengths) + return [reward - length_penalty * length for reward, length in zip(rewards, lengths)] + + + def sampling_paired_idx_from_rewards( + self, + rewards: List[float], + sampling_paired_method: str="random", + use_fast: bool=False, + ) -> Tuple[int, int]: + """Prepare the dataset for DPO training by rejection sampling. + We implement different strategies to select pairs, including + random: randomly select two instances + max_min: best v.s. worst + max_max: best v.s. second best + max_random: best v.s. random from the remaining + """ + if use_fast: + return self._sampling_paired_idx_from_rewards_fast(rewards, sampling_paired_method) + else: + return self._sampling_paired_idx_from_rewards(rewards, sampling_paired_method) + + + def _sampling_paired_idx_from_rewards( + self, + rewards: List[float], + sampling_paired_method: str="random" + ) -> Tuple[int, int]: + idx_0, idx_1 = -1, -1 + + if sampling_paired_method == "random": + idx_0, idx_1 = np.random.choice(len(rewards), size=2, replace=False) + elif sampling_paired_method == "max_min": + idx_0, idx_1 = np.argmax(rewards), np.argmin(rewards) + elif sampling_paired_method == "max_max": + sorted_indices = np.argsort(rewards) + idx_0, idx_1 = sorted_indices[-1], sorted_indices[-2] + elif sampling_paired_method == "max_random": + idx_0 = np.argmax(rewards) + idx_1 = np.random.choice([i for i in range(len(rewards)) if i != idx_0]) + else: + raise ValueError(f"Unknown sampling method: {sampling_paired_method}") + + chosen_idx, rejected_idx = (idx_0, idx_1) if rewards[idx_0] > rewards[idx_1] else (idx_1, idx_0) + + return chosen_idx, rejected_idx + + + def _sampling_paired_idx_from_rewards_fast( + self, + rewards: List[float], + sampling_paired_method: str="random" + ) -> Tuple[int, int]: + idx_0, idx_1 = -1, -1 + + if sampling_paired_method == "random": + idx_0, idx_1 = 0, 1 + elif sampling_paired_method == "max_min": + idx_0, idx_1 = np.argmax(rewards), np.argmin(rewards) + elif sampling_paired_method == "max_max": + sorted_indices = np.argsort(rewards) + idx_0, idx_1 = sorted_indices[-1], sorted_indices[-2] + elif sampling_paired_method == "max_random": + idx_0 = np.argmax(rewards) + idx_1 = 0 if idx_0 != 0 else 1 + else: + raise ValueError(f"Unknown sampling method: {sampling_paired_method}") + + chosen_idx, rejected_idx = (idx_0, idx_1) if rewards[idx_0] > rewards[idx_1] else (idx_1, idx_0) + + return chosen_idx, rejected_idx + + \ No newline at end of file diff --git a/src/lmflow/pipeline/rm_inferencer.py b/src/lmflow/pipeline/rm_inferencer.py index b5a3d81e1..087f4dec4 100644 --- a/src/lmflow/pipeline/rm_inferencer.py +++ b/src/lmflow/pipeline/rm_inferencer.py @@ -12,7 +12,7 @@ import json import time import logging -from typing import Dict, List, Union +from typing import Dict, List, Union, Tuple from accelerate import Accelerator import torch @@ -34,6 +34,7 @@ set_random_seed, batchlize ) +from lmflow.datasets.dataset import KEY_SCORES os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warnings about parallelism in tokenizers @@ -99,7 +100,7 @@ def inference( if not transform_dataset_in_place: dataset = copy.deepcopy(dataset) output_dict = { - "type": f"scored_{dataset.get_type()}", + "type": f"grouped_{dataset.get_type().lstrip('grouped_')}", "instances": dataset.to_dict()["instances"], } @@ -109,11 +110,11 @@ def inference( scores = self.__inference(model, dataset) for i, score in enumerate(scores): - output_dict["instances"][i]["score"] = score + output_dict["instances"][i][KEY_SCORES] = score output_dataset_args = copy.deepcopy(self.data_args) output_dataset_args.dataset_path = None - output_dataset_args.dataset_name = f"scored_{output_dataset_args.dataset_name}" + output_dataset_args.dataset_name = f"{output_dataset_args.dataset_name}_scored" output_dataset = Dataset(output_dataset_args) output_dataset = output_dataset.from_dict(output_dict) @@ -124,12 +125,17 @@ def __inference( self, model: HFTextRegressionModel, dataset: Dataset, - ) -> List[float]: + ) -> Union[List[float], List[List[float]]]: tokenized_dataset = model.tokenize(dataset) - dataloader, _ = self.create_dataloader( - dataset=tokenized_dataset, + if 'grouped_' in dataset.get_type(): + model_input_ids, num_outputs = self.flatten_list(tokenized_dataset.get_backend_dataset()["input_ids"]) + else: + model_input_ids = tokenized_dataset.get_backend_dataset()["input_ids"] + + dataloader = batchlize( + examples=model_input_ids, batch_size=self.inferencer_args.inference_batch_size, - random_shuffle=False, # no need to shuffle when inference + random_shuffle=False, # DO NOT shuffle when inference ) num_batches = len(dataloader) final_output = [] @@ -157,6 +163,9 @@ def __inference( batch_output = self.__post_process_model_output(batch_output) final_output.extend(batch_output) + if 'grouped_' in dataset.get_type(): + final_output = self.compress_list(final_output, num_outputs) + return final_output @@ -175,31 +184,27 @@ def __post_process_model_output( final_output = model_output.logits.to("cpu").reshape(-1).tolist() return final_output - - - def create_dataloader( + + + def flatten_list( self, - dataset: Dataset, - batch_size: int = 1, - random_shuffle: bool = False, - ): - r"""Batchlize dataset and format it to dataloader. - - Args: - dataset (Dataset): the dataset object - - Output: - dataloader (batchlize): the dataloader object - dataset_size (int): the length of the dataset - - """ - inputs = dataset.get_backend_dataset()["input_ids"] # this comes from lmflow model.tokenize(dataset) - dataset_size = len(inputs) + list_of_list: List[List] + ) -> Tuple[List, List[int]]: + sublist_lengths = [len(sublist) for sublist in list_of_list] + flattened_list = [item for sublist in list_of_list for item in sublist] + return flattened_list, sublist_lengths + - dataloader = batchlize( - inputs, - batch_size=batch_size, - random_shuffle=random_shuffle, - ) - return dataloader, dataset_size - \ No newline at end of file + def compress_list( + self, + list_to_compress: List, + sublist_lengths: List[int] + ) -> List[List]: + assert sum(sublist_lengths) == len(list_to_compress), "Sum of sublist lengths should be equal to length of list to compress." + compressed_list = [] + start_index = 0 + for length in sublist_lengths: + sublist = list_to_compress[start_index: start_index + length] + compressed_list.append(sublist) + start_index += length + return compressed_list diff --git a/src/lmflow/pipeline/utils/dpov2_dataprocessor.py b/src/lmflow/pipeline/utils/dpov2_dataprocessor.py new file mode 100644 index 000000000..4c74c8306 --- /dev/null +++ b/src/lmflow/pipeline/utils/dpov2_dataprocessor.py @@ -0,0 +1,195 @@ +from dataclasses import dataclass +import logging +from typing import Optional, Union, Dict, List, Any + +import torch +from torch.nn.utils.rnn import pad_sequence +from transformers import ( + PreTrainedModel, + PreTrainedTokenizerBase, +) + + +logger = logging.getLogger(__name__) + + +@dataclass +class PreferenceDataCollatorWithPadding: + tokenizer: PreTrainedTokenizerBase + model: Optional[PreTrainedModel] = None + padding: Union[bool, str] = True + max_length: Optional[int] = None + max_prompt_length: Optional[int] = None + label_pad_token_id: int = -100 + padding_value: int = 0 + truncation_mode: str = "keep_end" + is_encoder_decoder: Optional[bool] = False + max_target_length: Optional[int] = None + mask_prompt: Optional[bool] = False + + + def tokenize_batch_element( + self, + prompt: str, + chosen: str, + rejected: str, + ) -> Dict: + """Tokenize a single batch element. + + At this stage, we don't convert to PyTorch tensors yet; we just handle the truncation + in case the prompt + chosen or prompt + rejected responses is/are too long. First + we truncate the prompt; if we're still too long, we truncate the chosen/rejected. + + We also create the labels for the chosen/rejected responses, which are of length equal to + the sum of the length of the prompt and the chosen/rejected response, with + label_pad_token_id for the prompt tokens. + """ + batch = {} + + if self.is_encoder_decoder: + raise NotImplementedError + + chosen_tokens = self.tokenizer(chosen, add_special_tokens=False) + rejected_tokens = self.tokenizer(rejected, add_special_tokens=False) + prompt_tokens = self.tokenizer(prompt, add_special_tokens=False) + + eos_token_id = self.tokenizer.eos_token_id + # Get indices in list prompt_tokens["input_ids"] that equals the EOS token (often 0) + eos_indices_prompt = [i for i, x in enumerate(prompt_tokens["input_ids"]) if x == eos_token_id] + # attention mask these indices to eos_token_id + if self.mask_prompt: + new_attention_mask = [0 for i, p in enumerate(prompt_tokens["attention_mask"])] + else: + new_attention_mask = [ + 0 if i in eos_indices_prompt else p for i, p in enumerate(prompt_tokens["attention_mask"]) + ] + prompt_tokens["attention_mask"] = new_attention_mask + + # do the same for chosen and rejected + eos_indices_chosen = [i for i, x in enumerate(chosen_tokens["input_ids"]) if x == eos_token_id] + new_attention_mask_c = [ + 0 if i in eos_indices_chosen else p for i, p in enumerate(chosen_tokens["attention_mask"]) + ] + chosen_tokens["attention_mask"] = new_attention_mask_c + + eos_indices_rejected = [i for i, x in enumerate(rejected_tokens["input_ids"]) if x == eos_token_id] + new_attention_mask_r = [ + 0 if i in eos_indices_rejected else p for i, p in enumerate(rejected_tokens["attention_mask"]) + ] + rejected_tokens["attention_mask"] = new_attention_mask_r + + # add EOS token to end of prompt + + chosen_tokens["input_ids"].append(self.tokenizer.eos_token_id) + chosen_tokens["attention_mask"].append(1) + + rejected_tokens["input_ids"].append(self.tokenizer.eos_token_id) + rejected_tokens["attention_mask"].append(1) + + longer_response_length = max(len(chosen_tokens["input_ids"]), len(rejected_tokens["input_ids"])) + + # if combined sequence is too long, truncate the prompt + if len(prompt_tokens["input_ids"]) + longer_response_length > self.max_length: + if self.truncation_mode == "keep_start": + prompt_tokens = {k: v[: self.max_prompt_length] for k, v in prompt_tokens.items()} + elif self.truncation_mode == "keep_end": + prompt_tokens = {k: v[-self.max_prompt_length :] for k, v in prompt_tokens.items()} + else: + raise ValueError(f"Unknown truncation mode: {self.truncation_mode}") + + # if that's still too long, truncate the response + if len(prompt_tokens["input_ids"]) + longer_response_length > self.max_length: + chosen_tokens = {k: v[: self.max_length - self.max_prompt_length] for k, v in chosen_tokens.items()} + rejected_tokens = { + k: v[: self.max_length - self.max_prompt_length] for k, v in rejected_tokens.items() + } + + # Create labels + chosen_sequence_tokens = {k: prompt_tokens[k] + chosen_tokens[k] for k in chosen_tokens} + rejected_sequence_tokens = {k: prompt_tokens[k] + rejected_tokens[k] for k in rejected_tokens} + chosen_sequence_tokens["labels"] = chosen_sequence_tokens["input_ids"][:] + chosen_sequence_tokens["labels"][: len(prompt_tokens["input_ids"])] = [self.label_pad_token_id] * len( + prompt_tokens["input_ids"] + ) + rejected_sequence_tokens["labels"] = rejected_sequence_tokens["input_ids"][:] + rejected_sequence_tokens["labels"][: len(prompt_tokens["input_ids"])] = [self.label_pad_token_id] * len( + prompt_tokens["input_ids"] + ) + + for k, toks in { + "chosen": chosen_sequence_tokens, + "rejected": rejected_sequence_tokens, + "prompt": prompt_tokens, + }.items(): + for type_key, tokens in toks.items(): + if type_key == "token_type_ids": + continue + batch[f"{k}_{type_key}"] = tokens + + + + batch["prompt"] = prompt + batch["chosen"] = prompt + chosen + batch["rejected"] = prompt + rejected + batch["chosen_response_only"] = chosen + batch["rejected_response_only"] = rejected + + return batch + + + def collate(self, batch): + # first, pad everything to the same length + padded_batch = {} + for k in batch[0].keys(): + if k.endswith("_input_ids") or k.endswith("_attention_mask") or k.endswith("_labels"): + if self.is_encoder_decoder: + to_pad = [torch.LongTensor(ex[k]) for ex in batch] + + if (k.startswith("prompt")) and (k.endswith("input_ids")): + padding_value = self.tokenizer.pad_token_id + elif k.endswith("_attention_mask"): + padding_value = 0 + elif (k.startswith("chosen")) or (k.startswith("rejected")) or ("decoder" in k): + padding_value = self.label_pad_token_id + else: + raise ValueError(f"Unexpected key in batch '{k}'") + padded_batch[k] = pad_sequence(to_pad, batch_first=True, padding_value=padding_value) + else: + # adapted from https://stackoverflow.com/questions/73256206 + if "prompt" in k: + to_pad = [torch.LongTensor(ex[k][::-1]) for ex in batch] + else: + to_pad = [torch.LongTensor(ex[k]) for ex in batch] + if k.endswith("_input_ids"): + padding_value = self.tokenizer.pad_token_id + elif k.endswith("_labels"): + padding_value = self.label_pad_token_id + elif k.endswith("_attention_mask"): + padding_value = self.padding_value + else: + raise ValueError(f"Unexpected key in batch '{k}'") + + padded_batch[k] = pad_sequence(to_pad, batch_first=True, padding_value=padding_value) + # for the prompt, flip back so padding is on left side + if "prompt" in k: + padded_batch[k] = padded_batch[k].flip(dims=[1]) + else: + padded_batch[k] = [ex[k] for ex in batch] + + return padded_batch + + + def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]: + tokenized_batch = [] + + for feature in features: + prompt = feature["prompt"] + chosen = feature["chosen"] + rejected = feature["rejected"] + + batch_element = self.tokenize_batch_element(prompt, chosen, rejected) + batch_element["margin"] = feature["margin"] + tokenized_batch.append(batch_element) + + # return collated batch + return self.collate(tokenized_batch) \ No newline at end of file diff --git a/src/lmflow/pipeline/utils/dpov2_trainer.py b/src/lmflow/pipeline/utils/dpov2_trainer.py new file mode 100644 index 000000000..a43e96f2d --- /dev/null +++ b/src/lmflow/pipeline/utils/dpov2_trainer.py @@ -0,0 +1,245 @@ +import logging +from typing import Optional, Union, Dict, List, Any, Tuple, Callable, Literal + +from datasets import Dataset +import torch +import torch.nn as nn +import torch.nn.functional as F +from transformers import ( + PreTrainedModel, + PreTrainedTokenizerBase, + DataCollator, + TrainingArguments, + TrainerCallback +) +from transformers.trainer_callback import TrainerCallback +from transformers.trainer_utils import EvalLoopOutput +from trl import DPOTrainer + +from lmflow.pipeline.utils.dpov2_dataprocessor import PreferenceDataCollatorWithPadding + + +logger = logging.getLogger(__name__) + + +class PreferenceTrainer(DPOTrainer): + def __init__( + self, + model: Union[PreTrainedModel, nn.Module] = None, + ref_model: Optional[Union[PreTrainedModel, nn.Module]] = None, + beta: float = 0.1, + loss_type: Literal["sigmoid", "hinge", "cross_entropy", "kl", "rev_kl", "raft"] = "rev_kl", + args: TrainingArguments = None, + data_collator: Optional[DataCollator] = None, + label_pad_token_id: int = -100, + padding_value: int = 0, + truncation_mode: str = "keep_end", + train_dataset: Optional[Dataset] = None, + eval_dataset: Optional[Union[Dataset, Dict[str, Dataset]]] = None, + tokenizer: Optional[PreTrainedTokenizerBase] = None, + model_init: Optional[Callable[[], PreTrainedModel]] = None, + callbacks: Optional[List[TrainerCallback]] = None, + optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = ( + None, + None, + ), + preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, + max_length: Optional[int] = None, + max_prompt_length: Optional[int] = None, + max_target_length: Optional[int] = None, + peft_config: Optional[Dict] = None, + is_encoder_decoder: Optional[bool] = None, + disable_dropout: bool = True, + generate_during_eval: bool = False, + compute_metrics: Optional[Callable[[EvalLoopOutput], Dict]] = None, + mask_prompt: Optional[bool] = False, + len_penalty: float = 0, + ): + + if data_collator is None: + data_collator = PreferenceDataCollatorWithPadding( + tokenizer, + max_length=max_length, + max_prompt_length=max_prompt_length, + label_pad_token_id=label_pad_token_id, + padding_value=padding_value, + truncation_mode=truncation_mode, + is_encoder_decoder=False, + max_target_length=max_target_length, + mask_prompt=mask_prompt, + ) + super().__init__( + model=model, + ref_model=ref_model, + beta=beta, + loss_type=loss_type, + args=args, + data_collator=data_collator, + label_pad_token_id=label_pad_token_id, + padding_value=padding_value, + truncation_mode=truncation_mode, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + tokenizer=tokenizer, + model_init=model_init, + callbacks=callbacks, + optimizers=optimizers, + preprocess_logits_for_metrics=preprocess_logits_for_metrics, + max_length=max_length, + max_prompt_length=max_prompt_length, + max_target_length=max_target_length, + peft_config=peft_config, + is_encoder_decoder=is_encoder_decoder, + disable_dropout=disable_dropout, + generate_during_eval=generate_during_eval, + compute_metrics=compute_metrics, + ) + self.use_dpo_data_collator = True + self.len_penalty = len_penalty + + def dpo_loss( + self, + policy_chosen_logps: torch.FloatTensor, + policy_rejected_logps: torch.FloatTensor, + reference_chosen_logps: torch.FloatTensor, + reference_rejected_logps: torch.FloatTensor, + reference_free: bool = False, + margin: Optional[torch.FloatTensor] = None, + len_penalty: float = 0, + ) -> Tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]: + """Compute the DPO loss for a batch of policy and reference model log probabilities. + + Args: + policy_chosen_logps: Log probabilities of the policy model for the chosen responses. Shape: (batch_size,) + policy_rejected_logps: Log probabilities of the policy model for the rejected responses. Shape: (batch_size,) + reference_chosen_logps: Log probabilities of the reference model for the chosen responses. Shape: (batch_size,) + reference_rejected_logps: Log probabilities of the reference model for the rejected responses. Shape: (batch_size,) + beta: Temperature parameter for the DPO loss, typically something in the range of 0.1 to 0.5. We ignore the reference model as beta -> 0. + reference_free: If True, we ignore the _provided_ reference model and implicitly use a reference model that assigns equal probability to all responses. + + Returns: + A tuple of three tensors: (losses, chosen_rewards, rejected_rewards). + The losses tensor contains the DPO loss for each example in the batch. + The chosen_rewards and rejected_rewards tensors contain the rewards for the chosen and rejected responses, respectively. + """ + pi_logratios = policy_chosen_logps - policy_rejected_logps + ref_logratios = reference_chosen_logps - reference_rejected_logps + len_penalty + + if reference_free: + ref_logratios = 0 + + if self.loss_type == "sigmoid": + logits = pi_logratios - ref_logratios + losses = -F.logsigmoid(self.beta * logits) + elif self.loss_type == "hinge": + logits = pi_logratios - ref_logratios + losses = torch.relu(1 - self.beta * logits) + elif self.loss_type == "cross_entropy": + logits = policy_chosen_logps - reference_chosen_logps + losses = -F.logsigmoid(self.beta * logits) + elif self.loss_type == "raft": + losses = -policy_chosen_logps # F.logsigmoid(self.beta * logits) + elif self.loss_type == "ipo": + logits = pi_logratios - ref_logratios + # eqn (17) of the paper where beta is the regularization parameter for the IPO loss, denoted by tau in the paper. + losses = (logits - 1 / (2 * self.beta)) ** 2 + elif self.loss_type == "kl": + logits = pi_logratios - ref_logratios + p = F.sigmoid(self.beta * logits) + p = torch.minimum(p, torch.ones_like(p) * 0.999) + p_gt = torch.exp(margin) / (1 + torch.exp(margin) + 1e-3) + losses = p * (torch.log(p) - torch.log(p_gt)) + (1 - p) * (torch.log(1 - p) - torch.log(1 - p_gt)) + elif self.loss_type == "tv": + logits = pi_logratios - ref_logratios + p = F.sigmoid(self.beta * logits) + p_gt = torch.exp(margin) / (1 + torch.exp(margin)) + losses = torch.abs(p - p_gt) + elif self.loss_type == "hellinger": + logits = pi_logratios - ref_logratios + p = F.sigmoid(self.beta * logits) + p = torch.minimum(p, torch.ones_like(p) * 0.999) + p_gt = torch.exp(margin) / (1 + torch.exp(margin)) + losses = 0.5 * ((p**0.5 - p_gt**0.5) ** 2 + ((1 - p) ** 0.5 - (1 - p_gt) ** 0.5) ** 2) + elif self.loss_type == "rev_kl": + logits = pi_logratios - ref_logratios + logp = F.logsigmoid(self.beta * logits) + logp_neg = F.logsigmoid(-self.beta * logits) + p_gt = F.sigmoid(margin) + losses = -p_gt * (logp) - (1 - p_gt) * logp_neg + else: + raise ValueError(f"Unknown loss type: {self.loss_type}.") + + chosen_rewards = self.beta * (policy_chosen_logps - reference_chosen_logps).detach() + rejected_rewards = self.beta * (policy_rejected_logps - reference_rejected_logps).detach() + + return losses, chosen_rewards, rejected_rewards + + def get_batch_loss_metrics( + self, + model, + batch: Dict[str, Union[List, torch.LongTensor]], + train_eval: Literal["train", "eval"] = "train", + ): + return self.get_batch_metrics(model, batch, train_eval) + + def get_batch_metrics( + self, + model, + batch: Dict[str, Union[List, torch.LongTensor]], + train_eval: Literal["train", "eval"] = "train", + ): + """Compute the DPO loss and other metrics for the given batch of inputs for train or test.""" + metrics = {} + ( + policy_chosen_logps, + policy_rejected_logps, + policy_chosen_logits, + policy_rejected_logits, + ) = self.concatenated_forward(model, batch) + with torch.no_grad(): + if self.ref_model is None: + with self.accelerator.unwrap_model(self.model).disable_adapter(): + ( + reference_chosen_logps, + reference_rejected_logps, + _, + _, + ) = self.concatenated_forward(self.model, batch) + else: + ( + reference_chosen_logps, + reference_rejected_logps, + _, + _, + ) = self.concatenated_forward(self.ref_model, batch) + if self.len_penalty > 0: + chosen_len = batch["chosen_input_ids"].shape[1] * self.len_penalty + rejected_len = batch["rejected_input_ids"].shape[1] * self.len_penalty + len_penalty = chosen_len - rejected_len + else: + chosen_len = 1 + rejected_len = 1 + len_penalty = 0 + + margin = torch.tensor(batch["margin"], dtype=policy_chosen_logps.dtype).to(self.accelerator.device) + losses, chosen_rewards, rejected_rewards = self.dpo_loss( + policy_chosen_logps, + policy_rejected_logps, + reference_chosen_logps, + reference_rejected_logps, + margin=margin, + len_penalty=len_penalty, + ) + reward_accuracies = (chosen_rewards > rejected_rewards).float() + + prefix = "eval_" if train_eval == "eval" else "" + metrics[f"{prefix}rewards/chosen"] = chosen_rewards.cpu().mean() + metrics[f"{prefix}rewards/rejected"] = rejected_rewards.cpu().mean() + metrics[f"{prefix}rewards/accuracies"] = reward_accuracies.cpu().mean() + metrics[f"{prefix}rewards/margins"] = (chosen_rewards - rejected_rewards).cpu().mean() + metrics[f"{prefix}logps/rejected"] = policy_rejected_logps.detach().cpu().mean() + metrics[f"{prefix}logps/chosen"] = policy_chosen_logps.detach().cpu().mean() + metrics[f"{prefix}logits/rejected"] = policy_rejected_logits.detach().cpu().mean() + metrics[f"{prefix}logits/chosen"] = policy_chosen_logits.detach().cpu().mean() + + return losses.mean(), metrics diff --git a/src/lmflow/tokenization/hf_text_regression_model.py b/src/lmflow/tokenization/hf_text_regression_model.py index 806ff5654..cb1344543 100644 --- a/src/lmflow/tokenization/hf_text_regression_model.py +++ b/src/lmflow/tokenization/hf_text_regression_model.py @@ -63,7 +63,7 @@ def blocking_paired( if block_size_warning_num > 0: logger.warning( f"There are {block_size_warning_num} of {num_example} samples where" - f"block_size {block_size} < model_max_length" + f" block_size {block_size} < model_max_length" f" {model_max_length}, use block_size" " for maximum tokenized sequence length" ) @@ -119,7 +119,52 @@ def blocking( if block_size_warning_num > 0: logger.warning( f"There are {block_size_warning_num} of {num_example} samples where" - f"block_size {block_size} < model_max_length" + f" block_size {block_size} < model_max_length" + f" {model_max_length}, use block_size" + " for maximum tokenized sequence length" + ) + + return token_dict + + +def blocking_grouped_text2text( + token_dict: Dict, + block_size: int, + model_max_length: int, + pad_token_id: int, + padding_side: str, +) -> Dict: + block_size_warning_num = 0 + num_example = len(token_dict[list(token_dict.keys())[0]]) + max_length = min(block_size, model_max_length) + + for example_idx in range(num_example): + for content_idx in range(len(token_dict["input_ids"][example_idx])): + pad_length = max_length - len(token_dict["input_ids"][example_idx][content_idx]) + if block_size < model_max_length: + block_size_warning_num += 1 + if pad_length < 0: + # Truncates too long samples + token_dict["input_ids"][example_idx][content_idx] = token_dict["input_ids"][example_idx][content_idx][:pad_length] + else: + if padding_side == 'right': + # Pads too short samples + token_dict["input_ids"][example_idx][content_idx].extend( + [pad_token_id for _ in range(pad_length)] + ) + elif padding_side == 'left': + # Pads too short samples + token_dict["input_ids"][example_idx][content_idx] = ( + [pad_token_id for _ in range(pad_length)] + token_dict["input_ids"][example_idx][content_idx] + ) + else: + raise ValueError( + f"padding_side should be either 'right' or 'left', got {padding_side}" + ) + if block_size_warning_num > 0: + logger.warning( + f"There are {block_size_warning_num} of {num_example} samples where" + f" block_size {block_size} < model_max_length" f" {model_max_length}, use block_size" " for maximum tokenized sequence length" ) @@ -321,4 +366,43 @@ def tokenize_function( "^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits" " before being passed to the model." ) - return token_dict \ No newline at end of file + return token_dict + + +def grouped_text2text_tokenize_function( + examples, + data_args: DatasetArguments, + tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast], + column_names, + add_special_tokens, + use_truncation, +) -> Dict: + """For rm inference, and don't need attn mask and labels. + NOTE: input_ids here refers to the tokenized input_ids of the input **and** output + """ + num_example = len(examples[column_names[0]]) + output_dict = {column_name: examples[column_name] for column_name in column_names} + output_dict["input_ids"] = [[] for _ in range(num_example)] + + for example_idx in range(num_example): + encoded = tokenizer( + [ + examples["input"][example_idx] + examples["outputs"][example_idx][i] + for i in range(len(examples["outputs"][example_idx])) + ], + add_special_tokens=add_special_tokens, + truncation=use_truncation, + ) + + output_dict["input_ids"][example_idx] = encoded["input_ids"] + + if data_args.disable_group_texts: + output_dict = blocking_grouped_text2text( + token_dict=output_dict, + block_size=data_args.block_size, + model_max_length=tokenizer.model_max_length, + pad_token_id=tokenizer.pad_token_id, + padding_side=tokenizer.padding_side, + ) + + return output_dict \ No newline at end of file diff --git a/src/lmflow/utils/constants.py b/src/lmflow/utils/constants.py index 323e4d723..4fda4fd92 100644 --- a/src/lmflow/utils/constants.py +++ b/src/lmflow/utils/constants.py @@ -207,6 +207,32 @@ ).lstrip("\n") +GROUPED_TEXT2TEXT_DATASET_DESCRIPTION = ( +""" +This kind of dataset is commonly used in reward model training/prediction, as well as rl training. +{ + "type": "grouped_text2text", + "instances": [ + { + "input": "what's your name?", + "outputs": [ + "My name is John", + "I'm John", + ] + }, + { + "input": "Who are you?", + "outputs": [ + "My name is Amy", + "I'm Amy", + ] + }, + ] +} +""" +) + + TEXT2TEXT_DATASET_DETAILS = ( """ For example, @@ -297,11 +323,11 @@ "text2text": ["input", "output"], "conversation": ["messages"], # system, tools and conversation_id are optional "paired_conversation": ["chosen", "rejected"], + "paired_text2text": ["prompt", "chosen", "rejected"], "float_only": ["value"], "image_text": ["images", "text"], - "scored_text_only": ["text", "score"], - "scored_text2text": ["input", "output", "score"], - "scored_conversation": ["messages", "score"], + "grouped_text2text": ["input", "outputs"], + "grouped_conversation": ["input", "outputs"], } CONVERSATION_ROLE_NAMES = { From 89138850d30c2dc1a35b74dba8d13314711a8f2c Mon Sep 17 00:00:00 2001 From: Yizhen Date: Tue, 25 Jun 2024 18:11:57 +0800 Subject: [PATCH 07/13] [Usability] vllm inferencer save in utf-8 --- src/lmflow/pipeline/vllm_inferencer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lmflow/pipeline/vllm_inferencer.py b/src/lmflow/pipeline/vllm_inferencer.py index a109aef91..f6c1f71a4 100644 --- a/src/lmflow/pipeline/vllm_inferencer.py +++ b/src/lmflow/pipeline/vllm_inferencer.py @@ -153,8 +153,8 @@ def save_inference_results( outputs: Union[List[List[str]], List[List[List[int]]]], save_file_path: str, ): - with open(save_file_path, "w") as f: - json.dump(outputs, f) + with open(save_file_path, "w", encoding='utf-8') as f: + json.dump(outputs, f, ensure_ascii=False, indent=4) logger.info(f"Inference results are saved to {save_file_path}.") From fc74bf7f5c55e3e94cc2dd4e16ce9e8e063b2a93 Mon Sep 17 00:00:00 2001 From: Yizhen Date: Wed, 26 Jun 2024 23:35:15 +0800 Subject: [PATCH 08/13] [Feature] rm inference and dpov2 --- src/lmflow/datasets/dataset.py | 30 +++-- src/lmflow/models/hf_model_mixin.py | 13 +- src/lmflow/models/hf_text_regression_model.py | 39 +++--- src/lmflow/pipeline/dpov2_aligner.py | 115 ++++++++++-------- src/lmflow/pipeline/rm_inferencer.py | 12 +- src/lmflow/pipeline/utils/dpov2_trainer.py | 2 +- .../tokenization/hf_text_regression_model.py | 10 +- src/lmflow/utils/constants.py | 72 ++++++++--- 8 files changed, 177 insertions(+), 116 deletions(-) diff --git a/src/lmflow/datasets/dataset.py b/src/lmflow/datasets/dataset.py index 333ff4e65..377be0465 100644 --- a/src/lmflow/datasets/dataset.py +++ b/src/lmflow/datasets/dataset.py @@ -12,6 +12,7 @@ # Importing necessary libraries and modules import copy import json +import logging from pathlib import Path from cmath import e @@ -25,7 +26,6 @@ from lmflow.utils.constants import ( DATASET_DESCRIPTION_MAP, TEXT_ONLY_DATASET_DESCRIPTION, - SCORED_TEXT_ONLY_DATASET_DESCRIPTION, TEXT2TEXT_DATASET_DESCRIPTION, FLOAT_ONLY_DATASET_DESCRIPTION, INSTANCE_FIELDS_MAP, @@ -33,6 +33,10 @@ from .multi_modal_dataset import CustomMultiModalDataset + +logger = logging.getLogger(__name__) + + DATASET_TYPES = [ "text_only", "text2text", @@ -40,14 +44,14 @@ "image_text", "conversation", "paired_conversation", - "paired_text2text", - "grouped_text2text", - "grouped_conversation", + "paired_text_to_text", + "text_to_textlist", + "text_to_scored_textlist" ] KEY_TYPE = "type" KEY_INSTANCES = "instances" -KEY_SCORES = "scores" +KEY_SCORE = "score" class Dataset: r""" @@ -419,7 +423,11 @@ def get_type(self) -> str: return self.type - def save(self, file_path: str): + def save( + self, + file_path: str, + format: str="json" + ): r""" Save the dataset to a json file. @@ -428,6 +436,10 @@ def save(self, file_path: str): file_path : str. The path to the file where the dataset will be saved. """ - assert Path(file_path).suffix == ".json", "The file path must have a .json extension." - with open(file_path, "w") as fout: - json.dump(self.to_dict(), fout, indent=2) \ No newline at end of file + if format == "json": + assert Path(file_path).suffix == ".json", "The file path must have a .json extension." + with open(file_path, "w", encoding='utf-8') as fout: + json.dump(self.to_dict(), fout, indent=4, ensure_ascii=False) + + else: + logger.error(f"Unsupported format when saving the dataset: {format}.") \ No newline at end of file diff --git a/src/lmflow/models/hf_model_mixin.py b/src/lmflow/models/hf_model_mixin.py index 4fc35e451..6b1b6498c 100644 --- a/src/lmflow/models/hf_model_mixin.py +++ b/src/lmflow/models/hf_model_mixin.py @@ -303,9 +303,9 @@ def __prepare_model_for_training( model_args: ModelArguments, hf_auto_model: HF_AUTOMODEL_TYPE, ): - assert self.do_train, "To prepare the model for training, set do_train=True." + assert self.do_train, "To prepare the model for training, please set do_train=True." # TODO: change to accelerate - logger.warning("Preparing model for training") + logger.info("Preparing model for training") if model_args.model_name_or_path: model = hf_auto_model.from_pretrained( model_args.model_name_or_path, @@ -323,6 +323,9 @@ def __prepare_model_for_training( self.backend_model_full = model if model_args.ignore_bias_buffers: + # torch distributed hack + # fix for DDP issues with LM bias/mask buffers - invalid scalar type, inplace operation. + # See: https://github.com/huggingface/transformers/issues/22482#issuecomment-1595790992 model._ddp_params_and_buffers_to_ignore = [ name for name, buffer in model.named_buffers() if buffer.dtype == torch.bool ] @@ -367,12 +370,6 @@ def __prepare_model_for_inference( "offload_folder": "offload", "offload_state_dict": True, } - - if model_args.lora_model_path is not None: - logger.warning( - "LoRA does not support RAM optimized load currently. Using original load." - ) - model_args.use_ram_optimized_load = False if use_accelerator or model_args.use_ram_optimized_load: inference_load_kwargs.update(ram_optimized_load_kwargs) diff --git a/src/lmflow/models/hf_text_regression_model.py b/src/lmflow/models/hf_text_regression_model.py index 8b8d2f19e..3cafc6b1a 100644 --- a/src/lmflow/models/hf_text_regression_model.py +++ b/src/lmflow/models/hf_text_regression_model.py @@ -30,14 +30,14 @@ paired_conversation_tokenize_function, conversation_tokenize_function, tokenize_function, - grouped_text2text_tokenize_function, + text_to_textlist_tokenize_function, ) from lmflow.utils.conversation_template import PRESET_TEMPLATES from lmflow.utils.constants import ( PAIRED_CONVERSATION_DATASET_DESCRIPTION, TEXT2TEXT_DATASET_DESCRIPTION, TEXT_ONLY_DATASET_DESCRIPTION, - GROUPED_TEXT2TEXT_DATASET_DESCRIPTION, + TEXT_TO_TEXTLIST_DATASET_DESCRIPTION, CONVERSATION_DATASET_DESCRIPTION, ) @@ -167,17 +167,23 @@ def tokenize( } if dataset_type == "text_only": tokenize_fn = tokenize_function - tokenize_fn_kwargs["tokenized_column_order"] = ["text"] - tokenize_fn_kwargs["label_columns"] = ["text"] - tokenize_fn_kwargs["add_special_tokens"] = add_special_tokens - tokenize_fn_kwargs["use_truncation"] = use_truncation + text_only_tokenize_fn_kwargs = { + "tokenized_column_order": ["text"], + "label_columns": ["text"], + "add_special_tokens": add_special_tokens, + "use_truncation": use_truncation, + } + tokenize_fn_kwargs.update(text_only_tokenize_fn_kwargs) elif dataset_type == "text2text": tokenize_fn = tokenize_function - tokenize_fn_kwargs["tokenized_column_order"] = ["input", "output"] - tokenize_fn_kwargs["label_columns"] = ["output"] - tokenize_fn_kwargs["add_special_tokens"] = False - tokenize_fn_kwargs["use_truncation"] = use_truncation + text2text_tokenize_fn_kwargs = { + "tokenized_column_order": ["input", "output"], + "label_columns": ["output"], + "add_special_tokens": False, + "use_truncation": use_truncation, + } + tokenize_fn_kwargs.update(text2text_tokenize_fn_kwargs) elif dataset_type in ["conversation", "paired_conversation"]: if dataset_type == "conversation": @@ -198,10 +204,13 @@ def tokenize( tokenize_fn_kwargs["conversation_template"] = conversation_template logger.warning(f"Conversation template: {conversation_template}") - elif dataset_type == "grouped_text2text": - tokenize_fn = grouped_text2text_tokenize_function - tokenize_fn_kwargs["add_special_tokens"] = add_special_tokens - tokenize_fn_kwargs["use_truncation"] = use_truncation + elif dataset_type == "text_to_textlist": + tokenize_fn = text_to_textlist_tokenize_function + text_to_textlist_tokenize_fn_kwargs = { + "add_special_tokens": add_special_tokens, + "use_truncation": use_truncation, + } + tokenize_fn_kwargs.update(text_to_textlist_tokenize_fn_kwargs) else: raise NotImplementedError( @@ -211,7 +220,7 @@ def tokenize( f" 2) [Inference]{TEXT2TEXT_DATASET_DESCRIPTION}\n" f" 3) [Training]{PAIRED_CONVERSATION_DATASET_DESCRIPTION}\n" f" 4) [Inference]{CONVERSATION_DATASET_DESCRIPTION}\n" - f" 5) [Inference]{GROUPED_TEXT2TEXT_DATASET_DESCRIPTION}\n" + f" 5) [Inference]{TEXT_TO_TEXTLIST_DATASET_DESCRIPTION}\n" ) tokenize_kwargs = {} diff --git a/src/lmflow/pipeline/dpov2_aligner.py b/src/lmflow/pipeline/dpov2_aligner.py index 2109c5d15..151c1794e 100644 --- a/src/lmflow/pipeline/dpov2_aligner.py +++ b/src/lmflow/pipeline/dpov2_aligner.py @@ -7,7 +7,7 @@ from tqdm import tqdm from transformers import TrainingArguments -from lmflow.pipeline.utils.dpov2_trainer import PreferenceTrainer +from lmflow.pipeline.utils.dpov2_trainer import DPOv2Trainer from lmflow.pipeline.base_aligner import BaseAligner from lmflow.args import ( ModelArguments, @@ -15,7 +15,7 @@ DPOv2AlignerArguments ) from lmflow.models.hf_decoder_model import HFDecoderModel -from lmflow.datasets.dataset import Dataset, KEY_SCORES, KEY_TYPE, KEY_INSTANCES +from lmflow.datasets.dataset import Dataset, KEY_SCORE, KEY_TYPE, KEY_INSTANCES logger = logging.getLogger(__name__) @@ -43,6 +43,12 @@ def align( eval_dataset: Dataset, transform_dataset_in_place: bool=True, ): + if (train_dataset.get_type() not in ["text_to_scored_textlist", "paired_text_to_text"]) or \ + (eval_dataset.get_type() not in ["text_to_scored_textlist", "paired_text_to_text"]): + raise ValueError( + f"Unsupported dataset type {train_dataset.get_type()} for DPOv2 aligner." + ) + # step 0. setting up if self.aligner_args.gradient_checkpointing: logger.warning( @@ -52,29 +58,31 @@ def align( ref_model.get_backend_model().config.use_cache = False # step 1. prepare datasets - paired_train_dataset = self.convert_grouped_to_paired_dataset( - grouped_dataset=train_dataset, - sampling_paired_method=self.aligner_args.sampling_paired_method, - length_penalty=self.aligner_args.length_penalty, - margin_scale=self.aligner_args.margin_scale, - use_fast=False, - ) + if train_dataset.get_type() == "text_to_scored_textlist": + train_dataset = self.convert_to_paired_dataset( + source_dataset=train_dataset, + sampling_paired_method=self.aligner_args.sampling_paired_method, + length_penalty=self.aligner_args.length_penalty, + margin_scale=self.aligner_args.margin_scale, + use_fast=False, + ) if self.data_args.max_train_samples: - paired_train_dataset.backend_dataset = paired_train_dataset.backend_dataset.select(range(self.data_args.max_train_samples)) + train_dataset.backend_dataset = train_dataset.backend_dataset.select(range(self.data_args.max_train_samples)) - paired_eval_dataset = self.convert_grouped_to_paired_dataset( - grouped_dataset=eval_dataset, - sampling_paired_method=self.aligner_args.sampling_paired_method, - margin_scale=self.aligner_args.margin_scale, - use_fast=False, - ) + if eval_dataset.get_type() == "text_to_scored_textlist": + eval_dataset = self.convert_to_paired_dataset( + source_dataset=eval_dataset, + sampling_paired_method=self.aligner_args.sampling_paired_method, + margin_scale=self.aligner_args.margin_scale, + use_fast=False, + ) # step 2. prepare trainer - dpo_trainer = PreferenceTrainer( + dpo_trainer = DPOv2Trainer( model.get_backend_model(), ref_model.get_backend_model(), - train_dataset=paired_eval_dataset.get_backend_dataset(), # tokenization is done in the trainer - eval_dataset=paired_eval_dataset.get_backend_dataset(), + train_dataset=train_dataset.get_backend_dataset(), # tokenization is done in the trainer + eval_dataset=eval_dataset.get_backend_dataset(), tokenizer=model.tokenizer, args=self.__prepare_training_args(self.aligner_args), beta=self.aligner_args.beta, @@ -96,54 +104,55 @@ def align( def __prepare_training_args( self, - aligner_args: DPOv2AlignerArguments, + args: DPOv2AlignerArguments, ) -> TrainingArguments: training_args = TrainingArguments( - per_device_train_batch_size=aligner_args.per_device_train_batch_size, - per_device_eval_batch_size=aligner_args.per_device_eval_batch_size, - num_train_epochs=aligner_args.num_train_epochs, - save_strategy=aligner_args.save_strategy, - logging_steps=aligner_args.logging_steps, - save_steps=aligner_args.save_steps, - gradient_accumulation_steps=aligner_args.gradient_accumulation_steps, - gradient_checkpointing=aligner_args.gradient_checkpointing, - learning_rate=aligner_args.learning_rate, - evaluation_strategy=aligner_args.evaluation_strategy, - eval_steps=aligner_args.eval_steps, - output_dir=aligner_args.output_dir, - lr_scheduler_type=aligner_args.lr_scheduler_type, - warmup_steps=aligner_args.warmup_steps, - optim=aligner_args.optim, - bf16=aligner_args.bf16, - report_to=aligner_args.report_to, - run_name=aligner_args.run_name, - remove_unused_columns=False, # DO NOT CHANGE THIS, may cause error + per_device_train_batch_size=args.per_device_train_batch_size, + per_device_eval_batch_size=args.per_device_eval_batch_size, + num_train_epochs=args.num_train_epochs, + save_strategy=args.save_strategy, + logging_steps=args.logging_steps, + save_steps=args.save_steps, + gradient_accumulation_steps=args.gradient_accumulation_steps, + gradient_checkpointing=args.gradient_checkpointing, + learning_rate=args.learning_rate, + evaluation_strategy=args.evaluation_strategy, + eval_steps=args.eval_steps, + output_dir=args.output_dir, + lr_scheduler_type=args.lr_scheduler_type, + warmup_steps=args.warmup_steps, + optim=args.optim, + bf16=args.bf16, + report_to=args.report_to, + run_name=args.run_name, + remove_unused_columns=False, # DO NOT CHANGE THIS, may cause error https://discuss.huggingface.co/t/indexerror-invalid-key-16-is-out-of-bounds-for-size-0/14298/3 ) logger.warning(f"Actual training arguments for dpo trainer: {training_args}") return training_args - def convert_grouped_to_paired_dataset( + def convert_to_paired_dataset( self, - grouped_dataset: Dataset, + source_dataset: Dataset, sampling_paired_method: str="random", length_penalty: float=0.0, margin_scale: float=1.0, use_fast: bool=False, ) -> Dataset: - """Convert a grouped dataset to a paired dataset by rejection sampling. + """Convert a scored one to multiple (text_to_scored_textlist) to a paired dataset by rejection sampling. """ output_dict = { - KEY_TYPE: f"paired_{grouped_dataset.get_type().replace('grouped_','')}", KEY_INSTANCES: [] } + if source_dataset.get_type() in ["text_to_scored_textlist"]: + output_dict[KEY_TYPE] = "paired_text_to_text" - for sample in tqdm(grouped_dataset.get_backend_dataset(), desc="Converting to paired dataset"): + for sample in tqdm(source_dataset.get_backend_dataset(), desc="Converting to paired dataset"): sample_output_dict = {} - lengths = self._calc_response_lengths(sample["outputs"], grouped_dataset.get_type()) + lengths = self._calc_response_lengths(sample["output"], source_dataset.get_type()) penalized_rewards = self._calc_reward_with_length_penalty( - rewards=sample[KEY_SCORES], + rewards=[content[KEY_SCORE] for content in sample["output"]], lengths=lengths, length_penalty=length_penalty ) @@ -154,12 +163,12 @@ def convert_grouped_to_paired_dataset( ) sample_output_dict["prompt"] = sample["input"] - sample_output_dict["chosen"] = sample["outputs"][chosen_idx] - sample_output_dict["rejected"] = sample["outputs"][rejected_idx] - sample_output_dict["margin"] = (sample[KEY_SCORES][chosen_idx] - sample[KEY_SCORES][rejected_idx]) * margin_scale + sample_output_dict["chosen"] = sample["output"][chosen_idx]["text"] + sample_output_dict["rejected"] = sample["output"][rejected_idx]["text"] + sample_output_dict["margin"] = (sample["output"][chosen_idx][KEY_SCORE] - sample["output"][rejected_idx][KEY_SCORE]) * margin_scale output_dict[KEY_INSTANCES].append(sample_output_dict) - output_dataset_args = copy.deepcopy(grouped_dataset.data_args) + output_dataset_args = copy.deepcopy(source_dataset.data_args) output_dataset_args.dataset_path = None output_dataset_args.dataset_name = f"paired_{output_dataset_args.dataset_name}" output_dataset = Dataset(output_dataset_args) @@ -174,8 +183,8 @@ def _calc_response_lengths( dataset_type: str, ) -> List[int]: all_lengths = [] - if dataset_type == 'grouped_text2text': - all_lengths = [len(output) for output in outputs] + if dataset_type == "text_to_scored_textlist": + all_lengths = [len(output["text"]) for output in outputs] else: raise NotImplementedError( @@ -194,7 +203,7 @@ def _calc_reward_with_length_penalty( """When length_penalty > 0, penalize the longer sequence by subtracting length_penalty * length from the reward. Vice versa when length_penalty < 0. """ - assert len(rewards) == len(lengths) + assert len(rewards) == len(lengths), "The number of rewards and lengths should be the same." return [reward - length_penalty * length for reward, length in zip(rewards, lengths)] diff --git a/src/lmflow/pipeline/rm_inferencer.py b/src/lmflow/pipeline/rm_inferencer.py index 087f4dec4..e4d9b44b9 100644 --- a/src/lmflow/pipeline/rm_inferencer.py +++ b/src/lmflow/pipeline/rm_inferencer.py @@ -74,8 +74,6 @@ def __init__( torch.cuda.set_device(self.local_rank) # NOTE: cpu-only machine will have error deepspeed.init_distributed() else: - os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = "15000" dist.init_process_group( "gloo", rank=self.local_rank, world_size=self.world_size ) @@ -99,10 +97,14 @@ def inference( assert isinstance(model, HFTextRegressionModel), "model should be HFTextRegressionModel" if not transform_dataset_in_place: dataset = copy.deepcopy(dataset) + output_dict = { - "type": f"grouped_{dataset.get_type().lstrip('grouped_')}", "instances": dataset.to_dict()["instances"], } + if dataset.get_type() == "text_to_textlist": + output_dict["type"] = "text_to_scored_textlist" + else: + raise NotImplementedError(f"Dataset type {dataset.get_type()} is not supported for reward model inference.") if use_vllm: scores = self.__vllm_inference(model, dataset) @@ -127,7 +129,7 @@ def __inference( dataset: Dataset, ) -> Union[List[float], List[List[float]]]: tokenized_dataset = model.tokenize(dataset) - if 'grouped_' in dataset.get_type(): + if dataset.get_type() in ["text_to_textlist"]: model_input_ids, num_outputs = self.flatten_list(tokenized_dataset.get_backend_dataset()["input_ids"]) else: model_input_ids = tokenized_dataset.get_backend_dataset()["input_ids"] @@ -163,7 +165,7 @@ def __inference( batch_output = self.__post_process_model_output(batch_output) final_output.extend(batch_output) - if 'grouped_' in dataset.get_type(): + if dataset.get_type() in ["text_to_textlist"]: final_output = self.compress_list(final_output, num_outputs) return final_output diff --git a/src/lmflow/pipeline/utils/dpov2_trainer.py b/src/lmflow/pipeline/utils/dpov2_trainer.py index a43e96f2d..032c6e92e 100644 --- a/src/lmflow/pipeline/utils/dpov2_trainer.py +++ b/src/lmflow/pipeline/utils/dpov2_trainer.py @@ -22,7 +22,7 @@ logger = logging.getLogger(__name__) -class PreferenceTrainer(DPOTrainer): +class DPOv2Trainer(DPOTrainer): def __init__( self, model: Union[PreTrainedModel, nn.Module] = None, diff --git a/src/lmflow/tokenization/hf_text_regression_model.py b/src/lmflow/tokenization/hf_text_regression_model.py index cb1344543..b819c5389 100644 --- a/src/lmflow/tokenization/hf_text_regression_model.py +++ b/src/lmflow/tokenization/hf_text_regression_model.py @@ -127,7 +127,7 @@ def blocking( return token_dict -def blocking_grouped_text2text( +def blocking_text_to_textlist( token_dict: Dict, block_size: int, model_max_length: int, @@ -369,7 +369,7 @@ def tokenize_function( return token_dict -def grouped_text2text_tokenize_function( +def text_to_textlist_tokenize_function( examples, data_args: DatasetArguments, tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast], @@ -387,8 +387,8 @@ def grouped_text2text_tokenize_function( for example_idx in range(num_example): encoded = tokenizer( [ - examples["input"][example_idx] + examples["outputs"][example_idx][i] - for i in range(len(examples["outputs"][example_idx])) + examples["input"][example_idx] + examples["output"][example_idx][i] + for i in range(len(examples["output"][example_idx])) ], add_special_tokens=add_special_tokens, truncation=use_truncation, @@ -397,7 +397,7 @@ def grouped_text2text_tokenize_function( output_dict["input_ids"][example_idx] = encoded["input_ids"] if data_args.disable_group_texts: - output_dict = blocking_grouped_text2text( + output_dict = blocking_text_to_textlist( token_dict=output_dict, block_size=data_args.block_size, model_max_length=tokenizer.model_max_length, diff --git a/src/lmflow/utils/constants.py b/src/lmflow/utils/constants.py index 4fda4fd92..26f9e2a40 100644 --- a/src/lmflow/utils/constants.py +++ b/src/lmflow/utils/constants.py @@ -20,19 +20,52 @@ ).lstrip("\n") -SCORED_TEXT_ONLY_DATASET_DESCRIPTION = ( +TEXT_TO_SCORED_TEXTLIST_DATASET_DESCRIPTION = ( """ -"scored_text_only": a dataset with only raw text instances and corresponding scores, with following format: +This kind of dataset is commonly used in reward model training/prediction, as well as rl training. +{ + "type": "text_to_scored_textlist", + "instances": [ + { + "input": "what's your name?", + "output": [ + {"score": 1.0, "text": "My name is John"}, + {"score": -0.8, "text": "I'm John"} + ] + }, + { + "input": "Who are you?", + "output": [ + {"score": 1.5, "text": "My name is Amy"}, + {"score": 1.0, "text": "I'm Amy"} + ] + }, + ] +} +""" +).lstrip("\n") - { - "type": "text_only", - "instances": [ - { "text": "TEXT_1" }, - { "text": "TEXT_2" }, - ... - ], - "scores": [1.0, 0.5, ...] - } + +PAIRED_TEXT_TO_TEXT_DATASET_DESCRIPTION = ( +""" +This kind of dataset is commonly used in reward model training as well as rl training. +{ + "type": "paired_text_to_text", + "instances": [ + { + "prompt": "Who are you?", + "chosen": "My name is Amy.", + "rejected": "I'm Amy", + "margin": 0.6 + }, + { + "prompt": "what's your name?", + "chosen": "My name is John.", + "rejected": "I'm John", + "margin": 0.5 + } + ] +} """ ).lstrip("\n") @@ -207,22 +240,22 @@ ).lstrip("\n") -GROUPED_TEXT2TEXT_DATASET_DESCRIPTION = ( +TEXT_TO_TEXTLIST_DATASET_DESCRIPTION = ( """ -This kind of dataset is commonly used in reward model training/prediction, as well as rl training. +This kind of dataset is commonly used in reward model inference. { - "type": "grouped_text2text", + "type": "text_to_textlist", "instances": [ { "input": "what's your name?", - "outputs": [ + "output": [ "My name is John", "I'm John", ] }, { "input": "Who are you?", - "outputs": [ + "output": [ "My name is Amy", "I'm Amy", ] @@ -230,7 +263,7 @@ ] } """ -) +).lstrip("\n") TEXT2TEXT_DATASET_DETAILS = ( @@ -323,11 +356,10 @@ "text2text": ["input", "output"], "conversation": ["messages"], # system, tools and conversation_id are optional "paired_conversation": ["chosen", "rejected"], - "paired_text2text": ["prompt", "chosen", "rejected"], + "paired_text_to_text": ["prompt", "chosen", "rejected"], "float_only": ["value"], "image_text": ["images", "text"], - "grouped_text2text": ["input", "outputs"], - "grouped_conversation": ["input", "outputs"], + "text_to_textlist": ["input", "output"], } CONVERSATION_ROLE_NAMES = { From 35347d46d4cc2e2736b09e1c80325e011b5c90df Mon Sep 17 00:00:00 2001 From: Yizhen Date: Thu, 27 Jun 2024 02:22:04 +0800 Subject: [PATCH 09/13] [Bug] dataset bug fix --- src/lmflow/pipeline/rm_inferencer.py | 20 ++++++++++++++------ src/lmflow/utils/constants.py | 1 + 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/lmflow/pipeline/rm_inferencer.py b/src/lmflow/pipeline/rm_inferencer.py index e4d9b44b9..ab1058d63 100644 --- a/src/lmflow/pipeline/rm_inferencer.py +++ b/src/lmflow/pipeline/rm_inferencer.py @@ -34,7 +34,7 @@ set_random_seed, batchlize ) -from lmflow.datasets.dataset import KEY_SCORES +from lmflow.datasets.dataset import KEY_SCORE os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warnings about parallelism in tokenizers @@ -98,11 +98,18 @@ def inference( if not transform_dataset_in_place: dataset = copy.deepcopy(dataset) - output_dict = { - "instances": dataset.to_dict()["instances"], - } + output_dict = {"type": "", "instances": []} if dataset.get_type() == "text_to_textlist": output_dict["type"] = "text_to_scored_textlist" + for idx, instance in enumerate(dataset.get_backend_dataset()): + if len(instance["output"]) < 2: + logger.warning(f"Instance {idx} has less than 2 outputs, skipping.") + output_dict["instances"].append( + { + "input": instance["input"], + "output": [{"text": text} for text in instance["output"]], + } + ) else: raise NotImplementedError(f"Dataset type {dataset.get_type()} is not supported for reward model inference.") @@ -111,8 +118,9 @@ def inference( else: scores = self.__inference(model, dataset) - for i, score in enumerate(scores): - output_dict["instances"][i][KEY_SCORES] = score + for i, instance_scores in enumerate(scores): + for j, score in enumerate(instance_scores): + output_dict["instances"][i]["output"][j][KEY_SCORE] = score output_dataset_args = copy.deepcopy(self.data_args) output_dataset_args.dataset_path = None diff --git a/src/lmflow/utils/constants.py b/src/lmflow/utils/constants.py index 26f9e2a40..39ee78112 100644 --- a/src/lmflow/utils/constants.py +++ b/src/lmflow/utils/constants.py @@ -360,6 +360,7 @@ "float_only": ["value"], "image_text": ["images", "text"], "text_to_textlist": ["input", "output"], + "text_to_scored_textlist": ["input", "output"], } CONVERSATION_ROLE_NAMES = { From 1715212ee688d4bcea991d664f9782055ae8b422 Mon Sep 17 00:00:00 2001 From: Yizhen Date: Fri, 28 Jun 2024 02:13:36 +0800 Subject: [PATCH 10/13] [Feature] add dpo v2 example --- configs/accelerate_dsz3_config.yaml | 23 ++++++ examples/dpov2_train.py | 78 ++++++++++++++++++++ scripts/run_dpo_align.sh | 2 +- scripts/run_dpov2_align.sh | 86 ++++++++++++++++++++++ src/lmflow/args.py | 62 +++------------- src/lmflow/pipeline/auto_pipeline.py | 2 + src/lmflow/pipeline/dpov2_aligner.py | 3 +- src/lmflow/pipeline/utils/dpov2_trainer.py | 2 + 8 files changed, 204 insertions(+), 54 deletions(-) create mode 100644 configs/accelerate_dsz3_config.yaml create mode 100644 examples/dpov2_train.py create mode 100644 scripts/run_dpov2_align.sh diff --git a/configs/accelerate_dsz3_config.yaml b/configs/accelerate_dsz3_config.yaml new file mode 100644 index 000000000..7f2cf9600 --- /dev/null +++ b/configs/accelerate_dsz3_config.yaml @@ -0,0 +1,23 @@ +compute_environment: LOCAL_MACHINE +debug: false +deepspeed_config: + deepspeed_multinode_launcher: standard + offload_optimizer_device: none + offload_param_device: none + zero3_init_flag: true + zero3_save_16bit_model: true + zero_stage: 3 +distributed_type: DEEPSPEED +downcast_bf16: 'no' +machine_rank: 0 +main_training_function: main +mixed_precision: bf16 +num_machines: 1 +num_processes: 4 +gpu_ids: 4,5,6,7 +rdzv_backend: static +same_network: true +tpu_env: [] +tpu_use_cluster: false +tpu_use_sudo: false +use_cpu: false diff --git a/examples/dpov2_train.py b/examples/dpov2_train.py new file mode 100644 index 000000000..ae4ca22d6 --- /dev/null +++ b/examples/dpov2_train.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved. +import logging +import os +import sys +import copy + +from transformers import ( + HfArgumentParser +) + +from lmflow.datasets import Dataset +from lmflow.models.auto_model import AutoModel +from lmflow.pipeline.auto_pipeline import AutoPipeline +from lmflow.args import ( + ModelArguments, + DatasetArguments, + AutoArguments, +) +from lmflow.utils.common import remove_dataclass_attr_prefix, create_copied_dataclass + + +logger = logging.getLogger(__name__) + + +ReferenceModelArguments = create_copied_dataclass( + original_dataclass=ModelArguments, + field_prefix="reference_", + class_prefix="Reference" +) + + +def main(): + # Parses arguments + pipeline_name = "dpov2_aligner" + PipelineArguments = AutoArguments.get_pipeline_args_class(pipeline_name) + + parser = HfArgumentParser(( + ModelArguments, + ReferenceModelArguments, + DatasetArguments, + PipelineArguments + )) + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, ref_model_args, data_args, pipeline_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, ref_model_args, data_args, pipeline_args = parser.parse_args_into_dataclasses() + + ref_model_args_dict = remove_dataclass_attr_prefix(ref_model_args, "reference_") + ref_model_args = ModelArguments(**ref_model_args_dict) + + train_dataset = Dataset(data_args) + eval_data_args = copy.deepcopy(data_args) + eval_data_args.dataset_path = pipeline_args.eval_dataset_path + eval_dataset = Dataset(eval_data_args) + model = AutoModel.get_model(model_args) + ref_model = AutoModel.get_model(ref_model_args) + aligner = AutoPipeline.get_pipeline( + pipeline_name=pipeline_name, + model_args=model_args, + data_args=data_args, + pipeline_args=pipeline_args, + ref_model_args=ref_model_args, + ) + + res = aligner.align( + model=model, + ref_model=ref_model, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + ) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/run_dpo_align.sh b/scripts/run_dpo_align.sh index d9c54f4fc..7d2ee00be 100644 --- a/scripts/run_dpo_align.sh +++ b/scripts/run_dpo_align.sh @@ -20,7 +20,7 @@ while [[ $# -ge 1 ]]; do dataset_path="$2" shift ;; - -o|--output_lora_path) + -o|--output_dir) output_dir="$2" shift ;; diff --git a/scripts/run_dpov2_align.sh b/scripts/run_dpov2_align.sh new file mode 100644 index 000000000..9a2b0faae --- /dev/null +++ b/scripts/run_dpov2_align.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +# Parses arguments +run_name=dpov2_align +model_name_or_path=meta-llama/Meta-Llama-3-8B-Instruct +reference_model_name_or_path=meta-llama/Meta-Llama-3-8B-Instruct +dataset_path=data/iterative-prompt/train +eval_dataset_path=data/iterative-prompt/eval +output_dir=output_models/${run_name} +deepspeed_args="--master_port=11000 --include localhost:4,5,6,7" + +while [[ $# -ge 1 ]]; do + key="$1" + case ${key} in + -r|--run_name) + run_name="$2" + shift + ;; + --model_name_or_path) + model_name_or_path="$2" + shift + ;; + --reference_model_name_or_path) + reference_model_name_or_path="$2" + shift + ;; + --dataset_path) + dataset_path="$2" + shift + ;; + --eval_dataset_path) + eval_dataset_path="$2" + shift + ;; + -o|--output_dir) + output_dir="$2" + shift + ;; + --deepspeed_args) + deepspeed_args="$2" + shift + ;; + *) + echo "error: unknown option \"${key}\"" 1>&2 + exit 1 + esac + shift +done + +project_dir=$(cd "$(dirname $0)"/..; pwd) +log_dir=${project_dir}/log/${run_name} +mkdir -p ${output_dir} ${log_dir} + +accelerate launch --config_file configs/accelerate_dsz3_config.yaml \ + examples/dpov2_train.py \ + --model_name_or_path ${model_name_or_path} \ + --reference_model_name_or_path ${reference_model_name_or_path} \ + --do_train True \ + --dataset_path ${dataset_path} \ + --eval_dataset_path ${eval_dataset_path} \ + --bf16 True \ + --learning_rate 5e-7 \ + --lr_scheduler_type cosine \ + --warmup_steps 100 \ + --optim paged_adamw_32bit \ + --per_device_train_batch_size 1 \ + --per_device_eval_batch_size 1 \ + --gradient_accumulation_steps 16 \ + --gradient_checkpointing True \ + --margin_scale 1.0 \ + --max_prompt_length 1000 \ + --num_train_epochs 2 \ + --logging_steps 2 \ + --save_strategy epoch \ + --save_steps 5000 \ + --evaluation_strategy steps \ + --eval_steps 100 \ + --loss_type sigmoid \ + --output_dir ${output_dir} \ + --run_name ${run_name} \ + --sampling_paired_method max_min \ + --report_to wandb \ + --mask_prompt True \ + --length_penalty 0 \ + | tee ${log_dir}/train.log \ + 2> ${log_dir}/train.err \ No newline at end of file diff --git a/src/lmflow/args.py b/src/lmflow/args.py index 6a1aa438e..786f27a8a 100644 --- a/src/lmflow/args.py +++ b/src/lmflow/args.py @@ -1289,65 +1289,22 @@ class DPOAlignerArguments: @dataclass -class DPOv2AlignerArguments(TrainingArguments): +class DPOv2AlignerArguments(FinetunerArguments): """ The arguments for the DPOv2 training script. """ - - # data parameters, i.e., the KL penalty in the paper - beta: Optional[float] = field(default=0.1, metadata={"help": "the beta parameter for DPO loss"}) - - # training parameters - eval_dir: Optional[str] = field( - default="/export/home/hanze/project/vllm-gen/uf_split0_offline_reward.json", # "/export/home/data/gemma_it_2b_3w_k8_with_pairrm_rewards.json", - metadata={"help": "the location of the evalset name or path"}, - ) - learning_rate: Optional[float] = field(default=5e-7, metadata={"help": "optimizer learning rate"}) - lr_scheduler_type: Optional[str] = field( - default="constant_with_warmup", metadata={"help": "the lr scheduler type"} - ) - warmup_steps: Optional[int] = field(default=100, metadata={"help": "the number of warmup steps"}) - weight_decay: Optional[float] = field(default=0.01, metadata={"help": "the weight decay"}) - - per_device_train_batch_size: Optional[int] = field(default=1, metadata={"help": "train batch size per device"}) - per_device_eval_batch_size: Optional[int] = field(default=1, metadata={"help": "eval batch size per device"}) - gradient_accumulation_steps: Optional[int] = field( - default=16, metadata={"help": "the number of gradient accumulation steps"} - ) - gradient_checkpointing: Optional[bool] = field( - default=True, metadata={"help": "whether to use gradient checkpointing"} - ) - - - lora_alpha: Optional[float] = field(default=16, metadata={"help": "the lora alpha parameter"}) - lora_dropout: Optional[float] = field(default=0.05, metadata={"help": "the lora dropout parameter"}) - lora_r: Optional[int] = field(default=8, metadata={"help": "the lora r parameter"}) - + # pair sampling args margin_scale: Optional[float] = field(default=1.0, metadata={"help": "the margin scale"}) - - max_prompt_length: Optional[int] = field(default=1000, metadata={"help": "the maximum prompt length"}) - max_length: Optional[int] = field(default=2048, metadata={"help": "the maximum sequence length"}) - num_train_epochs: Optional[int] = field(default=2, metadata={"help": "max number of training epochs"}) - logging_steps: Optional[int] = field(default=2, metadata={"help": "the logging frequency"}) - save_strategy: Optional[str] = field(default="epoch", metadata={"help": "the saving strategy"}) - save_steps: Optional[int] = field(default=50000, metadata={"help": "the saving frequency"}) - eval_steps: Optional[int] = field(default=100, metadata={"help": "the evaluation frequency"}) - run_name: Optional[str] = field(default="dpo_soft", metadata={"help": "the run name"}) - loss_type: Optional[str] = field(default="sigmoid", metadata={"help": "the loss type"}) - output_dir: Optional[str] = field(default="./dpo_soft", metadata={"help": "the output directory"}) - log_freq: Optional[int] = field(default=1, metadata={"help": "the logging frequency"}) - - # instrumentation sampling_paired_method: Optional[str] = field(default="max_random", metadata={"help": "the choose type"}) - - mask_prompt: Optional[bool] = field(default=False, metadata={"help": "mask prompt"}) length_penalty: Optional[float] = field(default=0, metadata={"help": "the length penalty"}) + # data collator args + max_length: Optional[int] = field(default=2048, metadata={"help": "the maximum sequence length, prompt + output"}) + max_prompt_length: Optional[int] = field(default=1000, metadata={"help": "the maximum prompt length"}) + mask_prompt: Optional[bool] = field(default=False, metadata={"help": "mask prompt"}) + # dpov2 aligner args + beta: Optional[float] = field(default=0.1, metadata={"help": "the beta parameter for DPO loss"}) + loss_type: Optional[str] = field(default="sigmoid", metadata={"help": "the loss type"}) - # need to add - evaluation_strategy: Optional[str] = field( - default="steps", - metadata={"help": "the evaluation strategy"} - ) @dataclass class IterativeAlignerArguments(InferencerArguments): @@ -1366,6 +1323,7 @@ class IterativeAlignerArguments(InferencerArguments): "raft_aligner": RaftAlignerArguments, "dpo_aligner": DPOAlignerArguments, "rm_tuner": RewardModelTunerArguments, + "dpov2_aligner": DPOv2AlignerArguments, } diff --git a/src/lmflow/pipeline/auto_pipeline.py b/src/lmflow/pipeline/auto_pipeline.py index a5e815636..82212802f 100644 --- a/src/lmflow/pipeline/auto_pipeline.py +++ b/src/lmflow/pipeline/auto_pipeline.py @@ -19,6 +19,7 @@ def is_package_version_at_least(package_name, min_version): from lmflow.pipeline.inferencer import Inferencer from lmflow.pipeline.vllm_inferencer import VLLMInferencer from lmflow.pipeline.dpo_aligner import DPOAligner +from lmflow.pipeline.dpov2_aligner import DPOv2Aligner from lmflow.pipeline.rm_tuner import RewardModelTuner from lmflow.pipeline.rm_inferencer import RewardModelInferencer PIPELINE_MAPPING = { @@ -28,6 +29,7 @@ def is_package_version_at_least(package_name, min_version): "vllm_inferencer": VLLMInferencer, "rm_inferencer": RewardModelInferencer, "dpo_aligner": DPOAligner, + "dpov2_aligner": DPOv2Aligner, "rm_tuner": RewardModelTuner, } diff --git a/src/lmflow/pipeline/dpov2_aligner.py b/src/lmflow/pipeline/dpov2_aligner.py index 151c1794e..81770da16 100644 --- a/src/lmflow/pipeline/dpov2_aligner.py +++ b/src/lmflow/pipeline/dpov2_aligner.py @@ -25,9 +25,9 @@ class DPOv2Aligner(BaseAligner): def __init__( self, model_args: ModelArguments, - ref_model_args: ModelArguments, data_args: DatasetArguments, aligner_args: DPOv2AlignerArguments, + ref_model_args: ModelArguments, ): self.model_args = model_args self.ref_model_args = ref_model_args @@ -91,6 +91,7 @@ def align( max_length=self.aligner_args.max_length, mask_prompt=self.aligner_args.mask_prompt, len_penalty=self.aligner_args.length_penalty, + # preprocessing_num_workers=self.data_args.preprocessing_num_workers, # will trigger TypeError: cannot pickle 'torch._C._distributed_c10d.ProcessGroup' object ) # step 3. train diff --git a/src/lmflow/pipeline/utils/dpov2_trainer.py b/src/lmflow/pipeline/utils/dpov2_trainer.py index 032c6e92e..735daf635 100644 --- a/src/lmflow/pipeline/utils/dpov2_trainer.py +++ b/src/lmflow/pipeline/utils/dpov2_trainer.py @@ -54,6 +54,7 @@ def __init__( compute_metrics: Optional[Callable[[EvalLoopOutput], Dict]] = None, mask_prompt: Optional[bool] = False, len_penalty: float = 0, + preprocessing_num_workers: int = 1, ): if data_collator is None: @@ -93,6 +94,7 @@ def __init__( disable_dropout=disable_dropout, generate_during_eval=generate_during_eval, compute_metrics=compute_metrics, + dataset_num_proc=preprocessing_num_workers, ) self.use_dpo_data_collator = True self.len_penalty = len_penalty From 1a3e249ae984706ee417ebe83a0f5f216fa8851c Mon Sep 17 00:00:00 2001 From: Yizhen Date: Fri, 28 Jun 2024 02:16:12 +0800 Subject: [PATCH 11/13] [Usability] update accelerate dsz3 cfg --- configs/accelerate_dsz3_config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/accelerate_dsz3_config.yaml b/configs/accelerate_dsz3_config.yaml index 7f2cf9600..b8b1c93cb 100644 --- a/configs/accelerate_dsz3_config.yaml +++ b/configs/accelerate_dsz3_config.yaml @@ -13,8 +13,8 @@ machine_rank: 0 main_training_function: main mixed_precision: bf16 num_machines: 1 -num_processes: 4 -gpu_ids: 4,5,6,7 +num_processes: 8 +gpu_ids: rdzv_backend: static same_network: true tpu_env: [] From 052c836bb97ea47410e9b21d19b1c7a5f11c556c Mon Sep 17 00:00:00 2001 From: Yizhen Date: Fri, 28 Jun 2024 02:20:37 +0800 Subject: [PATCH 12/13] [Usability] dpo v2 example now using accelerate instead of deepspeed --- scripts/run_dpov2_align.sh | 5 ----- 1 file changed, 5 deletions(-) diff --git a/scripts/run_dpov2_align.sh b/scripts/run_dpov2_align.sh index 9a2b0faae..d96908931 100644 --- a/scripts/run_dpov2_align.sh +++ b/scripts/run_dpov2_align.sh @@ -7,7 +7,6 @@ reference_model_name_or_path=meta-llama/Meta-Llama-3-8B-Instruct dataset_path=data/iterative-prompt/train eval_dataset_path=data/iterative-prompt/eval output_dir=output_models/${run_name} -deepspeed_args="--master_port=11000 --include localhost:4,5,6,7" while [[ $# -ge 1 ]]; do key="$1" @@ -36,10 +35,6 @@ while [[ $# -ge 1 ]]; do output_dir="$2" shift ;; - --deepspeed_args) - deepspeed_args="$2" - shift - ;; *) echo "error: unknown option \"${key}\"" 1>&2 exit 1 From 8a9e4d6d8afa7895fe12e4611788f15c60af6a4b Mon Sep 17 00:00:00 2001 From: Yizhen Date: Fri, 28 Jun 2024 08:46:36 +0800 Subject: [PATCH 13/13] [Usability] specify trl version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d31f2e3a6..f526b103d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ peft>=0.10.0 torch>=2.0.1 wandb==0.14.0 deepspeed<=0.14.0 -trl>=0.7.11 +trl==0.8.0 sentencepiece transformers>=4.31.0 flask