diff --git a/src/twinkle/model/megatron/megatron.py b/src/twinkle/model/megatron/megatron.py index a7a2bb66..12b60b44 100644 --- a/src/twinkle/model/megatron/megatron.py +++ b/src/twinkle/model/megatron/megatron.py @@ -1186,13 +1186,10 @@ def _save_hf_format(self, output_dir: str, adapter_name: str, lora_converter=Non self.hf_config.save_pretrained(output_dir) if isinstance(model[0], PeftModel): config = model[0].peft_config[adapter_name] - target_modules = None - if getattr(config, 'origin_target_modules', None) == 'all-linear': - target_modules = config.target_modules - config.target_modules = 'all-linear' + target_modules = config.target_modules + config.target_modules = 'all-linear' model[0].peft_config[adapter_name].save_pretrained(output_dir) - if getattr(config, 'origin_target_modules', None) == 'all-linear': - config.target_modules = target_modules + config.target_modules = target_modules def _save_megatron_format(self, output_dir: str, adapter_name: str, lora_converter=None): """Save in Megatron checkpoint format.""" @@ -1296,9 +1293,6 @@ def _patch_adapter(self, adapter_name: str, config_or_dir: Union[PeftConfig, str config_or_dir = LoraConfig(**config_or_dir) config = config_or_dir - if config.target_modules == 'all-linear': - config.origin_target_modules = 'all-linear' - # Expand target_modules (e.g., 'all-linear' -> actual module names) if config.target_modules: if isinstance(config.target_modules, str): @@ -1564,7 +1558,11 @@ def get_peft_config_dict(self, adapter_name: str = None) -> Optional[Dict[str, A config = optimizer_config.adapter_config if isinstance(config, dict): config = config.get(adapter_name, next(iter(config.values()))) - return config.to_dict() if hasattr(config, 'to_dict') else dict(config) + target_modules = config.target_modules + config.target_modules = 'all-linear' + _peft_config = config.to_dict() if hasattr(config, 'to_dict') else dict(config) + _peft_config['target_modules'] = target_modules + return _peft_config @staticmethod def get_target_modules(model: 'torch.nn.Module', target_modules: List[str]) -> List[str]: diff --git a/src/twinkle/model/multi_lora.py b/src/twinkle/model/multi_lora.py index e3d72690..0373249e 100644 --- a/src/twinkle/model/multi_lora.py +++ b/src/twinkle/model/multi_lora.py @@ -419,8 +419,6 @@ def _patch_peft(_module): def _patch_megatron(_module): # Expand target_modules (e.g., 'all-linear' -> actual module names) _config = deepcopy(config) - if _config.target_modules == 'all-linear': - _config.origin_target_modules = 'all-linear' if isinstance(_module, PeftModel): _module.add_adapter(lora_tenant.adapter_name, _config) else: diff --git a/src/twinkle/processor/base.py b/src/twinkle/processor/base.py index 95758e8c..8375f0e9 100644 --- a/src/twinkle/processor/base.py +++ b/src/twinkle/processor/base.py @@ -101,6 +101,8 @@ def to_tensor(_input): elif (isinstance(value, list) and isinstance(value[0], (int, float, np.number))) or key == 'position_ids': value = torch.tensor(value) + elif (isinstance(value, list)) and key in ('completion_mask', 'mm_token_type_ids'): + value = torch.tensor(value) elif key in self.VLM_CONCAT_FIELDS: if not isinstance(value[0], torch.Tensor): value = [torch.tensor(v) for v in value]