diff --git a/auto_round/inference/convert_model.py b/auto_round/inference/convert_model.py index e769e1eca..62460abaf 100644 --- a/auto_round/inference/convert_model.py +++ b/auto_round/inference/convert_model.py @@ -179,6 +179,43 @@ def get_available_devices(): return devices +def _remap_paths_for_text_model(model, quant_block_list, extra_config): + """Remap quantization paths when a composite model checkpoint is loaded as its text sub-model. + + Uses Transformers' conversion_mapping WeightRenaming rules (e.g. "model.language_model" -> "model") + to fix path mismatches. Returns (remapped_block_list, remapped_extra_config). + """ + try: + from transformers.conversion_mapping import get_checkpoint_conversion_mapping + except ImportError: + return quant_block_list, extra_config + + model_type = getattr(getattr(model, "config", None), "model_type", None) + if model_type is None: + return quant_block_list, extra_config + + mapping = get_checkpoint_conversion_mapping(model_type) + renamings = [r for r in mapping if type(r).__name__ == "WeightRenaming"] + if not renamings: + return quant_block_list, extra_config + + def remap(path): + for r in renamings: + for src, tgt in zip(r.source_patterns, r.target_patterns): + new_path = re.sub(src, tgt, path) + if new_path != path: + return new_path + return path + + new_block_list = [remap(b) for b in quant_block_list] + new_extra_config = {remap(k): v for k, v in extra_config.items()} if extra_config else extra_config + + if new_block_list != quant_block_list: + logger.info(f"Remapped block_name_to_quantize: {quant_block_list} -> {new_block_list}") + + return new_block_list, new_extra_config + + def get_layer_config(model, quantization_config): """ get a layer-wise quantization configuration for a given model. @@ -259,6 +296,17 @@ def get_layer_config(model, quantization_config): # Load extra configuration if available extra_config = getattr(quantization_config, "extra_config", {}) + # When a composite model (e.g. VLM) is loaded as its text sub-model via AutoModelForCausalLM, + # block_name_to_quantize may still reference composite-level paths (e.g. "model.language_model.layers") + # while the actual module paths are "model.layers". Use conversion_mapping to remap if no layers matched. + if not layer_names and quant_block_list: + quant_block_list, extra_config = _remap_paths_for_text_model(model, quant_block_list, extra_config) + for n, m in model.named_modules(): + if type(m) not in SUPPORTED_LAYER_TYPES: + continue + if check_start_with_block_name(n, quant_block_list): + layer_names.append(n) + # Process GPTQ format: identify modules that should be quantized if getattr(quantization_config, "modules_in_block_to_quantize", None): modules_in_block_to_quantize = flatten_list(