Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions auto_round/inference/convert_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,43 @@ def get_available_devices():
return devices


def _remap_paths_for_text_model(model, quant_block_list, extra_config):
"""Remap quantization paths when a composite model checkpoint is loaded as its text sub-model.

Uses Transformers' conversion_mapping WeightRenaming rules (e.g. "model.language_model" -> "model")
to fix path mismatches. Returns (remapped_block_list, remapped_extra_config).
"""
try:
from transformers.conversion_mapping import get_checkpoint_conversion_mapping
except ImportError:
return quant_block_list, extra_config

model_type = getattr(getattr(model, "config", None), "model_type", None)
if model_type is None:
return quant_block_list, extra_config

mapping = get_checkpoint_conversion_mapping(model_type)
Comment thread
lvliang-intel marked this conversation as resolved.
renamings = [r for r in mapping if type(r).__name__ == "WeightRenaming"]
if not renamings:
Comment thread
lvliang-intel marked this conversation as resolved.
return quant_block_list, extra_config

def remap(path):
for r in renamings:
for src, tgt in zip(r.source_patterns, r.target_patterns):
new_path = re.sub(src, tgt, path)
if new_path != path:
return new_path
return path

new_block_list = [remap(b) for b in quant_block_list]
new_extra_config = {remap(k): v for k, v in extra_config.items()} if extra_config else extra_config

if new_block_list != quant_block_list:
logger.info(f"Remapped block_name_to_quantize: {quant_block_list} -> {new_block_list}")

return new_block_list, new_extra_config


def get_layer_config(model, quantization_config):
"""
get a layer-wise quantization configuration for a given model.
Expand Down Expand Up @@ -259,6 +296,17 @@ def get_layer_config(model, quantization_config):
# Load extra configuration if available
extra_config = getattr(quantization_config, "extra_config", {})

# When a composite model (e.g. VLM) is loaded as its text sub-model via AutoModelForCausalLM,
# block_name_to_quantize may still reference composite-level paths (e.g. "model.language_model.layers")
# while the actual module paths are "model.layers". Use conversion_mapping to remap if no layers matched.
if not layer_names and quant_block_list:
quant_block_list, extra_config = _remap_paths_for_text_model(model, quant_block_list, extra_config)
for n, m in model.named_modules():
if type(m) not in SUPPORTED_LAYER_TYPES:
continue
if check_start_with_block_name(n, quant_block_list):
layer_names.append(n)

# Process GPTQ format: identify modules that should be quantized
if getattr(quantization_config, "modules_in_block_to_quantize", None):
modules_in_block_to_quantize = flatten_list(
Expand Down
Loading