intel · lvliang-intel · Apr 22, 2026 · Apr 19, 2026 · Apr 19, 2026
diff --git a/auto_round/inference/convert_model.py b/auto_round/inference/convert_model.py
@@ -179,6 +179,43 @@ def get_available_devices():
     return devices
 
 
+def _remap_paths_for_text_model(model, quant_block_list, extra_config):
+    """Remap quantization paths when a composite model checkpoint is loaded as its text sub-model.
+
+    Uses Transformers' conversion_mapping WeightRenaming rules (e.g. "model.language_model" -> "model")
+    to fix path mismatches. Returns (remapped_block_list, remapped_extra_config).
+    """
+    try:
+        from transformers.conversion_mapping import get_checkpoint_conversion_mapping
+    except ImportError:
+        return quant_block_list, extra_config
+
+    model_type = getattr(getattr(model, "config", None), "model_type", None)
+    if model_type is None:
+        return quant_block_list, extra_config
+
+    mapping = get_checkpoint_conversion_mapping(model_type)
+    renamings = [r for r in mapping if type(r).__name__ == "WeightRenaming"]
+    if not renamings:
+        return quant_block_list, extra_config
+
+    def remap(path):
+        for r in renamings:
+            for src, tgt in zip(r.source_patterns, r.target_patterns):
+                new_path = re.sub(src, tgt, path)
+                if new_path != path:
+                    return new_path
+        return path
+
+    new_block_list = [remap(b) for b in quant_block_list]
+    new_extra_config = {remap(k): v for k, v in extra_config.items()} if extra_config else extra_config
+
+    if new_block_list != quant_block_list:
+        logger.info(f"Remapped block_name_to_quantize: {quant_block_list} -> {new_block_list}")
+
+    return new_block_list, new_extra_config
+
+
 def get_layer_config(model, quantization_config):
     """
     get a layer-wise quantization configuration for a given model.
@@ -259,6 +296,17 @@ def get_layer_config(model, quantization_config):
     # Load extra configuration if available
     extra_config = getattr(quantization_config, "extra_config", {})
 
+    # When a composite model (e.g. VLM) is loaded as its text sub-model via AutoModelForCausalLM,
+    # block_name_to_quantize may still reference composite-level paths (e.g. "model.language_model.layers")
+    # while the actual module paths are "model.layers". Use conversion_mapping to remap if no layers matched.
+    if not layer_names and quant_block_list:
+        quant_block_list, extra_config = _remap_paths_for_text_model(model, quant_block_list, extra_config)
+        for n, m in model.named_modules():
+            if type(m) not in SUPPORTED_LAYER_TYPES:
+                continue
+            if check_start_with_block_name(n, quant_block_list):
+                layer_names.append(n)
+
     # Process GPTQ format: identify modules that should be quantized
     if getattr(quantization_config, "modules_in_block_to_quantize", None):
         modules_in_block_to_quantize = flatten_list(