From de23c8afd0c663a82a5be126b74a656cfa48d124 Mon Sep 17 00:00:00 2001
From: Mengni Wang <mengni.wang@intel.com>
Date: Wed, 29 Apr 2026 11:31:04 +0800
Subject: [PATCH 01/11] Fix incompatible weight names

Co-authored-by: Copilot <copilot@github.com>
---
 auto_round/autoround.py                    |  1 +
 auto_round/compressors/base.py             | 17 +++++++++++++++++
 auto_round/compressors/shard_writer.py     |  9 ++++++++-
 auto_round/compressors_new/base.py         | 16 ++++++++++++++++
 auto_round/compressors_new/shard_writer.py |  9 ++++++++-
 auto_round/inference/convert_model.py      |  9 ++++++++-
 auto_round/utils/common.py                 | 19 +++++++++++++++++++
 7 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/auto_round/autoround.py b/auto_round/autoround.py
index b2bec2651..66ed99b9b 100644
--- a/auto_round/autoround.py
+++ b/auto_round/autoround.py
@@ -163,6 +163,7 @@ def __new__(
         """
 
         local_args = {k: v for k, v in locals().items() if k not in cls.SKIP_ARGS}
+        local_args.update({k: v for k, v in extra_config.to_dict().items() if k in local_args})
 
         if NEW_ARCH:
             from auto_round.compressors_new.entry import AutoRoundCompatible
diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py
index 0503c8235..ca852ed57 100644
--- a/auto_round/compressors/base.py
+++ b/auto_round/compressors/base.py
@@ -111,6 +111,7 @@
     set_module,
     to_device,
     to_dtype,
+    revert_checkpoint_conversion_mapping,
     unsupported_meta_device,
 )
 from auto_round.utils.device import (
@@ -3603,6 +3604,22 @@ def save_quantized(
             serialization_dict["autoround_version"] = __version__
             if "scale_dtype" in serialization_dict.keys():
                 serialization_dict["scale_dtype"] = str(serialization_dict["scale_dtype"])
+
+            # to match the original name 
+            if hasattr(self.model, "_checkpoint_conversion_mapping"):
+                reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()}
+
+                if isinstance(serialization_dict["to_quant_block_names"], str):
+                    serialization_dict["to_quant_block_names"] = revert_checkpoint_conversion_mapping(
+                        serialization_dict["to_quant_block_names"], reverse_key_mapping
+                    )
+
+                elif isinstance(serialization_dict["to_quant_block_names"], list):
+                    for idx in range(len(serialization_dict["to_quant_block_names"])):
+                        serialization_dict["to_quant_block_names"][idx] = revert_checkpoint_conversion_mapping(
+                            serialization_dict["to_quant_block_names"][idx], reverse_key_mapping
+                        )
+
             compressed_model = format.save_quantized(
                 save_folder,
                 model=self.model,
diff --git a/auto_round/compressors/shard_writer.py b/auto_round/compressors/shard_writer.py
index af3f510a3..4a7777ba6 100644
--- a/auto_round/compressors/shard_writer.py
+++ b/auto_round/compressors/shard_writer.py
@@ -20,7 +20,7 @@
 from torch.nn import Parameter
 
 from auto_round.logger import logger
-from auto_round.utils import get_lm_head_name, get_module
+from auto_round.utils import get_lm_head_name, get_module, revert_checkpoint_conversion_mapping
 
 
 class ShardWriter:
@@ -55,6 +55,9 @@ def __init__(self, rounder):
         self.shard_meta = []  # List of {tmp_file: str, params: list}
         self.global_weight_map = {}
         self.shard_counter = 0
+        self.reverse_key_mapping = {}
+        if hasattr(self.model, "_checkpoint_conversion_mapping"):
+            self.reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()}
 
         # Persistent set of all parameter names already flushed to a shard file.
         # Maintained incrementally in _flush_shard to avoid O(N^2) rebuilds in _add_tensor.
@@ -105,6 +108,10 @@ def save_module(self, m: torch.nn.Module, name: str = None):
             self._add_tensor(param_name, v)
 
     def _add_tensor(self, name: str, tensor: torch.Tensor):
+
+        # transformers will handle _checkpoint_conversion_mapping automatically if is_immediate_saving=False
+        name = revert_checkpoint_conversion_mapping(name, self.reverse_key_mapping)
+
         if isinstance(tensor, torch.Tensor) and tensor.device.type == "meta":
             self.skipped_meta_tensors.append(name)
             return
diff --git a/auto_round/compressors_new/base.py b/auto_round/compressors_new/base.py
index 37f5726e8..c827a31a6 100644
--- a/auto_round/compressors_new/base.py
+++ b/auto_round/compressors_new/base.py
@@ -56,6 +56,7 @@
     is_hpex_available,
     is_quantized_input_module,
     memory_monitor,
+    revert_checkpoint_conversion_mapping,
 )
 from auto_round.utils.device import (
     _force_trim_malloc,
@@ -1148,6 +1149,21 @@ def save_quantized(
             if "scale_dtype" in serialization_dict.keys():
                 serialization_dict["scale_dtype"] = str(serialization_dict["scale_dtype"])
 
+            # to match the original name 
+            if hasattr(self.model, "_checkpoint_conversion_mapping"):
+                reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()}
+
+                if isinstance(serialization_dict["to_quant_block_names"], str):
+                    serialization_dict["to_quant_block_names"] = revert_checkpoint_conversion_mapping(
+                        serialization_dict["to_quant_block_names"], reverse_key_mapping
+                    )
+
+                elif isinstance(serialization_dict["to_quant_block_names"], list):
+                    for idx in range(len(serialization_dict["to_quant_block_names"])):
+                        serialization_dict["to_quant_block_names"][idx] = revert_checkpoint_conversion_mapping(
+                            serialization_dict["to_quant_block_names"][idx], reverse_key_mapping
+                        )
+
             compressed_model = format.save_quantized(
                 save_folder,
                 model=self.model_context.model,
diff --git a/auto_round/compressors_new/shard_writer.py b/auto_round/compressors_new/shard_writer.py
index dbdd2cc86..beaf3805b 100644
--- a/auto_round/compressors_new/shard_writer.py
+++ b/auto_round/compressors_new/shard_writer.py
@@ -22,7 +22,7 @@
 from auto_round.context.compress import CompressContext
 from auto_round.context.model import ModelContext
 from auto_round.logger import logger
-from auto_round.utils import get_lm_head_name, get_module
+from auto_round.utils import get_lm_head_name, get_module, revert_checkpoint_conversion_mapping
 
 
 class ShardWriter:
@@ -78,6 +78,9 @@ def __init__(
         self.shard_meta = []  # List of {tmp_file: str, params: list}
         self.global_weight_map = {}
         self.shard_counter = 0
+        self.reverse_key_mapping = {}
+        if hasattr(self.model, "_checkpoint_conversion_mapping"):
+            self.reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()}
 
         # Persistent set of all parameter names already flushed to a shard file.
         # Maintained incrementally in _flush_shard to avoid O(N^2) rebuilds in _add_tensor.
@@ -154,6 +157,10 @@ def save_module(self, m: torch.nn.Module, name: str = None):
             self._add_tensor(param_name, v)
 
     def _add_tensor(self, name: str, tensor: torch.Tensor):
+
+        # transformers will handle _checkpoint_conversion_mapping automatically if is_immediate_saving=False
+        name = revert_checkpoint_conversion_mapping(name, self.reverse_key_mapping)
+
         if isinstance(tensor, torch.Tensor) and tensor.device.type == "meta":
             self.skipped_meta_tensors.append(name)
             return
diff --git a/auto_round/inference/convert_model.py b/auto_round/inference/convert_model.py
index 70bb311f7..48a74a150 100644
--- a/auto_round/inference/convert_model.py
+++ b/auto_round/inference/convert_model.py
@@ -35,6 +35,7 @@
 from auto_round.special_model_handler import update_module
 from auto_round.utils import (
     SUPPORTED_LAYER_TYPES,
+    apply_checkpoint_conversion_mapping,
     check_start_with_block_name,
     check_to_quantized,
     find_matching_blocks,
@@ -270,12 +271,16 @@ def get_layer_config(model, quantization_config):
     )
 
     # Determine the quantization block list
+    checkpoint_conversion_mapping = getattr(model, "_checkpoint_conversion_mapping", {})
     quant_block_list = getattr(quantization_config, "quant_block_list", None)
     if quant_block_list is not None:
         # Handle nested list format: [[block1, block2, ...], ...] -> [prefix1, ...]
         if quant_block_list and isinstance(quant_block_list[0], (list, tuple)):
             for i in range(len(quant_block_list)):
-                quant_block_list[i] = os.path.commonprefix(quant_block_list[i]).rstrip(".")
+                quant_block_list[i] = apply_checkpoint_conversion_mapping(
+                    os.path.commonprefix(quant_block_list[i]).rstrip("."),
+                    checkpoint_conversion_mapping
+                )
     elif quant_block_list is None:
         to_quant_block_names = getattr(quantization_config, "block_name_to_quantize", None)  # Prioritize this parameter
         if to_quant_block_names is None:
@@ -292,6 +297,8 @@ def get_layer_config(model, quantization_config):
             # Speed up the matching
             for i in range(len(quant_block_list)):
                 quant_block_list[i] = os.path.commonprefix(quant_block_list[i]).rstrip(".")
+        for i in range(len(quant_block_list)):
+            quant_block_list[i] = apply_checkpoint_conversion_mapping(quant_block_list[i], checkpoint_conversion_mapping)
 
     # Get layer names that will be quantized
     layer_names = []
diff --git a/auto_round/utils/common.py b/auto_round/utils/common.py
index 865134b8b..cc14d5eb7 100644
--- a/auto_round/utils/common.py
+++ b/auto_round/utils/common.py
@@ -1055,3 +1055,22 @@ def infer_bits_by_data_type(data_type: str):
             if str.isdigit(data_type[len(supported_dtype)]):
                 return int(data_type[len(supported_dtype)])
     return None
+
+
+def revert_checkpoint_conversion_mapping(name: str, reverse_key_mapping: dict[str, str]) -> str:
+    for pattern, replacement in reverse_key_mapping.items():
+        replacement = replacement.lstrip("^")  # strip off un-needed chars and patterns
+        replacement = re.sub(r"\(.*\)", "", replacement)
+        name, n_replace = re.subn(pattern, replacement, name)
+        # Early exit of the loop
+        if n_replace > 0:
+            break
+    return name
+
+def apply_checkpoint_conversion_mapping(name: str, key_mapping: dict[str, str]) -> str:
+    for pattern, replacement in key_mapping.items():
+        name, n_replace = re.subn(pattern, replacement, name)
+        # Early exit of the loop
+        if n_replace > 0:
+            break
+    return name
\ No newline at end of file

From c927d44962fa123a7575d81d34527b18eb510c55 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 29 Apr 2026 03:33:25 +0000
Subject: [PATCH 02/11] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 auto_round/compressors/base.py        | 4 ++--
 auto_round/compressors_new/base.py    | 2 +-
 auto_round/inference/convert_model.py | 7 ++++---
 auto_round/utils/common.py            | 3 ++-
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py
index ca852ed57..b62a171c3 100644
--- a/auto_round/compressors/base.py
+++ b/auto_round/compressors/base.py
@@ -107,11 +107,11 @@
     memory_monitor,
     mv_module_from_gpu,
     normalize_no_split_modules,
+    revert_checkpoint_conversion_mapping,
     set_amax_for_all_moe_layers,
     set_module,
     to_device,
     to_dtype,
-    revert_checkpoint_conversion_mapping,
     unsupported_meta_device,
 )
 from auto_round.utils.device import (
@@ -3605,7 +3605,7 @@ def save_quantized(
             if "scale_dtype" in serialization_dict.keys():
                 serialization_dict["scale_dtype"] = str(serialization_dict["scale_dtype"])
 
-            # to match the original name 
+            # to match the original name
             if hasattr(self.model, "_checkpoint_conversion_mapping"):
                 reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()}
 
diff --git a/auto_round/compressors_new/base.py b/auto_round/compressors_new/base.py
index c827a31a6..b169deb56 100644
--- a/auto_round/compressors_new/base.py
+++ b/auto_round/compressors_new/base.py
@@ -1149,7 +1149,7 @@ def save_quantized(
             if "scale_dtype" in serialization_dict.keys():
                 serialization_dict["scale_dtype"] = str(serialization_dict["scale_dtype"])
 
-            # to match the original name 
+            # to match the original name
             if hasattr(self.model, "_checkpoint_conversion_mapping"):
                 reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()}
 
diff --git a/auto_round/inference/convert_model.py b/auto_round/inference/convert_model.py
index 48a74a150..88500e4dc 100644
--- a/auto_round/inference/convert_model.py
+++ b/auto_round/inference/convert_model.py
@@ -278,8 +278,7 @@ def get_layer_config(model, quantization_config):
         if quant_block_list and isinstance(quant_block_list[0], (list, tuple)):
             for i in range(len(quant_block_list)):
                 quant_block_list[i] = apply_checkpoint_conversion_mapping(
-                    os.path.commonprefix(quant_block_list[i]).rstrip("."),
-                    checkpoint_conversion_mapping
+                    os.path.commonprefix(quant_block_list[i]).rstrip("."), checkpoint_conversion_mapping
                 )
     elif quant_block_list is None:
         to_quant_block_names = getattr(quantization_config, "block_name_to_quantize", None)  # Prioritize this parameter
@@ -298,7 +297,9 @@ def get_layer_config(model, quantization_config):
             for i in range(len(quant_block_list)):
                 quant_block_list[i] = os.path.commonprefix(quant_block_list[i]).rstrip(".")
         for i in range(len(quant_block_list)):
-            quant_block_list[i] = apply_checkpoint_conversion_mapping(quant_block_list[i], checkpoint_conversion_mapping)
+            quant_block_list[i] = apply_checkpoint_conversion_mapping(
+                quant_block_list[i], checkpoint_conversion_mapping
+            )
 
     # Get layer names that will be quantized
     layer_names = []
diff --git a/auto_round/utils/common.py b/auto_round/utils/common.py
index cc14d5eb7..2d3ade65a 100644
--- a/auto_round/utils/common.py
+++ b/auto_round/utils/common.py
@@ -1067,10 +1067,11 @@ def revert_checkpoint_conversion_mapping(name: str, reverse_key_mapping: dict[st
             break
     return name
 
+
 def apply_checkpoint_conversion_mapping(name: str, key_mapping: dict[str, str]) -> str:
     for pattern, replacement in key_mapping.items():
         name, n_replace = re.subn(pattern, replacement, name)
         # Early exit of the loop
         if n_replace > 0:
             break
-    return name
\ No newline at end of file
+    return name

From 021133ec71fb0c2882a2b6d88aa1d0c71bf9300f Mon Sep 17 00:00:00 2001
From: "Wang, Mengni" <mengni.wang@intel.com>
Date: Wed, 29 Apr 2026 13:55:04 +0800
Subject: [PATCH 03/11] Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 auto_round/autoround.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/auto_round/autoround.py b/auto_round/autoround.py
index 66ed99b9b..b1844490f 100644
--- a/auto_round/autoround.py
+++ b/auto_round/autoround.py
@@ -163,7 +163,8 @@ def __new__(
         """
 
         local_args = {k: v for k, v in locals().items() if k not in cls.SKIP_ARGS}
-        local_args.update({k: v for k, v in extra_config.to_dict().items() if k in local_args})
+        if extra_config is not None:
+            local_args.update({k: v for k, v in extra_config.to_dict().items() if k in local_args and v is not None})
 
         if NEW_ARCH:
             from auto_round.compressors_new.entry import AutoRoundCompatible

From 8389c5298d96d19293ed7c1e34f4800d46712929 Mon Sep 17 00:00:00 2001
From: Mengni Wang <mengni.wang@intel.com>
Date: Thu, 30 Apr 2026 10:37:33 +0800
Subject: [PATCH 04/11] add ut

---
 test/test_cuda/integrations/test_sglang.py | 33 ++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/test/test_cuda/integrations/test_sglang.py b/test/test_cuda/integrations/test_sglang.py
index 196fa2efa..3467f98ac 100644
--- a/test/test_cuda/integrations/test_sglang.py
+++ b/test/test_cuda/integrations/test_sglang.py
@@ -121,6 +121,39 @@ def test_mixed_ar_format_sglang(self, dataloader):
 
         shutil.rmtree(self.save_dir, ignore_errors=True)
 
+    def test_qwen2_5_vl_loading(self, tiny_qwen_2_5_vl_model_path):
+        from auto_round.utils import mllm_load_model
+        layer_config = {
+            "self_attn": {"bits": 8},
+            "lm_head": {"bits": 16},
+            "mlp": {"bits": 16, "act_bits": 16},
+        }
+
+        model, processor, tokenizer, image_processor = mllm_load_model(tiny_qwen_2_5_vl_model_path)
+
+        autoround = AutoRound(
+            model,
+            tokenizer,
+            scheme="W4A16",
+            iters=1,
+            nsamples=1,
+            seqlen=32,
+            processor=processor,
+            image_processor=image_processor,
+            layer_config=layer_config,
+        )
+
+        _, quantized_model_path = autoround.quantize_and_save(
+            output_dir=self.save_dir,
+            inplace=True,
+            format="auto_round",
+        )
+
+        generated_text = self._run_sglang_inference(quantized_model_path)
+        print(generated_text)
+
+        assert "!!!" not in generated_text
+
     @pytest.mark.skip_ci(reason="Cannot work well in CI env")
     def test_awq_format_sglang(self, dataloader):
         autoround = AutoRound(

From 0d00b12dc07acaf4393fae60e9d73219b1118468 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 30 Apr 2026 02:37:57 +0000
Subject: [PATCH 05/11] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 test/test_cuda/integrations/test_sglang.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/test_cuda/integrations/test_sglang.py b/test/test_cuda/integrations/test_sglang.py
index 3467f98ac..a99530d2c 100644
--- a/test/test_cuda/integrations/test_sglang.py
+++ b/test/test_cuda/integrations/test_sglang.py
@@ -123,6 +123,7 @@ def test_mixed_ar_format_sglang(self, dataloader):
 
     def test_qwen2_5_vl_loading(self, tiny_qwen_2_5_vl_model_path):
         from auto_round.utils import mllm_load_model
+
         layer_config = {
             "self_attn": {"bits": 8},
             "lm_head": {"bits": 16},

From 8b666affb1549d8dfc205be872b2d590db002fbd Mon Sep 17 00:00:00 2001
From: Mengni Wang <mengni.wang@intel.com>
Date: Thu, 30 Apr 2026 12:38:57 +0800
Subject: [PATCH 06/11] fix ut

---
 auto_round/utils/model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/auto_round/utils/model.py b/auto_round/utils/model.py
index 04a8b0395..08698d68f 100644
--- a/auto_round/utils/model.py
+++ b/auto_round/utils/model.py
@@ -602,10 +602,11 @@ def mllm_load_model(
                 processor_load_kwargs = {}
                 if processor_subfolder is not None:
                     processor_load_kwargs["subfolder"] = processor_subfolder
+                if model_type in FIX_MISTRAL_REGEX_MODEL_TYPE_LIST:
+                    processor_load_kwargs["fix_mistral_regex"] = True
                 tokenizer = AutoTokenizer.from_pretrained(
                     pretrained_model_name_or_path,
                     trust_remote_code=trust_remote_code,
-                    fix_mistral_regex=True if model_type in FIX_MISTRAL_REGEX_MODEL_TYPE_LIST else False,
                     **processor_load_kwargs,
                 )
                 processor = AutoProcessor.from_pretrained(

From 0fb900c5a793f8f58a50fc452bc11dd0b8fdb3f0 Mon Sep 17 00:00:00 2001
From: Mengni Wang <mengni.wang@intel.com>
Date: Thu, 30 Apr 2026 16:49:33 +0800
Subject: [PATCH 07/11] adapt to latest transformers

Co-authored-by: Copilot <copilot@github.com>
---
 auto_round/compressors/base.py             | 22 ++++----
 auto_round/compressors/shard_writer.py     |  8 ++-
 auto_round/compressors_new/base.py         | 22 ++++----
 auto_round/compressors_new/shard_writer.py |  8 ++-
 auto_round/inference/convert_model.py      |  3 +-
 auto_round/utils/common.py                 | 61 +++++++++++++++++-----
 auto_round/utils/model.py                  |  3 +-
 7 files changed, 79 insertions(+), 48 deletions(-)

diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py
index b62a171c3..6eb1cfd47 100644
--- a/auto_round/compressors/base.py
+++ b/auto_round/compressors/base.py
@@ -94,6 +94,7 @@
     get_layer_names_in_block,
     get_lm_head_name,
     get_module,
+    get_reverse_checkpoint_conversion_mapping,
     global_state,
     hook_ngram_embeddings_on_cpu,
     htcore,
@@ -3606,19 +3607,18 @@ def save_quantized(
                 serialization_dict["scale_dtype"] = str(serialization_dict["scale_dtype"])
 
             # to match the original name
-            if hasattr(self.model, "_checkpoint_conversion_mapping"):
-                reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()}
+            reverse_checkpoint_conversion_mapping = get_reverse_checkpoint_conversion_mapping(self.model)
 
-                if isinstance(serialization_dict["to_quant_block_names"], str):
-                    serialization_dict["to_quant_block_names"] = revert_checkpoint_conversion_mapping(
-                        serialization_dict["to_quant_block_names"], reverse_key_mapping
-                    )
+            if isinstance(serialization_dict["to_quant_block_names"], str):
+                serialization_dict["to_quant_block_names"] = revert_checkpoint_conversion_mapping(
+                    serialization_dict["to_quant_block_names"], reverse_checkpoint_conversion_mapping
+                )
 
-                elif isinstance(serialization_dict["to_quant_block_names"], list):
-                    for idx in range(len(serialization_dict["to_quant_block_names"])):
-                        serialization_dict["to_quant_block_names"][idx] = revert_checkpoint_conversion_mapping(
-                            serialization_dict["to_quant_block_names"][idx], reverse_key_mapping
-                        )
+            elif isinstance(serialization_dict["to_quant_block_names"], list):
+                for idx in range(len(serialization_dict["to_quant_block_names"])):
+                    serialization_dict["to_quant_block_names"][idx] = revert_checkpoint_conversion_mapping(
+                        serialization_dict["to_quant_block_names"][idx], reverse_checkpoint_conversion_mapping
+                    )
 
             compressed_model = format.save_quantized(
                 save_folder,
diff --git a/auto_round/compressors/shard_writer.py b/auto_round/compressors/shard_writer.py
index 4a7777ba6..177eac3ae 100644
--- a/auto_round/compressors/shard_writer.py
+++ b/auto_round/compressors/shard_writer.py
@@ -20,7 +20,7 @@
 from torch.nn import Parameter
 
 from auto_round.logger import logger
-from auto_round.utils import get_lm_head_name, get_module, revert_checkpoint_conversion_mapping
+from auto_round.utils import get_lm_head_name, get_module, revert_checkpoint_conversion_mapping, get_reverse_checkpoint_conversion_mapping
 
 
 class ShardWriter:
@@ -55,9 +55,7 @@ def __init__(self, rounder):
         self.shard_meta = []  # List of {tmp_file: str, params: list}
         self.global_weight_map = {}
         self.shard_counter = 0
-        self.reverse_key_mapping = {}
-        if hasattr(self.model, "_checkpoint_conversion_mapping"):
-            self.reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()}
+        self.reverse_checkpoint_conversion_mapping = get_reverse_checkpoint_conversion_mapping(self.model)
 
         # Persistent set of all parameter names already flushed to a shard file.
         # Maintained incrementally in _flush_shard to avoid O(N^2) rebuilds in _add_tensor.
@@ -110,7 +108,7 @@ def save_module(self, m: torch.nn.Module, name: str = None):
     def _add_tensor(self, name: str, tensor: torch.Tensor):
 
         # transformers will handle _checkpoint_conversion_mapping automatically if is_immediate_saving=False
-        name = revert_checkpoint_conversion_mapping(name, self.reverse_key_mapping)
+        name = revert_checkpoint_conversion_mapping(name, self.reverse_checkpoint_conversion_mapping)
 
         if isinstance(tensor, torch.Tensor) and tensor.device.type == "meta":
             self.skipped_meta_tensors.append(name)
diff --git a/auto_round/compressors_new/base.py b/auto_round/compressors_new/base.py
index b169deb56..f8d57ed18 100644
--- a/auto_round/compressors_new/base.py
+++ b/auto_round/compressors_new/base.py
@@ -51,6 +51,7 @@
     convert_dtype_str2torch,
     extract_block_names_to_str,
     find_matching_blocks,
+    get_reverse_checkpoint_conversion_mapping,
     get_block_names,
     is_debug_mode,
     is_hpex_available,
@@ -1150,19 +1151,18 @@ def save_quantized(
                 serialization_dict["scale_dtype"] = str(serialization_dict["scale_dtype"])
 
             # to match the original name
-            if hasattr(self.model, "_checkpoint_conversion_mapping"):
-                reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()}
+            reverse_checkpoint_conversion_mapping = get_reverse_checkpoint_conversion_mapping(self.model)
 
-                if isinstance(serialization_dict["to_quant_block_names"], str):
-                    serialization_dict["to_quant_block_names"] = revert_checkpoint_conversion_mapping(
-                        serialization_dict["to_quant_block_names"], reverse_key_mapping
-                    )
+            if isinstance(serialization_dict["to_quant_block_names"], str):
+                serialization_dict["to_quant_block_names"] = revert_checkpoint_conversion_mapping(
+                    serialization_dict["to_quant_block_names"], reverse_checkpoint_conversion_mapping
+                )
 
-                elif isinstance(serialization_dict["to_quant_block_names"], list):
-                    for idx in range(len(serialization_dict["to_quant_block_names"])):
-                        serialization_dict["to_quant_block_names"][idx] = revert_checkpoint_conversion_mapping(
-                            serialization_dict["to_quant_block_names"][idx], reverse_key_mapping
-                        )
+            elif isinstance(serialization_dict["to_quant_block_names"], list):
+                for idx in range(len(serialization_dict["to_quant_block_names"])):
+                    serialization_dict["to_quant_block_names"][idx] = revert_checkpoint_conversion_mapping(
+                        serialization_dict["to_quant_block_names"][idx], reverse_checkpoint_conversion_mapping
+                    )
 
             compressed_model = format.save_quantized(
                 save_folder,
diff --git a/auto_round/compressors_new/shard_writer.py b/auto_round/compressors_new/shard_writer.py
index beaf3805b..c61c2b529 100644
--- a/auto_round/compressors_new/shard_writer.py
+++ b/auto_round/compressors_new/shard_writer.py
@@ -22,7 +22,7 @@
 from auto_round.context.compress import CompressContext
 from auto_round.context.model import ModelContext
 from auto_round.logger import logger
-from auto_round.utils import get_lm_head_name, get_module, revert_checkpoint_conversion_mapping
+from auto_round.utils import get_lm_head_name, get_module, revert_checkpoint_conversion_mapping, get_reverse_checkpoint_conversion_mapping
 
 
 class ShardWriter:
@@ -78,9 +78,7 @@ def __init__(
         self.shard_meta = []  # List of {tmp_file: str, params: list}
         self.global_weight_map = {}
         self.shard_counter = 0
-        self.reverse_key_mapping = {}
-        if hasattr(self.model, "_checkpoint_conversion_mapping"):
-            self.reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()}
+        self.reverse_checkpoint_conversion_mapping = get_reverse_checkpoint_conversion_mapping(self.model)
 
         # Persistent set of all parameter names already flushed to a shard file.
         # Maintained incrementally in _flush_shard to avoid O(N^2) rebuilds in _add_tensor.
@@ -159,7 +157,7 @@ def save_module(self, m: torch.nn.Module, name: str = None):
     def _add_tensor(self, name: str, tensor: torch.Tensor):
 
         # transformers will handle _checkpoint_conversion_mapping automatically if is_immediate_saving=False
-        name = revert_checkpoint_conversion_mapping(name, self.reverse_key_mapping)
+        name = revert_checkpoint_conversion_mapping(name, self.reverse_checkpoint_conversion_mapping)
 
         if isinstance(tensor, torch.Tensor) and tensor.device.type == "meta":
             self.skipped_meta_tensors.append(name)
diff --git a/auto_round/inference/convert_model.py b/auto_round/inference/convert_model.py
index 88500e4dc..12cff3984 100644
--- a/auto_round/inference/convert_model.py
+++ b/auto_round/inference/convert_model.py
@@ -42,6 +42,7 @@
     get_block_names,
     get_module,
     is_hpex_available,
+    get_checkpoint_conversion_mapping,
     is_transformers_version_greater_or_equal_5,
     set_module,
 )
@@ -271,7 +272,7 @@ def get_layer_config(model, quantization_config):
     )
 
     # Determine the quantization block list
-    checkpoint_conversion_mapping = getattr(model, "_checkpoint_conversion_mapping", {})
+    checkpoint_conversion_mapping = get_checkpoint_conversion_mapping(model)
     quant_block_list = getattr(quantization_config, "quant_block_list", None)
     if quant_block_list is not None:
         # Handle nested list format: [[block1, block2, ...], ...] -> [prefix1, ...]
diff --git a/auto_round/utils/common.py b/auto_round/utils/common.py
index 2d3ade65a..585dcdccb 100644
--- a/auto_round/utils/common.py
+++ b/auto_round/utils/common.py
@@ -1057,21 +1057,56 @@ def infer_bits_by_data_type(data_type: str):
     return None
 
 
-def revert_checkpoint_conversion_mapping(name: str, reverse_key_mapping: dict[str, str]) -> str:
-    for pattern, replacement in reverse_key_mapping.items():
-        replacement = replacement.lstrip("^")  # strip off un-needed chars and patterns
-        replacement = re.sub(r"\(.*\)", "", replacement)
-        name, n_replace = re.subn(pattern, replacement, name)
-        # Early exit of the loop
-        if n_replace > 0:
-            break
+def get_checkpoint_conversion_mapping(model):
+    """Get the checkpoint conversion mapping for a given model, if it exists."""
+    # transformers <= 5.3.0 use _checkpoint_conversion_mapping
+    checkpoint_conversion_mapping = getattr(model, "_checkpoint_conversion_mapping", {})
+
+    # transformers > 5.3.0 use get_checkpoint_conversion_mapping
+    if hasattr(transformers, "conversion_mapping") and (hasattr(model, "config") and hasattr(model.config, "model_type")):
+        from transformers.conversion_mapping import get_checkpoint_conversion_mapping as transformers_get_checkpoint_conversion_mapping
+        conversion_mappings = transformers_get_checkpoint_conversion_mapping(model.config.model_type)
+        for conversion_mapping in conversion_mappings:
+            for source_pattern in conversion_mapping.source_patterns:
+                checkpoint_conversion_mapping[source_pattern] = conversion_mapping.target_patterns
+    return checkpoint_conversion_mapping
+
+
+def get_reverse_checkpoint_conversion_mapping(model):
+    """Get the reverse checkpoint conversion mapping for a given model, if it exists."""
+    reverse_checkpoint_conversion_mapping = {v: k for k, v in getattr(model, "_checkpoint_conversion_mapping", {}).items()}
+
+    if hasattr(model, "_weight_conversions"):
+        weight_conversions = model._weight_conversions
+        for weight_conversion in weight_conversions:
+            reverse_conversion_mapping = weight_conversion.reverse_transform()
+            for source_pattern in reverse_conversion_mapping.source_patterns:
+                reverse_checkpoint_conversion_mapping[source_pattern] = reverse_conversion_mapping.target_patterns
+
+    return reverse_checkpoint_conversion_mapping
+
+
+def revert_checkpoint_conversion_mapping(name: str, key_mapping: dict[str, str]) -> str:
+    for source_pattern, target_patterns in key_mapping.items():
+        if isinstance(target_patterns, str):
+            target_patterns = [target_patterns]
+        for target_pattern in target_patterns:
+            source_pattern = source_pattern.lstrip("^")  # strip off un-needed chars and patterns
+            source_pattern = re.sub(r"\(.*\)", "", source_pattern)
+            name, n_replace = re.subn(source_pattern, target_pattern, name)
+            # Early exit of the loop
+            if n_replace > 0:
+                return name
     return name
 
 
 def apply_checkpoint_conversion_mapping(name: str, key_mapping: dict[str, str]) -> str:
-    for pattern, replacement in key_mapping.items():
-        name, n_replace = re.subn(pattern, replacement, name)
-        # Early exit of the loop
-        if n_replace > 0:
-            break
+    for source_pattern, target_patterns in key_mapping.items():
+        if isinstance(target_patterns, str):
+            target_patterns = [target_patterns]
+        for target_pattern in target_patterns:
+            name, n_replace = re.subn(source_pattern, target_pattern, name)
+            # Early exit of the loop
+            if n_replace > 0:
+                return name
     return name
diff --git a/auto_round/utils/model.py b/auto_round/utils/model.py
index 08698d68f..04a8b0395 100644
--- a/auto_round/utils/model.py
+++ b/auto_round/utils/model.py
@@ -602,11 +602,10 @@ def mllm_load_model(
                 processor_load_kwargs = {}
                 if processor_subfolder is not None:
                     processor_load_kwargs["subfolder"] = processor_subfolder
-                if model_type in FIX_MISTRAL_REGEX_MODEL_TYPE_LIST:
-                    processor_load_kwargs["fix_mistral_regex"] = True
                 tokenizer = AutoTokenizer.from_pretrained(
                     pretrained_model_name_or_path,
                     trust_remote_code=trust_remote_code,
+                    fix_mistral_regex=True if model_type in FIX_MISTRAL_REGEX_MODEL_TYPE_LIST else False,
                     **processor_load_kwargs,
                 )
                 processor = AutoProcessor.from_pretrained(

From 90ed33f931ccda89bffc780197cd04e84b1badce Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 30 Apr 2026 08:50:41 +0000
Subject: [PATCH 08/11] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 auto_round/compressors/shard_writer.py     |  7 ++++++-
 auto_round/compressors_new/base.py         |  2 +-
 auto_round/compressors_new/shard_writer.py |  7 ++++++-
 auto_round/inference/convert_model.py      |  2 +-
 auto_round/utils/common.py                 | 13 ++++++++++---
 5 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/auto_round/compressors/shard_writer.py b/auto_round/compressors/shard_writer.py
index 177eac3ae..77b3b7ea4 100644
--- a/auto_round/compressors/shard_writer.py
+++ b/auto_round/compressors/shard_writer.py
@@ -20,7 +20,12 @@
 from torch.nn import Parameter
 
 from auto_round.logger import logger
-from auto_round.utils import get_lm_head_name, get_module, revert_checkpoint_conversion_mapping, get_reverse_checkpoint_conversion_mapping
+from auto_round.utils import (
+    get_lm_head_name,
+    get_module,
+    get_reverse_checkpoint_conversion_mapping,
+    revert_checkpoint_conversion_mapping,
+)
 
 
 class ShardWriter:
diff --git a/auto_round/compressors_new/base.py b/auto_round/compressors_new/base.py
index f8d57ed18..1032a87d6 100644
--- a/auto_round/compressors_new/base.py
+++ b/auto_round/compressors_new/base.py
@@ -51,8 +51,8 @@
     convert_dtype_str2torch,
     extract_block_names_to_str,
     find_matching_blocks,
-    get_reverse_checkpoint_conversion_mapping,
     get_block_names,
+    get_reverse_checkpoint_conversion_mapping,
     is_debug_mode,
     is_hpex_available,
     is_quantized_input_module,
diff --git a/auto_round/compressors_new/shard_writer.py b/auto_round/compressors_new/shard_writer.py
index c61c2b529..4a446ad07 100644
--- a/auto_round/compressors_new/shard_writer.py
+++ b/auto_round/compressors_new/shard_writer.py
@@ -22,7 +22,12 @@
 from auto_round.context.compress import CompressContext
 from auto_round.context.model import ModelContext
 from auto_round.logger import logger
-from auto_round.utils import get_lm_head_name, get_module, revert_checkpoint_conversion_mapping, get_reverse_checkpoint_conversion_mapping
+from auto_round.utils import (
+    get_lm_head_name,
+    get_module,
+    get_reverse_checkpoint_conversion_mapping,
+    revert_checkpoint_conversion_mapping,
+)
 
 
 class ShardWriter:
diff --git a/auto_round/inference/convert_model.py b/auto_round/inference/convert_model.py
index 12cff3984..3f973514d 100644
--- a/auto_round/inference/convert_model.py
+++ b/auto_round/inference/convert_model.py
@@ -40,9 +40,9 @@
     check_to_quantized,
     find_matching_blocks,
     get_block_names,
+    get_checkpoint_conversion_mapping,
     get_module,
     is_hpex_available,
-    get_checkpoint_conversion_mapping,
     is_transformers_version_greater_or_equal_5,
     set_module,
 )
diff --git a/auto_round/utils/common.py b/auto_round/utils/common.py
index 585dcdccb..13f86fed1 100644
--- a/auto_round/utils/common.py
+++ b/auto_round/utils/common.py
@@ -1063,8 +1063,13 @@ def get_checkpoint_conversion_mapping(model):
     checkpoint_conversion_mapping = getattr(model, "_checkpoint_conversion_mapping", {})
 
     # transformers > 5.3.0 use get_checkpoint_conversion_mapping
-    if hasattr(transformers, "conversion_mapping") and (hasattr(model, "config") and hasattr(model.config, "model_type")):
-        from transformers.conversion_mapping import get_checkpoint_conversion_mapping as transformers_get_checkpoint_conversion_mapping
+    if hasattr(transformers, "conversion_mapping") and (
+        hasattr(model, "config") and hasattr(model.config, "model_type")
+    ):
+        from transformers.conversion_mapping import (
+            get_checkpoint_conversion_mapping as transformers_get_checkpoint_conversion_mapping,
+        )
+
         conversion_mappings = transformers_get_checkpoint_conversion_mapping(model.config.model_type)
         for conversion_mapping in conversion_mappings:
             for source_pattern in conversion_mapping.source_patterns:
@@ -1074,7 +1079,9 @@ def get_checkpoint_conversion_mapping(model):
 
 def get_reverse_checkpoint_conversion_mapping(model):
     """Get the reverse checkpoint conversion mapping for a given model, if it exists."""
-    reverse_checkpoint_conversion_mapping = {v: k for k, v in getattr(model, "_checkpoint_conversion_mapping", {}).items()}
+    reverse_checkpoint_conversion_mapping = {
+        v: k for k, v in getattr(model, "_checkpoint_conversion_mapping", {}).items()
+    }
 
     if hasattr(model, "_weight_conversions"):
         weight_conversions = model._weight_conversions

From 49f51248243b4f4a1f09956255f80c0c2f846694 Mon Sep 17 00:00:00 2001
From: Mengni Wang <mengni.wang@intel.com>
Date: Thu, 30 Apr 2026 17:25:46 +0800
Subject: [PATCH 09/11] add config check in ut

---
 test/test_cpu/models/test_mllm.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/test_cpu/models/test_mllm.py b/test/test_cpu/models/test_mllm.py
index 2c0c71bd4..49b33332a 100644
--- a/test/test_cpu/models/test_mllm.py
+++ b/test/test_cpu/models/test_mllm.py
@@ -236,6 +236,7 @@ def test_qwen2_5(self, tiny_qwen_2_5_vl_model_path):
         model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
             quantized_model_path, torch_dtype="auto", device_map="auto"
         )
+        assert model.config.quantization_config.block_name_to_quantize == "model.visual.blocks,model.layers"
         image_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
         processor = AutoProcessor.from_pretrained(quantized_model_path)
         messages = [

From 26e828cf4e9645363e62a466c76f3076cdb0a8ee Mon Sep 17 00:00:00 2001
From: Mengni Wang <mengni.wang@intel.com>
Date: Thu, 30 Apr 2026 17:48:46 +0800
Subject: [PATCH 10/11] bug fix

Co-authored-by: Copilot <copilot@github.com>
---
 auto_round/utils/common.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/auto_round/utils/common.py b/auto_round/utils/common.py
index 13f86fed1..d2e36f015 100644
--- a/auto_round/utils/common.py
+++ b/auto_round/utils/common.py
@@ -1071,9 +1071,10 @@ def get_checkpoint_conversion_mapping(model):
         )
 
         conversion_mappings = transformers_get_checkpoint_conversion_mapping(model.config.model_type)
-        for conversion_mapping in conversion_mappings:
-            for source_pattern in conversion_mapping.source_patterns:
-                checkpoint_conversion_mapping[source_pattern] = conversion_mapping.target_patterns
+        if conversion_mappings is not None:
+            for conversion_mapping in conversion_mappings:
+                for source_pattern in conversion_mapping.source_patterns:
+                    checkpoint_conversion_mapping[source_pattern] = conversion_mapping.target_patterns
     return checkpoint_conversion_mapping
 
 

From 60352685eed85f0884effcb1a72f417c9eb7957a Mon Sep 17 00:00:00 2001
From: Mengni Wang <mengni.wang@intel.com>
Date: Thu, 30 Apr 2026 21:52:20 +0800
Subject: [PATCH 11/11] fix CI issue

---
 auto_round/utils/common.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/auto_round/utils/common.py b/auto_round/utils/common.py
index d2e36f015..8ccaf646e 100644
--- a/auto_round/utils/common.py
+++ b/auto_round/utils/common.py
@@ -1059,8 +1059,10 @@ def infer_bits_by_data_type(data_type: str):
 
 def get_checkpoint_conversion_mapping(model):
     """Get the checkpoint conversion mapping for a given model, if it exists."""
+    checkpoint_conversion_mapping = {}
+
     # transformers <= 5.3.0 use _checkpoint_conversion_mapping
-    checkpoint_conversion_mapping = getattr(model, "_checkpoint_conversion_mapping", {})
+    checkpoint_conversion_mapping.update(getattr(model, "_checkpoint_conversion_mapping", {}))
 
     # transformers > 5.3.0 use get_checkpoint_conversion_mapping
     if hasattr(transformers, "conversion_mapping") and (