From de23c8afd0c663a82a5be126b74a656cfa48d124 Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Wed, 29 Apr 2026 11:31:04 +0800 Subject: [PATCH 01/11] Fix incompatible weight names Co-authored-by: Copilot --- auto_round/autoround.py | 1 + auto_round/compressors/base.py | 17 +++++++++++++++++ auto_round/compressors/shard_writer.py | 9 ++++++++- auto_round/compressors_new/base.py | 16 ++++++++++++++++ auto_round/compressors_new/shard_writer.py | 9 ++++++++- auto_round/inference/convert_model.py | 9 ++++++++- auto_round/utils/common.py | 19 +++++++++++++++++++ 7 files changed, 77 insertions(+), 3 deletions(-) diff --git a/auto_round/autoround.py b/auto_round/autoround.py index b2bec2651..66ed99b9b 100644 --- a/auto_round/autoround.py +++ b/auto_round/autoround.py @@ -163,6 +163,7 @@ def __new__( """ local_args = {k: v for k, v in locals().items() if k not in cls.SKIP_ARGS} + local_args.update({k: v for k, v in extra_config.to_dict().items() if k in local_args}) if NEW_ARCH: from auto_round.compressors_new.entry import AutoRoundCompatible diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py index 0503c8235..ca852ed57 100644 --- a/auto_round/compressors/base.py +++ b/auto_round/compressors/base.py @@ -111,6 +111,7 @@ set_module, to_device, to_dtype, + revert_checkpoint_conversion_mapping, unsupported_meta_device, ) from auto_round.utils.device import ( @@ -3603,6 +3604,22 @@ def save_quantized( serialization_dict["autoround_version"] = __version__ if "scale_dtype" in serialization_dict.keys(): serialization_dict["scale_dtype"] = str(serialization_dict["scale_dtype"]) + + # to match the original name + if hasattr(self.model, "_checkpoint_conversion_mapping"): + reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()} + + if isinstance(serialization_dict["to_quant_block_names"], str): + serialization_dict["to_quant_block_names"] = revert_checkpoint_conversion_mapping( + serialization_dict["to_quant_block_names"], reverse_key_mapping + ) + + elif isinstance(serialization_dict["to_quant_block_names"], list): + for idx in range(len(serialization_dict["to_quant_block_names"])): + serialization_dict["to_quant_block_names"][idx] = revert_checkpoint_conversion_mapping( + serialization_dict["to_quant_block_names"][idx], reverse_key_mapping + ) + compressed_model = format.save_quantized( save_folder, model=self.model, diff --git a/auto_round/compressors/shard_writer.py b/auto_round/compressors/shard_writer.py index af3f510a3..4a7777ba6 100644 --- a/auto_round/compressors/shard_writer.py +++ b/auto_round/compressors/shard_writer.py @@ -20,7 +20,7 @@ from torch.nn import Parameter from auto_round.logger import logger -from auto_round.utils import get_lm_head_name, get_module +from auto_round.utils import get_lm_head_name, get_module, revert_checkpoint_conversion_mapping class ShardWriter: @@ -55,6 +55,9 @@ def __init__(self, rounder): self.shard_meta = [] # List of {tmp_file: str, params: list} self.global_weight_map = {} self.shard_counter = 0 + self.reverse_key_mapping = {} + if hasattr(self.model, "_checkpoint_conversion_mapping"): + self.reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()} # Persistent set of all parameter names already flushed to a shard file. # Maintained incrementally in _flush_shard to avoid O(N^2) rebuilds in _add_tensor. @@ -105,6 +108,10 @@ def save_module(self, m: torch.nn.Module, name: str = None): self._add_tensor(param_name, v) def _add_tensor(self, name: str, tensor: torch.Tensor): + + # transformers will handle _checkpoint_conversion_mapping automatically if is_immediate_saving=False + name = revert_checkpoint_conversion_mapping(name, self.reverse_key_mapping) + if isinstance(tensor, torch.Tensor) and tensor.device.type == "meta": self.skipped_meta_tensors.append(name) return diff --git a/auto_round/compressors_new/base.py b/auto_round/compressors_new/base.py index 37f5726e8..c827a31a6 100644 --- a/auto_round/compressors_new/base.py +++ b/auto_round/compressors_new/base.py @@ -56,6 +56,7 @@ is_hpex_available, is_quantized_input_module, memory_monitor, + revert_checkpoint_conversion_mapping, ) from auto_round.utils.device import ( _force_trim_malloc, @@ -1148,6 +1149,21 @@ def save_quantized( if "scale_dtype" in serialization_dict.keys(): serialization_dict["scale_dtype"] = str(serialization_dict["scale_dtype"]) + # to match the original name + if hasattr(self.model, "_checkpoint_conversion_mapping"): + reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()} + + if isinstance(serialization_dict["to_quant_block_names"], str): + serialization_dict["to_quant_block_names"] = revert_checkpoint_conversion_mapping( + serialization_dict["to_quant_block_names"], reverse_key_mapping + ) + + elif isinstance(serialization_dict["to_quant_block_names"], list): + for idx in range(len(serialization_dict["to_quant_block_names"])): + serialization_dict["to_quant_block_names"][idx] = revert_checkpoint_conversion_mapping( + serialization_dict["to_quant_block_names"][idx], reverse_key_mapping + ) + compressed_model = format.save_quantized( save_folder, model=self.model_context.model, diff --git a/auto_round/compressors_new/shard_writer.py b/auto_round/compressors_new/shard_writer.py index dbdd2cc86..beaf3805b 100644 --- a/auto_round/compressors_new/shard_writer.py +++ b/auto_round/compressors_new/shard_writer.py @@ -22,7 +22,7 @@ from auto_round.context.compress import CompressContext from auto_round.context.model import ModelContext from auto_round.logger import logger -from auto_round.utils import get_lm_head_name, get_module +from auto_round.utils import get_lm_head_name, get_module, revert_checkpoint_conversion_mapping class ShardWriter: @@ -78,6 +78,9 @@ def __init__( self.shard_meta = [] # List of {tmp_file: str, params: list} self.global_weight_map = {} self.shard_counter = 0 + self.reverse_key_mapping = {} + if hasattr(self.model, "_checkpoint_conversion_mapping"): + self.reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()} # Persistent set of all parameter names already flushed to a shard file. # Maintained incrementally in _flush_shard to avoid O(N^2) rebuilds in _add_tensor. @@ -154,6 +157,10 @@ def save_module(self, m: torch.nn.Module, name: str = None): self._add_tensor(param_name, v) def _add_tensor(self, name: str, tensor: torch.Tensor): + + # transformers will handle _checkpoint_conversion_mapping automatically if is_immediate_saving=False + name = revert_checkpoint_conversion_mapping(name, self.reverse_key_mapping) + if isinstance(tensor, torch.Tensor) and tensor.device.type == "meta": self.skipped_meta_tensors.append(name) return diff --git a/auto_round/inference/convert_model.py b/auto_round/inference/convert_model.py index 70bb311f7..48a74a150 100644 --- a/auto_round/inference/convert_model.py +++ b/auto_round/inference/convert_model.py @@ -35,6 +35,7 @@ from auto_round.special_model_handler import update_module from auto_round.utils import ( SUPPORTED_LAYER_TYPES, + apply_checkpoint_conversion_mapping, check_start_with_block_name, check_to_quantized, find_matching_blocks, @@ -270,12 +271,16 @@ def get_layer_config(model, quantization_config): ) # Determine the quantization block list + checkpoint_conversion_mapping = getattr(model, "_checkpoint_conversion_mapping", {}) quant_block_list = getattr(quantization_config, "quant_block_list", None) if quant_block_list is not None: # Handle nested list format: [[block1, block2, ...], ...] -> [prefix1, ...] if quant_block_list and isinstance(quant_block_list[0], (list, tuple)): for i in range(len(quant_block_list)): - quant_block_list[i] = os.path.commonprefix(quant_block_list[i]).rstrip(".") + quant_block_list[i] = apply_checkpoint_conversion_mapping( + os.path.commonprefix(quant_block_list[i]).rstrip("."), + checkpoint_conversion_mapping + ) elif quant_block_list is None: to_quant_block_names = getattr(quantization_config, "block_name_to_quantize", None) # Prioritize this parameter if to_quant_block_names is None: @@ -292,6 +297,8 @@ def get_layer_config(model, quantization_config): # Speed up the matching for i in range(len(quant_block_list)): quant_block_list[i] = os.path.commonprefix(quant_block_list[i]).rstrip(".") + for i in range(len(quant_block_list)): + quant_block_list[i] = apply_checkpoint_conversion_mapping(quant_block_list[i], checkpoint_conversion_mapping) # Get layer names that will be quantized layer_names = [] diff --git a/auto_round/utils/common.py b/auto_round/utils/common.py index 865134b8b..cc14d5eb7 100644 --- a/auto_round/utils/common.py +++ b/auto_round/utils/common.py @@ -1055,3 +1055,22 @@ def infer_bits_by_data_type(data_type: str): if str.isdigit(data_type[len(supported_dtype)]): return int(data_type[len(supported_dtype)]) return None + + +def revert_checkpoint_conversion_mapping(name: str, reverse_key_mapping: dict[str, str]) -> str: + for pattern, replacement in reverse_key_mapping.items(): + replacement = replacement.lstrip("^") # strip off un-needed chars and patterns + replacement = re.sub(r"\(.*\)", "", replacement) + name, n_replace = re.subn(pattern, replacement, name) + # Early exit of the loop + if n_replace > 0: + break + return name + +def apply_checkpoint_conversion_mapping(name: str, key_mapping: dict[str, str]) -> str: + for pattern, replacement in key_mapping.items(): + name, n_replace = re.subn(pattern, replacement, name) + # Early exit of the loop + if n_replace > 0: + break + return name \ No newline at end of file From c927d44962fa123a7575d81d34527b18eb510c55 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 29 Apr 2026 03:33:25 +0000 Subject: [PATCH 02/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- auto_round/compressors/base.py | 4 ++-- auto_round/compressors_new/base.py | 2 +- auto_round/inference/convert_model.py | 7 ++++--- auto_round/utils/common.py | 3 ++- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py index ca852ed57..b62a171c3 100644 --- a/auto_round/compressors/base.py +++ b/auto_round/compressors/base.py @@ -107,11 +107,11 @@ memory_monitor, mv_module_from_gpu, normalize_no_split_modules, + revert_checkpoint_conversion_mapping, set_amax_for_all_moe_layers, set_module, to_device, to_dtype, - revert_checkpoint_conversion_mapping, unsupported_meta_device, ) from auto_round.utils.device import ( @@ -3605,7 +3605,7 @@ def save_quantized( if "scale_dtype" in serialization_dict.keys(): serialization_dict["scale_dtype"] = str(serialization_dict["scale_dtype"]) - # to match the original name + # to match the original name if hasattr(self.model, "_checkpoint_conversion_mapping"): reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()} diff --git a/auto_round/compressors_new/base.py b/auto_round/compressors_new/base.py index c827a31a6..b169deb56 100644 --- a/auto_round/compressors_new/base.py +++ b/auto_round/compressors_new/base.py @@ -1149,7 +1149,7 @@ def save_quantized( if "scale_dtype" in serialization_dict.keys(): serialization_dict["scale_dtype"] = str(serialization_dict["scale_dtype"]) - # to match the original name + # to match the original name if hasattr(self.model, "_checkpoint_conversion_mapping"): reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()} diff --git a/auto_round/inference/convert_model.py b/auto_round/inference/convert_model.py index 48a74a150..88500e4dc 100644 --- a/auto_round/inference/convert_model.py +++ b/auto_round/inference/convert_model.py @@ -278,8 +278,7 @@ def get_layer_config(model, quantization_config): if quant_block_list and isinstance(quant_block_list[0], (list, tuple)): for i in range(len(quant_block_list)): quant_block_list[i] = apply_checkpoint_conversion_mapping( - os.path.commonprefix(quant_block_list[i]).rstrip("."), - checkpoint_conversion_mapping + os.path.commonprefix(quant_block_list[i]).rstrip("."), checkpoint_conversion_mapping ) elif quant_block_list is None: to_quant_block_names = getattr(quantization_config, "block_name_to_quantize", None) # Prioritize this parameter @@ -298,7 +297,9 @@ def get_layer_config(model, quantization_config): for i in range(len(quant_block_list)): quant_block_list[i] = os.path.commonprefix(quant_block_list[i]).rstrip(".") for i in range(len(quant_block_list)): - quant_block_list[i] = apply_checkpoint_conversion_mapping(quant_block_list[i], checkpoint_conversion_mapping) + quant_block_list[i] = apply_checkpoint_conversion_mapping( + quant_block_list[i], checkpoint_conversion_mapping + ) # Get layer names that will be quantized layer_names = [] diff --git a/auto_round/utils/common.py b/auto_round/utils/common.py index cc14d5eb7..2d3ade65a 100644 --- a/auto_round/utils/common.py +++ b/auto_round/utils/common.py @@ -1067,10 +1067,11 @@ def revert_checkpoint_conversion_mapping(name: str, reverse_key_mapping: dict[st break return name + def apply_checkpoint_conversion_mapping(name: str, key_mapping: dict[str, str]) -> str: for pattern, replacement in key_mapping.items(): name, n_replace = re.subn(pattern, replacement, name) # Early exit of the loop if n_replace > 0: break - return name \ No newline at end of file + return name From 021133ec71fb0c2882a2b6d88aa1d0c71bf9300f Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Wed, 29 Apr 2026 13:55:04 +0800 Subject: [PATCH 03/11] Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- auto_round/autoround.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/auto_round/autoround.py b/auto_round/autoround.py index 66ed99b9b..b1844490f 100644 --- a/auto_round/autoround.py +++ b/auto_round/autoround.py @@ -163,7 +163,8 @@ def __new__( """ local_args = {k: v for k, v in locals().items() if k not in cls.SKIP_ARGS} - local_args.update({k: v for k, v in extra_config.to_dict().items() if k in local_args}) + if extra_config is not None: + local_args.update({k: v for k, v in extra_config.to_dict().items() if k in local_args and v is not None}) if NEW_ARCH: from auto_round.compressors_new.entry import AutoRoundCompatible From 8389c5298d96d19293ed7c1e34f4800d46712929 Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Thu, 30 Apr 2026 10:37:33 +0800 Subject: [PATCH 04/11] add ut --- test/test_cuda/integrations/test_sglang.py | 33 ++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/test/test_cuda/integrations/test_sglang.py b/test/test_cuda/integrations/test_sglang.py index 196fa2efa..3467f98ac 100644 --- a/test/test_cuda/integrations/test_sglang.py +++ b/test/test_cuda/integrations/test_sglang.py @@ -121,6 +121,39 @@ def test_mixed_ar_format_sglang(self, dataloader): shutil.rmtree(self.save_dir, ignore_errors=True) + def test_qwen2_5_vl_loading(self, tiny_qwen_2_5_vl_model_path): + from auto_round.utils import mllm_load_model + layer_config = { + "self_attn": {"bits": 8}, + "lm_head": {"bits": 16}, + "mlp": {"bits": 16, "act_bits": 16}, + } + + model, processor, tokenizer, image_processor = mllm_load_model(tiny_qwen_2_5_vl_model_path) + + autoround = AutoRound( + model, + tokenizer, + scheme="W4A16", + iters=1, + nsamples=1, + seqlen=32, + processor=processor, + image_processor=image_processor, + layer_config=layer_config, + ) + + _, quantized_model_path = autoround.quantize_and_save( + output_dir=self.save_dir, + inplace=True, + format="auto_round", + ) + + generated_text = self._run_sglang_inference(quantized_model_path) + print(generated_text) + + assert "!!!" not in generated_text + @pytest.mark.skip_ci(reason="Cannot work well in CI env") def test_awq_format_sglang(self, dataloader): autoround = AutoRound( From 0d00b12dc07acaf4393fae60e9d73219b1118468 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 30 Apr 2026 02:37:57 +0000 Subject: [PATCH 05/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- test/test_cuda/integrations/test_sglang.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_cuda/integrations/test_sglang.py b/test/test_cuda/integrations/test_sglang.py index 3467f98ac..a99530d2c 100644 --- a/test/test_cuda/integrations/test_sglang.py +++ b/test/test_cuda/integrations/test_sglang.py @@ -123,6 +123,7 @@ def test_mixed_ar_format_sglang(self, dataloader): def test_qwen2_5_vl_loading(self, tiny_qwen_2_5_vl_model_path): from auto_round.utils import mllm_load_model + layer_config = { "self_attn": {"bits": 8}, "lm_head": {"bits": 16}, From 8b666affb1549d8dfc205be872b2d590db002fbd Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Thu, 30 Apr 2026 12:38:57 +0800 Subject: [PATCH 06/11] fix ut --- auto_round/utils/model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/auto_round/utils/model.py b/auto_round/utils/model.py index 04a8b0395..08698d68f 100644 --- a/auto_round/utils/model.py +++ b/auto_round/utils/model.py @@ -602,10 +602,11 @@ def mllm_load_model( processor_load_kwargs = {} if processor_subfolder is not None: processor_load_kwargs["subfolder"] = processor_subfolder + if model_type in FIX_MISTRAL_REGEX_MODEL_TYPE_LIST: + processor_load_kwargs["fix_mistral_regex"] = True tokenizer = AutoTokenizer.from_pretrained( pretrained_model_name_or_path, trust_remote_code=trust_remote_code, - fix_mistral_regex=True if model_type in FIX_MISTRAL_REGEX_MODEL_TYPE_LIST else False, **processor_load_kwargs, ) processor = AutoProcessor.from_pretrained( From 0fb900c5a793f8f58a50fc452bc11dd0b8fdb3f0 Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Thu, 30 Apr 2026 16:49:33 +0800 Subject: [PATCH 07/11] adapt to latest transformers Co-authored-by: Copilot --- auto_round/compressors/base.py | 22 ++++---- auto_round/compressors/shard_writer.py | 8 ++- auto_round/compressors_new/base.py | 22 ++++---- auto_round/compressors_new/shard_writer.py | 8 ++- auto_round/inference/convert_model.py | 3 +- auto_round/utils/common.py | 61 +++++++++++++++++----- auto_round/utils/model.py | 3 +- 7 files changed, 79 insertions(+), 48 deletions(-) diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py index b62a171c3..6eb1cfd47 100644 --- a/auto_round/compressors/base.py +++ b/auto_round/compressors/base.py @@ -94,6 +94,7 @@ get_layer_names_in_block, get_lm_head_name, get_module, + get_reverse_checkpoint_conversion_mapping, global_state, hook_ngram_embeddings_on_cpu, htcore, @@ -3606,19 +3607,18 @@ def save_quantized( serialization_dict["scale_dtype"] = str(serialization_dict["scale_dtype"]) # to match the original name - if hasattr(self.model, "_checkpoint_conversion_mapping"): - reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()} + reverse_checkpoint_conversion_mapping = get_reverse_checkpoint_conversion_mapping(self.model) - if isinstance(serialization_dict["to_quant_block_names"], str): - serialization_dict["to_quant_block_names"] = revert_checkpoint_conversion_mapping( - serialization_dict["to_quant_block_names"], reverse_key_mapping - ) + if isinstance(serialization_dict["to_quant_block_names"], str): + serialization_dict["to_quant_block_names"] = revert_checkpoint_conversion_mapping( + serialization_dict["to_quant_block_names"], reverse_checkpoint_conversion_mapping + ) - elif isinstance(serialization_dict["to_quant_block_names"], list): - for idx in range(len(serialization_dict["to_quant_block_names"])): - serialization_dict["to_quant_block_names"][idx] = revert_checkpoint_conversion_mapping( - serialization_dict["to_quant_block_names"][idx], reverse_key_mapping - ) + elif isinstance(serialization_dict["to_quant_block_names"], list): + for idx in range(len(serialization_dict["to_quant_block_names"])): + serialization_dict["to_quant_block_names"][idx] = revert_checkpoint_conversion_mapping( + serialization_dict["to_quant_block_names"][idx], reverse_checkpoint_conversion_mapping + ) compressed_model = format.save_quantized( save_folder, diff --git a/auto_round/compressors/shard_writer.py b/auto_round/compressors/shard_writer.py index 4a7777ba6..177eac3ae 100644 --- a/auto_round/compressors/shard_writer.py +++ b/auto_round/compressors/shard_writer.py @@ -20,7 +20,7 @@ from torch.nn import Parameter from auto_round.logger import logger -from auto_round.utils import get_lm_head_name, get_module, revert_checkpoint_conversion_mapping +from auto_round.utils import get_lm_head_name, get_module, revert_checkpoint_conversion_mapping, get_reverse_checkpoint_conversion_mapping class ShardWriter: @@ -55,9 +55,7 @@ def __init__(self, rounder): self.shard_meta = [] # List of {tmp_file: str, params: list} self.global_weight_map = {} self.shard_counter = 0 - self.reverse_key_mapping = {} - if hasattr(self.model, "_checkpoint_conversion_mapping"): - self.reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()} + self.reverse_checkpoint_conversion_mapping = get_reverse_checkpoint_conversion_mapping(self.model) # Persistent set of all parameter names already flushed to a shard file. # Maintained incrementally in _flush_shard to avoid O(N^2) rebuilds in _add_tensor. @@ -110,7 +108,7 @@ def save_module(self, m: torch.nn.Module, name: str = None): def _add_tensor(self, name: str, tensor: torch.Tensor): # transformers will handle _checkpoint_conversion_mapping automatically if is_immediate_saving=False - name = revert_checkpoint_conversion_mapping(name, self.reverse_key_mapping) + name = revert_checkpoint_conversion_mapping(name, self.reverse_checkpoint_conversion_mapping) if isinstance(tensor, torch.Tensor) and tensor.device.type == "meta": self.skipped_meta_tensors.append(name) diff --git a/auto_round/compressors_new/base.py b/auto_round/compressors_new/base.py index b169deb56..f8d57ed18 100644 --- a/auto_round/compressors_new/base.py +++ b/auto_round/compressors_new/base.py @@ -51,6 +51,7 @@ convert_dtype_str2torch, extract_block_names_to_str, find_matching_blocks, + get_reverse_checkpoint_conversion_mapping, get_block_names, is_debug_mode, is_hpex_available, @@ -1150,19 +1151,18 @@ def save_quantized( serialization_dict["scale_dtype"] = str(serialization_dict["scale_dtype"]) # to match the original name - if hasattr(self.model, "_checkpoint_conversion_mapping"): - reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()} + reverse_checkpoint_conversion_mapping = get_reverse_checkpoint_conversion_mapping(self.model) - if isinstance(serialization_dict["to_quant_block_names"], str): - serialization_dict["to_quant_block_names"] = revert_checkpoint_conversion_mapping( - serialization_dict["to_quant_block_names"], reverse_key_mapping - ) + if isinstance(serialization_dict["to_quant_block_names"], str): + serialization_dict["to_quant_block_names"] = revert_checkpoint_conversion_mapping( + serialization_dict["to_quant_block_names"], reverse_checkpoint_conversion_mapping + ) - elif isinstance(serialization_dict["to_quant_block_names"], list): - for idx in range(len(serialization_dict["to_quant_block_names"])): - serialization_dict["to_quant_block_names"][idx] = revert_checkpoint_conversion_mapping( - serialization_dict["to_quant_block_names"][idx], reverse_key_mapping - ) + elif isinstance(serialization_dict["to_quant_block_names"], list): + for idx in range(len(serialization_dict["to_quant_block_names"])): + serialization_dict["to_quant_block_names"][idx] = revert_checkpoint_conversion_mapping( + serialization_dict["to_quant_block_names"][idx], reverse_checkpoint_conversion_mapping + ) compressed_model = format.save_quantized( save_folder, diff --git a/auto_round/compressors_new/shard_writer.py b/auto_round/compressors_new/shard_writer.py index beaf3805b..c61c2b529 100644 --- a/auto_round/compressors_new/shard_writer.py +++ b/auto_round/compressors_new/shard_writer.py @@ -22,7 +22,7 @@ from auto_round.context.compress import CompressContext from auto_round.context.model import ModelContext from auto_round.logger import logger -from auto_round.utils import get_lm_head_name, get_module, revert_checkpoint_conversion_mapping +from auto_round.utils import get_lm_head_name, get_module, revert_checkpoint_conversion_mapping, get_reverse_checkpoint_conversion_mapping class ShardWriter: @@ -78,9 +78,7 @@ def __init__( self.shard_meta = [] # List of {tmp_file: str, params: list} self.global_weight_map = {} self.shard_counter = 0 - self.reverse_key_mapping = {} - if hasattr(self.model, "_checkpoint_conversion_mapping"): - self.reverse_key_mapping = {v: k for k, v in self.model._checkpoint_conversion_mapping.items()} + self.reverse_checkpoint_conversion_mapping = get_reverse_checkpoint_conversion_mapping(self.model) # Persistent set of all parameter names already flushed to a shard file. # Maintained incrementally in _flush_shard to avoid O(N^2) rebuilds in _add_tensor. @@ -159,7 +157,7 @@ def save_module(self, m: torch.nn.Module, name: str = None): def _add_tensor(self, name: str, tensor: torch.Tensor): # transformers will handle _checkpoint_conversion_mapping automatically if is_immediate_saving=False - name = revert_checkpoint_conversion_mapping(name, self.reverse_key_mapping) + name = revert_checkpoint_conversion_mapping(name, self.reverse_checkpoint_conversion_mapping) if isinstance(tensor, torch.Tensor) and tensor.device.type == "meta": self.skipped_meta_tensors.append(name) diff --git a/auto_round/inference/convert_model.py b/auto_round/inference/convert_model.py index 88500e4dc..12cff3984 100644 --- a/auto_round/inference/convert_model.py +++ b/auto_round/inference/convert_model.py @@ -42,6 +42,7 @@ get_block_names, get_module, is_hpex_available, + get_checkpoint_conversion_mapping, is_transformers_version_greater_or_equal_5, set_module, ) @@ -271,7 +272,7 @@ def get_layer_config(model, quantization_config): ) # Determine the quantization block list - checkpoint_conversion_mapping = getattr(model, "_checkpoint_conversion_mapping", {}) + checkpoint_conversion_mapping = get_checkpoint_conversion_mapping(model) quant_block_list = getattr(quantization_config, "quant_block_list", None) if quant_block_list is not None: # Handle nested list format: [[block1, block2, ...], ...] -> [prefix1, ...] diff --git a/auto_round/utils/common.py b/auto_round/utils/common.py index 2d3ade65a..585dcdccb 100644 --- a/auto_round/utils/common.py +++ b/auto_round/utils/common.py @@ -1057,21 +1057,56 @@ def infer_bits_by_data_type(data_type: str): return None -def revert_checkpoint_conversion_mapping(name: str, reverse_key_mapping: dict[str, str]) -> str: - for pattern, replacement in reverse_key_mapping.items(): - replacement = replacement.lstrip("^") # strip off un-needed chars and patterns - replacement = re.sub(r"\(.*\)", "", replacement) - name, n_replace = re.subn(pattern, replacement, name) - # Early exit of the loop - if n_replace > 0: - break +def get_checkpoint_conversion_mapping(model): + """Get the checkpoint conversion mapping for a given model, if it exists.""" + # transformers <= 5.3.0 use _checkpoint_conversion_mapping + checkpoint_conversion_mapping = getattr(model, "_checkpoint_conversion_mapping", {}) + + # transformers > 5.3.0 use get_checkpoint_conversion_mapping + if hasattr(transformers, "conversion_mapping") and (hasattr(model, "config") and hasattr(model.config, "model_type")): + from transformers.conversion_mapping import get_checkpoint_conversion_mapping as transformers_get_checkpoint_conversion_mapping + conversion_mappings = transformers_get_checkpoint_conversion_mapping(model.config.model_type) + for conversion_mapping in conversion_mappings: + for source_pattern in conversion_mapping.source_patterns: + checkpoint_conversion_mapping[source_pattern] = conversion_mapping.target_patterns + return checkpoint_conversion_mapping + + +def get_reverse_checkpoint_conversion_mapping(model): + """Get the reverse checkpoint conversion mapping for a given model, if it exists.""" + reverse_checkpoint_conversion_mapping = {v: k for k, v in getattr(model, "_checkpoint_conversion_mapping", {}).items()} + + if hasattr(model, "_weight_conversions"): + weight_conversions = model._weight_conversions + for weight_conversion in weight_conversions: + reverse_conversion_mapping = weight_conversion.reverse_transform() + for source_pattern in reverse_conversion_mapping.source_patterns: + reverse_checkpoint_conversion_mapping[source_pattern] = reverse_conversion_mapping.target_patterns + + return reverse_checkpoint_conversion_mapping + + +def revert_checkpoint_conversion_mapping(name: str, key_mapping: dict[str, str]) -> str: + for source_pattern, target_patterns in key_mapping.items(): + if isinstance(target_patterns, str): + target_patterns = [target_patterns] + for target_pattern in target_patterns: + source_pattern = source_pattern.lstrip("^") # strip off un-needed chars and patterns + source_pattern = re.sub(r"\(.*\)", "", source_pattern) + name, n_replace = re.subn(source_pattern, target_pattern, name) + # Early exit of the loop + if n_replace > 0: + return name return name def apply_checkpoint_conversion_mapping(name: str, key_mapping: dict[str, str]) -> str: - for pattern, replacement in key_mapping.items(): - name, n_replace = re.subn(pattern, replacement, name) - # Early exit of the loop - if n_replace > 0: - break + for source_pattern, target_patterns in key_mapping.items(): + if isinstance(target_patterns, str): + target_patterns = [target_patterns] + for target_pattern in target_patterns: + name, n_replace = re.subn(source_pattern, target_pattern, name) + # Early exit of the loop + if n_replace > 0: + return name return name diff --git a/auto_round/utils/model.py b/auto_round/utils/model.py index 08698d68f..04a8b0395 100644 --- a/auto_round/utils/model.py +++ b/auto_round/utils/model.py @@ -602,11 +602,10 @@ def mllm_load_model( processor_load_kwargs = {} if processor_subfolder is not None: processor_load_kwargs["subfolder"] = processor_subfolder - if model_type in FIX_MISTRAL_REGEX_MODEL_TYPE_LIST: - processor_load_kwargs["fix_mistral_regex"] = True tokenizer = AutoTokenizer.from_pretrained( pretrained_model_name_or_path, trust_remote_code=trust_remote_code, + fix_mistral_regex=True if model_type in FIX_MISTRAL_REGEX_MODEL_TYPE_LIST else False, **processor_load_kwargs, ) processor = AutoProcessor.from_pretrained( From 90ed33f931ccda89bffc780197cd04e84b1badce Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 30 Apr 2026 08:50:41 +0000 Subject: [PATCH 08/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- auto_round/compressors/shard_writer.py | 7 ++++++- auto_round/compressors_new/base.py | 2 +- auto_round/compressors_new/shard_writer.py | 7 ++++++- auto_round/inference/convert_model.py | 2 +- auto_round/utils/common.py | 13 ++++++++++--- 5 files changed, 24 insertions(+), 7 deletions(-) diff --git a/auto_round/compressors/shard_writer.py b/auto_round/compressors/shard_writer.py index 177eac3ae..77b3b7ea4 100644 --- a/auto_round/compressors/shard_writer.py +++ b/auto_round/compressors/shard_writer.py @@ -20,7 +20,12 @@ from torch.nn import Parameter from auto_round.logger import logger -from auto_round.utils import get_lm_head_name, get_module, revert_checkpoint_conversion_mapping, get_reverse_checkpoint_conversion_mapping +from auto_round.utils import ( + get_lm_head_name, + get_module, + get_reverse_checkpoint_conversion_mapping, + revert_checkpoint_conversion_mapping, +) class ShardWriter: diff --git a/auto_round/compressors_new/base.py b/auto_round/compressors_new/base.py index f8d57ed18..1032a87d6 100644 --- a/auto_round/compressors_new/base.py +++ b/auto_round/compressors_new/base.py @@ -51,8 +51,8 @@ convert_dtype_str2torch, extract_block_names_to_str, find_matching_blocks, - get_reverse_checkpoint_conversion_mapping, get_block_names, + get_reverse_checkpoint_conversion_mapping, is_debug_mode, is_hpex_available, is_quantized_input_module, diff --git a/auto_round/compressors_new/shard_writer.py b/auto_round/compressors_new/shard_writer.py index c61c2b529..4a446ad07 100644 --- a/auto_round/compressors_new/shard_writer.py +++ b/auto_round/compressors_new/shard_writer.py @@ -22,7 +22,12 @@ from auto_round.context.compress import CompressContext from auto_round.context.model import ModelContext from auto_round.logger import logger -from auto_round.utils import get_lm_head_name, get_module, revert_checkpoint_conversion_mapping, get_reverse_checkpoint_conversion_mapping +from auto_round.utils import ( + get_lm_head_name, + get_module, + get_reverse_checkpoint_conversion_mapping, + revert_checkpoint_conversion_mapping, +) class ShardWriter: diff --git a/auto_round/inference/convert_model.py b/auto_round/inference/convert_model.py index 12cff3984..3f973514d 100644 --- a/auto_round/inference/convert_model.py +++ b/auto_round/inference/convert_model.py @@ -40,9 +40,9 @@ check_to_quantized, find_matching_blocks, get_block_names, + get_checkpoint_conversion_mapping, get_module, is_hpex_available, - get_checkpoint_conversion_mapping, is_transformers_version_greater_or_equal_5, set_module, ) diff --git a/auto_round/utils/common.py b/auto_round/utils/common.py index 585dcdccb..13f86fed1 100644 --- a/auto_round/utils/common.py +++ b/auto_round/utils/common.py @@ -1063,8 +1063,13 @@ def get_checkpoint_conversion_mapping(model): checkpoint_conversion_mapping = getattr(model, "_checkpoint_conversion_mapping", {}) # transformers > 5.3.0 use get_checkpoint_conversion_mapping - if hasattr(transformers, "conversion_mapping") and (hasattr(model, "config") and hasattr(model.config, "model_type")): - from transformers.conversion_mapping import get_checkpoint_conversion_mapping as transformers_get_checkpoint_conversion_mapping + if hasattr(transformers, "conversion_mapping") and ( + hasattr(model, "config") and hasattr(model.config, "model_type") + ): + from transformers.conversion_mapping import ( + get_checkpoint_conversion_mapping as transformers_get_checkpoint_conversion_mapping, + ) + conversion_mappings = transformers_get_checkpoint_conversion_mapping(model.config.model_type) for conversion_mapping in conversion_mappings: for source_pattern in conversion_mapping.source_patterns: @@ -1074,7 +1079,9 @@ def get_checkpoint_conversion_mapping(model): def get_reverse_checkpoint_conversion_mapping(model): """Get the reverse checkpoint conversion mapping for a given model, if it exists.""" - reverse_checkpoint_conversion_mapping = {v: k for k, v in getattr(model, "_checkpoint_conversion_mapping", {}).items()} + reverse_checkpoint_conversion_mapping = { + v: k for k, v in getattr(model, "_checkpoint_conversion_mapping", {}).items() + } if hasattr(model, "_weight_conversions"): weight_conversions = model._weight_conversions From 49f51248243b4f4a1f09956255f80c0c2f846694 Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Thu, 30 Apr 2026 17:25:46 +0800 Subject: [PATCH 09/11] add config check in ut --- test/test_cpu/models/test_mllm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_cpu/models/test_mllm.py b/test/test_cpu/models/test_mllm.py index 2c0c71bd4..49b33332a 100644 --- a/test/test_cpu/models/test_mllm.py +++ b/test/test_cpu/models/test_mllm.py @@ -236,6 +236,7 @@ def test_qwen2_5(self, tiny_qwen_2_5_vl_model_path): model = Qwen2_5_VLForConditionalGeneration.from_pretrained( quantized_model_path, torch_dtype="auto", device_map="auto" ) + assert model.config.quantization_config.block_name_to_quantize == "model.visual.blocks,model.layers" image_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg" processor = AutoProcessor.from_pretrained(quantized_model_path) messages = [ From 26e828cf4e9645363e62a466c76f3076cdb0a8ee Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Thu, 30 Apr 2026 17:48:46 +0800 Subject: [PATCH 10/11] bug fix Co-authored-by: Copilot --- auto_round/utils/common.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/auto_round/utils/common.py b/auto_round/utils/common.py index 13f86fed1..d2e36f015 100644 --- a/auto_round/utils/common.py +++ b/auto_round/utils/common.py @@ -1071,9 +1071,10 @@ def get_checkpoint_conversion_mapping(model): ) conversion_mappings = transformers_get_checkpoint_conversion_mapping(model.config.model_type) - for conversion_mapping in conversion_mappings: - for source_pattern in conversion_mapping.source_patterns: - checkpoint_conversion_mapping[source_pattern] = conversion_mapping.target_patterns + if conversion_mappings is not None: + for conversion_mapping in conversion_mappings: + for source_pattern in conversion_mapping.source_patterns: + checkpoint_conversion_mapping[source_pattern] = conversion_mapping.target_patterns return checkpoint_conversion_mapping From 60352685eed85f0884effcb1a72f417c9eb7957a Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Thu, 30 Apr 2026 21:52:20 +0800 Subject: [PATCH 11/11] fix CI issue --- auto_round/utils/common.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/auto_round/utils/common.py b/auto_round/utils/common.py index d2e36f015..8ccaf646e 100644 --- a/auto_round/utils/common.py +++ b/auto_round/utils/common.py @@ -1059,8 +1059,10 @@ def infer_bits_by_data_type(data_type: str): def get_checkpoint_conversion_mapping(model): """Get the checkpoint conversion mapping for a given model, if it exists.""" + checkpoint_conversion_mapping = {} + # transformers <= 5.3.0 use _checkpoint_conversion_mapping - checkpoint_conversion_mapping = getattr(model, "_checkpoint_conversion_mapping", {}) + checkpoint_conversion_mapping.update(getattr(model, "_checkpoint_conversion_mapping", {})) # transformers > 5.3.0 use get_checkpoint_conversion_mapping if hasattr(transformers, "conversion_mapping") and (