From 14af3be0fc762369187b9e92366000b10891bffc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=87a=C4=9Fr=C4=B1=20Tu=C4=9Frul=20Canbol?= <103742287+ctcanbol@users.noreply.github.com> Date: Thu, 7 Aug 2025 09:28:14 +0200 Subject: [PATCH 1/2] fix qwen3moe gguf architecture --- src/transformers/modeling_gguf_pytorch_utils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/transformers/modeling_gguf_pytorch_utils.py b/src/transformers/modeling_gguf_pytorch_utils.py index feb6c6c3914f..7ef2725c10b0 100644 --- a/src/transformers/modeling_gguf_pytorch_utils.py +++ b/src/transformers/modeling_gguf_pytorch_utils.py @@ -246,6 +246,7 @@ def process(self, weights, name, **kwargs): TENSOR_PROCESSORS = { "llama": LlamaTensorProcessor, "qwen2moe": Qwen2MoeTensorProcessor, + "qwen3moe": Qwen2MoeTensorProcessor, "bloom": BloomTensorProcessor, "t5": T5TensorProcessor, "t5encoder": T5TensorProcessor, @@ -295,6 +296,8 @@ def get_gguf_hf_weights_map( model_type = "command-r" elif model_type == "qwen2_moe": model_type = "qwen2moe" + elif model_type == "qwen3_moe": + model_type = "qwen3moe" elif model_type == "gemma3_text": model_type = "gemma3" arch = None @@ -316,8 +319,8 @@ def get_gguf_hf_weights_map( gguf_to_hf_name_map = {} state_dict = hf_model.state_dict() for hf_name in state_dict: - # An exception for qwen2moe model, where the expert layers are packed - if model_type == "qwen2moe" and "mlp.experts." in hf_name: + # An exception for qwen2moe/qwen3moe model, where the expert layers are packed + if model_type in ("qwen2moe", "qwen3moe") and "mlp.experts." in hf_name: hf_name = re.sub(r"mlp.experts.\d+.", "mlp.experts.", hf_name) name, suffix = hf_name, "" @@ -391,6 +394,8 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo if "qwen2moe" in architecture: updated_architecture = "qwen2_moe" + elif "qwen3moe" in architecture: + updated_architecture = "qwen3_moe" # For stablelm architecture, we need to set qkv_bias and use_parallel_residual from tensors # If `qkv_bias=True`, qkv_proj with bias will be present in the tensors From f43d297e569998121f5e3fad9b0ce271298574c1 Mon Sep 17 00:00:00 2001 From: Jinuk Kim Date: Sun, 10 Aug 2025 17:15:22 +0900 Subject: [PATCH 2/2] Fix Qwen3Moe GGUF loading --- src/transformers/integrations/ggml.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/transformers/integrations/ggml.py b/src/transformers/integrations/ggml.py index ecf34bbf5e19..9d89abfaa24d 100644 --- a/src/transformers/integrations/ggml.py +++ b/src/transformers/integrations/ggml.py @@ -102,13 +102,14 @@ "attention.layer_norm_rms_epsilon": "rms_norm_eps", "vocab_size": "vocab_size", }, - "qwen3moe": { + "qwen3_moe": { "context_length": "max_position_embeddings", "block_count": "num_hidden_layers", "feed_forward_length": "intermediate_size", "embedding_length": "hidden_size", "rope.dimension_count": None, "rope.freq_base": "rope_theta", + "attention.key_length": "head_dim", "attention.head_count": "num_attention_heads", "attention.head_count_kv": "num_key_value_heads", "attention.layer_norm_rms_epsilon": "rms_norm_eps",