diff --git a/src/transformers/models/llama4/configuration_llama4.py b/src/transformers/models/llama4/configuration_llama4.py index 76162ee25964..d74107c2610a 100644 --- a/src/transformers/models/llama4/configuration_llama4.py +++ b/src/transformers/models/llama4/configuration_llama4.py @@ -251,9 +251,6 @@ class Llama4TextConfig(PretrainedConfig): "layers.*.self_attn.k_proj": "colwise", "layers.*.self_attn.v_proj": "colwise", "layers.*.self_attn.o_proj": "rowwise", - "layers.*.input_layernorm.weight": "sequence_parallel", - "layers.*.post_attention_layernorm.weight": "sequence_parallel", - "norm.weight": "sequence_parallel", "layers.*.feed_forward.shared_expert.gate_proj": "local_colwise", "layers.*.feed_forward.shared_expert.up_proj": "local_colwise", "layers.*.feed_forward.shared_expert.down_proj": "local_rowwise",