From cbc12b9796e89949efe020f895e54035554b23c1 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Fri, 9 May 2025 09:49:50 +0200 Subject: [PATCH 1/3] llama : one-off chat template fix for Mistral-Small-2503 --- src/llama-model.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 3ca265be8dc..13d626bd402 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -13387,6 +13387,14 @@ const char * llama_model_chat_template(const llama_model * model, const char * n : LLM_KV(model->arch)(LLM_KV_TOKENIZER_CHAT_TEMPLATE); const auto & it = model->gguf_kv.find(key); if (it == model->gguf_kv.end()) { + // one-off fix for very popular models (so we are not flooded with issues) + // do not extend this list unless absolutely necessary + // Mistral-Small-2503 does not have built-in chat template + llama_vocab_pre_type pre_type = model->vocab.get_pre_type(); + if (pre_type == LLAMA_VOCAB_PRE_TYPE_TEKKEN && model->layers.size() == 40) { + return "mistral-v7"; + } + return nullptr; } From c6e0f92010f10a1c61f3e2e262d9a6d944beaed0 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Fri, 9 May 2025 10:02:38 +0200 Subject: [PATCH 2/3] update readme --- tools/mtmd/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/mtmd/README.md b/tools/mtmd/README.md index b97b9e8c543..20e7696cefd 100644 --- a/tools/mtmd/README.md +++ b/tools/mtmd/README.md @@ -46,7 +46,7 @@ llama-mtmd-cli -hf ggml-org/Qwen2.5-VL-32B-Instruct-GGUF llama-mtmd-cli -hf ggml-org/Qwen2.5-VL-72B-Instruct-GGUF # Mistral Small 3.1 24B (IQ2_M quantization) -llama-mtmd-cli -hf ggml-org/Mistral-Small-3.1-24B-Instruct-2503-GGUF --chat-template mistral-v7 +llama-mtmd-cli -hf ggml-org/Mistral-Small-3.1-24B-Instruct-2503-GGUF ``` ## How it works and what is `mmproj`? From 7e0f4f2f6d943456c91781b29722b9fdb35d12d5 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Fri, 9 May 2025 10:16:29 +0200 Subject: [PATCH 3/3] add mistral-v7-tekken --- src/llama-chat.cpp | 14 ++++++++------ src/llama-chat.h | 1 + src/llama-model.cpp | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp index 46d43c58ef4..d12743e6b9a 100644 --- a/src/llama-chat.cpp +++ b/src/llama-chat.cpp @@ -35,6 +35,7 @@ static const std::map LLM_CHAT_TEMPLATES = { { "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 }, { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN }, { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 }, + { "mistral-v7-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN }, { "phi3", LLM_CHAT_TEMPLATE_PHI_3 }, { "phi4", LLM_CHAT_TEMPLATE_PHI_4 }, { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 }, @@ -202,19 +203,20 @@ int32_t llm_chat_apply_template( if (add_ass) { ss << "<|im_start|>assistant\n"; } - } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7) { + } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN) { // Official mistral 'v7' template // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7 + // https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503#basic-instruct-template-v7-tekken + const char * trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 ? " " : ""; for (auto message : chat) { std::string role(message->role); std::string content(message->content); if (role == "system") { - ss << "[SYSTEM_PROMPT] " << content << "[/SYSTEM_PROMPT]"; + ss << "[SYSTEM_PROMPT]" << trailing_space << content << "[/SYSTEM_PROMPT]"; } else if (role == "user") { - ss << "[INST] " << content << "[/INST]"; - } - else { - ss << " " << content << ""; + ss << "[INST]" << trailing_space << content << "[/INST]"; + } else { + ss << trailing_space << content << ""; } } } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 diff --git a/src/llama-chat.h b/src/llama-chat.h index 3f5843466d0..db24ade21e2 100644 --- a/src/llama-chat.h +++ b/src/llama-chat.h @@ -14,6 +14,7 @@ enum llm_chat_template { LLM_CHAT_TEMPLATE_MISTRAL_V3, LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN, LLM_CHAT_TEMPLATE_MISTRAL_V7, + LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN, LLM_CHAT_TEMPLATE_PHI_3, LLM_CHAT_TEMPLATE_PHI_4, LLM_CHAT_TEMPLATE_FALCON_3, diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 13d626bd402..e8b78c1d002 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -13392,7 +13392,7 @@ const char * llama_model_chat_template(const llama_model * model, const char * n // Mistral-Small-2503 does not have built-in chat template llama_vocab_pre_type pre_type = model->vocab.get_pre_type(); if (pre_type == LLAMA_VOCAB_PRE_TYPE_TEKKEN && model->layers.size() == 40) { - return "mistral-v7"; + return "mistral-v7-tekken"; } return nullptr;