From 8d7ea8ec680a148f56a375fec9fd5150fc24f216 Mon Sep 17 00:00:00 2001 From: zhangfuwen Date: Fri, 8 Mar 2024 17:25:18 +0800 Subject: [PATCH 1/2] examples: fix utf8 decoding error some models have a tokenizer that decodes an id into an incomplete utf8 sequence, need to validate and wait for next token one example would be: https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q4_0.gguf and and an example of the token is 18137 --- .../app/src/main/cpp/llama-android.cpp | 59 ++++++++++++++++++- .../src/main/java/com/example/llama/Llm.kt | 4 +- 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/examples/llama.android/app/src/main/cpp/llama-android.cpp b/examples/llama.android/app/src/main/cpp/llama-android.cpp index 2beb1e0d532..19a60ac1404 100644 --- a/examples/llama.android/app/src/main/cpp/llama-android.cpp +++ b/examples/llama.android/app/src/main/cpp/llama-android.cpp @@ -33,6 +33,52 @@ jclass la_int_var; jmethodID la_int_var_value; jmethodID la_int_var_inc; +std::string cached_token_chars=""; +bool is_valid_utf8(const char * string) +{ + if (!string) + return true; + + const unsigned char * bytes = (const unsigned char *)string; + int num; + + while (*bytes != 0x00) + { + if ((*bytes & 0x80) == 0x00) + { + // U+0000 to U+007F + num = 1; + } + else if ((*bytes & 0xE0) == 0xC0) + { + // U+0080 to U+07FF + num = 2; + } + else if ((*bytes & 0xF0) == 0xE0) + { + // U+0800 to U+FFFF + num = 3; + } + else if ((*bytes & 0xF8) == 0xF0) + { + // U+10000 to U+10FFFF + num = 4; + } + else + return false; + + bytes += 1; + for (int i = 1; i < num; ++i) + { + if ((*bytes & 0xC0) != 0x80) + return false; + bytes += 1; + } + } + + return true; +} + static void log_callback(ggml_log_level level, const char * fmt, void * data) { if (level == GGML_LOG_LEVEL_ERROR) __android_log_print(ANDROID_LOG_ERROR, TAG, fmt, data); else if (level == GGML_LOG_LEVEL_INFO) __android_log_print(ANDROID_LOG_INFO, TAG, fmt, data); @@ -295,6 +341,8 @@ Java_com_example_llama_Llm_completion_1init( jint n_len ) { + cached_token_chars = ""; + const auto text = env->GetStringUTFChars(jtext, 0); const auto context = reinterpret_cast(context_pointer); const auto batch = reinterpret_cast(batch_pointer); @@ -372,8 +420,15 @@ Java_com_example_llama_Llm_completion_1loop( } auto new_token_chars = llama_token_to_piece(context, new_token_id); - LOGi("new_token_chars: `%s`", new_token_chars.c_str()); - auto new_token = env->NewStringUTF(new_token_chars.c_str()); + cached_token_chars += new_token_chars; + jstring new_token = nullptr; + if(is_valid_utf8(cached_token_chars.c_str())) { + new_token = env->NewStringUTF(cached_token_chars.c_str()); + LOGi("cached: %s, new_token_chars: `%s`, id:%d", cached_token_chars.c_str(), new_token_chars.c_str(), new_token_id); + cached_token_chars=""; + } else { + new_token = env->NewStringUTF(""); + } llama_batch_clear(*batch); llama_batch_add(*batch, new_token_id, n_cur, { 0 }, true); diff --git a/examples/llama.android/app/src/main/java/com/example/llama/Llm.kt b/examples/llama.android/app/src/main/java/com/example/llama/Llm.kt index 5f32703724a..d86afee3790 100644 --- a/examples/llama.android/app/src/main/java/com/example/llama/Llm.kt +++ b/examples/llama.android/app/src/main/java/com/example/llama/Llm.kt @@ -71,7 +71,7 @@ class Llm { batch: Long, nLen: Int, ncur: IntVar - ): String + ): String? private external fun kv_cache_clear(context: Long) @@ -115,7 +115,7 @@ class Llm { val ncur = IntVar(completion_init(state.context, state.batch, message, nlen)) while (ncur.value <= nlen) { val str = completion_loop(state.context, state.batch, nlen, ncur) - if (str.isEmpty()) { + if (str == null) { break } emit(str) From 13d21fa4bfa9c5b631cd3ab7cb3eab7c92bd99f7 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 10 Mar 2024 22:02:44 +0200 Subject: [PATCH 2/2] android : minor --- .../app/src/main/cpp/llama-android.cpp | 46 ++++++++----------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/examples/llama.android/app/src/main/cpp/llama-android.cpp b/examples/llama.android/app/src/main/cpp/llama-android.cpp index 19a60ac1404..ce8ab3b7094 100644 --- a/examples/llama.android/app/src/main/cpp/llama-android.cpp +++ b/examples/llama.android/app/src/main/cpp/llama-android.cpp @@ -33,45 +33,38 @@ jclass la_int_var; jmethodID la_int_var_value; jmethodID la_int_var_inc; -std::string cached_token_chars=""; -bool is_valid_utf8(const char * string) -{ - if (!string) +std::string cached_token_chars; + +bool is_valid_utf8(const char * string) { + if (!string) { return true; + } const unsigned char * bytes = (const unsigned char *)string; int num; - while (*bytes != 0x00) - { - if ((*bytes & 0x80) == 0x00) - { + while (*bytes != 0x00) { + if ((*bytes & 0x80) == 0x00) { // U+0000 to U+007F num = 1; - } - else if ((*bytes & 0xE0) == 0xC0) - { + } else if ((*bytes & 0xE0) == 0xC0) { // U+0080 to U+07FF num = 2; - } - else if ((*bytes & 0xF0) == 0xE0) - { + } else if ((*bytes & 0xF0) == 0xE0) { // U+0800 to U+FFFF num = 3; - } - else if ((*bytes & 0xF8) == 0xF0) - { + } else if ((*bytes & 0xF8) == 0xF0) { // U+10000 to U+10FFFF num = 4; - } - else + } else { return false; + } bytes += 1; - for (int i = 1; i < num; ++i) - { - if ((*bytes & 0xC0) != 0x80) + for (int i = 1; i < num; ++i) { + if ((*bytes & 0xC0) != 0x80) { return false; + } bytes += 1; } } @@ -341,7 +334,7 @@ Java_com_example_llama_Llm_completion_1init( jint n_len ) { - cached_token_chars = ""; + cached_token_chars.clear(); const auto text = env->GetStringUTFChars(jtext, 0); const auto context = reinterpret_cast(context_pointer); @@ -421,11 +414,12 @@ Java_com_example_llama_Llm_completion_1loop( auto new_token_chars = llama_token_to_piece(context, new_token_id); cached_token_chars += new_token_chars; + jstring new_token = nullptr; - if(is_valid_utf8(cached_token_chars.c_str())) { + if (is_valid_utf8(cached_token_chars.c_str())) { new_token = env->NewStringUTF(cached_token_chars.c_str()); - LOGi("cached: %s, new_token_chars: `%s`, id:%d", cached_token_chars.c_str(), new_token_chars.c_str(), new_token_id); - cached_token_chars=""; + LOGi("cached: %s, new_token_chars: `%s`, id: %d", cached_token_chars.c_str(), new_token_chars.c_str(), new_token_id); + cached_token_chars.clear(); } else { new_token = env->NewStringUTF(""); }