From 8d7ea8ec680a148f56a375fec9fd5150fc24f216 Mon Sep 17 00:00:00 2001
From: zhangfuwen <zhangfuwen@foxmail.com>
Date: Fri, 8 Mar 2024 17:25:18 +0800
Subject: [PATCH 1/2] examples: fix utf8 decoding error

some models have a tokenizer that decodes an id into an incomplete utf8 sequence, need to validate and wait for next token
one example would be: https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q4_0.gguf and and an example of the token is 18137
---
 .../app/src/main/cpp/llama-android.cpp        | 59 ++++++++++++++++++-
 .../src/main/java/com/example/llama/Llm.kt    |  4 +-
 2 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/examples/llama.android/app/src/main/cpp/llama-android.cpp b/examples/llama.android/app/src/main/cpp/llama-android.cpp
index 2beb1e0d532..19a60ac1404 100644
--- a/examples/llama.android/app/src/main/cpp/llama-android.cpp
+++ b/examples/llama.android/app/src/main/cpp/llama-android.cpp
@@ -33,6 +33,52 @@ jclass la_int_var;
 jmethodID la_int_var_value;
 jmethodID la_int_var_inc;
 
+std::string cached_token_chars="";
+bool is_valid_utf8(const char * string)
+{
+    if (!string)
+        return true;
+
+    const unsigned char * bytes = (const unsigned char *)string;
+    int num;
+
+    while (*bytes != 0x00)
+    {
+        if ((*bytes & 0x80) == 0x00)
+        {
+            // U+0000 to U+007F
+            num = 1;
+        }
+        else if ((*bytes & 0xE0) == 0xC0)
+        {
+            // U+0080 to U+07FF
+            num = 2;
+        }
+        else if ((*bytes & 0xF0) == 0xE0)
+        {
+            // U+0800 to U+FFFF
+            num = 3;
+        }
+        else if ((*bytes & 0xF8) == 0xF0)
+        {
+            // U+10000 to U+10FFFF
+            num = 4;
+        }
+        else
+            return false;
+
+        bytes += 1;
+        for (int i = 1; i < num; ++i)
+        {
+            if ((*bytes & 0xC0) != 0x80)
+                return false;
+            bytes += 1;
+        }
+    }
+
+    return true;
+}
+
 static void log_callback(ggml_log_level level, const char * fmt, void * data) {
     if (level == GGML_LOG_LEVEL_ERROR)     __android_log_print(ANDROID_LOG_ERROR, TAG, fmt, data);
     else if (level == GGML_LOG_LEVEL_INFO) __android_log_print(ANDROID_LOG_INFO, TAG, fmt, data);
@@ -295,6 +341,8 @@ Java_com_example_llama_Llm_completion_1init(
         jint n_len
     ) {
 
+    cached_token_chars = "";
+
     const auto text = env->GetStringUTFChars(jtext, 0);
     const auto context = reinterpret_cast<llama_context *>(context_pointer);
     const auto batch = reinterpret_cast<llama_batch *>(batch_pointer);
@@ -372,8 +420,15 @@ Java_com_example_llama_Llm_completion_1loop(
     }
 
     auto new_token_chars = llama_token_to_piece(context, new_token_id);
-    LOGi("new_token_chars: `%s`", new_token_chars.c_str());
-    auto new_token = env->NewStringUTF(new_token_chars.c_str());
+    cached_token_chars += new_token_chars;
+    jstring new_token = nullptr;
+    if(is_valid_utf8(cached_token_chars.c_str())) {
+        new_token = env->NewStringUTF(cached_token_chars.c_str());
+        LOGi("cached: %s, new_token_chars: `%s`, id:%d", cached_token_chars.c_str(), new_token_chars.c_str(), new_token_id);
+        cached_token_chars="";
+    } else {
+        new_token = env->NewStringUTF("");
+    }
 
     llama_batch_clear(*batch);
     llama_batch_add(*batch, new_token_id, n_cur, { 0 }, true);
diff --git a/examples/llama.android/app/src/main/java/com/example/llama/Llm.kt b/examples/llama.android/app/src/main/java/com/example/llama/Llm.kt
index 5f32703724a..d86afee3790 100644
--- a/examples/llama.android/app/src/main/java/com/example/llama/Llm.kt
+++ b/examples/llama.android/app/src/main/java/com/example/llama/Llm.kt
@@ -71,7 +71,7 @@ class Llm {
         batch: Long,
         nLen: Int,
         ncur: IntVar
-    ): String
+    ): String?
 
     private external fun kv_cache_clear(context: Long)
 
@@ -115,7 +115,7 @@ class Llm {
                 val ncur = IntVar(completion_init(state.context, state.batch, message, nlen))
                 while (ncur.value <= nlen) {
                     val str = completion_loop(state.context, state.batch, nlen, ncur)
-                    if (str.isEmpty()) {
+                    if (str == null) {
                         break
                     }
                     emit(str)

From 13d21fa4bfa9c5b631cd3ab7cb3eab7c92bd99f7 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Sun, 10 Mar 2024 22:02:44 +0200
Subject: [PATCH 2/2] android : minor

---
 .../app/src/main/cpp/llama-android.cpp        | 46 ++++++++-----------
 1 file changed, 20 insertions(+), 26 deletions(-)

diff --git a/examples/llama.android/app/src/main/cpp/llama-android.cpp b/examples/llama.android/app/src/main/cpp/llama-android.cpp
index 19a60ac1404..ce8ab3b7094 100644
--- a/examples/llama.android/app/src/main/cpp/llama-android.cpp
+++ b/examples/llama.android/app/src/main/cpp/llama-android.cpp
@@ -33,45 +33,38 @@ jclass la_int_var;
 jmethodID la_int_var_value;
 jmethodID la_int_var_inc;
 
-std::string cached_token_chars="";
-bool is_valid_utf8(const char * string)
-{
-    if (!string)
+std::string cached_token_chars;
+
+bool is_valid_utf8(const char * string) {
+    if (!string) {
         return true;
+    }
 
     const unsigned char * bytes = (const unsigned char *)string;
     int num;
 
-    while (*bytes != 0x00)
-    {
-        if ((*bytes & 0x80) == 0x00)
-        {
+    while (*bytes != 0x00) {
+        if ((*bytes & 0x80) == 0x00) {
             // U+0000 to U+007F
             num = 1;
-        }
-        else if ((*bytes & 0xE0) == 0xC0)
-        {
+        } else if ((*bytes & 0xE0) == 0xC0) {
             // U+0080 to U+07FF
             num = 2;
-        }
-        else if ((*bytes & 0xF0) == 0xE0)
-        {
+        } else if ((*bytes & 0xF0) == 0xE0) {
             // U+0800 to U+FFFF
             num = 3;
-        }
-        else if ((*bytes & 0xF8) == 0xF0)
-        {
+        } else if ((*bytes & 0xF8) == 0xF0) {
             // U+10000 to U+10FFFF
             num = 4;
-        }
-        else
+        } else {
             return false;
+        }
 
         bytes += 1;
-        for (int i = 1; i < num; ++i)
-        {
-            if ((*bytes & 0xC0) != 0x80)
+        for (int i = 1; i < num; ++i) {
+            if ((*bytes & 0xC0) != 0x80) {
                 return false;
+            }
             bytes += 1;
         }
     }
@@ -341,7 +334,7 @@ Java_com_example_llama_Llm_completion_1init(
         jint n_len
     ) {
 
-    cached_token_chars = "";
+    cached_token_chars.clear();
 
     const auto text = env->GetStringUTFChars(jtext, 0);
     const auto context = reinterpret_cast<llama_context *>(context_pointer);
@@ -421,11 +414,12 @@ Java_com_example_llama_Llm_completion_1loop(
 
     auto new_token_chars = llama_token_to_piece(context, new_token_id);
     cached_token_chars += new_token_chars;
+
     jstring new_token = nullptr;
-    if(is_valid_utf8(cached_token_chars.c_str())) {
+    if (is_valid_utf8(cached_token_chars.c_str())) {
         new_token = env->NewStringUTF(cached_token_chars.c_str());
-        LOGi("cached: %s, new_token_chars: `%s`, id:%d", cached_token_chars.c_str(), new_token_chars.c_str(), new_token_id);
-        cached_token_chars="";
+        LOGi("cached: %s, new_token_chars: `%s`, id: %d", cached_token_chars.c_str(), new_token_chars.c_str(), new_token_id);
+        cached_token_chars.clear();
     } else {
         new_token = env->NewStringUTF("");
     }