mudler · mudler · Mar 12, 2025
diff --git a/Makefile b/Makefile
@@ -6,7 +6,7 @@ BINARY_NAME=local-ai
 DETECT_LIBS?=true
 
 # llama.cpp versions
-CPPLLAMA_VERSION?=2c9f833d17bb5b8ea89dec663b072b5420fc5438
+CPPLLAMA_VERSION?=10f2e81809bbb69ecfe64fc8b4686285f84b0c07
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

diff --git a/backend/cpp/llama/patches/01-llava.patch b/backend/cpp/llama/patches/01-llava.patch
@@ -1,13 +1,13 @@
 diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
-index 3cd0d2fa..6c5e811a 100644
+index 7f892beb..0517e529 100644
 --- a/examples/llava/clip.cpp
 +++ b/examples/llava/clip.cpp
-@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
-                 struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
+@@ -2766,7 +2766,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
+                 int patch_offset = ctx->has_class_embedding ? 1 : 0;
                  int* patches_data = (int*)malloc(ggml_nbytes(patches));
                  for (int i = 0; i < num_patches; i++) {
--                    patches_data[i] = i + 1;
-+                    patches_data[i] = i;
+-                    patches_data[i] = i + patch_offset;
++                    patches_data[i] = i + 1;
                  }
                  ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
                  free(patches_data);
diff --git a/backend/cpp/llama/prepare.sh b/backend/cpp/llama/prepare.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+set -e
+
 ## Patches
 ## Apply patches from the `patches` directory
 for patch in $(ls patches); do