From cff6645ccb664aeb20e5e906f9b6fd729b01e1f6 Mon Sep 17 00:00:00 2001 From: Oleg Skutte <00.00.oleg.00.00@gmail.com> Date: Wed, 28 Aug 2024 00:17:26 +0400 Subject: [PATCH] sync: update ggml --- ggml | 2 +- ggml_extend.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml b/ggml index 21f9e5c42..21d3a308f 160000 --- a/ggml +++ b/ggml @@ -1 +1 @@ -Subproject commit 21f9e5c426b105841c2e346d8f1aafec398edf15 +Subproject commit 21d3a308fcb7f31cb9beceaeebad4fb622f3c337 diff --git a/ggml_extend.hpp b/ggml_extend.hpp index bc920eef6..810f2b9ef 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -741,7 +741,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context* v = ggml_cont(ctx, ggml_permute(ctx, v, 0, 2, 1, 3)); // [N, n_head, L_k, d_head] v = ggml_reshape_3d(ctx, v, d_head, L_k, n_head * N); // [N * n_head, L_k, d_head] LOG_DEBUG("k->ne[1] == %d", k->ne[1]); - kqv = ggml_flash_attn_ext(ctx, q, k, v, mask, scale, 0); + kqv = ggml_flash_attn_ext(ctx, q, k, v, mask, scale, 0, 0); } else { v = ggml_cont(ctx, ggml_permute(ctx, v, 1, 2, 0, 3)); // [N, n_head, d_head, L_k] v = ggml_reshape_3d(ctx, v, L_k, d_head, n_head * N); // [N * n_head, d_head, L_k]