From 9e591054fe166126a32f51a1739ca55c7bb2ae61 Mon Sep 17 00:00:00 2001 From: Martin Evans Date: Sat, 31 May 2025 23:53:34 +0100 Subject: [PATCH 1/2] Using `-1` for start/end positions in `Conversation`. This eliminate the possibility of KV cache leaks due to bugs with `_end` getting out of sync. --- LLama/Batched/Conversation.cs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/LLama/Batched/Conversation.cs b/LLama/Batched/Conversation.cs index ec1dfdc00..a03473d5c 100644 --- a/LLama/Batched/Conversation.cs +++ b/LLama/Batched/Conversation.cs @@ -84,7 +84,7 @@ public void Dispose() _disposed = true; // Remove this conversation from the KV cache - Executor.Context.NativeHandle.KvCacheRemove(ConversationId, 0, _end); + Executor.Context.NativeHandle.KvCacheRemove(ConversationId, -1, -1); // Prevent finalizer from running GC.SuppressFinalize(this); @@ -419,8 +419,7 @@ public void Remove(LLamaPos start, int count) if (count <= 0) return; - var end = start.Value + count; - _conversation.Executor.Context.NativeHandle.KvCacheRemove(_conversation.ConversationId, start, end); + _conversation.Executor.Context.NativeHandle.KvCacheRemove(_conversation.ConversationId, start, -1); } #endregion From 3bfedda99af560ef48f94a49a7d361fb89d9c87a Mon Sep 17 00:00:00 2001 From: Martin Evans Date: Sun, 1 Jun 2025 00:12:22 +0100 Subject: [PATCH 2/2] Fixed `Remove` comment --- LLama/Batched/Conversation.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/LLama/Batched/Conversation.cs b/LLama/Batched/Conversation.cs index a03473d5c..7dbf1f8c3 100644 --- a/LLama/Batched/Conversation.cs +++ b/LLama/Batched/Conversation.cs @@ -410,7 +410,7 @@ public void Remove(LLamaPos start, LLamaPos end) } /// - /// Removes all tokens starting from the given position + /// Removes tokens starting from /// /// Start position (inclusive) /// Number of tokens @@ -419,7 +419,8 @@ public void Remove(LLamaPos start, int count) if (count <= 0) return; - _conversation.Executor.Context.NativeHandle.KvCacheRemove(_conversation.ConversationId, start, -1); + var end = start.Value + count; + _conversation.Executor.Context.NativeHandle.KvCacheRemove(_conversation.ConversationId, start, end); } #endregion