diff --git a/LLama.Unittest/MtmdWeightsTests.cs b/LLama.Unittest/MtmdWeightsTests.cs
index 4a1a59c75..42ba52356 100644
--- a/LLama.Unittest/MtmdWeightsTests.cs
+++ b/LLama.Unittest/MtmdWeightsTests.cs
@@ -81,7 +81,7 @@ public void BasicPropertyChecks()
Assert.True(_mtmdWeights.SupportsVision);
Assert.False(_mtmdWeights.UsesMRope);
Assert.True(_mtmdWeights.UsesNonCausalAttention);
- Assert.Equal(-1, _mtmdWeights.AudioBitrate);
+ Assert.Equal(-1, _mtmdWeights.SampleRate);
}
[Fact,Trait("Category", "NoCI")]
@@ -143,8 +143,8 @@ public void TokenizeProvidesChunkMetadata()
Assert.True(_mtmdWeights.SupportsVision);
Assert.False(_mtmdWeights.SupportsAudio);
- var audioBitrate = _mtmdWeights.AudioBitrate;
- Assert.True(audioBitrate <= 0);
+ var audioSampleRate = _mtmdWeights.SampleRate;
+ Assert.True(audioSampleRate <= 0);
}
}
}
diff --git a/LLama/Batched/BatchedExecutor.cs b/LLama/Batched/BatchedExecutor.cs
index 1a6698b1a..db9be6a7a 100644
--- a/LLama/Batched/BatchedExecutor.cs
+++ b/LLama/Batched/BatchedExecutor.cs
@@ -43,7 +43,12 @@ public sealed class BatchedExecutor
/// The this executor is using
///
public LLamaWeights Model { get; }
-
+
+ ///
+ /// The optional this executor is using
+ ///
+ public MtmdWeights? ClipModel { get; }
+
///
/// Get the number of tokens in the batch, waiting for to be called
///
@@ -79,12 +84,8 @@ public int BatchedTokenCount
///
/// The model to use
/// Parameters to create a new context
- public BatchedExecutor(LLamaWeights model, IContextParams contextParams)
- : this(model, contextParams, null)
- {
- }
-
- public BatchedExecutor(LLamaWeights model, IContextParams contextParams, MtmdWeights? clipModel)
+ /// Clip model to use for multimodal capabilities
+ public BatchedExecutor(LLamaWeights model, IContextParams contextParams, MtmdWeights? clipModel = null)
{
Model = model;
Context = model.CreateContext(contextParams);
@@ -92,8 +93,6 @@ public BatchedExecutor(LLamaWeights model, IContextParams contextParams, MtmdWei
Epoch = 1;
}
- public MtmdWeights? ClipModel { get; }
-
///
/// Start a new
///
diff --git a/LLama/LLamaEmbedder.cs b/LLama/LLamaEmbedder.cs
index eee9a01e9..5f6a2878d 100644
--- a/LLama/LLamaEmbedder.cs
+++ b/LLama/LLamaEmbedder.cs
@@ -1,14 +1,11 @@
using System;
using System.Collections.Generic;
-using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using LLama.Abstractions;
using LLama.Exceptions;
using LLama.Native;
-using Microsoft.Extensions.AI;
using Microsoft.Extensions.Logging;
-using static System.Net.Mime.MediaTypeNames;
namespace LLama;
@@ -79,7 +76,7 @@ public async Task> GetEmbeddings(string input, Cancellati
Context.Dispose();
Context = _weights.CreateContext(_params, _logger);
- NativeApi.llama_set_embeddings(Context.NativeHandle, true);
+ Context.NativeHandle.SetEmbeddings(true);
// Add all of the tokens to the batch
var tokens = Context.Tokenize(input, special: true);
diff --git a/LLama/LLamaReranker.cs b/LLama/LLamaReranker.cs
index 16a206c40..58919fd0a 100644
--- a/LLama/LLamaReranker.cs
+++ b/LLama/LLamaReranker.cs
@@ -1,11 +1,7 @@
using System;
using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using System.Text;
using System.Threading;
using System.Threading.Tasks;
-using System.Xml.Linq;
using LLama.Abstractions;
using LLama.Exceptions;
using LLama.Native;
@@ -44,7 +40,7 @@ public LLamaReranker(LLamaWeights weights, IContextParams @params, ILogger? logg
if (@params.PoolingType != LLamaPoolingType.Rank)
throw new NotSupportedException("Computing rank score, PoolingType must be equal to LLamaPoolingType.Rank");
Context = weights.CreateContext(@params, logger);
- NativeApi.llama_set_embeddings(Context.NativeHandle, true);
+ Context.NativeHandle.SetEmbeddings(true);
}
///
diff --git a/LLama/LLamaSharp.csproj b/LLama/LLamaSharp.csproj
index 4bdf9289a..02c3e11c4 100644
--- a/LLama/LLamaSharp.csproj
+++ b/LLama/LLamaSharp.csproj
@@ -57,7 +57,7 @@
- ff4affb4c1aa7eb4_v3
+ 3f7c29d318e317b6
diff --git a/LLama/MtmdWeights.cs b/LLama/MtmdWeights.cs
index 07e739a61..32dd6b572 100644
--- a/LLama/MtmdWeights.cs
+++ b/LLama/MtmdWeights.cs
@@ -137,9 +137,9 @@ public int DecodeImageChunk(IntPtr chunkPtr, SafeLLamaContextHandle llamaContext
public bool UsesMRope => NativeHandle.DecodeUseMRope();
///
- /// Gets the audio bitrate advertised by the model.
+ /// Gets the audio sample rate advertised by the model.
///
- public int AudioBitrate => NativeHandle.GetAudioBitrate();
+ public int SampleRate => NativeHandle.GetAudioSampleRate();
///
public void Dispose() => NativeHandle.Dispose();
diff --git a/LLama/Native/GPUSplitMode.cs b/LLama/Native/GPUSplitMode.cs
index 27ee7ae49..338131b52 100644
--- a/LLama/Native/GPUSplitMode.cs
+++ b/LLama/Native/GPUSplitMode.cs
@@ -20,4 +20,10 @@ public enum GPUSplitMode
/// split layers and KV across GPUs, use tensor parallelism if supported
///
Row = 2,
+
+ // Undocumented in llama.h
+ ///
+ ///
+ ///
+ Tensor = 3,
}
\ No newline at end of file
diff --git a/LLama/Native/LLamaFtype.cs b/LLama/Native/LLamaFtype.cs
index 813bad1ae..78a682130 100644
--- a/LLama/Native/LLamaFtype.cs
+++ b/LLama/Native/LLamaFtype.cs
@@ -206,7 +206,17 @@ public enum LLamaFtype
/// except 1d tensors
///
LLAMA_FTYPE_MOSTLY_MXFP4_MOE = 38,
-
+
+ ///
+ /// Except 1d tensors
+ ///
+ LLAMA_FTYPE_MOSTLY_NVFP4 = 39,
+
+ ///
+ /// Except 1d tensors
+ ///
+ LLAMA_FTYPE_MOSTLY_Q1_0 = 40,
+
///
/// File type was not specified
///
diff --git a/LLama/Native/LLamaModelImatrixData.cs b/LLama/Native/LLamaModelImatrixData.cs
new file mode 100644
index 000000000..df0eb3856
--- /dev/null
+++ b/LLama/Native/LLamaModelImatrixData.cs
@@ -0,0 +1,12 @@
+namespace LLama.Native;
+
+/* ///
+///
+///
+/// llama_model_imatrix_data
+public unsafe struct LLamaModelImatrixData
+{
+ char* name;
+ float* data;
+ nuint size;
+} */
\ No newline at end of file
diff --git a/LLama/Native/LLamaModelQuantizeParams.cs b/LLama/Native/LLamaModelQuantizeParams.cs
index 857f0cfb9..9cd04b3f8 100644
--- a/LLama/Native/LLamaModelQuantizeParams.cs
+++ b/LLama/Native/LLamaModelQuantizeParams.cs
@@ -79,25 +79,35 @@ public bool keep_split
}
private sbyte _keep_split;
+ ///
+ /// calculate and show the final quantization size without performing quantization
+ ///
+ public bool dry_run
+ {
+ get => Convert.ToBoolean(_dry_run);
+ set => _dry_run = Convert.ToSByte(value);
+ }
+ private sbyte _dry_run;
+
///
/// pointer to importance matrix data
///
- public IntPtr imatrix;
+ public IntPtr imatrix; // LLamaModelImatrixData *
///
/// pointer to vector containing overrides
///
- public IntPtr kv_overrides;
+ public IntPtr kv_overrides; // llama_model_kv_override *
///
/// pointer to vector containing tensor types
///
- public IntPtr tensor_types;
+ public IntPtr tensor_types; // llama_model_tensor_override *
///
/// Pointer to vector containing layer indices to prune
///
- public IntPtr prune_layers;
+ public IntPtr prune_layers; // int32 *
///
/// Create a LLamaModelQuantizeParams with default values
diff --git a/LLama/Native/LLamaModelTensorOverride.cs b/LLama/Native/LLamaModelTensorOverride.cs
new file mode 100644
index 000000000..82f44b530
--- /dev/null
+++ b/LLama/Native/LLamaModelTensorOverride.cs
@@ -0,0 +1,9 @@
+namespace LLama.Native;
+
+// Unsupported - we can't handle ggml_type since LlamaSharp doesn't wrap/expose ggml
+/*
+ * struct llama_model_tensor_override {
+ const char * pattern;
+ enum ggml_type type; // GGMLType might work?
+ };
+*/
\ No newline at end of file
diff --git a/LLama/Native/LoraAdapter.cs b/LLama/Native/LoraAdapter.cs
index 8fdd649a3..2aa96eb13 100644
--- a/LLama/Native/LoraAdapter.cs
+++ b/LLama/Native/LoraAdapter.cs
@@ -40,12 +40,24 @@ internal LoraAdapter(SafeLlamaModelHandle model, string path, IntPtr nativePtr)
///
public void Unload()
{
+ // Early exit if already unloaded
+ if (!Loaded)
+ return;
+
+ // If the model has been unloaded this handle will have been auto unloaded
+ if (Model.IsClosed)
+ {
+ Loaded = false;
+ return;
+ }
+
+ // Unload
Loaded = false;
llama_adapter_lora_free(Pointer);
- // Manually free a LoRA adapter. loaded adapters will be free when the associated model is deleted
+ // Manually free a LoRA adapter. loaded adapters which have not been
+ // freed will be automatically freed when the associated model is deleted
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
- [Obsolete("adapters are now freed together with the associated model")]
static extern void llama_adapter_lora_free(IntPtr adapter);
}
}
\ No newline at end of file
diff --git a/LLama/Native/NativeApi.Mtmd.cs b/LLama/Native/NativeApi.Mtmd.cs
index 827c0e1b0..1bf2b5f9a 100644
--- a/LLama/Native/NativeApi.Mtmd.cs
+++ b/LLama/Native/NativeApi.Mtmd.cs
@@ -41,24 +41,50 @@ internal struct mtmd_context_params
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_context_params_default", CallingConvention = CallingConvention.Cdecl)]
internal static extern mtmd_context_params mtmd_context_params_default();
+ ///
+ /// whether we need to set non-causal mask before llama_decode
+ /// if chunk is nullptr, we assume the default case where chunk is an image chunk
+ ///
+ ///
+ ///
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_decode_use_non_causal", CallingConvention = CallingConvention.Cdecl)]
[return: MarshalAs(UnmanagedType.I1)]
internal static extern bool mtmd_decode_use_non_causal(SafeMtmdModelHandle ctx);
+ ///
+ /// whether the current model use M-RoPE for llama_decode
+ ///
+ ///
+ ///
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_decode_use_mrope", CallingConvention = CallingConvention.Cdecl)]
[return: MarshalAs(UnmanagedType.I1)]
internal static extern bool mtmd_decode_use_mrope(SafeMtmdModelHandle ctx);
+ ///
+ /// whether the current model supports vision input
+ ///
+ ///
+ ///
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_support_vision", CallingConvention = CallingConvention.Cdecl)]
[return: MarshalAs(UnmanagedType.I1)]
internal static extern bool mtmd_support_vision(SafeMtmdModelHandle ctx);
+ ///
+ /// whether the current model supports audio input
+ ///
+ ///
+ ///
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_support_audio", CallingConvention = CallingConvention.Cdecl)]
[return: MarshalAs(UnmanagedType.I1)]
internal static extern bool mtmd_support_audio(SafeMtmdModelHandle ctx);
- [DllImport(mtmdLibraryName, EntryPoint = "mtmd_get_audio_bitrate", CallingConvention = CallingConvention.Cdecl)]
- internal static extern int mtmd_get_audio_bitrate(SafeMtmdModelHandle ctx);
+ ///
+ /// get audio sample rate in Hz, for example 16000 for Whisper
+ ///
+ ///
+ ///
+ [DllImport(mtmdLibraryName, EntryPoint = "mtmd_get_audio_sample_rate", CallingConvention = CallingConvention.Cdecl)]
+ internal static extern int mtmd_get_audio_sample_rate(SafeMtmdModelHandle ctx);
// bitmap ------------------------------------------------------------
@@ -153,9 +179,11 @@ internal static unsafe void mtmd_bitmap_set_id(SafeMtmdEmbed bitmap, string? id)
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_image_tokens_get_n_tokens", CallingConvention = CallingConvention.Cdecl)]
internal static extern UIntPtr mtmd_image_tokens_get_n_tokens(IntPtr image_tokens);
+ [Obsolete("use mtmd_image_tokens_get_decoder_pos() instead")]
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_image_tokens_get_nx", CallingConvention = CallingConvention.Cdecl)]
internal static extern UIntPtr mtmd_image_tokens_get_nx(IntPtr image_tokens);
+ [Obsolete("use mtmd_image_tokens_get_decoder_pos() instead")]
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_image_tokens_get_ny", CallingConvention = CallingConvention.Cdecl)]
internal static extern UIntPtr mtmd_image_tokens_get_ny(IntPtr image_tokens);
@@ -165,10 +193,32 @@ internal static unsafe void mtmd_bitmap_set_id(SafeMtmdEmbed bitmap, string? id)
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_image_tokens_get_n_pos", CallingConvention = CallingConvention.Cdecl)]
internal static extern int mtmd_image_tokens_get_n_pos(IntPtr image_tokens);
+ [StructLayout(LayoutKind.Explicit)]
+ internal struct mtmd_decoder_pos
+ {
+ [FieldOffset(0)]
+ uint t;
+
+ [FieldOffset(4)]
+ uint x;
+
+ [FieldOffset(8)]
+ uint y;
+ };
+
+ ///
+ /// get position for decoder attention, to be used by M-RoPE models
+ ///
+ ///
+ /// i is the index of the embedding token, ranging from 0 to mtmd_image_tokens_get_n_tokens() - 1
+ /// return relative position (for example, embedding 0 will have position (0, 0, 0); remember to adjust it to the current absolute position)
+ [DllImport(mtmdLibraryName, EntryPoint = "mtmd_image_tokens_get_decoder_pos", CallingConvention = CallingConvention.Cdecl)]
+ internal static extern mtmd_decoder_pos mtmd_image_tokens_get_decoder_pos(IntPtr image_tokens, nuint i);
+
// tokenize ----------------------------------------------------------
///
- /// Native text structure consumed by .
+ /// Native text structure consumed by .
///
internal unsafe struct mtmd_input_text_native
{
@@ -259,6 +309,11 @@ internal static unsafe IntPtr mtmd_helper_bitmap_init_from_file(SafeMtmdModelHan
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_helper_get_n_pos", CallingConvention = CallingConvention.Cdecl)]
internal static extern int mtmd_helper_get_n_pos(SafeMtmdInputChunks chunks);
+ [DllImport(mtmdLibraryName, EntryPoint = "mtmd_helper_image_get_decoder_pos", CallingConvention = CallingConvention.Cdecl)]
+ // helper to get the list of relative positions corresponding to the embedding tokens, to be used by M-RoPE
+ // out_pos must have length == mtmd_helper_get_n_tokens(image)
+ internal static extern void mtmd_helper_image_get_decoder_pos(IntPtr /* mtmd_image_tokens* */ image, IntPtr /* mtmd_decoder_pos* */ out_pos);
+
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_helper_eval_chunks", CallingConvention = CallingConvention.Cdecl)]
internal static extern int mtmd_helper_eval_chunks(
SafeMtmdModelHandle ctx,
diff --git a/LLama/Native/NativeApi.cs b/LLama/Native/NativeApi.cs
index ce8c36197..cbdd05a53 100644
--- a/LLama/Native/NativeApi.cs
+++ b/LLama/Native/NativeApi.cs
@@ -132,19 +132,7 @@ public static void llama_empty_call()
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
public static extern unsafe nuint llama_state_seq_load_file(SafeLLamaContextHandle ctx, string filepath, LLamaSeqId dest_seq_id, LLamaToken* tokens_out, nuint n_token_capacity, out nuint n_token_count_out);
- ///
- /// Set whether to use causal attention or not. If set to true, the model will only attend to the past tokens
- ///
- [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
- public static extern void llama_set_causal_attn(SafeLLamaContextHandle ctx, [MarshalAs(UnmanagedType.U1)] bool causalAttn);
-
- ///
- /// Set whether the context outputs embeddings or not
- ///
- ///
- /// If true, embeddings will be returned but logits will not
- [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
- public static extern void llama_set_embeddings(SafeLLamaContextHandle ctx, [MarshalAs(UnmanagedType.U1)] bool embeddings);
+
///
/// Set abort callback
@@ -152,14 +140,6 @@ public static void llama_empty_call()
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
public static extern void llama_set_abort_callback(SafeLLamaContextHandle ctx, IntPtr /* ggml_abort_callback */ abortCallback, IntPtr abortCallbackData);
- ///
- /// Get the n_seq_max for this context
- ///
- ///
- ///
- [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
- public static extern uint llama_n_seq_max(SafeLLamaContextHandle ctx);
-
///
/// Get all output token embeddings.
/// When pooling_type == LLAMA_POOLING_TYPE_NONE or when using a generative model, the embeddings for which
@@ -515,6 +495,26 @@ public static extern unsafe LLamaParamsFitStatus llama_params_fit(
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
public static extern long llama_time_us();
-
+ /* Directly exposes `ggml_tensor` and `gguf_context` which LLamaSharp does not currently support!
+
+ typedef void (* llama_model_set_tensor_data_t) (struct ggml_tensor * tensor, void* userdata);
+
+ // Create a new model from GGUF metadata as well as a function to set the tensor data
+ // - tensors are created as GGML_TYPE_F32 by default,
+ // override by adding a tensor with the same name but a different name to the context
+ LLAMA_API struct llama_model * llama_model_init_from_user(
+ struct gguf_context * metadata,
+ llama_model_set_tensor_data_t set_tensor_data, // function to initialize tensor data with
+ void* set_tensor_data_ud, // userdata for function
+ struct llama_model_params params);
+ */
+
+ /*
+ // Cannot be wrapped, pinvoke doesn't understand FILE
+ // Load a model from an open FILE pointer
+ LLAMA_API struct llama_model * llama_model_load_from_file_ptr(
+ FILE* file,
+ struct llama_model_params params);
+ */
}
}
diff --git a/LLama/Native/SafeLLamaContextHandle.cs b/LLama/Native/SafeLLamaContextHandle.cs
index 5ec78f053..0041fbf5e 100644
--- a/LLama/Native/SafeLLamaContextHandle.cs
+++ b/LLama/Native/SafeLLamaContextHandle.cs
@@ -37,7 +37,7 @@ public sealed class SafeLLamaContextHandle
///
/// Get the number of maximum sequences allowed
///
- public uint MaxSeq => NativeApi.llama_n_seq_max(this);
+ public uint MaxSeq => llama_n_seq_max(this);
///
/// Get or set the number of threads used for generation of a single token.
@@ -355,6 +355,7 @@ static SafeLLamaContextHandle()
///
/// The length of the value string (on success) -1 otherwise
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
+ // ReSharper disable once InconsistentNaming
private static extern int llama_adapter_meta_val_str(IntPtr adapter, string key, StringBuilder buf, UIntPtr buf_size);
///
@@ -374,6 +375,7 @@ static SafeLLamaContextHandle()
///
/// The length of string i.e meta key (on success) -1 otherwise
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
+ // ReSharper disable once InconsistentNaming
private static extern int llama_adapter_meta_key_by_index(IntPtr adapter, int i, StringBuilder buf, UIntPtr buf_size);
///
@@ -385,6 +387,7 @@ static SafeLLamaContextHandle()
///
/// The length of value string (on success) -1 otherwise
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
+ // ReSharper disable once InconsistentNaming
private static extern int llama_adapter_meta_val_by_index(IntPtr adapter, int i, StringBuilder buf, UIntPtr buf_size);
///
@@ -424,6 +427,56 @@ static SafeLLamaContextHandle()
///
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
private static extern void llama_set_warmup(SafeLLamaContextHandle ctx, [MarshalAs(UnmanagedType.U1)] bool warmup);
+
+ ///
+ /// Set whether to use causal attention or not. If set to true, the model will only attend to the past tokens
+ ///
+ [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
+ private static extern void llama_set_causal_attn(SafeLLamaContextHandle ctx, [MarshalAs(UnmanagedType.U1)] bool causalAttn);
+
+ ///
+ /// Set whether the context outputs embeddings or not
+ ///
+ ///
+ /// If true, embeddings will be returned but logits will not
+ [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
+ private static extern void llama_set_embeddings(SafeLLamaContextHandle ctx, [MarshalAs(UnmanagedType.U1)] bool embeddings);
+
+ ///
+ /// Get the n_seq_max for this context
+ ///
+ ///
+ ///
+ [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
+ private static extern uint llama_n_seq_max(SafeLLamaContextHandle ctx);
+ #endregion
+
+ #region Setters
+ ///
+ /// Set whether the model is in warmup mode or not
+ /// If true, all model tensors are activated during to load and cache their weights.
+ ///
+ public void SetWarmup(bool value)
+ {
+ llama_set_warmup(this, value);
+ }
+
+ ///
+ /// Set whether to use causal attention or not. If set to true, the model will only attend to the past tokens
+ ///
+ public void SetCausalAttention(bool value)
+ {
+ llama_set_causal_attn(this, value);
+ }
+
+ ///
+ /// Set whether the context outputs embeddings or not
+ ///
+ /// If true, embeddings will be returned but logits will not
+ public void SetEmbeddings(bool value)
+ {
+ llama_set_embeddings(this, value);
+ }
#endregion
#region LoRA
@@ -434,7 +487,7 @@ static SafeLLamaContextHandle()
///
public void SetLoraAdapters(params Span<(LoraAdapter Adapter, float Scale)> adapters)
{
- // Check adapters are all valid
+ // Check adapters are all valid and attached to this model
foreach (var adapter in adapters)
{
if (adapter.Adapter.Model != ModelHandle)
diff --git a/LLama/Native/SafeLlamaModelHandle.cs b/LLama/Native/SafeLlamaModelHandle.cs
index 668074090..2a6855741 100644
--- a/LLama/Native/SafeLlamaModelHandle.cs
+++ b/LLama/Native/SafeLlamaModelHandle.cs
@@ -175,7 +175,7 @@ static SafeLlamaModelHandle()
private static extern unsafe byte* llama_model_chat_template(SafeLlamaModelHandle model, string? name);
///
- /// Load the model from a file
+ /// Load a model from a file
/// If the file is split into multiple parts, the file name must follow this pattern: {name}-%05d-of-%05d.gguf
/// If the split file name does not follow this pattern, use llama_model_load_from_splits
///
@@ -186,7 +186,7 @@ static SafeLlamaModelHandle()
private static extern SafeLlamaModelHandle llama_model_load_from_file(string path, LLamaModelParams @params);
///
- /// Load the model from multiple splits (support custom naming scheme)
+ /// Load a model from multiple splits (support custom naming scheme)
/// The paths must be in the correct order
///
///
@@ -460,7 +460,7 @@ private static int llama_model_meta_val_str(SafeLlamaModelHandle model, string k
///
///
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
- private static extern string? llama_model_cls_label(SafeLlamaModelHandle model, uint i);
+ private static extern IntPtr /* char* */ llama_model_cls_label(SafeLlamaModelHandle model, uint i);
#endregion
#region LoRA
diff --git a/LLama/Native/SafeMtmdModelHandle.cs b/LLama/Native/SafeMtmdModelHandle.cs
index 4a356f637..24ccc8ee2 100644
--- a/LLama/Native/SafeMtmdModelHandle.cs
+++ b/LLama/Native/SafeMtmdModelHandle.cs
@@ -348,7 +348,7 @@ public int DecodeImageChunk(IntPtr chunkPtr, SafeLLamaContextHandle llamaContext
///
/// Gets the audio bitrate advertised by the model.
///
- public int GetAudioBitrate() => NativeApi.mtmd_get_audio_bitrate(this);
+ public int GetAudioSampleRate() => NativeApi.mtmd_get_audio_sample_rate(this);
private void EnsureNotDisposed()
{
diff --git a/llama.cpp b/llama.cpp
index ff4affb4c..73c9eb8ce 160000
--- a/llama.cpp
+++ b/llama.cpp
@@ -1 +1 @@
-Subproject commit ff4affb4c1aa7eb4f28a0d9de1b205bd719802f2
+Subproject commit 73c9eb8ceda397b651dbb6661b2935f0283a2b1d