Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions LLama.Unittest/LLamaEmbedderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ private async Task CompareEmbeddings(string modelPath)
Assert.DoesNotContain(float.NaN, spoon);

var generator = (IEmbeddingGenerator<string, Embedding<float>>)embedder;
Assert.NotNull(generator.Metadata);
Assert.Equal(nameof(LLamaEmbedder), generator.Metadata.ProviderName);
Assert.NotNull(generator.Metadata.ModelId);
Assert.NotEmpty(generator.Metadata.ModelId);
Assert.NotNull(generator.GetService<EmbeddingGeneratorMetadata>());
Assert.Equal(nameof(LLamaEmbedder), generator.GetService<EmbeddingGeneratorMetadata>()?.ProviderName);
Assert.NotNull(generator.GetService<EmbeddingGeneratorMetadata>()?.ModelId);
Assert.NotEmpty(generator.GetService<EmbeddingGeneratorMetadata>()?.ModelId!);
Assert.Same(embedder, generator.GetService<LLamaEmbedder>());
Assert.Same(generator, generator.GetService<IEmbeddingGenerator<string, Embedding<float>>>());
Assert.Null(generator.GetService<string>());
Expand Down
21 changes: 10 additions & 11 deletions LLama/Extensions/LLamaExecutorExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ private sealed class LLamaExecutorChatClient(
IHistoryTransform? historyTransform = null,
ITextStreamTransform? outputTransform = null) : IChatClient
{
private static readonly ChatClientMetadata s_metadata = new(nameof(LLamaExecutorChatClient));
private static readonly InferenceParams s_defaultParams = new();
private static readonly DefaultSamplingPipeline s_defaultPipeline = new();
private static readonly string[] s_antiPrompts = ["User:", "Assistant:", "System:"];
Expand All @@ -47,21 +48,19 @@ private sealed class LLamaExecutorChatClient(
private readonly ITextStreamTransform _outputTransform = outputTransform ??
new LLamaTransforms.KeywordTextOutputStreamTransform(s_antiPrompts);

/// <inheritdoc/>
public ChatClientMetadata Metadata { get; } = new(nameof(LLamaExecutorChatClient));

/// <inheritdoc/>
public void Dispose() { }

/// <inheritdoc/>
public object? GetService(Type serviceType, object? key = null) =>
key is not null ? null :
public object? GetService(Type serviceType, object? serviceKey = null) =>
serviceKey is not null ? null :
serviceType == typeof(ChatClientMetadata) ? s_metadata :
serviceType?.IsInstanceOfType(_executor) is true ? _executor :
serviceType?.IsInstanceOfType(this) is true ? this :
null;

/// <inheritdoc/>
public async Task<ChatCompletion> CompleteAsync(
public async Task<ChatResponse> GetResponseAsync(
IList<ChatMessage> chatMessages, ChatOptions? options = null, CancellationToken cancellationToken = default)
{
var result = _executor.InferAsync(CreatePrompt(chatMessages), CreateInferenceParams(options), cancellationToken);
Expand All @@ -79,7 +78,7 @@ public async Task<ChatCompletion> CompleteAsync(
}

/// <inheritdoc/>
public async IAsyncEnumerable<StreamingChatCompletionUpdate> CompleteStreamingAsync(
public async IAsyncEnumerable<ChatResponseUpdate> GetStreamingResponseAsync(
IList<ChatMessage> chatMessages, ChatOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
{
var result = _executor.InferAsync(CreatePrompt(chatMessages), CreateInferenceParams(options), cancellationToken);
Expand Down Expand Up @@ -142,8 +141,8 @@ private string CreatePrompt(IList<ChatMessage> messages)
MaxTokens = options?.MaxOutputTokens ?? 256, // arbitrary upper limit
SamplingPipeline = new DefaultSamplingPipeline()
{
FrequencyPenalty = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.FrequencyPenalty), out float af) is true ? af : s_defaultPipeline.FrequencyPenalty,
PresencePenalty = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.PresencePenalty), out float ap) is true ? ap : s_defaultPipeline.PresencePenalty,
FrequencyPenalty = options?.FrequencyPenalty ?? s_defaultPipeline.FrequencyPenalty,
PresencePenalty = options?.PresencePenalty ?? s_defaultPipeline.PresencePenalty,
PreventEOS = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.PreventEOS), out bool eos) is true ? eos : s_defaultPipeline.PreventEOS,
PenalizeNewline = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.PenalizeNewline), out bool pnl) is true ? pnl : s_defaultPipeline.PenalizeNewline,
RepeatPenalty = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.RepeatPenalty), out float rp) is true ? rp : s_defaultPipeline.RepeatPenalty,
Expand All @@ -152,8 +151,8 @@ private string CreatePrompt(IList<ChatMessage> messages)
MinKeep = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.MinKeep), out int mk) is true ? mk : s_defaultPipeline.MinKeep,
MinP = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.MinP), out float mp) is true ? mp : s_defaultPipeline.MinP,
Seed = options?.Seed is long seed ? (uint)seed : (uint)(t_random ??= new()).Next(),
Temperature = options?.Temperature ?? 0,
TopP = options?.TopP ?? 0,
Temperature = options?.Temperature ?? s_defaultPipeline.Temperature,
TopP = options?.TopP ?? s_defaultPipeline.TopP,
TopK = options?.TopK ?? s_defaultPipeline.TopK,
TypicalP = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.TypicalP), out float tp) is true ? tp : s_defaultPipeline.TypicalP,
},
Expand Down
35 changes: 24 additions & 11 deletions LLama/LLamaEmbedder.EmbeddingGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,31 @@ public partial class LLamaEmbedder
private EmbeddingGeneratorMetadata? _metadata;

/// <inheritdoc />
EmbeddingGeneratorMetadata IEmbeddingGenerator<string, Embedding<float>>.Metadata =>
_metadata ??= new(
nameof(LLamaEmbedder),
modelId: Context.NativeHandle.ModelHandle.ReadMetadata().TryGetValue("general.name", out var name) ? name : null,
dimensions: EmbeddingSize);
object? IEmbeddingGenerator<string, Embedding<float>>.GetService(Type serviceType, object? serviceKey)
{
if (serviceKey is null)
{
if (serviceType == typeof(EmbeddingGeneratorMetadata))
{
return _metadata ??= new(
nameof(LLamaEmbedder),
modelId: Context.NativeHandle.ModelHandle.ReadMetadata().TryGetValue("general.name", out var name) ? name : null,
dimensions: EmbeddingSize);
}

/// <inheritdoc />
object? IEmbeddingGenerator<string, Embedding<float>>.GetService(Type serviceType, object? key) =>
key is not null ? null :
serviceType?.IsInstanceOfType(Context) is true ? Context :
serviceType?.IsInstanceOfType(this) is true ? this :
null;
if (serviceType?.IsInstanceOfType(Context) is true)
{
return Context;
}

if (serviceType?.IsInstanceOfType(this) is true)
{
return this;
}
}

return null;
}

/// <inheritdoc />
async Task<GeneratedEmbeddings<Embedding<float>>> IEmbeddingGenerator<string, Embedding<float>>.GenerateAsync(IEnumerable<string> values, EmbeddingGenerationOptions? options, CancellationToken cancellationToken)
Expand Down
2 changes: 1 addition & 1 deletion LLama/LLamaSharp.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@

<ItemGroup>
<PackageReference Include="Microsoft.Bcl.AsyncInterfaces" Version="9.0.0" />
<PackageReference Include="Microsoft.Extensions.AI.Abstractions" Version="9.1.0-preview.1.25064.3" />
<PackageReference Include="Microsoft.Extensions.AI.Abstractions" Version="9.3.0-preview.1.25114.11" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.0" />
<PackageReference Include="System.Numerics.Tensors" Version="9.0.0" />
</ItemGroup>
Expand Down