Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 11 additions & 19 deletions LLama.Unittest/BasicTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public sealed class BasicTest
public BasicTest(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;
_params = new ModelParams(Constants.GenerativeModelPath)
_params = new ModelParams(Constants.GenerativeModelPath2)
{
ContextSize = 128,
GpuLayerCount = Constants.CIGpuLayerCount
Expand All @@ -26,38 +26,30 @@ public void Dispose()
_model.Dispose();
}

[Fact]
public void BasicModelProperties()
{
Assert.Equal(128256, _model.Vocab.Count);
Assert.Equal(131072, _model.ContextSize);
Assert.Equal(2048, _model.EmbeddingSize);
}

[Fact]
public void AdvancedModelProperties()
{
// These are the keys in the llama 7B test model. This will need changing if
// tests are switched to use a new model!
var expected = new Dictionary<string, string>
{
{ "general.name", "Llama 3.2 1B Instruct" },
{ "general.name", "SmolLM 360M" },
{ "general.architecture", "llama" },
{ "general.quantization_version", "2" },
{ "general.file_type", "2" },
{ "general.file_type", "7" },

{ "llama.context_length", "131072" },
{ "llama.context_length", "2048" },
{ "llama.rope.dimension_count", "64" },
{ "llama.embedding_length", "2048" },
{ "llama.block_count", "16" },
{ "llama.feed_forward_length", "8192" },
{ "llama.attention.head_count", "32" },
{ "llama.attention.head_count_kv", "8" },
{ "llama.embedding_length", "960" },
{ "llama.block_count", "32" },
{ "llama.feed_forward_length", "2560" },
{ "llama.attention.head_count", "15" },
{ "llama.attention.head_count_kv", "5" },
{ "llama.attention.layer_norm_rms_epsilon", "0.000010" },

{ "tokenizer.ggml.eos_token_id", "128009" },
{ "tokenizer.ggml.eos_token_id", "2" },
{ "tokenizer.ggml.model", "gpt2" },
{ "tokenizer.ggml.bos_token_id", "128000" },
{ "tokenizer.ggml.bos_token_id", "1" },
};

// Print all keys
Expand Down
1 change: 1 addition & 0 deletions LLama.Unittest/Constants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ namespace LLama.Unittest
internal static class Constants
{
public static readonly string GenerativeModelPath = "Models/Llama-3.2-1B-Instruct-Q4_0.gguf";
public static readonly string GenerativeModelPath2 = "Models/smollm-360m-instruct-add-basics-q8_0.gguf";
public static readonly string EmbeddingModelPath = "Models/all-MiniLM-L12-v2.Q8_0.gguf";

public static readonly string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf";
Expand Down
8 changes: 4 additions & 4 deletions LLama.Unittest/KernelMemory/ITextTokenizerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ public abstract class ITextTokenizerTests
protected ITextTokenizer? _generator;
#pragma warning restore KMEXP00 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.

protected InferenceParams _infParams;
protected LLamaSharpConfig _lsConfig;
protected readonly InferenceParams _infParams;
protected readonly LLamaSharpConfig _lsConfig;

public ITextTokenizerTests(ITestOutputHelper testOutputHelper)
{
Expand All @@ -34,7 +34,7 @@ public ITextTokenizerTests(ITestOutputHelper testOutputHelper)
[InlineData("...___---")]
[InlineData("15 + 6 = 21 && 68 * 75 = 5100")]
[InlineData(" \n \r\n \t ")]
public void GetTokens_ShouldReturnListOfTokensForInputString(string? text)
public void GetTokens_ShouldReturnListOfTokensForInputString(string text)
{
var tokens = _generator!.GetTokens(text);
var tokensCount = _generator.CountTokens(text);
Expand Down Expand Up @@ -74,7 +74,7 @@ public void GetTokens_ShouldReturnListOfTokensForInputString(string? text)
[Theory]
[InlineData("And a little bit of unicode για να κρατήσουμε τα πράγματα ενδιαφέροντα")]
[InlineData("猫坐在垫子上 😀🤨🤐😏")]
public void GetTokens_Unicode_ShouldReturnListOfTokensForInputString(string? text)
public void GetTokens_Unicode_ShouldReturnListOfTokensForInputString(string text)
{
var tokens = _generator!.GetTokens(text);
var tokensCount = _generator.CountTokens(text);
Expand Down
6 changes: 6 additions & 0 deletions LLama.Unittest/LLama.Unittest.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@

<DownloadFile SourceUrl="https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_0.gguf" DestinationFolder="Models" DestinationFileName="Llama-3.2-1B-Instruct-Q4_0.gguf" SkipUnchangedFiles="true">
</DownloadFile>

<DownloadFile SourceUrl="https://huggingface.co/HuggingFaceTB/smollm-360M-instruct-v0.2-Q8_0-GGUF/resolve/main/smollm-360m-instruct-add-basics-q8_0.gguf" DestinationFolder="Models" DestinationFileName="smollm-360m-instruct-add-basics-q8_0.gguf" SkipUnchangedFiles="true">
</DownloadFile>

<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf" DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf" SkipUnchangedFiles="true">
</DownloadFile>
Expand Down Expand Up @@ -60,6 +63,9 @@
<None Update="Models\Llama-3.2-1B-Instruct-Q4_0.gguf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="Models\smollm-360m-instruct-add-basics-q8_0.gguf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="Models\llava-v1.6-mistral-7b.Q3_K_XS.gguf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
Expand Down
18 changes: 5 additions & 13 deletions LLama.Unittest/LLamaContextTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public sealed class LLamaContextTests

public LLamaContextTests()
{
var @params = new ModelParams(Constants.GenerativeModelPath)
var @params = new ModelParams(Constants.GenerativeModelPath2)
{
ContextSize = 128,
GpuLayerCount = Constants.CIGpuLayerCount,
Expand All @@ -30,16 +30,16 @@ public void Dispose()
public void CheckProperties()
{
Assert.Equal(128u, _context.ContextSize);
Assert.Equal(2048, _context.EmbeddingSize);
Assert.Equal(128256, _context.Vocab.Count);
Assert.Equal(960, _context.EmbeddingSize);
Assert.Equal(49152, _context.Vocab.Count);
}

[Fact]
public void Tokenize()
{
var tokens = _context.Tokenize("The quick brown fox", true);
var tokens = _context.Tokenize("The quick brown fox");

Assert.Equal(new LLamaToken[] { 128000, 791, 4062, 14198, 39935 }, tokens);
Assert.Equal(new LLamaToken[] { 504, 2365, 6354, 16438 }, tokens);
}

[Fact]
Expand Down Expand Up @@ -73,14 +73,6 @@ public void TokenizeRoundtripSpecialStrings()
}
}

[Fact]
public void TokenizeWithoutBOS()
{
var tokens = _context.Tokenize("The quick brown fox", false);

Assert.Equal(new LLamaToken[] { 791, 4062, 14198, 39935 }, tokens);
}

[Fact]
public void TokenizeEmpty()
{
Expand Down
6 changes: 3 additions & 3 deletions LLama.Unittest/LLamaContextWithCustomLoggerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public void Log<TState>(

public LLamaContextWithCustomLoggerTests()
{
var @params = new ModelParams(Constants.GenerativeModelPath)
var @params = new ModelParams(Constants.GenerativeModelPath2)
{
ContextSize = 128,
GpuLayerCount = Constants.CIGpuLayerCount,
Expand Down Expand Up @@ -56,8 +56,8 @@ public void Dispose()
public void CheckProperties()
{
Assert.Equal(128u, _context.ContextSize);
Assert.Equal(2048, _context.EmbeddingSize);
Assert.Equal(128256, _context.Vocab.Count);
Assert.Equal(960, _context.EmbeddingSize);
Assert.Equal(49152, _context.Vocab.Count);
}
}
}
8 changes: 3 additions & 5 deletions LLama.Unittest/LLamaEmbedderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@ private async Task CompareEmbeddings(string modelPath)
Assert.All(cat.Zip(embeddings[0].Vector.Span.EuclideanNormalization()), e => Assert.Equal(e.First, e.Second, 0.001));
Assert.All(kitten.Zip(embeddings[1].Vector.Span.EuclideanNormalization()), e => Assert.Equal(e.First, e.Second, 0.001));
Assert.All(spoon.Zip(embeddings[2].Vector.Span.EuclideanNormalization()), e => Assert.Equal(e.First, e.Second, 0.001));
Assert.True(embeddings.Usage?.InputTokenCount is 16 or 19);
Assert.True(embeddings.Usage?.TotalTokenCount is 16 or 19);

_testOutputHelper.WriteLine($"Cat = [{string.Join(",", cat.AsMemory().Slice(0, 7).ToArray())}...]");
_testOutputHelper.WriteLine($"Kitten = [{string.Join(",", kitten.AsMemory().Slice(0, 7).ToArray())}...]");
Expand All @@ -84,9 +82,9 @@ public async Task EmbedCompareEmbeddingModel()
}

[Fact]
public async Task EmbedCompareGenerateModel()
public async Task EmbedCompareGenerativeModel()
{
await CompareEmbeddings(Constants.GenerativeModelPath);
await CompareEmbeddings(Constants.GenerativeModelPath2);
}

private async Task NonPooledEmbeddings(string modelPath)
Expand Down Expand Up @@ -115,6 +113,6 @@ public async Task EmbeddingModelNonPooledEmbeddings()
[Fact]
public async Task GenerativeModelNonPooledEmbeddings()
{
await NonPooledEmbeddings(Constants.GenerativeModelPath);
await NonPooledEmbeddings(Constants.GenerativeModelPath2);
}
}
4 changes: 2 additions & 2 deletions LLama.Unittest/MemoryDisposalTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ public class MemoryDisposalTests
[Fact]
public void ModelDisposal()
{
var @params = new ModelParams(Constants.GenerativeModelPath)
var @params = new ModelParams(Constants.GenerativeModelPath2)
{
ContextSize = 2048,
GpuLayerCount = 0,
Expand All @@ -22,7 +22,7 @@ public void ModelDisposal()
[Fact]
public void ContextDisposal()
{
var @params = new ModelParams(Constants.GenerativeModelPath)
var @params = new ModelParams(Constants.GenerativeModelPath2)
{
ContextSize = 128,
GpuLayerCount = 0,
Expand Down
8 changes: 2 additions & 6 deletions LLama.Unittest/Native/SafeLlamaModelHandleTests.cs
Original file line number Diff line number Diff line change
@@ -1,25 +1,21 @@
using System.Text;
using LLama.Common;
using LLama.Native;
using LLama.Extensions;

namespace LLama.Unittest.Native;

public class SafeLlamaModelHandleTests
{
private readonly LLamaWeights _model;
private readonly SafeLlamaModelHandle TestableHandle;

public SafeLlamaModelHandleTests()
{
var @params = new ModelParams(Constants.GenerativeModelPath)
var @params = new ModelParams(Constants.GenerativeModelPath2)
{
ContextSize = 1,
GpuLayerCount = Constants.CIGpuLayerCount
};
_model = LLamaWeights.LoadFromFile(@params);

TestableHandle = _model.NativeHandle;
}

[Fact]
Expand All @@ -29,7 +25,7 @@ public void MetadataValByKey_ReturnsCorrectly()
var template = _model.NativeHandle.MetadataValueByKey(key);
var name = Encoding.UTF8.GetStringFromSpan(template!.Value.Span);

const string expected = "Llama 3.2 1B Instruct";
const string expected = "SmolLM 360M";
Assert.Equal(expected, name);

var metadataLookup = _model.Metadata[key];
Expand Down
2 changes: 1 addition & 1 deletion LLama.Unittest/SamplingTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public class SamplingTests
public SamplingTests(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;
_params = new ModelParams(Constants.GenerativeModelPath) {
_params = new ModelParams(Constants.GenerativeModelPath2) {
ContextSize = 200,
BatchSize = 200,
GpuLayerCount = Constants.CIGpuLayerCount,
Expand Down
2 changes: 1 addition & 1 deletion LLama.Unittest/StatelessExecutorTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public class StatelessExecutorTest
public StatelessExecutorTest(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;
_params = new ModelParams(Constants.GenerativeModelPath)
_params = new ModelParams(Constants.GenerativeModelPath2)
{
ContextSize = 60,
BatchSize = 2,
Expand Down
4 changes: 2 additions & 2 deletions LLama.Unittest/StreamingTextDecoderTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using System.Text;
using System.Text;
using LLama.Common;
using Xunit.Abstractions;

Expand All @@ -14,7 +14,7 @@ public class StreamingTextDecoderTests
public StreamingTextDecoderTests(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;
_params = new ModelParams(Constants.GenerativeModelPath);
_params = new ModelParams(Constants.GenerativeModelPath2);
_model = LLamaWeights.LoadFromFile(_params);
}

Expand Down
72 changes: 29 additions & 43 deletions LLama.Unittest/TemplateTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public sealed class TemplateTests
public TemplateTests(ITestOutputHelper output)
{
_output = output;
var @params = new ModelParams(Constants.GenerativeModelPath)
var @params = new ModelParams(Constants.GenerativeModelPath2)
{
ContextSize = 1,
GpuLayerCount = Constants.CIGpuLayerCount
Expand Down Expand Up @@ -55,14 +55,16 @@ public void BasicTemplate()
Assert.Equal(8, templater.Count);

var templateResult = Encoding.UTF8.GetString(dest);
const string expected = "<|start_header_id|>assistant<|end_header_id|>\n\nhello<|eot_id|>"
+ "<|start_header_id|>user<|end_header_id|>\n\nworld<|eot_id|>"
+ "<|start_header_id|>assistant<|end_header_id|>\n\n111<|eot_id|>"
+ "<|start_header_id|>user<|end_header_id|>\n\naaa<|eot_id|>"
+ "<|start_header_id|>assistant<|end_header_id|>\n\n222<|eot_id|>"
+ "<|start_header_id|>user<|end_header_id|>\n\nbbb<|eot_id|>"
+ "<|start_header_id|>assistant<|end_header_id|>\n\n333<|eot_id|>"
+ "<|start_header_id|>user<|end_header_id|>\n\nccc<|eot_id|>";
const string expected = "<|im_start|>assistant\nhello<|im_end|>\n" +
"<|im_start|>user\nworld<|im_end|>\n" +
"<|im_start|>assistant\n111<|im_end|>\n" +
"<|im_start|>user\naaa<|im_end|>\n" +
"<|im_start|>assistant\n222<|im_end|>\n" +
"<|im_start|>user\nbbb<|im_end|>\n" +
"<|im_start|>assistant\n" +
"333<|im_end|>\n" +
"<|im_start|>user\n" +
"ccc<|im_end|>\n";

var eq = expected == templateResult;
Assert.Equal(expected, templateResult);
Expand Down Expand Up @@ -131,15 +133,23 @@ public void BasicTemplateWithAddAssistant()
Assert.Equal(8, templater.Count);

var templateResult = Encoding.UTF8.GetString(dest);
const string expected = "<|start_header_id|>assistant<|end_header_id|>\n\nhello<|eot_id|>"
+ "<|start_header_id|>user<|end_header_id|>\n\nworld<|eot_id|>"
+ "<|start_header_id|>assistant<|end_header_id|>\n\n111<|eot_id|>"
+ "<|start_header_id|>user<|end_header_id|>\n\naaa<|eot_id|>"
+ "<|start_header_id|>assistant<|end_header_id|>\n\n222<|eot_id|>"
+ "<|start_header_id|>user<|end_header_id|>\n\nbbb<|eot_id|>"
+ "<|start_header_id|>assistant<|end_header_id|>\n\n333<|eot_id|>"
+ "<|start_header_id|>user<|end_header_id|>\n\nccc<|eot_id|>"
+ "<|start_header_id|>assistant<|end_header_id|>\n\n";
const string expected = "<|im_start|>assistant\n" +
"hello<|im_end|>\n" +
"<|im_start|>user\n" +
"world<|im_end|>\n" +
"<|im_start|>assistant\n" +
"111<|im_end|>\n" +
"<|im_start|>user\n" +
"aaa<|im_end|>\n" +
"<|im_start|>assistant\n" +
"222<|im_end|>\n" +
"<|im_start|>user\n" +
"bbb<|im_end|>\n" +
"<|im_start|>assistant\n" +
"333<|im_end|>\n" +
"<|im_start|>user\n" +
"ccc<|im_end|>\n" +
"<|im_start|>assistant\n";

Assert.Equal(expected, templateResult);
}
Expand Down Expand Up @@ -241,31 +251,7 @@ public void Clear_ResetsTemplateState()
var dest = templater.Apply();
var templateResult = Encoding.UTF8.GetString(dest);

const string expectedTemplate = $"<|start_header_id|>user<|end_header_id|>\n\n{userData}<|eot_id|>";
const string expectedTemplate = $"<|im_start|>user\n{userData}<|im_end|>\n";
Assert.Equal(expectedTemplate, templateResult);
}

private string? ConvertTokenToString(LLamaToken token)
{
_output.WriteLine($"ConvertTokenToString: {token}");

const int buffSize = 32;
Span<byte> buff = stackalloc byte[buffSize];
var tokenLength = _model.NativeHandle.TokenToSpan(token, buff, 0, true);

_output.WriteLine($"tokenLength = {tokenLength}");
if (tokenLength <= 0)
return null;

// if the original buffer wasn't large enough, create a new one
_output.WriteLine($"tokenLength = {tokenLength}, buffSize = {buffSize}");
if (tokenLength > buffSize)
{
buff = stackalloc byte[(int)tokenLength];
_ = _model.NativeHandle.TokenToSpan(token, buff, 0, true);
}

var slice = buff.Slice(0, (int)tokenLength);
return Encoding.UTF8.GetStringFromSpan(slice);
}
}
Loading