SciSharp · martindevans · Feb 18, 2025 · Feb 18, 2025
diff --git a/LLama.Unittest/BasicTest.cs b/LLama.Unittest/BasicTest.cs
@@ -13,7 +13,7 @@ public sealed class BasicTest
         public BasicTest(ITestOutputHelper testOutputHelper)
         {
             _testOutputHelper = testOutputHelper;
-            _params = new ModelParams(Constants.GenerativeModelPath)
+            _params = new ModelParams(Constants.GenerativeModelPath2)
             {
                 ContextSize = 128,
                 GpuLayerCount = Constants.CIGpuLayerCount
@@ -26,38 +26,30 @@ public void Dispose()
             _model.Dispose();
         }
 
-        [Fact]
-        public void BasicModelProperties()
-        {
-            Assert.Equal(128256, _model.Vocab.Count);
-            Assert.Equal(131072, _model.ContextSize);
-            Assert.Equal(2048, _model.EmbeddingSize);
-        }
-
         [Fact]
         public void AdvancedModelProperties()
         {
             // These are the keys in the llama 7B test model. This will need changing if
             // tests are switched to use a new model!
             var expected = new Dictionary<string, string>
             {
-                { "general.name", "Llama 3.2 1B Instruct" },
+                { "general.name", "SmolLM 360M" },
                 { "general.architecture", "llama" },
                 { "general.quantization_version", "2" },
-                { "general.file_type", "2" },
+                { "general.file_type", "7" },
 
-                { "llama.context_length", "131072" },
+                { "llama.context_length", "2048" },
                 { "llama.rope.dimension_count", "64" },
-                { "llama.embedding_length", "2048" },
-                { "llama.block_count", "16" },
-                { "llama.feed_forward_length", "8192" },
-                { "llama.attention.head_count", "32" },
-                { "llama.attention.head_count_kv", "8" },
+                { "llama.embedding_length", "960" },
+                { "llama.block_count", "32" },
+                { "llama.feed_forward_length", "2560" },
+                { "llama.attention.head_count", "15" },
+                { "llama.attention.head_count_kv", "5" },
                 { "llama.attention.layer_norm_rms_epsilon", "0.000010" },
 
-                { "tokenizer.ggml.eos_token_id", "128009" },
+                { "tokenizer.ggml.eos_token_id", "2" },
                 { "tokenizer.ggml.model", "gpt2" },
-                { "tokenizer.ggml.bos_token_id", "128000" },
+                { "tokenizer.ggml.bos_token_id", "1" },
             };
 
             // Print all keys

diff --git a/LLama.Unittest/Constants.cs b/LLama.Unittest/Constants.cs
@@ -5,6 +5,7 @@ namespace LLama.Unittest
     internal static class Constants
     {
         public static readonly string GenerativeModelPath = "Models/Llama-3.2-1B-Instruct-Q4_0.gguf";
+        public static readonly string GenerativeModelPath2 = "Models/smollm-360m-instruct-add-basics-q8_0.gguf";
         public static readonly string EmbeddingModelPath = "Models/all-MiniLM-L12-v2.Q8_0.gguf";
 
         public static readonly string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf";

diff --git a/LLama.Unittest/KernelMemory/ITextTokenizerTests.cs b/LLama.Unittest/KernelMemory/ITextTokenizerTests.cs
@@ -14,8 +14,8 @@ public abstract class ITextTokenizerTests
         protected ITextTokenizer? _generator;
 #pragma warning restore KMEXP00 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
 
-        protected InferenceParams _infParams;
-        protected LLamaSharpConfig _lsConfig;
+        protected readonly InferenceParams _infParams;
+        protected readonly LLamaSharpConfig _lsConfig;
 
         public ITextTokenizerTests(ITestOutputHelper testOutputHelper)
         {
@@ -34,7 +34,7 @@ public ITextTokenizerTests(ITestOutputHelper testOutputHelper)
         [InlineData("...___---")]
         [InlineData("15 + 6 = 21 && 68 * 75 = 5100")]
         [InlineData("  \n  \r\n  \t   ")]
-        public void GetTokens_ShouldReturnListOfTokensForInputString(string? text)
+        public void GetTokens_ShouldReturnListOfTokensForInputString(string text)
         {
             var tokens = _generator!.GetTokens(text);
             var tokensCount = _generator.CountTokens(text);
@@ -74,7 +74,7 @@ public void GetTokens_ShouldReturnListOfTokensForInputString(string? text)
         [Theory]
         [InlineData("And a little bit of unicode για να κρατήσουμε τα πράγματα ενδιαφέροντα")]
         [InlineData("猫坐在垫子上 😀🤨🤐😏")]
-        public void GetTokens_Unicode_ShouldReturnListOfTokensForInputString(string? text)
+        public void GetTokens_Unicode_ShouldReturnListOfTokensForInputString(string text)
         {
             var tokens = _generator!.GetTokens(text);
             var tokensCount = _generator.CountTokens(text);

diff --git a/LLama.Unittest/LLama.Unittest.csproj b/LLama.Unittest/LLama.Unittest.csproj
@@ -31,6 +31,9 @@
 
     <DownloadFile SourceUrl="https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_0.gguf" DestinationFolder="Models" DestinationFileName="Llama-3.2-1B-Instruct-Q4_0.gguf" SkipUnchangedFiles="true">
 	</DownloadFile>
+
+    <DownloadFile SourceUrl="https://huggingface.co/HuggingFaceTB/smollm-360M-instruct-v0.2-Q8_0-GGUF/resolve/main/smollm-360m-instruct-add-basics-q8_0.gguf" DestinationFolder="Models" DestinationFileName="smollm-360m-instruct-add-basics-q8_0.gguf" SkipUnchangedFiles="true">
+    </DownloadFile>
 
 	<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf" DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf" SkipUnchangedFiles="true">
 	</DownloadFile>
@@ -60,6 +63,9 @@
     <None Update="Models\Llama-3.2-1B-Instruct-Q4_0.gguf">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </None>
+    <None Update="Models\smollm-360m-instruct-add-basics-q8_0.gguf">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
     <None Update="Models\llava-v1.6-mistral-7b.Q3_K_XS.gguf">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </None>

diff --git a/LLama.Unittest/LLamaContextTests.cs b/LLama.Unittest/LLamaContextTests.cs
@@ -11,7 +11,7 @@ public sealed class LLamaContextTests
 
         public LLamaContextTests()
         {
-            var @params = new ModelParams(Constants.GenerativeModelPath)
+            var @params = new ModelParams(Constants.GenerativeModelPath2)
             {
                 ContextSize = 128,
                 GpuLayerCount = Constants.CIGpuLayerCount,
@@ -30,16 +30,16 @@ public void Dispose()
         public void CheckProperties()
         {
             Assert.Equal(128u, _context.ContextSize);
-            Assert.Equal(2048, _context.EmbeddingSize);
-            Assert.Equal(128256, _context.Vocab.Count);
+            Assert.Equal(960, _context.EmbeddingSize);
+            Assert.Equal(49152, _context.Vocab.Count);
         }
 
         [Fact]
         public void Tokenize()
         {
-            var tokens = _context.Tokenize("The quick brown fox", true);
+            var tokens = _context.Tokenize("The quick brown fox");
 
-            Assert.Equal(new LLamaToken[] { 128000, 791, 4062, 14198, 39935 }, tokens);
+            Assert.Equal(new LLamaToken[] { 504, 2365, 6354, 16438 }, tokens);
         }
 
         [Fact]
@@ -73,14 +73,6 @@ public void TokenizeRoundtripSpecialStrings()
             }
         }
 
-        [Fact]
-        public void TokenizeWithoutBOS()
-        {
-            var tokens = _context.Tokenize("The quick brown fox", false);
-
-            Assert.Equal(new LLamaToken[] { 791, 4062, 14198, 39935 }, tokens);
-        }
-
         [Fact]
         public void TokenizeEmpty()
         {

diff --git a/LLama.Unittest/LLamaContextWithCustomLoggerTests.cs b/LLama.Unittest/LLamaContextWithCustomLoggerTests.cs
@@ -28,7 +28,7 @@ public void Log<TState>(
 
         public LLamaContextWithCustomLoggerTests()
         {
-            var @params = new ModelParams(Constants.GenerativeModelPath)
+            var @params = new ModelParams(Constants.GenerativeModelPath2)
             {
                 ContextSize = 128,
                 GpuLayerCount = Constants.CIGpuLayerCount,
@@ -56,8 +56,8 @@ public void Dispose()
         public void CheckProperties()
         {
             Assert.Equal(128u, _context.ContextSize);
-            Assert.Equal(2048, _context.EmbeddingSize);
-            Assert.Equal(128256, _context.Vocab.Count);
+            Assert.Equal(960, _context.EmbeddingSize);
+            Assert.Equal(49152, _context.Vocab.Count);
         }
     }
 }
diff --git a/LLama.Unittest/LLamaEmbedderTests.cs b/LLama.Unittest/LLamaEmbedderTests.cs
@@ -60,8 +60,6 @@ private async Task CompareEmbeddings(string modelPath)
         Assert.All(cat.Zip(embeddings[0].Vector.Span.EuclideanNormalization()), e => Assert.Equal(e.First, e.Second, 0.001));
         Assert.All(kitten.Zip(embeddings[1].Vector.Span.EuclideanNormalization()), e => Assert.Equal(e.First, e.Second, 0.001));
         Assert.All(spoon.Zip(embeddings[2].Vector.Span.EuclideanNormalization()), e => Assert.Equal(e.First, e.Second, 0.001));
-        Assert.True(embeddings.Usage?.InputTokenCount is 16 or 19);
-        Assert.True(embeddings.Usage?.TotalTokenCount is 16 or 19);
 
         _testOutputHelper.WriteLine($"Cat    = [{string.Join(",", cat.AsMemory().Slice(0, 7).ToArray())}...]");
         _testOutputHelper.WriteLine($"Kitten = [{string.Join(",", kitten.AsMemory().Slice(0, 7).ToArray())}...]");
@@ -84,9 +82,9 @@ public async Task EmbedCompareEmbeddingModel()
     }
 
     [Fact]
-    public async Task EmbedCompareGenerateModel()
+    public async Task EmbedCompareGenerativeModel()
     {
-        await CompareEmbeddings(Constants.GenerativeModelPath);
+        await CompareEmbeddings(Constants.GenerativeModelPath2);
     }
 
     private async Task NonPooledEmbeddings(string modelPath)
@@ -115,6 +113,6 @@ public async Task EmbeddingModelNonPooledEmbeddings()
     [Fact]
     public async Task GenerativeModelNonPooledEmbeddings()
     {
-        await NonPooledEmbeddings(Constants.GenerativeModelPath);
+        await NonPooledEmbeddings(Constants.GenerativeModelPath2);
     }
 }
diff --git a/LLama.Unittest/MemoryDisposalTests.cs b/LLama.Unittest/MemoryDisposalTests.cs
@@ -7,7 +7,7 @@ public class MemoryDisposalTests
     [Fact]
     public void ModelDisposal()
     {
-        var @params = new ModelParams(Constants.GenerativeModelPath)
+        var @params = new ModelParams(Constants.GenerativeModelPath2)
         {
             ContextSize = 2048,
             GpuLayerCount = 0,
@@ -22,7 +22,7 @@ public void ModelDisposal()
     [Fact]
     public void ContextDisposal()
     {
-        var @params = new ModelParams(Constants.GenerativeModelPath)
+        var @params = new ModelParams(Constants.GenerativeModelPath2)
         {
             ContextSize = 128,
             GpuLayerCount = 0,            

diff --git a/LLama.Unittest/Native/SafeLlamaModelHandleTests.cs b/LLama.Unittest/Native/SafeLlamaModelHandleTests.cs
@@ -1,25 +1,21 @@
 using System.Text;
 using LLama.Common;
-using LLama.Native;
 using LLama.Extensions;
 
 namespace LLama.Unittest.Native;
 
 public class SafeLlamaModelHandleTests
 {
     private readonly LLamaWeights _model;
-    private readonly SafeLlamaModelHandle TestableHandle;
 
     public SafeLlamaModelHandleTests()
     {
-        var @params = new ModelParams(Constants.GenerativeModelPath)
+        var @params = new ModelParams(Constants.GenerativeModelPath2)
         {
             ContextSize = 1,
             GpuLayerCount = Constants.CIGpuLayerCount
         };
         _model = LLamaWeights.LoadFromFile(@params);
-
-        TestableHandle = _model.NativeHandle;
     }
 
     [Fact]
@@ -29,7 +25,7 @@ public void MetadataValByKey_ReturnsCorrectly()
         var template = _model.NativeHandle.MetadataValueByKey(key);
         var name = Encoding.UTF8.GetStringFromSpan(template!.Value.Span);
 
-        const string expected = "Llama 3.2 1B Instruct";
+        const string expected = "SmolLM 360M";
         Assert.Equal(expected, name);
 
         var metadataLookup = _model.Metadata[key];

diff --git a/LLama.Unittest/SamplingTests.cs b/LLama.Unittest/SamplingTests.cs
@@ -22,7 +22,7 @@ public class SamplingTests
         public SamplingTests(ITestOutputHelper testOutputHelper)
         {
             _testOutputHelper = testOutputHelper;
-            _params = new ModelParams(Constants.GenerativeModelPath) {
+            _params = new ModelParams(Constants.GenerativeModelPath2) {
                 ContextSize = 200,
                 BatchSize = 200,
                 GpuLayerCount = Constants.CIGpuLayerCount,

diff --git a/LLama.Unittest/StatelessExecutorTest.cs b/LLama.Unittest/StatelessExecutorTest.cs
@@ -15,7 +15,7 @@ public class StatelessExecutorTest
         public StatelessExecutorTest(ITestOutputHelper testOutputHelper)
         {
             _testOutputHelper = testOutputHelper;
-            _params = new ModelParams(Constants.GenerativeModelPath)
+            _params = new ModelParams(Constants.GenerativeModelPath2)
             {
                 ContextSize = 60,
                 BatchSize = 2,

diff --git a/LLama.Unittest/StreamingTextDecoderTests.cs b/LLama.Unittest/StreamingTextDecoderTests.cs
@@ -1,4 +1,4 @@
-using System.Text;
+using System.Text;
 using LLama.Common;
 using Xunit.Abstractions;
 
@@ -14,7 +14,7 @@ public class StreamingTextDecoderTests
     public StreamingTextDecoderTests(ITestOutputHelper testOutputHelper)
     {
         _testOutputHelper = testOutputHelper;
-        _params = new ModelParams(Constants.GenerativeModelPath);
+        _params = new ModelParams(Constants.GenerativeModelPath2);
         _model = LLamaWeights.LoadFromFile(_params);
     }
 

diff --git a/LLama.Unittest/TemplateTests.cs b/LLama.Unittest/TemplateTests.cs
@@ -15,7 +15,7 @@ public sealed class TemplateTests
     public TemplateTests(ITestOutputHelper output)
     {
         _output = output;
-        var @params = new ModelParams(Constants.GenerativeModelPath)
+        var @params = new ModelParams(Constants.GenerativeModelPath2)
         {
             ContextSize = 1,
             GpuLayerCount = Constants.CIGpuLayerCount
@@ -55,14 +55,16 @@ public void BasicTemplate()
         Assert.Equal(8, templater.Count);
 
         var templateResult = Encoding.UTF8.GetString(dest);
-        const string expected = "<|start_header_id|>assistant<|end_header_id|>\n\nhello<|eot_id|>"
-                                    + "<|start_header_id|>user<|end_header_id|>\n\nworld<|eot_id|>"
-                                    + "<|start_header_id|>assistant<|end_header_id|>\n\n111<|eot_id|>"
-                                    + "<|start_header_id|>user<|end_header_id|>\n\naaa<|eot_id|>"
-                                    + "<|start_header_id|>assistant<|end_header_id|>\n\n222<|eot_id|>"
-                                    + "<|start_header_id|>user<|end_header_id|>\n\nbbb<|eot_id|>"
-                                    + "<|start_header_id|>assistant<|end_header_id|>\n\n333<|eot_id|>"
-                                    + "<|start_header_id|>user<|end_header_id|>\n\nccc<|eot_id|>";
+        const string expected = "<|im_start|>assistant\nhello<|im_end|>\n" +
+                                "<|im_start|>user\nworld<|im_end|>\n" +
+                                "<|im_start|>assistant\n111<|im_end|>\n" +
+                                "<|im_start|>user\naaa<|im_end|>\n" +
+                                "<|im_start|>assistant\n222<|im_end|>\n" +
+                                "<|im_start|>user\nbbb<|im_end|>\n" +
+                                "<|im_start|>assistant\n" +
+                                "333<|im_end|>\n" +
+                                "<|im_start|>user\n" +
+                                "ccc<|im_end|>\n";
 
         var eq = expected == templateResult;
         Assert.Equal(expected, templateResult);
@@ -131,15 +133,23 @@ public void BasicTemplateWithAddAssistant()
         Assert.Equal(8, templater.Count);
 
         var templateResult = Encoding.UTF8.GetString(dest);
-        const string expected = "<|start_header_id|>assistant<|end_header_id|>\n\nhello<|eot_id|>"
-                                    + "<|start_header_id|>user<|end_header_id|>\n\nworld<|eot_id|>"
-                                    + "<|start_header_id|>assistant<|end_header_id|>\n\n111<|eot_id|>"
-                                    + "<|start_header_id|>user<|end_header_id|>\n\naaa<|eot_id|>"
-                                    + "<|start_header_id|>assistant<|end_header_id|>\n\n222<|eot_id|>"
-                                    + "<|start_header_id|>user<|end_header_id|>\n\nbbb<|eot_id|>"
-                                    + "<|start_header_id|>assistant<|end_header_id|>\n\n333<|eot_id|>"
-                                    + "<|start_header_id|>user<|end_header_id|>\n\nccc<|eot_id|>"
-                                    + "<|start_header_id|>assistant<|end_header_id|>\n\n";
+        const string expected = "<|im_start|>assistant\n" +
+                                "hello<|im_end|>\n" +
+                                "<|im_start|>user\n" +
+                                "world<|im_end|>\n" +
+                                "<|im_start|>assistant\n" +
+                                "111<|im_end|>\n" +
+                                "<|im_start|>user\n" +
+                                "aaa<|im_end|>\n" +
+                                "<|im_start|>assistant\n" +
+                                "222<|im_end|>\n" +
+                                "<|im_start|>user\n" +
+                                "bbb<|im_end|>\n" +
+                                "<|im_start|>assistant\n" +
+                                "333<|im_end|>\n" +
+                                "<|im_start|>user\n" +
+                                "ccc<|im_end|>\n" +
+                                "<|im_start|>assistant\n";
 
         Assert.Equal(expected, templateResult);
     }
@@ -241,31 +251,7 @@ public void Clear_ResetsTemplateState()
         var dest = templater.Apply();
         var templateResult = Encoding.UTF8.GetString(dest);
 
-        const string expectedTemplate = $"<|start_header_id|>user<|end_header_id|>\n\n{userData}<|eot_id|>";
+        const string expectedTemplate = $"<|im_start|>user\n{userData}<|im_end|>\n";
         Assert.Equal(expectedTemplate, templateResult);
     }
-
-    private string? ConvertTokenToString(LLamaToken token)
-    {
-        _output.WriteLine($"ConvertTokenToString: {token}");
-
-        const int buffSize = 32;
-        Span<byte> buff = stackalloc byte[buffSize];
-        var tokenLength = _model.NativeHandle.TokenToSpan(token, buff, 0, true);
-
-        _output.WriteLine($"tokenLength = {tokenLength}");
-        if (tokenLength <= 0)
-            return null;
-
-        // if the original buffer wasn't large enough, create a new one
-        _output.WriteLine($"tokenLength = {tokenLength}, buffSize = {buffSize}");
-        if (tokenLength > buffSize)
-        {
-            buff = stackalloc byte[(int)tokenLength];
-            _ = _model.NativeHandle.TokenToSpan(token, buff, 0, true);
-        }
-
-        var slice = buff.Slice(0, (int)tokenLength);
-        return Encoding.UTF8.GetStringFromSpan(slice);
-    }
 }