diff --git a/Directory.Build.props b/Directory.Build.props
index 5cde406ee..3737938e1 100644
--- a/Directory.Build.props
+++ b/Directory.Build.props
@@ -2,7 +2,7 @@
- 0.93.0
+ 0.94.0
12
diff --git a/Directory.Packages.props b/Directory.Packages.props
index 9947848b6..1c9cf7265 100644
--- a/Directory.Packages.props
+++ b/Directory.Packages.props
@@ -24,7 +24,9 @@
-
+
+
+
diff --git a/KernelMemory.sln b/KernelMemory.sln
index 6bbcc4566..8c2a0c66a 100644
--- a/KernelMemory.sln
+++ b/KernelMemory.sln
@@ -265,7 +265,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Service.AspNetCore", "servi
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "111-dotnet-azure-ai-hybrid-search", "examples\111-dotnet-azure-ai-hybrid-search\111-dotnet-azure-ai-hybrid-search.csproj", "{28534545-CB39-446A-9EB9-A5ABBFE0CFD3}"
EndProject
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "OpenAI.UnitTests", "extensions\OpenAI\OpenAI.UnitTests\OpenAI.UnitTests.csproj", "{8ADA17CD-B779-4817-B10A-E9D7B019088D}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tiktoken.UnitTests", "extensions\Tiktoken\Tiktoken.UnitTests\Tiktoken.UnitTests.csproj", "{8ADA17CD-B779-4817-B10A-E9D7B019088D}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SQLServer", "extensions\SQLServer\SQLServer\SQLServer.csproj", "{B9BE1099-F78F-4A5F-A897-BF2C75E19C57}"
EndProject
@@ -335,6 +335,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AzureOpenAI.FunctionalTests
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "007-dotnet-serverless-azure", "examples\007-dotnet-serverless-azure\007-dotnet-serverless-azure.csproj", "{AF1E12A9-D8A1-4815-995E-C6F7B2022016}"
EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Tiktoken", "extensions\Tiktoken\Tiktoken\Tiktoken.csproj", "{830C91B5-6F8D-4DAD-B1BD-3C2F9DEEC8F6}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -623,6 +625,10 @@ Global
{AF1E12A9-D8A1-4815-995E-C6F7B2022016}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{AF1E12A9-D8A1-4815-995E-C6F7B2022016}.Debug|Any CPU.Build.0 = Debug|Any CPU
{AF1E12A9-D8A1-4815-995E-C6F7B2022016}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {830C91B5-6F8D-4DAD-B1BD-3C2F9DEEC8F6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {830C91B5-6F8D-4DAD-B1BD-3C2F9DEEC8F6}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {830C91B5-6F8D-4DAD-B1BD-3C2F9DEEC8F6}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {830C91B5-6F8D-4DAD-B1BD-3C2F9DEEC8F6}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@@ -723,6 +729,7 @@ Global
{AB097B62-5A0B-4D74-9F8B-A41FE8241447} = {155DA079-E267-49AF-973A-D1D44681970F}
{8E907766-4A7D-46E2-B5E3-EB2994B1AA54} = {3C17F42B-CFC8-4900-8CFB-88936311E919}
{AF1E12A9-D8A1-4815-995E-C6F7B2022016} = {0A43C65C-6007-4BB4-B3FE-8D439FC91841}
+ {830C91B5-6F8D-4DAD-B1BD-3C2F9DEEC8F6} = {155DA079-E267-49AF-973A-D1D44681970F}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {CC136C62-115C-41D1-B414-F9473EFF6EA8}
diff --git a/KernelMemory.sln.DotSettings b/KernelMemory.sln.DotSettings
index e0cd1b828..4d9e859e9 100644
--- a/KernelMemory.sln.DotSettings
+++ b/KernelMemory.sln.DotSettings
@@ -94,6 +94,7 @@
AMQP
API
BOM
+ CL
CORS
DB
DI
diff --git a/examples/002-dotnet-Serverless/Program.cs b/examples/002-dotnet-Serverless/Program.cs
index f0e7d5fbe..ee5f680d6 100644
--- a/examples/002-dotnet-Serverless/Program.cs
+++ b/examples/002-dotnet-Serverless/Program.cs
@@ -57,7 +57,7 @@ public static async Task Main()
var builder = new KernelMemoryBuilder()
.Configure(builder => builder.Services.AddLogging(l =>
{
- l.SetMinimumLevel(LogLevel.Warning);
+ l.SetMinimumLevel(LogLevel.Error);
l.AddSimpleConsole(c => c.SingleLine = true);
}))
.AddSingleton(memoryConfiguration)
diff --git a/examples/002-dotnet-Serverless/appsettings.json b/examples/002-dotnet-Serverless/appsettings.json
index 09477c1be..a8cf55665 100644
--- a/examples/002-dotnet-Serverless/appsettings.json
+++ b/examples/002-dotnet-Serverless/appsettings.json
@@ -1,17 +1,4 @@
{
- "Logging": {
- "LogLevel": {
- "Default": "Warning",
- // Examples: how to handle logs differently by class
- // "Microsoft.KernelMemory.Handlers.TextExtractionHandler": "Information",
- // "Microsoft.KernelMemory.Handlers.TextPartitioningHandler": "Information",
- // "Microsoft.KernelMemory.Handlers.GenerateEmbeddingsHandler": "Information",
- // "Microsoft.KernelMemory.Handlers.SaveEmbeddingsHandler": "Information",
- // "Microsoft.KernelMemory.DocumentStorage.AzureBlobs": "Information",
- // "Microsoft.KernelMemory.Pipeline.Queue.AzureQueues": "Information",
- "Microsoft.AspNetCore": "Warning"
- }
- },
"KernelMemory": {
"Services": {
"AzureAIContentSafety": {
@@ -78,20 +65,30 @@
"Auth": "AzureIdentity",
"Endpoint": "https://<...>.openai.azure.com/",
"APIKey": "",
+ // Your Azure Deployment name
"Deployment": "",
// The max number of tokens supported by model deployed
// See https://learn.microsoft.com/azure/ai-services/openai/concepts/models
"MaxTokenTotal": 8191,
+ // Which tokenizer to use to correctly measure the size of chunks.
+ // Supported values: "p50k", "cl100k", "o200k". Leave it empty if unsure.
+ // - Use p50k for the old text-davinci-003 models
+ // - Use cl100k for the old gpt-3.4 and gpt-4 family, and for text embedding models
+ // - Use o200k for the most recent gpt-4o family
+ "Tokenizer": "cl100k",
// The number of dimensions output embeddings should have.
// Only supported in "text-embedding-3" and later models developed with
// MRL, see https://arxiv.org/abs/2205.13147
"EmbeddingDimensions": null,
// How many embeddings to calculate in parallel. The max value depends on
// the model and deployment in use.
- // See also hhttps://learn.microsoft.com/azure/ai-services/openai/reference#embeddings
- "MaxEmbeddingBatchSize": 10,
+ // See https://learn.microsoft.com/azure/ai-services/openai/reference#embeddings
+ "MaxEmbeddingBatchSize": 1,
// How many times to retry in case of throttling.
- "MaxRetries": 10
+ "MaxRetries": 10,
+ // Thumbprints of certificates that should be trusted for HTTPS requests when SSL policy errors are detected.
+ // This should only be used for local development when using a proxy to call the OpenAI endpoints.
+ "TrustedCertificateThumbprints": []
},
"AzureOpenAIText": {
// "ApiKey" or "AzureIdentity"
@@ -104,16 +101,27 @@
// The max number of tokens supported by model deployed
// See https://learn.microsoft.com/azure/ai-services/openai/concepts/models
"MaxTokenTotal": 16384,
+ // Which tokenizer to use to correctly measure the size of chunks.
+ // Supported values: "p50k", "cl100k", "o200k". Leave it empty if unsure.
+ // - Use p50k for the old text-davinci-003 models
+ // - Use cl100k for the old gpt-3.4 and gpt-4 family, and for text embedding models
+ // - Use o200k for the most recent gpt-4o family
+ "Tokenizer": "o200k",
// "ChatCompletion" or "TextCompletion"
"APIType": "ChatCompletion",
// How many times to retry in case of throttling.
- "MaxRetries": 10
+ "MaxRetries": 10,
+ // Thumbprints of certificates that should be trusted for HTTPS requests when SSL policy errors are detected.
+ // This should only be used for local development when using a proxy to call the OpenAI endpoints.
+ "TrustedCertificateThumbprints": []
},
"OpenAI": {
// Name of the model used to generate text (text completion or chat completion)
"TextModel": "gpt-4o-mini",
// The max number of tokens supported by the text model.
"TextModelMaxTokenTotal": 16384,
+ // Supported values: "p50k", "cl100k", "o200k". Leave it empty for autodetect.
+ "TextModelTokenizer": "",
// What type of text generation, by default autodetect using the model name.
// Possible values: "Auto", "TextCompletion", "Chat"
"TextGenerationType": "Auto",
@@ -122,6 +130,8 @@
// The max number of tokens supported by the embedding model
// See https://platform.openai.com/docs/guides/embeddings/what-are-embeddings
"EmbeddingModelMaxTokenTotal": 8191,
+ // Supported values: "p50k", "cl100k", "o200k". Leave it empty for autodetect.
+ "EmbeddingModelTokenizer": "",
// OpenAI API Key
"APIKey": "",
// OpenAI Organization ID (usually empty, unless you have multiple accounts on different orgs)
diff --git a/examples/007-dotnet-serverless-azure/007-dotnet-serverless-azure.csproj b/examples/007-dotnet-serverless-azure/007-dotnet-serverless-azure.csproj
index fca07cf1e..7f9ccf1b6 100644
--- a/examples/007-dotnet-serverless-azure/007-dotnet-serverless-azure.csproj
+++ b/examples/007-dotnet-serverless-azure/007-dotnet-serverless-azure.csproj
@@ -3,6 +3,7 @@
net8.0
enable
+ CS0162;CA2007;CA1303;IDE0058;IDE0008;CA1050;CA1515;
diff --git a/examples/007-dotnet-serverless-azure/Program.cs b/examples/007-dotnet-serverless-azure/Program.cs
index d99e2c41f..b8c356b82 100644
--- a/examples/007-dotnet-serverless-azure/Program.cs
+++ b/examples/007-dotnet-serverless-azure/Program.cs
@@ -15,8 +15,13 @@
public static class Program
{
private static MemoryServerless? s_memory;
+
private const string IndexName = "example006";
+ // Use these booleans in case you don't want to use these Azure Services
+ private const bool UseAzureAIDocIntelligence = true;
+ private const bool UseAzureAIContentSafety = true;
+
public static async Task Main()
{
var memoryConfiguration = new KernelMemoryConfig();
@@ -43,37 +48,49 @@ public static async Task Main()
var builder = new KernelMemoryBuilder()
.WithAzureBlobsDocumentStorage(azureBlobConfig)
- .WithAzureAIDocIntel(azureAIDocIntelConfig)
.WithAzureOpenAITextEmbeddingGeneration(azureOpenAIEmbeddingConfig)
.WithAzureOpenAITextGeneration(azureOpenAITextConfig)
.WithAzureAISearchMemoryDb(azureAISearchConfig)
- .WithAzureAIContentSafetyModeration(azureAIContentSafetyConfig)
+ // .WithAzureAIDocIntel(azureAIDocIntelConfig) // see below
+ // .WithAzureAIContentSafetyModeration(azureAIContentSafetyConfig) // see below
.Configure(builder => builder.Services.AddLogging(l =>
{
- l.SetMinimumLevel(LogLevel.Warning);
+ l.SetMinimumLevel(LogLevel.Error);
l.AddSimpleConsole(c => c.SingleLine = true);
}));
+ // We split this builder code out in case you don't have these Azure services
+ if (UseAzureAIDocIntelligence) { builder.WithAzureAIContentSafetyModeration(azureAIContentSafetyConfig); }
+
+ if (UseAzureAIContentSafety) { builder.WithAzureAIDocIntel(azureAIDocIntelConfig); }
+
s_memory = builder.Build();
- await StoreWebPage();
- await StoreImage();
+ // ====== Store some data ======
- // Test 1
+ await StoreWebPageAsync(); // Works with Azure AI Search and Azure OpenAI
+ await StoreImageAsync(); // Works only if Azure AI Document Intelligence is used
+
+ // ====== Answer some questions ======
+
+ // When using hybrid search, relevance is much lower than cosine similarity
+ var minRelevance = azureAISearchConfig.UseHybridSearch ? 0 : 0.5;
+
+ // Test 1 (answer from the web page)
var question = "What's Kernel Memory?";
Console.WriteLine($"Question: {question}");
- var answer = await s_memory.AskAsync(question, minRelevance: 0.5, index: IndexName);
+ var answer = await s_memory.AskAsync(question, minRelevance: minRelevance, index: IndexName);
Console.WriteLine($"Answer: {answer.Result}\n\n");
- // Test 2
+ // Test 2 (requires Azure AI Document Intelligence to have parsed the image)
question = "Which conference is Microsoft sponsoring?";
Console.WriteLine($"Question: {question}");
- answer = await s_memory.AskAsync(question, minRelevance: 0.5, index: IndexName);
+ answer = await s_memory.AskAsync(question, minRelevance: minRelevance, index: IndexName);
Console.WriteLine($"Answer: {answer.Result}\n\n");
}
// Downloading web pages
- private static async Task StoreWebPage()
+ private static async Task StoreWebPageAsync()
{
const string DocId = "webPage1";
if (!await s_memory!.IsDocumentReadyAsync(DocId, index: IndexName))
@@ -87,9 +104,11 @@ private static async Task StoreWebPage()
}
}
- // Extract memory from images (OCR required)
- private static async Task StoreImage()
+ // Extract memory from images (requires Azure AI Document Intelligence)
+ private static async Task StoreImageAsync()
{
+ if (!UseAzureAIDocIntelligence) { return; }
+
const string DocId = "img001";
if (!await s_memory!.IsDocumentReadyAsync(DocId, index: IndexName))
{
diff --git a/examples/210-KM-without-builder/Program.cs b/examples/210-KM-without-builder/Program.cs
index 888446b9b..db0c3a829 100644
--- a/examples/210-KM-without-builder/Program.cs
+++ b/examples/210-KM-without-builder/Program.cs
@@ -4,7 +4,6 @@
using Microsoft.KernelMemory;
using Microsoft.KernelMemory.AI;
using Microsoft.KernelMemory.AI.AzureOpenAI;
-using Microsoft.KernelMemory.AI.OpenAI;
using Microsoft.KernelMemory.Configuration;
using Microsoft.KernelMemory.Context;
using Microsoft.KernelMemory.DataFormats;
@@ -79,11 +78,12 @@ public static async Task Main()
var promptProvider = new EmbeddedPromptProvider();
// AI dependencies
- var tokenizer = new GPT4oTokenizer();
+ var tokenizerForChat = new O200KTokenizer();
+ var tokenizerForEmbeddings = new CL100KTokenizer();
var embeddingGeneratorHttpClient = new HttpClient();
- var embeddingGenerator = new AzureOpenAITextEmbeddingGenerator(azureOpenAIEmbeddingConfig, tokenizer, loggerFactory, embeddingGeneratorHttpClient);
+ var embeddingGenerator = new AzureOpenAITextEmbeddingGenerator(azureOpenAIEmbeddingConfig, tokenizerForEmbeddings, loggerFactory, embeddingGeneratorHttpClient);
var textGeneratorHttpClient = new HttpClient();
- var textGenerator = new AzureOpenAITextGenerator(azureOpenAITextConfig, tokenizer, loggerFactory, textGeneratorHttpClient);
+ var textGenerator = new AzureOpenAITextGenerator(azureOpenAITextConfig, tokenizerForChat, loggerFactory, textGeneratorHttpClient);
var contentModeration = new AzureAIContentSafetyModeration(azureAIContentSafetyModerationConfig, loggerFactory);
// Storage
diff --git a/examples/210-KM-without-builder/appsettings.json b/examples/210-KM-without-builder/appsettings.json
index 331025b97..e283025ed 100644
--- a/examples/210-KM-without-builder/appsettings.json
+++ b/examples/210-KM-without-builder/appsettings.json
@@ -217,7 +217,7 @@
// the same line verbatim.
"FrequencyPenalty": 0,
// Sequences where the completion will stop generating further tokens.
- "StopSequences": []
+ "StopSequences": [],
// Modify the likelihood of specified tokens appearing in the completion.
//"TokenSelectionBiases": { }
// Whether to check is the generated answers are safe.
@@ -232,6 +232,8 @@
"ApiKey": "",
// See https://docs.anthropic.com/claude/docs/models-overview for list of models and details
"TextModelName": "claude-3-haiku-20240307",
+ // Supported values: "p50k", "cl100k", "o200k". Leave it empty if unsure.
+ "Tokenizer": "cl100k",
// How many tokens the model can receive in input and generate in output
// See https://docs.anthropic.com/claude/docs/models-overview
"MaxTokenIn": 200000,
@@ -297,6 +299,12 @@
// The max number of tokens supported by model deployed
// See https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
"MaxTokenTotal": 8191,
+ // Which tokenizer to use to correctly measure the size of chunks.
+ // Supported values: "p50k", "cl100k", "o200k". Leave it empty if unsure.
+ // - Use p50k for the old text-davinci-003 models
+ // - Use cl100k for the old gpt-3.4 and gpt-4 family, and for text embedding models
+ // - Use o200k for the most recent gpt-4o family
+ "Tokenizer": "cl100k",
// The number of dimensions output embeddings should have.
// Only supported in "text-embedding-3" and later models developed with
// MRL, see https://arxiv.org/abs/2205.13147
@@ -319,6 +327,12 @@
// The max number of tokens supported by model deployed
// See https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
"MaxTokenTotal": 16384,
+ // Which tokenizer to use to correctly measure the size of chunks.
+ // Supported values: "p50k", "cl100k", "o200k". Leave it empty if unsure.
+ // - Use p50k for the old text-davinci-003 models
+ // - Use cl100k for the old gpt-3.4 and gpt-4 family, and for text embedding models
+ // - Use o200k for the most recent gpt-4o family
+ "Tokenizer": "o200k",
// "ChatCompletion" or "TextCompletion"
"APIType": "ChatCompletion",
// How many times to retry in case of throttling.
@@ -389,6 +403,8 @@
"TextModel": "gpt-4o-mini",
// The max number of tokens supported by the text model.
"TextModelMaxTokenTotal": 16384,
+ // Supported values: "p50k", "cl100k", "o200k". Leave it empty for autodetect.
+ "TextModelTokenizer": "",
// What type of text generation, by default autodetect using the model name.
// Possible values: "Auto", "TextCompletion", "Chat"
"TextGenerationType": "Auto",
@@ -397,6 +413,8 @@
// The max number of tokens supported by the embedding model
// See https://platform.openai.com/docs/guides/embeddings/what-are-embeddings
"EmbeddingModelMaxTokenTotal": 8191,
+ // Supported values: "p50k", "cl100k", "o200k". Leave it empty for autodetect.
+ "EmbeddingModelTokenizer": "",
// OpenAI API Key
"APIKey": "",
// OpenAI Organization ID (usually empty, unless you have multiple accounts on different orgs)
diff --git a/examples/212-dotnet-ollama/Program.cs b/examples/212-dotnet-ollama/Program.cs
index c7492564c..bb6ae8fda 100644
--- a/examples/212-dotnet-ollama/Program.cs
+++ b/examples/212-dotnet-ollama/Program.cs
@@ -1,8 +1,8 @@
// Copyright (c) Microsoft. All rights reserved.
using Microsoft.KernelMemory;
+using Microsoft.KernelMemory.AI;
using Microsoft.KernelMemory.AI.Ollama;
-using Microsoft.KernelMemory.AI.OpenAI;
using Microsoft.KernelMemory.Context;
using Microsoft.KernelMemory.Diagnostics;
@@ -36,8 +36,8 @@ public static async Task Main()
};
var memory = new KernelMemoryBuilder()
- .WithOllamaTextGeneration(config, new GPT4oTokenizer())
- .WithOllamaTextEmbeddingGeneration(config, new GPT4oTokenizer())
+ .WithOllamaTextGeneration(config, new CL100KTokenizer())
+ .WithOllamaTextEmbeddingGeneration(config, new CL100KTokenizer())
.Configure(builder => builder.Services.AddLogging(l =>
{
l.SetMinimumLevel(logLevel);
diff --git a/extensions/Anthropic/Anthropic.csproj b/extensions/Anthropic/Anthropic.csproj
index 7e34304d0..c8d6ae45c 100644
--- a/extensions/Anthropic/Anthropic.csproj
+++ b/extensions/Anthropic/Anthropic.csproj
@@ -10,7 +10,7 @@
-
+
diff --git a/extensions/Anthropic/AnthropicConfig.cs b/extensions/Anthropic/AnthropicConfig.cs
index da6a1baf1..111de13a0 100644
--- a/extensions/Anthropic/AnthropicConfig.cs
+++ b/extensions/Anthropic/AnthropicConfig.cs
@@ -48,6 +48,12 @@ public class AnthropicConfig
///
public int MaxTokenOut { get; set; } = 4096;
+ ///
+ /// Name of the tokenizer used to count tokens.
+ /// Supported values: "p50k", "cl100k", "o200k". Leave it empty if unsure.
+ ///
+ public string Tokenizer { get; set; } = "cl100k";
+
///
/// System prompt used when generating text
///
diff --git a/extensions/Anthropic/AnthropicTextGeneration.cs b/extensions/Anthropic/AnthropicTextGeneration.cs
index df0252ce4..9257853bd 100644
--- a/extensions/Anthropic/AnthropicTextGeneration.cs
+++ b/extensions/Anthropic/AnthropicTextGeneration.cs
@@ -8,7 +8,6 @@
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.KernelMemory.AI.Anthropic.Client;
-using Microsoft.KernelMemory.AI.OpenAI;
using Microsoft.KernelMemory.Context;
using Microsoft.KernelMemory.Diagnostics;
@@ -70,12 +69,13 @@ public AnthropicTextGeneration(
var endpointVersion = string.IsNullOrWhiteSpace(config.Endpoint) ? DefaultEndpointVersion : config.EndpointVersion;
this._client = new RawAnthropicClient(this._httpClient, endpoint, endpointVersion, config.ApiKey);
+ textTokenizer ??= TokenizerFactory.GetTokenizerForEncoding(config.Tokenizer);
if (textTokenizer == null)
{
+ textTokenizer = new CL100KTokenizer();
this._log.LogWarning(
"Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
- nameof(GPT4oTokenizer));
- textTokenizer = new GPT4oTokenizer();
+ textTokenizer.GetType().FullName);
}
this._textTokenizer = textTokenizer;
diff --git a/extensions/AzureAISearch/AzureAISearch/AzureAISearchMemory.cs b/extensions/AzureAISearch/AzureAISearch/AzureAISearchMemory.cs
index 72eed149d..fb838e862 100644
--- a/extensions/AzureAISearch/AzureAISearch/AzureAISearchMemory.cs
+++ b/extensions/AzureAISearch/AzureAISearch/AzureAISearchMemory.cs
@@ -372,7 +372,9 @@ private async Task DoesIndexExistAsync(string index, CancellationToken can
private SearchClient GetSearchClient(string index)
{
var normalIndexName = this.NormalizeIndexName(index);
- this._log.LogTrace("Preparing search client, index name '{0}' normalized to '{1}'", index, normalIndexName);
+
+ if (index != normalIndexName) { this._log.LogTrace("Preparing search client, index name '{0}' normalized to '{1}'", index, normalIndexName); }
+ else { this._log.LogTrace("Preparing search client, index name '{0}'", normalIndexName); }
// Search an available client from the local cache
if (!this._clientsByIndex.TryGetValue(normalIndexName, out SearchClient? client))
diff --git a/extensions/AzureOpenAI/AzureOpenAI.FunctionalTests/Issue855Test.cs b/extensions/AzureOpenAI/AzureOpenAI.FunctionalTests/Issue855Test.cs
index f2a3ab768..e0740d134 100644
--- a/extensions/AzureOpenAI/AzureOpenAI.FunctionalTests/Issue855Test.cs
+++ b/extensions/AzureOpenAI/AzureOpenAI.FunctionalTests/Issue855Test.cs
@@ -22,10 +22,11 @@ public Issue855Test(IConfiguration cfg, ITestOutputHelper output) : base(cfg, ou
this._target = new AzureOpenAITextEmbeddingGenerator(this.AzureOpenAIEmbeddingConfiguration);
}
+ // [Fact] // Enable manually on a need basis
[Fact(Skip = "Enable and run manually")]
[Trait("Category", "Manual")]
[Trait("Category", "BugFix")]
- public async Task ItDoesntWhenThrottling()
+ public async Task ItDoesntFailWhenThrottling()
{
for (int i = 0; i < 50; i++)
{
diff --git a/extensions/AzureOpenAI/AzureOpenAI/AzureOpenAI.csproj b/extensions/AzureOpenAI/AzureOpenAI/AzureOpenAI.csproj
index a2c44bc35..9bf95c22c 100644
--- a/extensions/AzureOpenAI/AzureOpenAI/AzureOpenAI.csproj
+++ b/extensions/AzureOpenAI/AzureOpenAI/AzureOpenAI.csproj
@@ -10,7 +10,7 @@
-
+
diff --git a/extensions/AzureOpenAI/AzureOpenAI/AzureOpenAIConfig.cs b/extensions/AzureOpenAI/AzureOpenAI/AzureOpenAIConfig.cs
index a50938d59..f86a53083 100644
--- a/extensions/AzureOpenAI/AzureOpenAI/AzureOpenAIConfig.cs
+++ b/extensions/AzureOpenAI/AzureOpenAI/AzureOpenAIConfig.cs
@@ -63,6 +63,12 @@ public enum APITypes
///
public int MaxTokenTotal { get; set; } = 8191;
+ ///
+ /// Name of the tokenizer used to count tokens.
+ /// Supported values: "p50k", "cl100k", "o200k". Leave it empty if unsure.
+ ///
+ public string Tokenizer { get; set; } = "cl100k";
+
///
/// The number of dimensions output embeddings should have.
/// Only supported in "text-embedding-3" and later models developed with
diff --git a/extensions/AzureOpenAI/AzureOpenAI/AzureOpenAITextEmbeddingGenerator.cs b/extensions/AzureOpenAI/AzureOpenAI/AzureOpenAITextEmbeddingGenerator.cs
index 7aa954d36..0d7411f9b 100644
--- a/extensions/AzureOpenAI/AzureOpenAI/AzureOpenAITextEmbeddingGenerator.cs
+++ b/extensions/AzureOpenAI/AzureOpenAI/AzureOpenAITextEmbeddingGenerator.cs
@@ -10,7 +10,6 @@
using Azure.AI.OpenAI;
using Microsoft.Extensions.Logging;
using Microsoft.KernelMemory.AI.AzureOpenAI.Internals;
-using Microsoft.KernelMemory.AI.OpenAI;
using Microsoft.KernelMemory.Diagnostics;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.AI.Embeddings;
@@ -95,12 +94,13 @@ public AzureOpenAITextEmbeddingGenerator(
this.MaxTokens = config.MaxTokenTotal;
this.MaxBatchSize = config.MaxEmbeddingBatchSize;
+ textTokenizer ??= TokenizerFactory.GetTokenizerForEncoding(config.Tokenizer);
if (textTokenizer == null)
{
+ textTokenizer = new CL100KTokenizer();
this._log.LogWarning(
"Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
- nameof(GPT4oTokenizer));
- textTokenizer = new GPT4oTokenizer();
+ textTokenizer.GetType().FullName);
}
this._textTokenizer = textTokenizer;
diff --git a/extensions/AzureOpenAI/AzureOpenAI/AzureOpenAITextGenerator.cs b/extensions/AzureOpenAI/AzureOpenAI/AzureOpenAITextGenerator.cs
index a95e48c45..94375aa2b 100644
--- a/extensions/AzureOpenAI/AzureOpenAI/AzureOpenAITextGenerator.cs
+++ b/extensions/AzureOpenAI/AzureOpenAI/AzureOpenAITextGenerator.cs
@@ -9,7 +9,6 @@
using Azure.AI.OpenAI;
using Microsoft.Extensions.Logging;
using Microsoft.KernelMemory.AI.AzureOpenAI.Internals;
-using Microsoft.KernelMemory.AI.OpenAI;
using Microsoft.KernelMemory.Diagnostics;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.Connectors.AzureOpenAI;
@@ -90,12 +89,13 @@ public AzureOpenAITextGenerator(
this._log = (loggerFactory ?? DefaultLogger.Factory).CreateLogger();
this.MaxTokenTotal = config.MaxTokenTotal;
+ textTokenizer ??= TokenizerFactory.GetTokenizerForEncoding(config.Tokenizer);
if (textTokenizer == null)
{
+ textTokenizer = new O200KTokenizer();
this._log.LogWarning(
"Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
- nameof(GPT4oTokenizer));
- textTokenizer = new GPT4oTokenizer();
+ textTokenizer.GetType().FullName);
}
this._textTokenizer = textTokenizer;
diff --git a/extensions/KM/KernelMemory/Internals/KernelMemoryComposer.cs b/extensions/KM/KernelMemory/Internals/KernelMemoryComposer.cs
index a850099cb..9068c9e77 100644
--- a/extensions/KM/KernelMemory/Internals/KernelMemoryComposer.cs
+++ b/extensions/KM/KernelMemory/Internals/KernelMemoryComposer.cs
@@ -7,7 +7,6 @@
using Microsoft.KernelMemory.AI;
using Microsoft.KernelMemory.AI.Anthropic;
using Microsoft.KernelMemory.AI.Ollama;
-using Microsoft.KernelMemory.AI.OpenAI;
using Microsoft.KernelMemory.DocumentStorage.DevTools;
using Microsoft.KernelMemory.MemoryDb.SQLServer;
using Microsoft.KernelMemory.MemoryStorage;
@@ -201,8 +200,7 @@ private void ConfigureIngestionEmbeddingGenerators()
{
var instance = this.GetServiceInstance(
s => s.AddAzureOpenAIEmbeddingGeneration(
- config: this.GetServiceConfig("AzureOpenAIEmbedding"),
- textTokenizer: new GPT4oTokenizer()));
+ config: this.GetServiceConfig("AzureOpenAIEmbedding")));
this._builder.AddIngestionEmbeddingGenerator(instance);
break;
}
@@ -211,8 +209,7 @@ private void ConfigureIngestionEmbeddingGenerators()
{
var instance = this.GetServiceInstance(
s => s.AddOpenAITextEmbeddingGeneration(
- config: this.GetServiceConfig("OpenAI"),
- textTokenizer: new GPT4oTokenizer()));
+ config: this.GetServiceConfig("OpenAI")));
this._builder.AddIngestionEmbeddingGenerator(instance);
break;
}
@@ -221,8 +218,7 @@ private void ConfigureIngestionEmbeddingGenerators()
{
var instance = this.GetServiceInstance(
s => s.AddOllamaTextEmbeddingGeneration(
- config: this.GetServiceConfig("Ollama"),
- textTokenizer: new GPT4oTokenizer()));
+ config: this.GetServiceConfig("Ollama")));
this._builder.AddIngestionEmbeddingGenerator(instance);
break;
}
@@ -371,20 +367,17 @@ private void ConfigureRetrievalEmbeddingGenerator()
case string x when x.Equals("AzureOpenAI", StringComparison.OrdinalIgnoreCase):
case string y when y.Equals("AzureOpenAIEmbedding", StringComparison.OrdinalIgnoreCase):
this._builder.Services.AddAzureOpenAIEmbeddingGeneration(
- config: this.GetServiceConfig("AzureOpenAIEmbedding"),
- textTokenizer: new GPT4oTokenizer());
+ config: this.GetServiceConfig("AzureOpenAIEmbedding"));
break;
case string x when x.Equals("OpenAI", StringComparison.OrdinalIgnoreCase):
this._builder.Services.AddOpenAITextEmbeddingGeneration(
- config: this.GetServiceConfig("OpenAI"),
- textTokenizer: new GPT4oTokenizer());
+ config: this.GetServiceConfig("OpenAI"));
break;
case string x when x.Equals("Ollama", StringComparison.OrdinalIgnoreCase):
this._builder.Services.AddOllamaTextEmbeddingGeneration(
- config: this.GetServiceConfig("Ollama"),
- textTokenizer: new GPT4oTokenizer());
+ config: this.GetServiceConfig("Ollama"));
break;
case string x when x.Equals("LlamaSharp", StringComparison.OrdinalIgnoreCase):
@@ -453,26 +446,22 @@ private void ConfigureTextGenerator()
case string x when x.Equals("AzureOpenAI", StringComparison.OrdinalIgnoreCase):
case string y when y.Equals("AzureOpenAIText", StringComparison.OrdinalIgnoreCase):
this._builder.Services.AddAzureOpenAITextGeneration(
- config: this.GetServiceConfig("AzureOpenAIText"),
- textTokenizer: new GPT4oTokenizer());
+ config: this.GetServiceConfig("AzureOpenAIText"));
break;
case string x when x.Equals("OpenAI", StringComparison.OrdinalIgnoreCase):
this._builder.Services.AddOpenAITextGeneration(
- config: this.GetServiceConfig("OpenAI"),
- textTokenizer: new GPT4oTokenizer());
+ config: this.GetServiceConfig("OpenAI"));
break;
case string x when x.Equals("Anthropic", StringComparison.OrdinalIgnoreCase):
this._builder.Services.AddAnthropicTextGeneration(
- config: this.GetServiceConfig("Anthropic"),
- textTokenizer: new GPT4oTokenizer());
+ config: this.GetServiceConfig("Anthropic"));
break;
case string x when x.Equals("Ollama", StringComparison.OrdinalIgnoreCase):
this._builder.Services.AddOllamaTextGeneration(
- config: this.GetServiceConfig("Ollama"),
- textTokenizer: new GPT4oTokenizer());
+ config: this.GetServiceConfig("Ollama"));
break;
case string x when x.Equals("LlamaSharp", StringComparison.OrdinalIgnoreCase):
diff --git a/extensions/KM/KernelMemory/KernelMemory.csproj b/extensions/KM/KernelMemory/KernelMemory.csproj
index ea33c0c6b..26a5c99df 100644
--- a/extensions/KM/KernelMemory/KernelMemory.csproj
+++ b/extensions/KM/KernelMemory/KernelMemory.csproj
@@ -9,12 +9,12 @@
-
-
-
+
+
+
@@ -24,8 +24,9 @@
-
+
+
diff --git a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextGeneratorTest.cs b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextGeneratorTest.cs
index 3bca592e4..285ff1425 100644
--- a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextGeneratorTest.cs
+++ b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextGeneratorTest.cs
@@ -40,7 +40,7 @@ public void ItCountsTokens()
// Assert
Console.WriteLine("Phi3 token count: " + tokenCount);
- Console.WriteLine("GPT4 token count: " + DefaultGPTTokenizer.StaticCountTokens(text));
+ Console.WriteLine("GPT4 token count: " + (new CL100KTokenizer()).CountTokens(text));
Console.WriteLine($"Time: {this._timer.ElapsedMilliseconds / 1000} secs");
// Expected result with Phi-3-mini-4k-instruct-q4.gguf, without BoS (https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf)
diff --git a/extensions/ONNX/Onnx.FunctionalTests/appsettings.json b/extensions/ONNX/Onnx.FunctionalTests/appsettings.json
index ec84442d4..56b9181c5 100644
--- a/extensions/ONNX/Onnx.FunctionalTests/appsettings.json
+++ b/extensions/ONNX/Onnx.FunctionalTests/appsettings.json
@@ -7,7 +7,9 @@
"Services": {
"Onnx": {
// Path to directory containing ONNX Model, e.g. "C:\\....\\Phi-3-mini-128k-instruct-onnx\\....\\cpu-int4-rtn-block-32"
- "TextModelDir": "Z:\\tools\\LocalModels\\Phi-3-mini-128k-instruct-onnx\\cpu_and_mobile\\cpu-int4-rtn-block-32"
+ "TextModelDir": "Z:\\tools\\LocalModels\\Phi-3-mini-128k-instruct-onnx\\cpu_and_mobile\\cpu-int4-rtn-block-32",
+ // Supported values: "p50k", "cl100k", "o200k". Leave it empty if unsure.
+ "Tokenizer": "o200k",
},
"SimpleVectorDb": {
// Options: "Disk" or "Volatile". Volatile data is lost after each execution.
diff --git a/extensions/ONNX/Onnx/Onnx.csproj b/extensions/ONNX/Onnx/Onnx.csproj
index 96ec73dff..ecafdfed3 100644
--- a/extensions/ONNX/Onnx/Onnx.csproj
+++ b/extensions/ONNX/Onnx/Onnx.csproj
@@ -8,6 +8,17 @@
$(NoWarn);KMEXP00;KMEXP01;CA1724;
+
+
+
+
+
+
+
+
+
+
+
true
Microsoft.KernelMemory.AI.Onnx
@@ -21,12 +32,4 @@
-
-
-
-
-
-
-
-
diff --git a/extensions/ONNX/Onnx/OnnxConfig.cs b/extensions/ONNX/Onnx/OnnxConfig.cs
index 4a0ce66fe..3a54540ac 100644
--- a/extensions/ONNX/Onnx/OnnxConfig.cs
+++ b/extensions/ONNX/Onnx/OnnxConfig.cs
@@ -50,6 +50,12 @@ public enum OnnxSearchType
///
public int MaxTokens { get; set; } = 2048;
+ ///
+ /// Name of the tokenizer used to count tokens.
+ /// Supported values: "p50k", "cl100k", "o200k". Leave it empty if unsure.
+ ///
+ public string Tokenizer { get; set; } = "o200k";
+
///
/// The minimum length of the response that the model will generate. See https://onnxruntime.ai/docs/genai/reference/config.html
///
diff --git a/extensions/ONNX/Onnx/OnnxTextGenerator.cs b/extensions/ONNX/Onnx/OnnxTextGenerator.cs
index adfa0c8cc..7f31e49a7 100644
--- a/extensions/ONNX/Onnx/OnnxTextGenerator.cs
+++ b/extensions/ONNX/Onnx/OnnxTextGenerator.cs
@@ -9,7 +9,6 @@
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
-using Microsoft.KernelMemory.AI.OpenAI;
using Microsoft.KernelMemory.Diagnostics;
using Microsoft.ML.OnnxRuntimeGenAI;
using static Microsoft.KernelMemory.OnnxConfig;
@@ -60,12 +59,14 @@ public OnnxTextGenerator(
ILoggerFactory? loggerFactory = null)
{
this._log = (loggerFactory ?? DefaultLogger.Factory).CreateLogger();
+
+ textTokenizer ??= TokenizerFactory.GetTokenizerForEncoding(config.Tokenizer);
if (textTokenizer == null)
{
+ textTokenizer = new O200KTokenizer();
this._log.LogWarning(
"Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
- nameof(GPT4oTokenizer));
- textTokenizer = new GPT4oTokenizer();
+ textTokenizer.GetType().FullName);
}
config.Validate();
diff --git a/extensions/Ollama/Ollama/Ollama.csproj b/extensions/Ollama/Ollama/Ollama.csproj
index e26c65d47..6a943c693 100644
--- a/extensions/Ollama/Ollama/Ollama.csproj
+++ b/extensions/Ollama/Ollama/Ollama.csproj
@@ -8,6 +8,15 @@
$(NoWarn);KMEXP00;KMEXP01;CA1724;
+
+
+
+
+
+
+
+
+
true
Microsoft.KernelMemory.AI.Ollama
@@ -21,13 +30,4 @@
-
-
-
-
-
-
-
-
-
diff --git a/extensions/Ollama/Ollama/OllamaModelConfig.cs b/extensions/Ollama/Ollama/OllamaModelConfig.cs
index c1c3af561..6f14a9306 100644
--- a/extensions/Ollama/Ollama/OllamaModelConfig.cs
+++ b/extensions/Ollama/Ollama/OllamaModelConfig.cs
@@ -15,6 +15,12 @@ public class OllamaModelConfig
///
public int? MaxTokenTotal { get; set; }
+ ///
+ /// Name of the tokenizer used to count tokens.
+ /// Supported values: "p50k", "cl100k", "o200k". Leave it empty if unsure.
+ ///
+ public string Tokenizer { get; set; } = "cl100k";
+
///
/// Enable Mirostat sampling for controlling perplexity.
/// (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)
diff --git a/extensions/Ollama/Ollama/OllamaTextEmbeddingGenerator.cs b/extensions/Ollama/Ollama/OllamaTextEmbeddingGenerator.cs
index 7e4a5ae9b..a2d093c98 100644
--- a/extensions/Ollama/Ollama/OllamaTextEmbeddingGenerator.cs
+++ b/extensions/Ollama/Ollama/OllamaTextEmbeddingGenerator.cs
@@ -7,7 +7,6 @@
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
-using Microsoft.KernelMemory.AI.OpenAI;
using Microsoft.KernelMemory.Context;
using Microsoft.KernelMemory.Diagnostics;
using OllamaSharp;
@@ -41,12 +40,13 @@ public OllamaTextEmbeddingGenerator(
this.MaxBatchSize = modelConfig.MaxBatchSize;
this._log = (loggerFactory ?? DefaultLogger.Factory).CreateLogger();
+ textTokenizer ??= TokenizerFactory.GetTokenizerForEncoding(modelConfig.Tokenizer);
if (textTokenizer == null)
{
+ textTokenizer = new CL100KTokenizer();
this._log.LogWarning(
"Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
- nameof(GPT4oTokenizer));
- textTokenizer = new GPT4oTokenizer();
+ textTokenizer.GetType().FullName);
}
this._textTokenizer = textTokenizer;
diff --git a/extensions/Ollama/Ollama/OllamaTextGenerator.cs b/extensions/Ollama/Ollama/OllamaTextGenerator.cs
index 5f15d7428..34900713c 100644
--- a/extensions/Ollama/Ollama/OllamaTextGenerator.cs
+++ b/extensions/Ollama/Ollama/OllamaTextGenerator.cs
@@ -7,7 +7,6 @@
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
-using Microsoft.KernelMemory.AI.OpenAI;
using Microsoft.KernelMemory.Context;
using Microsoft.KernelMemory.Diagnostics;
using OllamaSharp;
@@ -38,12 +37,13 @@ public OllamaTextGenerator(
this._modelConfig = modelConfig;
this._log = (loggerFactory ?? DefaultLogger.Factory).CreateLogger();
+ textTokenizer ??= TokenizerFactory.GetTokenizerForEncoding(modelConfig.Tokenizer);
if (textTokenizer == null)
{
+ textTokenizer = new O200KTokenizer();
this._log.LogWarning(
"Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
- nameof(GPT4oTokenizer));
- textTokenizer = new GPT4oTokenizer();
+ textTokenizer.GetType().FullName);
}
this._textTokenizer = textTokenizer;
diff --git a/extensions/OpenAI/OpenAI/DependencyInjection.cs b/extensions/OpenAI/OpenAI/DependencyInjection.cs
index d9c64984b..11ee2a92e 100644
--- a/extensions/OpenAI/OpenAI/DependencyInjection.cs
+++ b/extensions/OpenAI/OpenAI/DependencyInjection.cs
@@ -92,7 +92,7 @@ public static IKernelMemoryBuilder WithOpenAI(
{
config.Validate();
builder.WithOpenAITextEmbeddingGeneration(config, textEmbeddingTokenizer, onlyForRetrieval, httpClient);
- builder.WithOpenAITextGeneration(config, textGenerationTokenizer);
+ builder.WithOpenAITextGeneration(config, textGenerationTokenizer, httpClient);
return builder;
}
@@ -137,7 +137,7 @@ public static IKernelMemoryBuilder WithOpenAITextEmbeddingGeneration(
HttpClient? httpClient = null)
{
config.Validate();
- builder.Services.AddOpenAITextEmbeddingGeneration(config, httpClient: httpClient);
+ builder.Services.AddOpenAITextEmbeddingGeneration(config, textTokenizer, httpClient: httpClient);
if (!onlyForRetrieval)
{
builder.AddIngestionEmbeddingGenerator(
@@ -164,7 +164,7 @@ public static IKernelMemoryBuilder WithOpenAITextEmbeddingGeneration(
bool onlyForRetrieval = false)
{
config.Validate();
- builder.Services.AddOpenAITextEmbeddingGeneration(config, openAIClient);
+ builder.Services.AddOpenAITextEmbeddingGeneration(config, openAIClient, textTokenizer);
if (!onlyForRetrieval)
{
builder.AddIngestionEmbeddingGenerator(
diff --git a/extensions/OpenAI/OpenAI/OpenAI.csproj b/extensions/OpenAI/OpenAI/OpenAI.csproj
index f1879abed..145d086fd 100644
--- a/extensions/OpenAI/OpenAI/OpenAI.csproj
+++ b/extensions/OpenAI/OpenAI/OpenAI.csproj
@@ -10,6 +10,7 @@
+
diff --git a/extensions/OpenAI/OpenAI/OpenAIConfig.cs b/extensions/OpenAI/OpenAI/OpenAIConfig.cs
index 2b8e234ff..8e5eee5dc 100644
--- a/extensions/OpenAI/OpenAI/OpenAIConfig.cs
+++ b/extensions/OpenAI/OpenAI/OpenAIConfig.cs
@@ -53,7 +53,13 @@ public enum TextGenerationTypes
public int TextModelMaxTokenTotal { get; set; } = 8192;
///
- /// Model used to embedding generation/
+ /// Name of the tokenizer used to count tokens.
+ /// Supported values: "p50k", "cl100k", "o200k". Leave it empty for autodetect.
+ ///
+ public string TextModelTokenizer { get; set; } = string.Empty;
+
+ ///
+ /// Model used to embedding generation.
///
public string EmbeddingModel { get; set; } = string.Empty;
@@ -63,6 +69,12 @@ public enum TextGenerationTypes
///
public int EmbeddingModelMaxTokenTotal { get; set; } = 8191;
+ ///
+ /// Name of the tokenizer used to count tokens.
+ /// Supported values: "p50k", "cl100k", "o200k". Leave it empty for autodetect.
+ ///
+ public string EmbeddingModelTokenizer { get; set; } = string.Empty;
+
///
/// The number of dimensions output embeddings should have.
/// Only supported in "text-embedding-3" and later models developed with
diff --git a/extensions/OpenAI/OpenAI/OpenAITextEmbeddingGenerator.cs b/extensions/OpenAI/OpenAI/OpenAITextEmbeddingGenerator.cs
index d9582d70c..58b049796 100644
--- a/extensions/OpenAI/OpenAI/OpenAITextEmbeddingGenerator.cs
+++ b/extensions/OpenAI/OpenAI/OpenAITextEmbeddingGenerator.cs
@@ -96,12 +96,18 @@ public OpenAITextEmbeddingGenerator(
this.MaxTokens = config.EmbeddingModelMaxTokenTotal;
this.MaxBatchSize = config.MaxEmbeddingBatchSize;
+ if (textTokenizer == null && !string.IsNullOrEmpty(config.EmbeddingModelTokenizer))
+ {
+ textTokenizer = TokenizerFactory.GetTokenizerForEncoding(config.EmbeddingModelTokenizer);
+ }
+
+ textTokenizer ??= TokenizerFactory.GetTokenizerForModel(config.EmbeddingModel);
if (textTokenizer == null)
{
+ textTokenizer = new CL100KTokenizer();
this._log.LogWarning(
"Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
- nameof(GPT4oTokenizer));
- textTokenizer = new GPT4oTokenizer();
+ textTokenizer.GetType().FullName);
}
this._textTokenizer = textTokenizer;
diff --git a/extensions/OpenAI/OpenAI/OpenAITextGenerator.cs b/extensions/OpenAI/OpenAI/OpenAITextGenerator.cs
index c0cd6b0b7..dbc9cb857 100644
--- a/extensions/OpenAI/OpenAI/OpenAITextGenerator.cs
+++ b/extensions/OpenAI/OpenAI/OpenAITextGenerator.cs
@@ -89,12 +89,18 @@ public OpenAITextGenerator(
this._log = (loggerFactory ?? DefaultLogger.Factory).CreateLogger();
this.MaxTokenTotal = config.TextModelMaxTokenTotal;
+ if (textTokenizer == null && !string.IsNullOrEmpty(config.TextModelTokenizer))
+ {
+ textTokenizer = TokenizerFactory.GetTokenizerForEncoding(config.TextModelTokenizer);
+ }
+
+ textTokenizer ??= TokenizerFactory.GetTokenizerForModel(config.TextModel);
if (textTokenizer == null)
{
+ textTokenizer = new O200KTokenizer();
this._log.LogWarning(
"Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
- nameof(GPT4oTokenizer));
- textTokenizer = new GPT4oTokenizer();
+ textTokenizer.GetType().FullName);
}
this._textTokenizer = textTokenizer;
diff --git a/extensions/OpenAI/OpenAI/Tokenizers/GPT3Tokenizer.cs b/extensions/OpenAI/OpenAI/Tokenizers/GPT3Tokenizer.cs
deleted file mode 100644
index e7d03d721..000000000
--- a/extensions/OpenAI/OpenAI/Tokenizers/GPT3Tokenizer.cs
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright (c) Microsoft. All rights reserved.
-
-using System.Collections.Generic;
-using System.Linq;
-using Microsoft.ML.Tokenizers;
-
-#pragma warning disable IDE0130 // reduce number of "using" statements
-// ReSharper disable once CheckNamespace
-namespace Microsoft.KernelMemory.AI.OpenAI;
-
-///
-/// TikToken GPT3 tokenizer (p50k_base.tiktoken)
-///
-public sealed class GPT3Tokenizer : ITextTokenizer
-{
- private static readonly Tokenizer s_tokenizer = TiktokenTokenizer.CreateForModel("text-davinci-003");
-
- ///
- public int CountTokens(string text)
- {
- return s_tokenizer.CountTokens(text);
- }
-
- ///
- public IReadOnlyList GetTokens(string text)
- {
- return s_tokenizer.EncodeToTokens(text, out string? _).Select(t => t.Value).ToList();
- }
-}
diff --git a/extensions/OpenAI/OpenAI/Tokenizers/GPT4oTokenizer.cs b/extensions/OpenAI/OpenAI/Tokenizers/GPT4oTokenizer.cs
deleted file mode 100644
index a0052c803..000000000
--- a/extensions/OpenAI/OpenAI/Tokenizers/GPT4oTokenizer.cs
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright (c) Microsoft. All rights reserved.
-
-using System.Collections.Generic;
-using System.Linq;
-using Microsoft.ML.Tokenizers;
-
-#pragma warning disable IDE0130 // reduce number of "using" statements
-// ReSharper disable once CheckNamespace
-namespace Microsoft.KernelMemory.AI.OpenAI;
-
-///
-/// GPT 4o / 4o mini tokenizer (cl200k_base.tiktoken + special tokens)
-///
-// ReSharper disable once InconsistentNaming
-public sealed class GPT4oTokenizer : ITextTokenizer
-{
- private static readonly Tokenizer s_tokenizer = TiktokenTokenizer.CreateForModel("gpt-4o",
- new Dictionary { { "<|im_start|>", 100264 }, { "<|im_end|>", 100265 } });
-
- ///
- public int CountTokens(string text)
- {
- return s_tokenizer.CountTokens(text);
- }
-
- ///
- public IReadOnlyList GetTokens(string text)
- {
- return s_tokenizer.EncodeToTokens(text, out string? _).Select(t => t.Value).ToList();
- }
-}
diff --git a/extensions/Tiktoken/README.md b/extensions/Tiktoken/README.md
new file mode 100644
index 000000000..adb6e0306
--- /dev/null
+++ b/extensions/Tiktoken/README.md
@@ -0,0 +1,6 @@
+# Kernel Memory with Tiktoken tokenizers
+
+[](https://www.nuget.org/packages/Microsoft.KernelMemory.AI.Tiktoken/)
+[](https://aka.ms/KMdiscord)
+
+This project contains the Tiktoken tokenizers for Kernel Memory.
diff --git a/extensions/OpenAI/OpenAI.UnitTests/Startup.cs b/extensions/Tiktoken/Tiktoken.UnitTests/Startup.cs
similarity index 89%
rename from extensions/OpenAI/OpenAI.UnitTests/Startup.cs
rename to extensions/Tiktoken/Tiktoken.UnitTests/Startup.cs
index b489661b8..c840e11ef 100644
--- a/extensions/OpenAI/OpenAI.UnitTests/Startup.cs
+++ b/extensions/Tiktoken/Tiktoken.UnitTests/Startup.cs
@@ -5,7 +5,7 @@
using Microsoft.Extensions.Hosting;
-namespace Microsoft.OpenAI.UnitTests;
+namespace Microsoft.Tiktoken.UnitTests;
public class Startup
{
diff --git a/extensions/OpenAI/OpenAI.UnitTests/OpenAI.UnitTests.csproj b/extensions/Tiktoken/Tiktoken.UnitTests/Tiktoken.UnitTests.csproj
similarity index 91%
rename from extensions/OpenAI/OpenAI.UnitTests/OpenAI.UnitTests.csproj
rename to extensions/Tiktoken/Tiktoken.UnitTests/Tiktoken.UnitTests.csproj
index bfd523257..4b8557bf2 100644
--- a/extensions/OpenAI/OpenAI.UnitTests/OpenAI.UnitTests.csproj
+++ b/extensions/Tiktoken/Tiktoken.UnitTests/Tiktoken.UnitTests.csproj
@@ -1,8 +1,8 @@
- Microsoft.OpenAI.UnitTests
- Microsoft.OpenAI.UnitTests
+ Microsoft.Tiktoken.UnitTests
+ Microsoft.Tiktoken.UnitTests
net8.0
LatestMajor
true
diff --git a/extensions/OpenAI/OpenAI.UnitTests/GPTTokenizersTests.cs b/extensions/Tiktoken/Tiktoken.UnitTests/TokenizersTests.cs
similarity index 72%
rename from extensions/OpenAI/OpenAI.UnitTests/GPTTokenizersTests.cs
rename to extensions/Tiktoken/Tiktoken.UnitTests/TokenizersTests.cs
index 0d93d1336..270e4ba63 100644
--- a/extensions/OpenAI/OpenAI.UnitTests/GPTTokenizersTests.cs
+++ b/extensions/Tiktoken/Tiktoken.UnitTests/TokenizersTests.cs
@@ -1,13 +1,13 @@
// Copyright (c) Microsoft. All rights reserved.
-using Microsoft.KernelMemory.AI.OpenAI;
+using Microsoft.KernelMemory.AI;
using Microsoft.KM.TestHelpers;
using Xunit;
using Xunit.Abstractions;
-namespace Microsoft.OpenAI.UnitTests;
+namespace Microsoft.Tiktoken.UnitTests;
-public class GPTTokenizersTests(ITestOutputHelper output) : BaseUnitTestCase(output)
+public class TokenizersTests(ITestOutputHelper output) : BaseUnitTestCase(output)
{
[Fact]
[Trait("Category", "UnitTest")]
@@ -15,12 +15,9 @@ public class GPTTokenizersTests(ITestOutputHelper output) : BaseUnitTestCase(out
public void CanTokenize()
{
const string helloWorld = "hello world";
- var gpt2 = new GPT2Tokenizer();
- var tokens = gpt2.GetTokens(helloWorld);
- Assert.Equal(["hello", " world"], tokens);
var gpt3 = new GPT3Tokenizer();
- tokens = gpt3.GetTokens(helloWorld);
+ var tokens = gpt3.GetTokens(helloWorld);
Assert.Equal(["hello", " world"], tokens);
var gpt4 = new GPT4Tokenizer();
@@ -39,7 +36,6 @@ public void TheyCountTokens()
{
const string text = "{'bos_token': '<|endoftext|>',\n 'eos_token': '<|endoftext|>',\n 'unk_token': '<|endoftext|>'}";
- Assert.Equal(29, new GPT2Tokenizer().CountTokens(text));
Assert.Equal(29, new GPT3Tokenizer().CountTokens(text));
Assert.Equal(21, new GPT4Tokenizer().CountTokens(text));
Assert.Equal(22, new GPT4oTokenizer().CountTokens(text));
diff --git a/extensions/OpenAI/OpenAI/Tokenizers/GPT4Tokenizer.cs b/extensions/Tiktoken/Tiktoken/CL100KTokenizer.cs
similarity index 57%
rename from extensions/OpenAI/OpenAI/Tokenizers/GPT4Tokenizer.cs
rename to extensions/Tiktoken/Tiktoken/CL100KTokenizer.cs
index 5cef0f5cc..30c401864 100644
--- a/extensions/OpenAI/OpenAI/Tokenizers/GPT4Tokenizer.cs
+++ b/extensions/Tiktoken/Tiktoken/CL100KTokenizer.cs
@@ -4,16 +4,11 @@
using System.Linq;
using Microsoft.ML.Tokenizers;
-#pragma warning disable IDE0130 // reduce number of "using" statements
-// ReSharper disable once CheckNamespace
-namespace Microsoft.KernelMemory.AI.OpenAI;
+namespace Microsoft.KernelMemory.AI;
-///
-/// GPT 3.5 and GPT 4 tokenizer (cl100k_base.tiktoken + special tokens)
-///
-public sealed class GPT4Tokenizer : ITextTokenizer
+public class CL100KTokenizer : ITextTokenizer
{
- private static readonly Tokenizer s_tokenizer = TiktokenTokenizer.CreateForModel("gpt-4",
+ private static readonly Tokenizer s_tokenizer = ML.Tokenizers.TiktokenTokenizer.CreateForEncoding("cl100k_base",
new Dictionary { { "<|im_start|>", 100264 }, { "<|im_end|>", 100265 } });
///
diff --git a/extensions/Tiktoken/Tiktoken/GPTTokenizers.cs b/extensions/Tiktoken/Tiktoken/GPTTokenizers.cs
new file mode 100644
index 000000000..109cca358
--- /dev/null
+++ b/extensions/Tiktoken/Tiktoken/GPTTokenizers.cs
@@ -0,0 +1,32 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+namespace Microsoft.KernelMemory.AI;
+
+///
+/// GPT3 tokenizer
+///
+public sealed class GPT3Tokenizer : P50KTokenizer
+{
+}
+
+///
+/// gpt-3.5-turbo
+/// gpt-3.5-turbo-*
+/// gpt-4
+/// text-embedding-ada-002
+/// text-embedding-3-small
+/// text-embedding-3-large
+///
+public sealed class GPT4Tokenizer : CL100KTokenizer
+{
+}
+
+///
+/// GPT 4o / 4o mini tokenizer
+/// gpt-4o
+/// gpt-4o-*
+///
+// ReSharper disable once InconsistentNaming
+public sealed class GPT4oTokenizer : O200KTokenizer
+{
+}
diff --git a/service/Core/AI/DefaultGPTTokenizer.cs b/extensions/Tiktoken/Tiktoken/O200KTokenizer.cs
similarity index 55%
rename from service/Core/AI/DefaultGPTTokenizer.cs
rename to extensions/Tiktoken/Tiktoken/O200KTokenizer.cs
index 9a3adb619..3a33979c5 100644
--- a/service/Core/AI/DefaultGPTTokenizer.cs
+++ b/extensions/Tiktoken/Tiktoken/O200KTokenizer.cs
@@ -6,21 +6,18 @@
namespace Microsoft.KernelMemory.AI;
-public class DefaultGPTTokenizer : ITextTokenizer
+public class O200KTokenizer : ITextTokenizer
{
- private static readonly Tokenizer s_tokenizer = TiktokenTokenizer.CreateForModel(
- "gpt-4", new Dictionary { { "<|im_start|>", 100264 }, { "<|im_end|>", 100265 } });
-
- public static int StaticCountTokens(string text)
- {
- return s_tokenizer.CountTokens(text);
- }
+ private static readonly Tokenizer s_tokenizer = ML.Tokenizers.TiktokenTokenizer.CreateForEncoding("o200k_base",
+ new Dictionary { { "<|im_start|>", 100264 }, { "<|im_end|>", 100265 } });
+ ///
public int CountTokens(string text)
{
return s_tokenizer.CountTokens(text);
}
+ ///
public IReadOnlyList GetTokens(string text)
{
return s_tokenizer.EncodeToTokens(text, out string? _).Select(t => t.Value).ToList();
diff --git a/extensions/OpenAI/OpenAI/Tokenizers/GPT2Tokenizer.cs b/extensions/Tiktoken/Tiktoken/P50KTokenizer.cs
similarity index 54%
rename from extensions/OpenAI/OpenAI/Tokenizers/GPT2Tokenizer.cs
rename to extensions/Tiktoken/Tiktoken/P50KTokenizer.cs
index 8b3df3559..6a059f42f 100644
--- a/extensions/OpenAI/OpenAI/Tokenizers/GPT2Tokenizer.cs
+++ b/extensions/Tiktoken/Tiktoken/P50KTokenizer.cs
@@ -4,16 +4,11 @@
using System.Linq;
using Microsoft.ML.Tokenizers;
-#pragma warning disable IDE0130 // reduce number of "using" statements
-// ReSharper disable once CheckNamespace
-namespace Microsoft.KernelMemory.AI.OpenAI;
+namespace Microsoft.KernelMemory.AI;
-///
-/// TikToken GPT2 tokenizer (gpt2.tiktoken)
-///
-public sealed class GPT2Tokenizer : ITextTokenizer
+public class P50KTokenizer : ITextTokenizer
{
- private static readonly Tokenizer s_tokenizer = TiktokenTokenizer.CreateForModel("gpt2");
+ private static readonly Tokenizer s_tokenizer = ML.Tokenizers.TiktokenTokenizer.CreateForEncoding("p50k_base");
///
public int CountTokens(string text)
diff --git a/extensions/Tiktoken/Tiktoken/Tiktoken.csproj b/extensions/Tiktoken/Tiktoken/Tiktoken.csproj
new file mode 100644
index 000000000..839e109bf
--- /dev/null
+++ b/extensions/Tiktoken/Tiktoken/Tiktoken.csproj
@@ -0,0 +1,32 @@
+
+
+
+ net8.0
+ LatestMajor
+ Microsoft.KernelMemory.AI.Tiktoken
+ Microsoft.KernelMemory.AI
+ $(NoWarn);KMEXP00;CA1308;NU5104;
+
+
+
+
+
+
+
+
+
+
+
+ true
+ Microsoft.KernelMemory.AI.Tiktoken
+ Tiktoken tokenizers for Kernel Memory
+ Provide tokenizers to allow counting content tokens for text and embeddings
+ Tiktoken, Tokenizer, RAG, Kernel Memory, AI, Artificial Intelligence, Embeddings, Vector DB, Vector Search, Memory DB
+ bin/$(Configuration)/$(TargetFramework)/$(AssemblyName).xml
+
+
+
+
+
+
+
diff --git a/extensions/Tiktoken/Tiktoken/TiktokenTokenizer.cs b/extensions/Tiktoken/Tiktoken/TiktokenTokenizer.cs
new file mode 100644
index 000000000..9d441893f
--- /dev/null
+++ b/extensions/Tiktoken/Tiktoken/TiktokenTokenizer.cs
@@ -0,0 +1,39 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.ML.Tokenizers;
+
+namespace Microsoft.KernelMemory.AI;
+
+public class TiktokenTokenizer : ITextTokenizer
+{
+ private readonly Tokenizer _tokenizer;
+
+ public TiktokenTokenizer(string modelId)
+ {
+ try
+ {
+ this._tokenizer = Microsoft.ML.Tokenizers.TiktokenTokenizer.CreateForModel(modelId);
+ }
+ catch (NotSupportedException)
+ {
+ throw new KernelMemoryException("Autodetect failed");
+ }
+ catch (ArgumentNullException)
+ {
+ throw new KernelMemoryException("Autodetect failed");
+ }
+ }
+
+ public int CountTokens(string text)
+ {
+ return this._tokenizer.CountTokens(text);
+ }
+
+ public IReadOnlyList GetTokens(string text)
+ {
+ return this._tokenizer.EncodeToTokens(text, out string? _).Select(t => t.Value).ToList();
+ }
+}
diff --git a/extensions/Tiktoken/Tiktoken/TokenizerFactory.cs b/extensions/Tiktoken/Tiktoken/TokenizerFactory.cs
new file mode 100644
index 000000000..2db88d151
--- /dev/null
+++ b/extensions/Tiktoken/Tiktoken/TokenizerFactory.cs
@@ -0,0 +1,71 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+
+namespace Microsoft.KernelMemory.AI;
+
+public static class TokenizerFactory
+{
+ public static ITextTokenizer? GetTokenizerForEncoding(string encodingId)
+ {
+ encodingId = encodingId.ToLowerInvariant();
+
+ switch (encodingId.ToLowerInvariant())
+ {
+ case "p50k":
+ return new P50KTokenizer();
+
+ case "cl100k":
+ return new CL100KTokenizer();
+
+ case "o200k":
+ return new O200KTokenizer();
+ }
+
+ return null;
+ }
+
+ public static ITextTokenizer? GetTokenizerForModel(string modelId)
+ {
+ try
+ {
+ return new TiktokenTokenizer(modelId);
+ }
+ catch (KernelMemoryException)
+ {
+ // ignore
+ }
+
+ modelId = modelId.ToLowerInvariant();
+
+ if (modelId.StartsWith("text-embedding-", StringComparison.Ordinal)
+ || modelId.StartsWith("gpt-3.5-", StringComparison.Ordinal)
+ || modelId.StartsWith("gpt-4-", StringComparison.Ordinal))
+ {
+ return new CL100KTokenizer();
+ }
+
+ if (modelId.StartsWith("gpt-4o-", StringComparison.Ordinal))
+ {
+ return new O200KTokenizer();
+ }
+
+ switch (modelId.ToLowerInvariant())
+ {
+ case "code-davinci-001":
+ case "code-davinci-002":
+ case "text-davinci-002":
+ case "text-davinci-003":
+ return new P50KTokenizer();
+
+ case "gpt-3.5-turbo":
+ case "gpt-4":
+ return new CL100KTokenizer();
+
+ case "gpt-4o":
+ return new O200KTokenizer();
+ }
+
+ return null;
+ }
+}
diff --git a/service/Core/Core.csproj b/service/Core/Core.csproj
index 26aca1d01..1c5c69eaf 100644
--- a/service/Core/Core.csproj
+++ b/service/Core/Core.csproj
@@ -9,6 +9,7 @@
+
diff --git a/service/Core/DataFormats/Text/TextChunker.cs b/service/Core/DataFormats/Text/TextChunker.cs
index 374a4eb5f..a89100492 100644
--- a/service/Core/DataFormats/Text/TextChunker.cs
+++ b/service/Core/DataFormats/Text/TextChunker.cs
@@ -25,6 +25,9 @@ public static class TextChunker
/// The number of tokens in the input string.
public delegate int TokenCounter(string input);
+ // Fallback when TokenCounter is not set
+ private static readonly TokenCounter s_defaultTokenCounter = (new CL100KTokenizer()).CountTokens;
+
private static readonly char[] s_spaceChar = [' '];
private static readonly string?[] s_plaintextSplitOptions = ["\n\r", ".", "?!", ";", ":", ",", ")]}", " ", "-", null];
private static readonly string?[] s_markdownSplitOptions = [".", "?!", ";", ":", ",", ")]}", " ", "-", "\n\r", null];
@@ -47,7 +50,7 @@ public static List SplitPlainTextLines(
s_plaintextSplitOptions, tokenCounter);
///
- /// Split markdown text into lines.
+ /// Split Markdown text into lines.
///
/// Text to split
/// Maximum number of tokens per line.
@@ -93,7 +96,7 @@ public static List SplitPlainTextParagraphs(
tokenCounter);
///
- /// Split markdown text into paragraphs.
+ /// Split Markdown text into paragraphs.
///
/// Lines of text.
/// Maximum number of tokens per paragraph.
@@ -399,6 +402,6 @@ private static (List, bool) Split(
private static int GetTokenCount(string input, TokenCounter? tokenCounter)
{
// Fall back to GPT tokenizer if none configured
- return tokenCounter?.Invoke(input) ?? DefaultGPTTokenizer.StaticCountTokens(input);
+ return tokenCounter?.Invoke(input) ?? s_defaultTokenCounter(input);
}
}
diff --git a/service/Core/Handlers/TextPartitioningHandler.cs b/service/Core/Handlers/TextPartitioningHandler.cs
index 489ffbfb0..34cc47ee2 100644
--- a/service/Core/Handlers/TextPartitioningHandler.cs
+++ b/service/Core/Handlers/TextPartitioningHandler.cs
@@ -49,7 +49,7 @@ public TextPartitioningHandler(
this._log = (loggerFactory ?? DefaultLogger.Factory).CreateLogger();
this._log.LogInformation("Handler '{0}' ready", stepName);
- this._tokenCounter = DefaultGPTTokenizer.StaticCountTokens;
+ this._tokenCounter = (new CL100KTokenizer()).CountTokens;
if (orchestrator.EmbeddingGenerationEnabled)
{
foreach (var gen in orchestrator.GetEmbeddingGenerators())
diff --git a/service/Core/Search/SearchClient.cs b/service/Core/Search/SearchClient.cs
index 26eb0d76c..cf410ad54 100644
--- a/service/Core/Search/SearchClient.cs
+++ b/service/Core/Search/SearchClient.cs
@@ -102,6 +102,8 @@ public async Task SearchAsync(
if (result.State == SearchState.Stop) { break; }
}
+ this._log.LogTrace("{Count} records processed", result.RecordCount);
+
if (result.SearchResult.Results.Count == 0)
{
this._log.LogDebug("No memories found");
@@ -167,6 +169,8 @@ public async Task AskAsync(
if (result.State == SearchState.Stop) { break; }
}
+ this._log.LogTrace("{Count} records processed", result.RecordCount);
+
return await this._answerGenerator.GenerateAnswerAsync(question, result, context, cancellationToken).ConfigureAwait(false);
}
@@ -189,6 +193,9 @@ private SearchClientResult ProcessMemoryRecord(
return result.SkipRecord();
}
+ // Keep track of how many records have been processed
+ result.RecordCount++;
+
// Note: a document can be composed by multiple files
string documentId = record.GetDocumentId(this._log);
diff --git a/service/Core/Search/SearchClientResult.cs b/service/Core/Search/SearchClientResult.cs
index 605055970..c509809a7 100644
--- a/service/Core/Search/SearchClientResult.cs
+++ b/service/Core/Search/SearchClientResult.cs
@@ -21,6 +21,7 @@ internal class SearchClientResult
{
public SearchMode Mode { get; private init; }
public SearchState State { get; set; }
+ public int RecordCount { get; set; }
// Use by in Search and Ask mode
public MemoryAnswer AskResult { get; private init; } = new();
diff --git a/service/Core/SemanticKernel/SemanticKernelTextEmbeddingGenerator.cs b/service/Core/SemanticKernel/SemanticKernelTextEmbeddingGenerator.cs
index 07c487581..5f4a61cfc 100644
--- a/service/Core/SemanticKernel/SemanticKernelTextEmbeddingGenerator.cs
+++ b/service/Core/SemanticKernel/SemanticKernelTextEmbeddingGenerator.cs
@@ -41,10 +41,10 @@ public SemanticKernelTextEmbeddingGenerator(
if (textTokenizer == null)
{
+ textTokenizer = new CL100KTokenizer();
this._log.LogWarning(
"Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
- nameof(DefaultGPTTokenizer));
- textTokenizer = new DefaultGPTTokenizer();
+ textTokenizer.GetType().FullName);
}
this._tokenizer = textTokenizer;
diff --git a/service/Core/SemanticKernel/SemanticKernelTextGenerator.cs b/service/Core/SemanticKernel/SemanticKernelTextGenerator.cs
index 5d592db33..06c4844d3 100644
--- a/service/Core/SemanticKernel/SemanticKernelTextGenerator.cs
+++ b/service/Core/SemanticKernel/SemanticKernelTextGenerator.cs
@@ -42,10 +42,10 @@ public SemanticKernelTextGenerator(
if (textTokenizer == null)
{
+ textTokenizer = new O200KTokenizer();
this._log.LogWarning(
"Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
- nameof(DefaultGPTTokenizer));
- textTokenizer = new DefaultGPTTokenizer();
+ textTokenizer.GetType().FullName);
}
this._tokenizer = textTokenizer;
diff --git a/service/Service/appsettings.json b/service/Service/appsettings.json
index a9940b9b5..506f52c2d 100644
--- a/service/Service/appsettings.json
+++ b/service/Service/appsettings.json
@@ -238,6 +238,8 @@
"ApiKey": "",
// See https://docs.anthropic.com/claude/docs/models-overview for list of models and details
"TextModelName": "claude-3-haiku-20240307",
+ // Supported values: "p50k", "cl100k", "o200k". Leave it empty if unsure.
+ "Tokenizer": "cl100k",
// How many tokens the model can receive in input and generate in output
// See https://docs.anthropic.com/claude/docs/models-overview
"MaxTokenIn": 200000,
@@ -326,10 +328,17 @@
"Auth": "AzureIdentity",
"Endpoint": "https://<...>.openai.azure.com/",
"APIKey": "",
+ // Your Azure Deployment name
"Deployment": "",
// The max number of tokens supported by model deployed
// See https://learn.microsoft.com/azure/ai-services/openai/concepts/models
"MaxTokenTotal": 8191,
+ // Which tokenizer to use to correctly measure the size of chunks.
+ // Supported values: "p50k", "cl100k", "o200k". Leave it empty if unsure.
+ // - Use p50k for the old text-davinci-003 models
+ // - Use cl100k for the old gpt-3.4 and gpt-4 family, and for text embedding models
+ // - Use o200k for the most recent gpt-4o family
+ "Tokenizer": "cl100k",
// The number of dimensions output embeddings should have.
// Only supported in "text-embedding-3" and later models developed with
// MRL, see https://arxiv.org/abs/2205.13147
@@ -355,6 +364,12 @@
// The max number of tokens supported by model deployed
// See https://learn.microsoft.com/azure/ai-services/openai/concepts/models
"MaxTokenTotal": 16384,
+ // Which tokenizer to use to correctly measure the size of chunks.
+ // Supported values: "p50k", "cl100k", "o200k". Leave it empty if unsure.
+ // - Use p50k for the old text-davinci-003 models
+ // - Use cl100k for the old gpt-3.4 and gpt-4 family, and for text embedding models
+ // - Use o200k for the most recent gpt-4o family
+ "Tokenizer": "o200k",
// "ChatCompletion" or "TextCompletion"
"APIType": "ChatCompletion",
// How many times to retry in case of throttling.
@@ -427,6 +442,8 @@
"Endpoint": "http://localhost:11434",
"TextModel": {
"ModelName": "phi3:medium-128k",
+ // Supported values: "p50k", "cl100k", "o200k". Leave it empty if unsure.
+ "Tokenizer": "cl100k",
"MaxTokenTotal": 131072,
// How many requests can be processed in parallel
"MaxBatchSize": 1
@@ -482,6 +499,8 @@
},
"EmbeddingModel": {
"ModelName": "nomic-embed-text",
+ // Supported values: "p50k", "cl100k", "o200k". Leave it empty if unsure.
+ "Tokenizer": "cl100k",
"MaxTokenTotal": 2048,
// How many requests can be processed in parallel
"MaxBatchSize": 1
@@ -541,6 +560,8 @@
"TextModel": "gpt-4o-mini",
// The max number of tokens supported by the text model.
"TextModelMaxTokenTotal": 16384,
+ // Supported values: "p50k", "cl100k", "o200k". Leave it empty for autodetect.
+ "TextModelTokenizer": "",
// What type of text generation, by default autodetect using the model name.
// Possible values: "Auto", "TextCompletion", "Chat"
"TextGenerationType": "Auto",
@@ -549,6 +570,8 @@
// The max number of tokens supported by the embedding model
// See https://platform.openai.com/docs/guides/embeddings/what-are-embeddings
"EmbeddingModelMaxTokenTotal": 8191,
+ // Supported values: "p50k", "cl100k", "o200k". Leave it empty for autodetect.
+ "EmbeddingModelTokenizer": "",
// OpenAI API Key
"APIKey": "",
// OpenAI Organization ID (usually empty, unless you have multiple accounts on different orgs)
diff --git a/service/tests/Core.FunctionalTests/DefaultTestCases/DocumentUploadTest.cs b/service/tests/Core.FunctionalTests/DefaultTestCases/DocumentUploadTest.cs
index 7848f36d5..7123609f1 100644
--- a/service/tests/Core.FunctionalTests/DefaultTestCases/DocumentUploadTest.cs
+++ b/service/tests/Core.FunctionalTests/DefaultTestCases/DocumentUploadTest.cs
@@ -53,7 +53,7 @@ await memory.ImportDocumentAsync(
var count = 0;
while (!await memory.IsDocumentReadyAsync(documentId: Id))
{
- Assert.True(count++ <= 30, "Document import timed out");
+ Assert.True(count++ <= 60, "Document import timed out");
log("Waiting for memory ingestion to complete...");
await Task.Delay(TimeSpan.FromSeconds(1));
}
diff --git a/tools/InteractiveSetup/Services/AzureOpenAIEmbedding.cs b/tools/InteractiveSetup/Services/AzureOpenAIEmbedding.cs
index 9635e371d..0d07c531f 100644
--- a/tools/InteractiveSetup/Services/AzureOpenAIEmbedding.cs
+++ b/tools/InteractiveSetup/Services/AzureOpenAIEmbedding.cs
@@ -21,6 +21,7 @@ public static void Setup(Context ctx, bool force = false)
{ "APIType", "EmbeddingGeneration" },
{ "Endpoint", "" },
{ "Deployment", "" },
+ { "Tokenizer", "cl100k" },
{ "Auth", "ApiKey" },
{ "APIKey", "" },
};
@@ -47,5 +48,6 @@ public static void Setup(Context ctx, bool force = false)
AppSettings.Change(x => x.Services[ServiceName]["APIType"] = "EmbeddingGeneration");
AppSettings.Change(x => x.Services[ServiceName]["Endpoint"] = SetupUI.AskOpenQuestion("Azure OpenAI ", config["Endpoint"].ToString()));
AppSettings.Change(x => x.Services[ServiceName]["Deployment"] = SetupUI.AskOpenQuestion("Azure OpenAI ", config["Deployment"].ToString()));
+ AppSettings.Change(x => x.Services[ServiceName]["Tokenizer"] = SetupUI.AskOpenQuestion("Tokenizer (p50k/cl100k/o200k)", config["Tokenizer"].ToString()));
}
}
diff --git a/tools/InteractiveSetup/Services/AzureOpenAIText.cs b/tools/InteractiveSetup/Services/AzureOpenAIText.cs
index 00b9dec09..cdc3e035a 100644
--- a/tools/InteractiveSetup/Services/AzureOpenAIText.cs
+++ b/tools/InteractiveSetup/Services/AzureOpenAIText.cs
@@ -21,6 +21,7 @@ public static void Setup(Context ctx, bool force = false)
{ "APIType", "ChatCompletion" },
{ "Endpoint", "" },
{ "Deployment", "" },
+ { "Tokenizer", "o200k" },
{ "Auth", "ApiKey" },
{ "APIKey", "" },
};
@@ -47,5 +48,6 @@ public static void Setup(Context ctx, bool force = false)
AppSettings.Change(x => x.Services[ServiceName]["APIType"] = "ChatCompletion");
AppSettings.Change(x => x.Services[ServiceName]["Endpoint"] = SetupUI.AskOpenQuestion("Azure OpenAI ", config["Endpoint"].ToString()));
AppSettings.Change(x => x.Services[ServiceName]["Deployment"] = SetupUI.AskOpenQuestion("Azure OpenAI ", config["Deployment"].ToString()));
+ AppSettings.Change(x => x.Services[ServiceName]["Tokenizer"] = SetupUI.AskOpenQuestion("Tokenizer (p50k/cl100k/o200k)", config["Tokenizer"].ToString()));
}
}