From 0e4c5ee95e83545ec10d717578a6870595e59802 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Thu, 4 Jul 2024 14:53:42 +0100 Subject: [PATCH] Allow custom serialization options to be passed to TextMemoryPlugin to allow non-latin script to be serialized without escaping. --- ...ugin_RecallJsonSerializationWithOptions.cs | 80 +++++++++++++++++++ dotnet/samples/Concepts/README.md | 1 + .../Plugins.Memory/TextMemoryPlugin.cs | 12 ++- 3 files changed, 90 insertions(+), 3 deletions(-) create mode 100644 dotnet/samples/Concepts/Memory/TextMemoryPlugin_RecallJsonSerializationWithOptions.cs diff --git a/dotnet/samples/Concepts/Memory/TextMemoryPlugin_RecallJsonSerializationWithOptions.cs b/dotnet/samples/Concepts/Memory/TextMemoryPlugin_RecallJsonSerializationWithOptions.cs new file mode 100644 index 000000000000..fbc313adebf4 --- /dev/null +++ b/dotnet/samples/Concepts/Memory/TextMemoryPlugin_RecallJsonSerializationWithOptions.cs @@ -0,0 +1,80 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Text.Encodings.Web; +using System.Text.Json; +using System.Text.Unicode; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Connectors.OpenAI; +using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Plugins.Memory; + +namespace Memory; + +/// +/// This example shows how to use custom when serializing multiple results during recall using . +/// +/// +/// When multiple results are returned during recall, has to turn these results into a string to pass back to the kernel. +/// The uses to turn the results into a string. +/// In some cases though, the default serialization options may not work, e.g. if the memories contain non-latin text, +/// will escape these characters by default. In this case, you can provide custom to the to control how the memories are serialized. +/// +public class TextMemoryPlugin_RecallJsonSerializationWithOptions(ITestOutputHelper output) : BaseTest(output) +{ + [Fact] + public async Task RunAsync() + { + // Create a Kernel. + var kernelWithoutOptions = Kernel.CreateBuilder() + .Build(); + + // Create an embedding generator to use for semantic memory. + var embeddingGenerator = new AzureOpenAITextEmbeddingGenerationService(TestConfiguration.AzureOpenAIEmbeddings.DeploymentName, TestConfiguration.AzureOpenAIEmbeddings.Endpoint, TestConfiguration.AzureOpenAIEmbeddings.ApiKey); + + // Using an in memory store for this example. + var memoryStore = new VolatileMemoryStore(); + + // The combination of the text embedding generator and the memory store makes up the 'SemanticTextMemory' object used to + // store and retrieve memories. + SemanticTextMemory textMemory = new(memoryStore, embeddingGenerator); + await textMemory.SaveInformationAsync("samples", "First example of some text in Thai and Bengali: วรรณยุกต์ চলিতভাষা", "test-record-1"); + await textMemory.SaveInformationAsync("samples", "Second example of some text in Thai and Bengali: วรรณยุกต์ চলিতভাষা", "test-record-2"); + + // Import the TextMemoryPlugin into the Kernel without any custom JsonSerializerOptions. + var memoryPluginWithoutOptions = kernelWithoutOptions.ImportPluginFromObject(new TextMemoryPlugin(textMemory)); + + // Retrieve the memories using the TextMemoryPlugin. + var resultWithoutOptions = await kernelWithoutOptions.InvokeAsync(memoryPluginWithoutOptions["Recall"], new() + { + [TextMemoryPlugin.InputParam] = "Text examples", + [TextMemoryPlugin.CollectionParam] = "samples", + [TextMemoryPlugin.LimitParam] = "2", + [TextMemoryPlugin.RelevanceParam] = "0.79", + }); + + // The recall operation returned the following text, where the Thai and Bengali text was escaped: + // ["Second example of some text in Thai and Bengali: \u0E27\u0E23\u0E23\u0E13\u0E22\u0E38\u0E01\u0E15\u0E4C \u099A\u09B2\u09BF\u09A4\u09AD\u09BE\u09B7\u09BE","First example of some text in Thai and Bengali: \u0E27\u0E23\u0E23\u0E13\u0E22\u0E38\u0E01\u0E15\u0E4C \u099A\u09B2\u09BF\u09A4\u09AD\u09BE\u09B7\u09BE"] + Console.WriteLine(resultWithoutOptions.GetValue()); + + // Create a Kernel. + var kernelWithOptions = Kernel.CreateBuilder() + .Build(); + + // Import the TextMemoryPlugin into the Kernel with custom JsonSerializerOptions that allow Thai and Bengali script to be serialized unescaped. + var options = new JsonSerializerOptions { Encoder = JavaScriptEncoder.Create(UnicodeRanges.BasicLatin, UnicodeRanges.Thai, UnicodeRanges.Bengali) }; + var memoryPluginWithOptions = kernelWithOptions.ImportPluginFromObject(new TextMemoryPlugin(textMemory, jsonSerializerOptions: options)); + + // Retrieve the memories using the TextMemoryPlugin. + var result = await kernelWithOptions.InvokeAsync(memoryPluginWithOptions["Recall"], new() + { + [TextMemoryPlugin.InputParam] = "Text examples", + [TextMemoryPlugin.CollectionParam] = "samples", + [TextMemoryPlugin.LimitParam] = "2", + [TextMemoryPlugin.RelevanceParam] = "0.79", + }); + + // The recall operation returned the following text, where the Thai and Bengali text was not escaped: + // ["Second example of some text in Thai and Bengali: วรรณยุกต์ চলিতভাষা","First example of some text in Thai and Bengali: วรรณยุกต์ চলিতভাষা"] + Console.WriteLine(result.GetValue()); + } +} diff --git a/dotnet/samples/Concepts/README.md b/dotnet/samples/Concepts/README.md index afb151337576..4bdfbd064d39 100644 --- a/dotnet/samples/Concepts/README.md +++ b/dotnet/samples/Concepts/README.md @@ -102,6 +102,7 @@ Down below you can find the code snippets that demonstrate the usage of many Sem - [TextChunkingAndEmbedding](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/TextChunkingAndEmbedding.cs) - [TextMemoryPlugin_GeminiEmbeddingGeneration](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/TextMemoryPlugin_GeminiEmbeddingGeneration.cs) - [TextMemoryPlugin_MultipleMemoryStore](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/TextMemoryPlugin_MultipleMemoryStore.cs) +- [TextMemoryPlugin_RecallJsonSerializationWithOptions](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/TextMemoryPlugin_RecallJsonSerializationWithOptions.cs) ## Optimization - Examples of different cost and performance optimization techniques diff --git a/dotnet/src/Plugins/Plugins.Memory/TextMemoryPlugin.cs b/dotnet/src/Plugins/Plugins.Memory/TextMemoryPlugin.cs index 18a64bc3c4c8..946aea828692 100644 --- a/dotnet/src/Plugins/Plugins.Memory/TextMemoryPlugin.cs +++ b/dotnet/src/Plugins/Plugins.Memory/TextMemoryPlugin.cs @@ -49,16 +49,22 @@ public sealed class TextMemoryPlugin private readonly ISemanticTextMemory _memory; private readonly ILogger _logger; + private readonly JsonSerializerOptions? _jsonSerializerOptions; /// - /// Creates a new instance of the TextMemoryPlugin + /// Initializes a new instance of the class. /// + /// The instance to use for retrieving and saving memories to and from storage. + /// The to use for logging. If null, no logging will be performed. + /// An optional to use when turning multiple memories into json text. If null, is used. public TextMemoryPlugin( ISemanticTextMemory memory, - ILoggerFactory? loggerFactory = null) + ILoggerFactory? loggerFactory = null, + JsonSerializerOptions? jsonSerializerOptions = null) { this._memory = memory; this._logger = loggerFactory?.CreateLogger(typeof(TextMemoryPlugin)) ?? NullLogger.Instance; + this._jsonSerializerOptions = jsonSerializerOptions ?? JsonSerializerOptions.Default; } /// @@ -128,7 +134,7 @@ public async Task RecallAsync( return string.Empty; } - return limit == 1 ? memories[0].Metadata.Text : JsonSerializer.Serialize(memories.Select(x => x.Metadata.Text)); + return limit == 1 ? memories[0].Metadata.Text : JsonSerializer.Serialize(memories.Select(x => x.Metadata.Text), this._jsonSerializerOptions); } ///