diff --git a/.gitignore b/.gitignore index 663e4f27f037..43c769a2c0f3 100644 --- a/.gitignore +++ b/.gitignore @@ -446,3 +446,12 @@ _site # Yarn .yarn .yarnrc.yml + +# Python Environments +.env +.venv +.myenv +env/ +venv/ +myvenv/ +ENV/ diff --git a/dotnet/src/Connectors/Connectors.UnitTests/Connectors.UnitTests.csproj b/dotnet/src/Connectors/Connectors.UnitTests/Connectors.UnitTests.csproj index 408c2da4a266..ad5180c8bbd9 100644 --- a/dotnet/src/Connectors/Connectors.UnitTests/Connectors.UnitTests.csproj +++ b/dotnet/src/Connectors/Connectors.UnitTests/Connectors.UnitTests.csproj @@ -24,4 +24,13 @@ + + + Always + + + Always + + + diff --git a/dotnet/src/Connectors/Connectors.UnitTests/HuggingFace/HuggingFaceTestHelper.cs b/dotnet/src/Connectors/Connectors.UnitTests/HuggingFace/HuggingFaceTestHelper.cs new file mode 100644 index 000000000000..70d494b78f01 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.UnitTests/HuggingFace/HuggingFaceTestHelper.cs @@ -0,0 +1,44 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.IO; +using System.Net.Http; +using System.Threading; +using System.Threading.Tasks; +using Moq; +using Moq.Protected; + +namespace SemanticKernel.Connectors.UnitTests.HuggingFace; + +/// +/// Helper for HuggingFace test purposes. +/// +internal static class HuggingFaceTestHelper +{ + /// + /// Reads test response from file for mocking purposes. + /// + /// Name of the file with test response. + internal static string GetTestResponse(string fileName) + { + return File.ReadAllText($"./HuggingFace/TestData/{fileName}"); + } + + /// + /// Returns mocked instance of . + /// + /// Message to return for mocked . + internal static HttpClientHandler GetHttpClientHandlerMock(HttpResponseMessage httpResponseMessage) + { + var httpClientHandler = new Mock(); + + httpClientHandler + .Protected() + .Setup>( + "SendAsync", + ItExpr.IsAny(), + ItExpr.IsAny()) + .ReturnsAsync(httpResponseMessage); + + return httpClientHandler.Object; + } +} diff --git a/dotnet/src/Connectors/Connectors.UnitTests/HuggingFace/TestData/completion_test_response.json b/dotnet/src/Connectors/Connectors.UnitTests/HuggingFace/TestData/completion_test_response.json new file mode 100644 index 000000000000..e6c7a94a93a3 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.UnitTests/HuggingFace/TestData/completion_test_response.json @@ -0,0 +1,5 @@ +[ + { + "generated_text": "This is test completion response" + } +] \ No newline at end of file diff --git a/dotnet/src/Connectors/Connectors.UnitTests/HuggingFace/TestData/embeddings_test_response.json b/dotnet/src/Connectors/Connectors.UnitTests/HuggingFace/TestData/embeddings_test_response.json new file mode 100644 index 000000000000..3ccb846673b3 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.UnitTests/HuggingFace/TestData/embeddings_test_response.json @@ -0,0 +1,23 @@ +{ + "data": [ + { + "embedding": [ + -0.08541165292263031, + 0.08639130741357803, + -0.12805694341659546, + -0.2877824902534485, + 0.2114177942276001, + -0.29374566674232483, + -0.10496602207422256, + 0.009402364492416382 + ], + "index": 0, + "object": "embedding" + } + ], + "object": "list", + "usage": { + "prompt_tokens": 15, + "total_tokens": 15 + } +} \ No newline at end of file diff --git a/dotnet/src/Connectors/Connectors.UnitTests/HuggingFace/TextCompletion/HuggingFaceTextCompletionTests.cs b/dotnet/src/Connectors/Connectors.UnitTests/HuggingFace/TextCompletion/HuggingFaceTextCompletionTests.cs new file mode 100644 index 000000000000..cec57a896a87 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.UnitTests/HuggingFace/TextCompletion/HuggingFaceTextCompletionTests.cs @@ -0,0 +1,73 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Net; +using System.Net.Http; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.AI.TextCompletion; +using Microsoft.SemanticKernel.Connectors.HuggingFace.TextCompletion; +using Xunit; + +namespace SemanticKernel.Connectors.UnitTests.HuggingFace.TextCompletion; + +/// +/// Unit tests for class. +/// +public class HuggingFaceTextCompletionTests : IDisposable +{ + private const string Endpoint = "http://localhost:5000/completions"; + private const string Model = "gpt2"; + + private readonly HttpResponseMessage _response = new() + { + StatusCode = HttpStatusCode.OK, + }; + + /// + /// Verifies that + /// returns expected completed text without errors. + /// + [Fact] + public async Task ItReturnsCompletionCorrectlyAsync() + { + // Arrange + const string prompt = "This is test"; + CompleteRequestSettings requestSettings = new(); + + using var service = this.CreateService(HuggingFaceTestHelper.GetTestResponse("completion_test_response.json")); + + // Act + var completion = await service.CompleteAsync(prompt, requestSettings); + + // Assert + Assert.Equal("This is test completion response", completion); + } + + /// + /// Initializes with mocked . + /// + /// Test response for to return. + private HuggingFaceTextCompletion CreateService(string testResponse) + { + this._response.Content = new StringContent(testResponse); + + var httpClientHandler = HuggingFaceTestHelper.GetHttpClientHandlerMock(this._response); + + return new HuggingFaceTextCompletion(new Uri(Endpoint), Model, httpClientHandler); + } + + public void Dispose() + { + this.Dispose(true); + GC.SuppressFinalize(this); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + this._response.Dispose(); + } + } +} diff --git a/dotnet/src/Connectors/Connectors.UnitTests/HuggingFace/TextEmbedding/HuggingFaceEmbeddingGenerationTests.cs b/dotnet/src/Connectors/Connectors.UnitTests/HuggingFace/TextEmbedding/HuggingFaceEmbeddingGenerationTests.cs new file mode 100644 index 000000000000..f66021336b71 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.UnitTests/HuggingFace/TextEmbedding/HuggingFaceEmbeddingGenerationTests.cs @@ -0,0 +1,76 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net; +using System.Net.Http; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Connectors.HuggingFace.TextEmbedding; +using Xunit; + +namespace SemanticKernel.Connectors.UnitTests.HuggingFace.TextEmbedding; + +/// +/// Unit tests for class. +/// +public class HuggingFaceEmbeddingGenerationTests : IDisposable +{ + private const string Endpoint = "http://localhost:5000/embeddings"; + private const string Model = "gpt2"; + + private readonly HttpResponseMessage _response = new() + { + StatusCode = HttpStatusCode.OK, + }; + + /// + /// Verifies that + /// returns expected list of generated embeddings without errors. + /// + [Fact] + public async Task ItReturnsEmbeddingsCorrectlyAsync() + { + // Arrange + const int expectedEmbeddingCount = 1; + const int expectedVectorCount = 8; + List data = new() { "test_string_1", "test_string_2", "test_string_3" }; + + using var service = this.CreateService(HuggingFaceTestHelper.GetTestResponse("embeddings_test_response.json")); + + // Act + var embeddings = await service.GenerateEmbeddingsAsync(data); + + // Assert + Assert.NotNull(embeddings); + Assert.Equal(expectedEmbeddingCount, embeddings.Count); + Assert.Equal(expectedVectorCount, embeddings.First().Count); + } + + /// + /// Initializes with mocked . + /// + /// Test response for to return. + private HuggingFaceTextEmbeddingGeneration CreateService(string testResponse) + { + this._response.Content = new StringContent(testResponse); + + var httpClientHandler = HuggingFaceTestHelper.GetHttpClientHandlerMock(this._response); + + return new HuggingFaceTextEmbeddingGeneration(new Uri(Endpoint), Model, httpClientHandler); + } + + public void Dispose() + { + this.Dispose(true); + GC.SuppressFinalize(this); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + this._response.Dispose(); + } + } +} diff --git a/dotnet/src/SemanticKernel.IntegrationTests/Connectors/HuggingFace/TextCompletion/HuggingFaceTextCompletionTests.cs b/dotnet/src/SemanticKernel.IntegrationTests/Connectors/HuggingFace/TextCompletion/HuggingFaceTextCompletionTests.cs new file mode 100644 index 000000000000..c63e83160a13 --- /dev/null +++ b/dotnet/src/SemanticKernel.IntegrationTests/Connectors/HuggingFace/TextCompletion/HuggingFaceTextCompletionTests.cs @@ -0,0 +1,57 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Threading.Tasks; +using Microsoft.Extensions.Configuration; +using Microsoft.SemanticKernel.AI.TextCompletion; +using Microsoft.SemanticKernel.Connectors.HuggingFace.TextCompletion; +using Xunit; + +namespace SemanticKernel.IntegrationTests.Connectors.HuggingFace.TextCompletion; + +/// +/// Integration tests for . +/// +public sealed class HuggingFaceTextCompletionTests +{ + private const string Endpoint = "http://localhost:5000/completions"; + private const string Model = "gpt2"; + + private readonly IConfigurationRoot _configuration; + + public HuggingFaceTextCompletionTests() + { + // Load configuration + this._configuration = new ConfigurationBuilder() + .AddJsonFile(path: "testsettings.json", optional: false, reloadOnChange: true) + .AddJsonFile(path: "testsettings.development.json", optional: true, reloadOnChange: true) + .AddEnvironmentVariables() + .Build(); + } + + [Fact(Skip = "This test is for manual verification.")] + public async Task HuggingFaceLocalAndRemoteTextCompletionAsync() + { + // Arrange + const string input = "This is test"; + + using var huggingFaceLocal = new HuggingFaceTextCompletion(new Uri(Endpoint), Model); + using var huggingFaceRemote = new HuggingFaceTextCompletion(this.GetApiKey(), Model); + + // Act + var localResponse = await huggingFaceLocal.CompleteAsync(input, new CompleteRequestSettings()).ConfigureAwait(false); + var remoteResponse = await huggingFaceRemote.CompleteAsync(input, new CompleteRequestSettings()).ConfigureAwait(false); + + // Assert + Assert.NotNull(localResponse); + Assert.NotNull(remoteResponse); + + Assert.StartsWith(input, localResponse, StringComparison.InvariantCulture); + Assert.StartsWith(input, remoteResponse, StringComparison.InvariantCulture); + } + + private string GetApiKey() + { + return this._configuration.GetSection("HuggingFace:ApiKey").Get()!; + } +} diff --git a/dotnet/src/SemanticKernel.IntegrationTests/README.md b/dotnet/src/SemanticKernel.IntegrationTests/README.md index ba4b0ed94ef3..2b404118e507 100644 --- a/dotnet/src/SemanticKernel.IntegrationTests/README.md +++ b/dotnet/src/SemanticKernel.IntegrationTests/README.md @@ -5,7 +5,8 @@ 1. **Azure OpenAI**: go to the [Azure OpenAI Quickstart](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/quickstart) and deploy an instance of Azure OpenAI, deploy a model like "text-davinci-003" find your Endpoint and API key. 2. **OpenAI**: go to [OpenAI](https://openai.com/api/) to register and procure your API key. -3. **Azure Bing Web Search API**: go to [Bing Web Seach API](https://www.microsoft.com/en-us/bing/apis/bing-web-search-api) +3. **HuggingFace API key**: see https://huggingface.co/docs/huggingface_hub/guides/inference for details. +4. **Azure Bing Web Search API**: go to [Bing Web Seach API](https://www.microsoft.com/en-us/bing/apis/bing-web-search-api) and select `Try Now` to get started. ## Setup @@ -42,6 +43,9 @@ For example: "Endpoint": "https://contoso.openai.azure.com/", "ApiKey": "...." }, + "HuggingFace": { + "ApiKey": "" + }, "Bing": { "ApiKey": "...." } @@ -58,6 +62,7 @@ For example: export AzureOpenAI__DeploymentName="azure-text-davinci-003" export AzureOpenAIEmbeddings__DeploymentName="azure-text-embedding-ada-002" export AzureOpenAI__Endpoint="https://contoso.openai.azure.com/" + export HuggingFace__ApiKey="...." export Bing__ApiKey="...." ``` @@ -69,5 +74,6 @@ For example: $env:AzureOpenAI__DeploymentName = "azure-text-davinci-003" $env:AzureOpenAIEmbeddings__DeploymentName = "azure-text-embedding-ada-002" $env:AzureOpenAI__Endpoint = "https://contoso.openai.azure.com/" + $env:HuggingFace__ApiKey = "...." $env:Bing__ApiKey = "...." ``` diff --git a/dotnet/src/SemanticKernel.IntegrationTests/SemanticKernel.IntegrationTests.csproj b/dotnet/src/SemanticKernel.IntegrationTests/SemanticKernel.IntegrationTests.csproj index 3ab7d55d937e..189f0be52f4f 100644 --- a/dotnet/src/SemanticKernel.IntegrationTests/SemanticKernel.IntegrationTests.csproj +++ b/dotnet/src/SemanticKernel.IntegrationTests/SemanticKernel.IntegrationTests.csproj @@ -42,4 +42,8 @@ + + + + diff --git a/dotnet/src/SemanticKernel.IntegrationTests/testsettings.json b/dotnet/src/SemanticKernel.IntegrationTests/testsettings.json index 2954fa8de7d1..3e80c2f596ad 100644 --- a/dotnet/src/SemanticKernel.IntegrationTests/testsettings.json +++ b/dotnet/src/SemanticKernel.IntegrationTests/testsettings.json @@ -21,6 +21,9 @@ "Endpoint": "", "ApiKey": "" }, + "HuggingFace": { + "ApiKey": "" + }, "Bing": { "ApiKey": "" } diff --git a/dotnet/src/SemanticKernel/Connectors/HuggingFace/TextCompletion/HuggingFaceTextCompletion.cs b/dotnet/src/SemanticKernel/Connectors/HuggingFace/TextCompletion/HuggingFaceTextCompletion.cs new file mode 100644 index 000000000000..d16d93021165 --- /dev/null +++ b/dotnet/src/SemanticKernel/Connectors/HuggingFace/TextCompletion/HuggingFaceTextCompletion.cs @@ -0,0 +1,154 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net.Http; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.AI; +using Microsoft.SemanticKernel.AI.TextCompletion; +using Microsoft.SemanticKernel.Diagnostics; + +namespace Microsoft.SemanticKernel.Connectors.HuggingFace.TextCompletion; + +/// +/// HuggingFace text completion service. +/// +public sealed class HuggingFaceTextCompletion : ITextCompletion, IDisposable +{ + private const string HttpUserAgent = "Microsoft Semantic Kernel"; + private const string HuggingFaceApiEndpoint = "https://api-inference.huggingface.co/models"; + + private readonly string _model; + private readonly Uri _endpoint; + private readonly HttpClient _httpClient; + private readonly HttpClientHandler? _httpClientHandler; + + /// + /// Initializes a new instance of the class. + /// + /// Endpoint for service API call. + /// Model to use for service API call. + /// Instance of to setup specific scenarios. + public HuggingFaceTextCompletion(Uri endpoint, string model, HttpClientHandler httpClientHandler) + { + Verify.NotNull(endpoint, "Endpoint cannot be null."); + Verify.NotEmpty(model, "Model cannot be empty."); + + this._endpoint = endpoint; + this._model = model; + + this._httpClient = new(httpClientHandler); + + this._httpClient.DefaultRequestHeaders.Add("User-Agent", HttpUserAgent); + } + + /// + /// Initializes a new instance of the class. + /// Using default implementation. + /// + /// Endpoint for service API call. + /// Model to use for service API call. + public HuggingFaceTextCompletion(Uri endpoint, string model) + { + Verify.NotNull(endpoint, "Endpoint cannot be null."); + Verify.NotEmpty(model, "Model cannot be empty."); + + this._endpoint = endpoint; + this._model = model; + + this._httpClientHandler = new() { CheckCertificateRevocationList = true }; + this._httpClient = new(this._httpClientHandler); + + this._httpClient.DefaultRequestHeaders.Add("User-Agent", HttpUserAgent); + } + + /// + /// Initializes a new instance of the class. + /// Using HuggingFace API for service call, see https://huggingface.co/docs/api-inference/index. + /// + /// HuggingFace API key, see https://huggingface.co/docs/api-inference/quicktour#running-inference-with-api-requests. + /// Model to use for service API call. + /// Instance of to setup specific scenarios. + /// Endpoint for service API call. + public HuggingFaceTextCompletion(string apiKey, string model, HttpClientHandler httpClientHandler, string endpoint = HuggingFaceApiEndpoint) + : this(new Uri(endpoint), model, httpClientHandler) + { + Verify.NotEmpty(apiKey, "HuggingFace API key cannot be empty."); + + this._httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {apiKey}"); + } + + /// + /// Initializes a new instance of the class. + /// Using HuggingFace API for service call, see https://huggingface.co/docs/api-inference/index. + /// Using default implementation. + /// + /// HuggingFace API key, see https://huggingface.co/docs/api-inference/quicktour#running-inference-with-api-requests. + /// Model to use for service API call. + /// Endpoint for service API call. + public HuggingFaceTextCompletion(string apiKey, string model, string endpoint = HuggingFaceApiEndpoint) + : this(new Uri(endpoint), model) + { + Verify.NotEmpty(apiKey, "HuggingFace API key cannot be empty."); + + this._httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {apiKey}"); + } + + /// + public async Task CompleteAsync(string text, CompleteRequestSettings requestSettings, CancellationToken cancellationToken = default) + { + return await this.ExecuteCompleteRequestAsync(text, cancellationToken); + } + + /// + public void Dispose() + { + this._httpClient.Dispose(); + this._httpClientHandler?.Dispose(); + } + + #region private ================================================================================ + + /// + /// Performs HTTP request to given endpoint for text completion. + /// + /// Text to complete. + /// Cancellation token. + /// Completed text. + /// Exception when backend didn't respond with completed text. + private async Task ExecuteCompleteRequestAsync(string text, CancellationToken cancellationToken = default) + { + try + { + var completionRequest = new TextCompletionRequest + { + Input = text + }; + + using var httpRequestMessage = new HttpRequestMessage() + { + Method = HttpMethod.Post, + RequestUri = new Uri($"{this._endpoint}/{this._model}"), + Content = new StringContent(JsonSerializer.Serialize(completionRequest)) + }; + + var response = await this._httpClient.SendAsync(httpRequestMessage, cancellationToken).ConfigureAwait(false); + var body = await response.Content.ReadAsStringAsync().ConfigureAwait(false); + + var completionResponse = JsonSerializer.Deserialize>(body); + + return completionResponse.First().Text!; + } + catch (Exception e) when (e is not AIException && !e.IsCriticalException()) + { + throw new AIException( + AIException.ErrorCodes.UnknownError, + $"Something went wrong: {e.Message}", e); + } + } + + #endregion +} diff --git a/dotnet/src/SemanticKernel/Connectors/HuggingFace/TextCompletion/TextCompletionRequest.cs b/dotnet/src/SemanticKernel/Connectors/HuggingFace/TextCompletion/TextCompletionRequest.cs new file mode 100644 index 000000000000..b2a656744266 --- /dev/null +++ b/dotnet/src/SemanticKernel/Connectors/HuggingFace/TextCompletion/TextCompletionRequest.cs @@ -0,0 +1,19 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Text.Json.Serialization; + +namespace Microsoft.SemanticKernel.Connectors.HuggingFace.TextCompletion; + +/// +/// HTTP schema to perform completion request. +/// +[Serializable] +public sealed class TextCompletionRequest +{ + /// + /// Prompt to complete. + /// + [JsonPropertyName("inputs")] + public string Input { get; set; } = string.Empty; +} diff --git a/dotnet/src/SemanticKernel/Connectors/HuggingFace/TextCompletion/TextCompletionResponse.cs b/dotnet/src/SemanticKernel/Connectors/HuggingFace/TextCompletion/TextCompletionResponse.cs new file mode 100644 index 000000000000..2678ec93808e --- /dev/null +++ b/dotnet/src/SemanticKernel/Connectors/HuggingFace/TextCompletion/TextCompletionResponse.cs @@ -0,0 +1,17 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Text.Json.Serialization; + +namespace Microsoft.SemanticKernel.Connectors.HuggingFace.TextCompletion; + +/// +/// HTTP Schema for completion response. +/// +public sealed class TextCompletionResponse +{ + /// + /// Completed text. + /// + [JsonPropertyName("generated_text")] + public string? Text { get; set; } +} diff --git a/dotnet/src/SemanticKernel/Connectors/HuggingFace/TextEmbedding/HuggingFaceTextEmbeddingGeneration.cs b/dotnet/src/SemanticKernel/Connectors/HuggingFace/TextEmbedding/HuggingFaceTextEmbeddingGeneration.cs new file mode 100644 index 000000000000..4cc928d55066 --- /dev/null +++ b/dotnet/src/SemanticKernel/Connectors/HuggingFace/TextEmbedding/HuggingFaceTextEmbeddingGeneration.cs @@ -0,0 +1,119 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net.Http; +using System.Text.Json; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.AI; +using Microsoft.SemanticKernel.AI.Embeddings; +using Microsoft.SemanticKernel.Diagnostics; + +namespace Microsoft.SemanticKernel.Connectors.HuggingFace.TextEmbedding; + +/// +/// HuggingFace embedding generation service. +/// +public sealed class HuggingFaceTextEmbeddingGeneration : IEmbeddingGeneration, IDisposable +{ + private const string HttpUserAgent = "Microsoft Semantic Kernel"; + + private readonly string _model; + private readonly Uri _endpoint; + private readonly HttpClient _httpClient; + private readonly HttpClientHandler? _httpClientHandler; + + /// + /// Initializes a new instance of the class. + /// + /// Endpoint for service API call. + /// Model to use for service API call. + /// Instance of to setup specific scenarios. + public HuggingFaceTextEmbeddingGeneration(Uri endpoint, string model, HttpClientHandler httpClientHandler) + { + Verify.NotNull(endpoint, "Endpoint cannot be null."); + Verify.NotEmpty(model, "Model cannot be empty."); + + this._endpoint = endpoint; + this._model = model; + + this._httpClient = new(httpClientHandler); + + this._httpClient.DefaultRequestHeaders.Add("User-Agent", HttpUserAgent); + } + + /// + /// Initializes a new instance of the class. + /// Using default implementation. + /// + /// Endpoint for service API call. + /// Model to use for service API call. + public HuggingFaceTextEmbeddingGeneration(Uri endpoint, string model) + { + Verify.NotNull(endpoint, "Endpoint cannot be null."); + Verify.NotEmpty(model, "Model cannot be empty."); + + this._endpoint = endpoint; + this._model = model; + + this._httpClientHandler = new() { CheckCertificateRevocationList = true }; + this._httpClient = new(this._httpClientHandler); + + this._httpClient.DefaultRequestHeaders.Add("User-Agent", HttpUserAgent); + } + + /// + public async Task>> GenerateEmbeddingsAsync(IList data) + { + return await this.ExecuteEmbeddingRequestAsync(data); + } + + /// + public void Dispose() + { + this._httpClient.Dispose(); + this._httpClientHandler?.Dispose(); + } + + #region private ================================================================================ + + /// + /// Performs HTTP request to given endpoint for embedding generation. + /// + /// Data to embed. + /// List of generated embeddings. + /// Exception when backend didn't respond with generated embeddings. + private async Task>> ExecuteEmbeddingRequestAsync(IList data) + { + try + { + var embeddingRequest = new TextEmbeddingRequest + { + Input = data + }; + + using var httpRequestMessage = new HttpRequestMessage() + { + Method = HttpMethod.Post, + RequestUri = new Uri($"{this._endpoint}/{this._model}"), + Content = new StringContent(JsonSerializer.Serialize(embeddingRequest)), + }; + + var response = await this._httpClient.SendAsync(httpRequestMessage).ConfigureAwait(false); + var body = await response.Content.ReadAsStringAsync().ConfigureAwait(false); + + var embeddingResponse = JsonSerializer.Deserialize(body); + + return embeddingResponse?.Embeddings?.Select(l => new Embedding(l.Embedding.ToArray())).ToList()!; + } + catch (Exception e) when (e is not AIException && !e.IsCriticalException()) + { + throw new AIException( + AIException.ErrorCodes.UnknownError, + $"Something went wrong: {e.Message}", e); + } + } + + #endregion +} diff --git a/dotnet/src/SemanticKernel/Connectors/HuggingFace/TextEmbedding/TextEmbeddingRequest.cs b/dotnet/src/SemanticKernel/Connectors/HuggingFace/TextEmbedding/TextEmbeddingRequest.cs new file mode 100644 index 000000000000..2b8cd227e179 --- /dev/null +++ b/dotnet/src/SemanticKernel/Connectors/HuggingFace/TextEmbedding/TextEmbeddingRequest.cs @@ -0,0 +1,20 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Text.Json.Serialization; + +namespace Microsoft.SemanticKernel.Connectors.HuggingFace.TextEmbedding; + +/// +/// HTTP schema to perform embedding request. +/// +[Serializable] +public sealed class TextEmbeddingRequest +{ + /// + /// Data to embed. + /// + [JsonPropertyName("inputs")] + public IList Input { get; set; } = new List(); +} diff --git a/dotnet/src/SemanticKernel/Connectors/HuggingFace/TextEmbedding/TextEmbeddingResponse.cs b/dotnet/src/SemanticKernel/Connectors/HuggingFace/TextEmbedding/TextEmbeddingResponse.cs new file mode 100644 index 000000000000..ee25f69ad9b4 --- /dev/null +++ b/dotnet/src/SemanticKernel/Connectors/HuggingFace/TextEmbedding/TextEmbeddingResponse.cs @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Collections.Generic; +using System.Text.Json.Serialization; + +namespace Microsoft.SemanticKernel.Connectors.HuggingFace.TextEmbedding; + +/// +/// HTTP Schema for embedding response. +/// +public sealed class TextEmbeddingResponse +{ + /// + /// Model containing embedding. + /// + public sealed class EmbeddingVector + { + [JsonPropertyName("embedding")] + public IList? Embedding { get; set; } + } + + /// + /// List of embeddings. + /// + [JsonPropertyName("data")] + public IList? Embeddings { get; set; } +} diff --git a/dotnet/src/SemanticKernel/SemanticKernel.csproj b/dotnet/src/SemanticKernel/SemanticKernel.csproj index c784a28f5bcf..6dcfdcc3616f 100644 --- a/dotnet/src/SemanticKernel/SemanticKernel.csproj +++ b/dotnet/src/SemanticKernel/SemanticKernel.csproj @@ -39,9 +39,6 @@ <_Parameter1>SemanticKernel.UnitTests - - <_Parameter1>Microsoft.SemanticKernel.Connectors.OpenAI - <_Parameter1>DynamicProxyGenAssembly2 diff --git a/samples/apps/hugging-face-http-server/Dockerfile b/samples/apps/hugging-face-http-server/Dockerfile new file mode 100644 index 000000000000..28a07b9365c4 --- /dev/null +++ b/samples/apps/hugging-face-http-server/Dockerfile @@ -0,0 +1,19 @@ +# Copyright (c) Microsoft. All rights reserved. + +FROM huggingface/transformers-pytorch-gpu + +COPY ./requirements.txt /app/requirements.txt + +# switch working directory +WORKDIR /app + +# install the dependencies and packages in the requirements file +RUN pip install -r requirements.txt + +# copy every content from the local file to the image +COPY . /app + +# configure the container to run in an executed manner +ENTRYPOINT [ "python3" ] + +CMD ["inference_app.py" ] \ No newline at end of file diff --git a/samples/apps/hugging-face-http-server/README.md b/samples/apps/hugging-face-http-server/README.md new file mode 100644 index 000000000000..ababcc8e248d --- /dev/null +++ b/samples/apps/hugging-face-http-server/README.md @@ -0,0 +1,51 @@ +# Local Hugging Face Model Inteference Server + +> [!IMPORTANT] +> This learning sample is for educational purposes only and should not be used in any production +> use case. It is intended to make Semantic Kernel features more accessible for scenarios that +> do not require an OpenAI or Azure OpenAI endpoint. + +This application provides an API service for interacting with models available +through [Hugging Face](https://huggingface.co/). The request bodies and responses +are modeled after OpenAI and Azure OpenAI for smooth transition to more capable LLMs. + +## Building the Sample Container + +`docker image build -t hf_model_server .` + +This step will take some minutes to download Docker image dependencies. + +## Running the Sample Container + +`docker run -p 5000:5000 -d hf_model_server` + +This will run the service at **`http://localhost:5000`**. Navigating to +**`http://localhost:5000`** in a browser window will provide instruction on how +to construct requests to the service. + +> [!IMPORTANT] +> If the model has not been cached (ex: first time calling it) the response can +> take some time due to the model being downloaded. +> Using this service to generate images can also take a very long time - a factor +> that scales with your hardware. + +## Alternative: Bare-Metal + +Alternatively, the service can be started on bare-metal. To do this, you will +need to have Python 3.9 installed. + +Before proceeding, it is highly recommended that you create a Python 3.9 virtual +environment. + +Example: `python -m venv myvenv` or `python3 -m venv myvenv`. + +Make sure your environment is activated: + +For Windows, run in PowerShell: `./myvenv/Scripts/Activate`. +For Linux/macOS, run: `source myvenv/bin/activate`. + +Then, run `pip install -r requirements.txt`. + +Once all the required dependencies have been installed, you can run the service +using `python inference_app.py`. Navigating to **`http://localhost:5000`** in a +browser window will provide instruction on how to construct requests to the service. diff --git a/samples/apps/hugging-face-http-server/inference_app.py b/samples/apps/hugging-face-http-server/inference_app.py new file mode 100644 index 000000000000..9b140c000211 --- /dev/null +++ b/samples/apps/hugging-face-http-server/inference_app.py @@ -0,0 +1,128 @@ +# Copyright (c) Microsoft. All rights reserved. + +# Importing flask module in the project is mandatory +# An object of Flask class is our WSGI application. +from flask import Flask, request, json, redirect, url_for, render_template, jsonify +from utils import create_responses, CompletionGenerator, EmbeddingGenerator, ImageGenerator +import argparse + +# Flask constructor takes the name of +# current module (__name__) as argument. +app = Flask(__name__) + +@app.route('/') +def home(): + return render_template('home.html') + +@app.route('/docs') +def docs(): + return render_template('documentation.html') + +@app.route('/docs/completions') +def completions_docs(): + return render_template('completions.html') + +@app.route('/docs/embeddings') +def embeddings_docs(): + return render_template('embeddings.html') + +@app.route('/docs/images') +def images_docs(): + return render_template('images.html') + +@app.route('/completions/', methods=['POST']) +def receive_completion_by_model(model): + return process_completion_request(request, model) + +@app.route('/completions//', methods=['POST']) +def receive_completion_by_organization_model(organization, model): + return process_completion_request(request, f'{organization}/{model}') + +@app.route('/embeddings/', methods=['POST']) +def receive_embedding_by_model(model): + return process_embedding_request(request, model) + +@app.route('/embeddings//', methods=['POST']) +def receive_embedding_by_organization_model(organization, model): + return process_embedding_request(request, f'{organization}/{model}') + +@app.route('/images/generations/', methods=['POST']) +def receive_image_generation_by_model(model): + return process_image_generation_request(request, model) + +@app.route('/images/generations//', methods=['POST']) +def receive_image_generation_by_organization_model(organization, model): + return process_image_generation_request(request, f'{organization}/{model}') + +def process_completion_request(request, model): + request_data = request.data + json_data = json.loads(request_data) + try: + prompt = json_data["inputs"] + if "context" in json_data: + context = json_data["context"] + else: + context = "" + + if "max_tokens" in json_data: + max_tokens = json_data["max_tokens"] + else: + max_tokens = 32 + + inference_generator = CompletionGenerator.CompletionGenerator(model) + result, num_prompt_tokens, num_result_tokens = inference_generator.perform_inference(prompt, context, max_tokens) + return jsonify(create_responses.create_completion_response( + result, + model, + num_prompt_tokens, + num_result_tokens)) + except Exception as e: + print(e) + return ("Sorry, unable to perform sentence completion with model {}".format(model)) + +def process_embedding_request(request, model): + request_data = request.data + json_data = json.loads(request_data) + try: + sentences = json_data["inputs"] + inference_generator = EmbeddingGenerator.EmbeddingGenerator(model) + embeddings, num_prompt_tokens = inference_generator.perform_inference(sentences) + return jsonify(create_responses.create_embedding_response( + embeddings, + num_prompt_tokens)) + except Exception as e: + print(e) + return ("Sorry, unable to generate embeddings with model {}".format(model)) + +def process_image_generation_request(request, model): + request_data = request.data + json_data = json.loads(request_data) + num_images = json_data["n"] + prompt = json_data["inputs"] + image_size = json_data["size"] + try: + image_generator = ImageGenerator.ImageGenerator(model) + image_data = image_generator.perform_inference(prompt, num_images, image_size) + return jsonify(create_responses.create_image_gen_response(image_data)) + except Exception as e: + print(e) + return ("Sorry, unable to generate images with model {}".format(model)) + +# main driver function +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--ip', + default='0.0.0.0', + help='ip address for flask server endpoint' + ) + parser.add_argument('-p', '--port', + default=5000, + help='port for flask server endpoint', + type=int, + ) + args = parser.parse_args() + + host_ip = args.ip + port = args.port + + app.run(host=host_ip, debug=True, port=port) diff --git a/samples/apps/hugging-face-http-server/requirements.txt b/samples/apps/hugging-face-http-server/requirements.txt new file mode 100644 index 000000000000..ac5b62201ab9 Binary files /dev/null and b/samples/apps/hugging-face-http-server/requirements.txt differ diff --git a/samples/apps/hugging-face-http-server/static/css/styles.css b/samples/apps/hugging-face-http-server/static/css/styles.css new file mode 100644 index 000000000000..eea906c30f2d --- /dev/null +++ b/samples/apps/hugging-face-http-server/static/css/styles.css @@ -0,0 +1,11 @@ +/* Copyright (c) Microsoft. All rights reserved. */ + +.code { + background-color: black; + height: auto; + width: auto; + border-style: solid; + font-family:'Courier New', Courier, monospace; + color: white; + font-size: 14px; +} diff --git a/samples/apps/hugging-face-http-server/templates/completions.html b/samples/apps/hugging-face-http-server/templates/completions.html new file mode 100644 index 000000000000..bf88cc859abd --- /dev/null +++ b/samples/apps/hugging-face-http-server/templates/completions.html @@ -0,0 +1,24 @@ + + +{% extends 'documentation.html' %} + +{% block body %} + +

Completions

+ +

Example Request

+
+

curl http://localhost:5000/completions/{model} \

+

-X POST \

+

-H "Content-Type: application/json" \

+

-d '{"inputs": "this is a test"}'

+
+ +

+ HF Text Generation Models +

+

+ HF Text Summarization Models +

+ +{% endblock %} diff --git a/samples/apps/hugging-face-http-server/templates/documentation.html b/samples/apps/hugging-face-http-server/templates/documentation.html new file mode 100644 index 000000000000..84d1384067a0 --- /dev/null +++ b/samples/apps/hugging-face-http-server/templates/documentation.html @@ -0,0 +1,27 @@ + + + + + + + + + Flask Docker + + +

API Documentation

+ Home + Completions + Embeddings + Images + + {% block body %} + + +

Documentation

+ + + {% endblock %} + + + \ No newline at end of file diff --git a/samples/apps/hugging-face-http-server/templates/embeddings.html b/samples/apps/hugging-face-http-server/templates/embeddings.html new file mode 100644 index 000000000000..15d81cf0d3b4 --- /dev/null +++ b/samples/apps/hugging-face-http-server/templates/embeddings.html @@ -0,0 +1,21 @@ + + +{% extends 'documentation.html' %} + +{% block body %} + +

Embeddings

+ +

Example Request

+
+

curl http://localhost:5000/embeddings/{model} \

+

-X POST \

+

-H "Content-Type: application/json" \

+

-d '{"inputs": ["test string 1", "test string 2", ...]}'

+
+ +

+ HF Text Embedding Models +

+ +{% endblock %} diff --git a/samples/apps/hugging-face-http-server/templates/home.html b/samples/apps/hugging-face-http-server/templates/home.html new file mode 100644 index 000000000000..e71ec5a53878 --- /dev/null +++ b/samples/apps/hugging-face-http-server/templates/home.html @@ -0,0 +1,14 @@ + + + + + + + + Flask Docker + + +

Your Hugging Face model server is running

+ Documentation + + \ No newline at end of file diff --git a/samples/apps/hugging-face-http-server/templates/images.html b/samples/apps/hugging-face-http-server/templates/images.html new file mode 100644 index 000000000000..eec18e540c35 --- /dev/null +++ b/samples/apps/hugging-face-http-server/templates/images.html @@ -0,0 +1,21 @@ + + +{% extends 'documentation.html' %} + +{% block body %} + +

Images

+ +

Example Request

+
+

curl http://localhost:5000/images/generations/{model} \

+

-X POST \

+

-H "Content-Type: application/json" \

+

-d '{"inputs": "a test image", "n": 1, "size": "256x256"}'

+
+ +

+ HF Text-to-Image Models +

+ +{% endblock %} diff --git a/samples/apps/hugging-face-http-server/utils/CompletionGenerator.py b/samples/apps/hugging-face-http-server/utils/CompletionGenerator.py new file mode 100644 index 000000000000..42369d1a0e0a --- /dev/null +++ b/samples/apps/hugging-face-http-server/utils/CompletionGenerator.py @@ -0,0 +1,33 @@ +# Copyright (c) Microsoft. All rights reserved. + +from . import InferenceGenerator +from transformers import AutoTokenizer, AutoModelForCausalLM + +# The model used to get the tokenizer can be a little arbitrary +# since the tokenizers are common within the same model type + +class CompletionGenerator(InferenceGenerator.InferenceGenerator): + def __init__(self, model_name): + super().__init__(model_name) + self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) + self.tokenizer.pad_token = self.tokenizer.eos_token + + def perform_inference(self, prompt, context, max_tokens): + model = AutoModelForCausalLM.from_pretrained(self.model_name, is_decoder=True) + model.to(self.device) + + encodings = self.tokenizer.encode_plus( + text = prompt, + text_pair = context, + truncation = True, + return_tensors= 'pt') + + generated_ids = model.generate( + encodings.input_ids, + max_length = max_tokens, + # num_beams = 5, + # temperature = 0.8, + no_repeat_ngram_size=4, + early_stopping=True) + + return self.tokenizer.decode(generated_ids[0]), encodings.input_ids.numel(), len(generated_ids[0]) diff --git a/samples/apps/hugging-face-http-server/utils/EmbeddingGenerator.py b/samples/apps/hugging-face-http-server/utils/EmbeddingGenerator.py new file mode 100644 index 000000000000..47495ca56fe5 --- /dev/null +++ b/samples/apps/hugging-face-http-server/utils/EmbeddingGenerator.py @@ -0,0 +1,32 @@ +# Copyright (c) Microsoft. All rights reserved. + +import torch +from . import InferenceGenerator +from transformers import AutoModel, AutoTokenizer + +class EmbeddingGenerator(InferenceGenerator.InferenceGenerator): + def __init__(self, model_name): + super().__init__(model_name) + self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) + self.tokenizer.pad_token = self.tokenizer.eos_token + + def _mean_pooling(self, model_output, attention_mask): + token_embeddings = model_output[0] #First element of model_output contains all token embeddings + input_mask_expanded = attention_mask.unsqueeze(-1).float() + x = torch.sum(token_embeddings * input_mask_expanded, 1) + y = torch.clamp(input_mask_expanded.sum(1), min=1e-9) + return (x / y) + + def perform_inference(self, sentences): + model = AutoModel.from_pretrained(self.model_name) + model.to(self.device) + + encodings = self.tokenizer( + sentences, + padding = True, + truncation = True, + return_tensors= 'pt') + + model_output = model(**encodings) + embeddings = self._mean_pooling(model_output, encodings['attention_mask']) + return embeddings, encodings.input_ids.numel() diff --git a/samples/apps/hugging-face-http-server/utils/ImageGenerator.py b/samples/apps/hugging-face-http-server/utils/ImageGenerator.py new file mode 100644 index 000000000000..3a6027ae1bff --- /dev/null +++ b/samples/apps/hugging-face-http-server/utils/ImageGenerator.py @@ -0,0 +1,35 @@ +# Copyright (c) Microsoft. All rights reserved. + +from diffusers import DiffusionPipeline +import base64 +from . import InferenceGenerator +from io import BytesIO +# The model used to get the tokenizer can be a little arbitrary +# since the tokenizers are common within the same model type + +class ImageGenerator(InferenceGenerator.InferenceGenerator): + def __init__(self, model_name): + super().__init__(model_name) + self.default_size = 512 + + def perform_inference(self, prompt, num_images, size): + generator = DiffusionPipeline.from_pretrained(self.model_name) + generator.to(self.device) + + height = self.default_size + width = self.default_size + + if size is not None: + tmp = size.split("x") + height = int(tmp[0]) + width = int(tmp[1]) + + images = generator([prompt] * num_images, height=height, width=width).images + + b64_images = [] + for image in images: + buffered = BytesIO() + image.save(buffered, format="PNG") + base64_image = base64.b64encode(buffered.getvalue()) + b64_images.append({"b64_json": base64_image.decode()}) + return b64_images diff --git a/samples/apps/hugging-face-http-server/utils/InferenceGenerator.py b/samples/apps/hugging-face-http-server/utils/InferenceGenerator.py new file mode 100644 index 000000000000..db5ba93eb9d4 --- /dev/null +++ b/samples/apps/hugging-face-http-server/utils/InferenceGenerator.py @@ -0,0 +1,14 @@ +# Copyright (c) Microsoft. All rights reserved. + +import os +import torch +from transformers import AutoTokenizer, AutoModelForCausalLM + +# The model used to get the tokenizer can be a little arbitrary +# since the tokenizers are common within the same model type + +class InferenceGenerator(): + def __init__(self, model_name): + os.environ['TOKENIZERS_PARALLELISM'] = "false" + self.model_name = model_name + self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') diff --git a/samples/apps/hugging-face-http-server/utils/create_responses.py b/samples/apps/hugging-face-http-server/utils/create_responses.py new file mode 100644 index 000000000000..d92043e54761 --- /dev/null +++ b/samples/apps/hugging-face-http-server/utils/create_responses.py @@ -0,0 +1,43 @@ +# Copyright (c) Microsoft. All rights reserved. + +from datetime import datetime + +# These responses are modeled after the OpenAI REST API + +def create_completion_response(completion_text, model, num_prompt_tokens, num_completion_tokens): + data = [{ + "generated_text": completion_text + }] + return data + +def create_embedding_indices(embeddings): + index = 0 + data_entries = [] + for embedding in embeddings: + data_entries.append({ + "object": "embedding", + "index": index, + "embedding": embedding.tolist() + }) + index = index + 1 + return data_entries + +def create_embedding_response(embeddings, num_prompt_tokens): + data_entries = create_embedding_indices(embeddings) + data = { + "object": "list", + "data": data_entries, + "usage": { + "prompt_tokens": num_prompt_tokens, + "total_tokens": num_prompt_tokens + } + } + return data + + +def create_image_gen_response(image_data): + data = { + "created": datetime.now(), + "data": image_data + } + return data diff --git a/samples/dotnet/kernel-syntax-examples/Example20_HuggingFace.cs b/samples/dotnet/kernel-syntax-examples/Example20_HuggingFace.cs new file mode 100644 index 000000000000..68fca8c88557 --- /dev/null +++ b/samples/dotnet/kernel-syntax-examples/Example20_HuggingFace.cs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Threading.Tasks; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Connectors.HuggingFace.TextCompletion; +using RepoUtils; + +/** + * The following example shows how to use Semantic Kernel with HuggingFace API. + */ + +// ReSharper disable once InconsistentNaming +public static class Example20_HuggingFace +{ + public static async Task RunAsync() + { + Console.WriteLine("======== HuggingFace text completion AI ========"); + + IKernel kernel = new KernelBuilder().WithLogger(ConsoleLogger.Log).Build(); + + // Add HuggingFace text completion service + kernel.Config.AddTextCompletionService("hf-text-completion", (kernel) => new HuggingFaceTextCompletion(Env.Var("HF_API_KEY"), "gpt2")); + + const string FUNCTION_DEFINITION = "Question: {{$input}}; Answer:"; + + var questionAnswerFunction = kernel.CreateSemanticFunction(FUNCTION_DEFINITION); + + var result = await questionAnswerFunction.InvokeAsync("What is New York?"); + + Console.WriteLine(result); + } +} diff --git a/samples/dotnet/kernel-syntax-examples/Program.cs b/samples/dotnet/kernel-syntax-examples/Program.cs index 8799c3c1507e..4e0c993d56ea 100644 --- a/samples/dotnet/kernel-syntax-examples/Program.cs +++ b/samples/dotnet/kernel-syntax-examples/Program.cs @@ -65,6 +65,9 @@ public static async Task Main() await Example19_Qdrant.RunAsync(); Console.WriteLine("== DONE =="); + + await Example20_HuggingFace.RunAsync(); + Console.WriteLine("== DONE =="); } } #pragma warning restore CS1591