From 45681fb5e180bd9cd6e171d8f03882ea7a16e201 Mon Sep 17 00:00:00 2001 From: Craig Presti <146438+craigomatic@users.noreply.github.com> Date: Mon, 20 Mar 2023 07:01:48 -0700 Subject: [PATCH 1/2] Several changes to resolve PR feedback --- .../apps/github-qna-webapp-react/README.md | 26 +++----- .../src/components/GitHubRepoSelection.tsx | 12 ++++ .../src/components/QnA.tsx | 2 +- samples/dotnet/KernelHttpServer/Extensions.cs | 6 +- .../SemanticKernelEndpoint.cs | 1 + samples/dotnet/github-skills/GitHubSkill.cs | 63 +++++++++++++------ .../config.json | 10 +-- .../QASkill/GitHubMemoryQuery/skprompt.txt | 6 ++ .../skills/QASkill/MemoryQuery/skprompt.txt | 7 --- 9 files changed, 75 insertions(+), 58 deletions(-) rename samples/skills/QASkill/{MemoryQuery => GitHubMemoryQuery}/config.json (57%) create mode 100644 samples/skills/QASkill/GitHubMemoryQuery/skprompt.txt delete mode 100644 samples/skills/QASkill/MemoryQuery/skprompt.txt diff --git a/samples/apps/github-qna-webapp-react/README.md b/samples/apps/github-qna-webapp-react/README.md index eb832925ae24..b590b80489f0 100644 --- a/samples/apps/github-qna-webapp-react/README.md +++ b/samples/apps/github-qna-webapp-react/README.md @@ -8,34 +8,22 @@ ## Running the sample 1. You will need an [Open AI Key](https://openai.com/api/) or - [Azure Open AI Service key](https://learn.microsoft.com/azure/cognitive-services/openai/quickstart) - for this sample. -2. Ensure the service API is already running `http://localhost:7071`. If not learn - how to start it [here](../../dotnet/KernelHttpServer/README.md). -3. **Run** the following command `yarn install` (if you have never run the sample before) - and/or `yarn start` from the command line. + [Azure Open AI Service key](https://learn.microsoft.com/azure/cognitive-services/openai/quickstart) for this sample. +2. Ensure the service API is already running `http://localhost:7071`. If not learn how to start it [here](../../dotnet/KernelHttpServer/README.md). +3. **Run** the following command `yarn install` (if you have never run the sample before) and/or `yarn start` from the command line. 4. A browser will open or you can navigate to `http://localhost:3000` to use the sample. ## About the GitHub Repo Q&A Bot Sample -The GitHub Repo Q&A Bot sample allows you to pull in data from a public GitHub repo -into a local memory store in order to ask questions about the project and to get -answers about it. The sample highlights how [memory](https://aka.ms/sk/memories) -and [embeddings](https://aka.ms/sk/embeddings) work along with SK Functions when -the size of the data is larger than the allowed token limited. Each SK function -will call Open AI to perform the tasks you ask about.​ +The GitHub Repo Q&A Bot sample allows you to pull in data from a public GitHub repo into a local memory store in order to ask questions about the project and to get answers about it. The sample highlights how [memory](https://aka.ms/sk/memories) and [embeddings](https://aka.ms/sk/embeddings) work along with the SemanticTextPartitioner when the size of the data is larger than the allowed token limited. Each SK function will call Open AI to perform the tasks you ask about.​ > [!CAUTION] > Each function will call Open AI which will use tokens that you will be billed for. ## Next Steps -Create Skills and SK functions: Check out the [documentation](https://aka.ms/sk/learn) -for how to create Skills. +Create Skills and SK functions: Check out the [documentation](https://aka.ms/sk/learn) for how to create Skills. -Join the community: Join our [Discord community](https://aka.ms/SKDiscord) to share -ideas and get help​. +Join the community: Join our [Discord community](https://aka.ms/SKDiscord) to share ideas and get help​. -Contribute: We need your help to make this the best it can be. Learn how you -can [contribute](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) -to this project.​ \ No newline at end of file +Contribute: We need your help to make this the best it can be. Learn how you can [contribute](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) to this project.​ \ No newline at end of file diff --git a/samples/apps/github-qna-webapp-react/src/components/GitHubRepoSelection.tsx b/samples/apps/github-qna-webapp-react/src/components/GitHubRepoSelection.tsx index d757c5f34694..7807426b9705 100644 --- a/samples/apps/github-qna-webapp-react/src/components/GitHubRepoSelection.tsx +++ b/samples/apps/github-qna-webapp-react/src/components/GitHubRepoSelection.tsx @@ -122,6 +122,18 @@ const GitHubProjectSelection: FC = ({ uri, keyConfig, prevProject, prevBr onClick={() => download()} /> + +
+ +
{downloadState === DownloadState.Loading ? ( <> diff --git a/samples/apps/github-qna-webapp-react/src/components/QnA.tsx b/samples/apps/github-qna-webapp-react/src/components/QnA.tsx index 33d1f74b5b99..07e715c04702 100644 --- a/samples/apps/github-qna-webapp-react/src/components/QnA.tsx +++ b/samples/apps/github-qna-webapp-react/src/components/QnA.tsx @@ -60,7 +60,7 @@ const QnA: FC = ({ uri, project, branch, keyConfig, onBack }) => { ], }, 'QASkill', - 'MemoryQuery', + 'GitHubMemoryQuery', ); response.content = result.value; response.fetchState = FetchState.Fetched; diff --git a/samples/dotnet/KernelHttpServer/Extensions.cs b/samples/dotnet/KernelHttpServer/Extensions.cs index 6b4abde4d996..e11dbe8bdbb4 100644 --- a/samples/dotnet/KernelHttpServer/Extensions.cs +++ b/samples/dotnet/KernelHttpServer/Extensions.cs @@ -8,7 +8,6 @@ using System.Net; using System.Net.Http; using System.Threading.Tasks; -using GitHubSkillsExample; using KernelHttpServer.Config; using Microsoft.Azure.Functions.Worker.Http; using Microsoft.Extensions.Logging; @@ -151,7 +150,7 @@ internal static void RegisterTextMemory(this IKernel kernel) } [SuppressMessage("Reliability", "CA2000:Dispose objects before losing scope", - Justification = "The caller invokes native skills during a request and the skill instances must remain alive for those requests to be successful.")] + Justification = "The caller invokes native skills during a request and the skill instances must remain alive for those requests to be successful.")] internal static void RegisterNativeSkills(this IKernel kernel, IEnumerable? skillsToLoad = null) { if (_ShouldLoad(nameof(DocumentSkill), skillsToLoad)) @@ -174,9 +173,8 @@ internal static void RegisterNativeSkills(this IKernel kernel, IEnumerable ExecutePlanAsync( await r.WriteAsJsonAsync(new AskResult { Value = result.Variables.ToPlan().Result }); return r; } + } diff --git a/samples/dotnet/github-skills/GitHubSkill.cs b/samples/dotnet/github-skills/GitHubSkill.cs index e37c173d22b1..623e11c04198 100644 --- a/samples/dotnet/github-skills/GitHubSkill.cs +++ b/samples/dotnet/github-skills/GitHubSkill.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. using System; +using System.Collections.Generic; using System.IO; using System.IO.Compression; using System.Threading.Tasks; @@ -10,12 +11,10 @@ using Microsoft.SemanticKernel.KernelExtensions; using Microsoft.SemanticKernel.Memory; using Microsoft.SemanticKernel.Orchestration; +using Microsoft.SemanticKernel.SemanticFunctions.Partitioning; using Microsoft.SemanticKernel.SkillDefinition; -using Microsoft.SemanticKernel.Skills.Document; using Microsoft.SemanticKernel.Skills.Web; -namespace GitHubSkillsExample; - /// /// Skill for interacting with a GitHub repository. /// @@ -66,11 +65,10 @@ public static class Parameters private readonly ISKFunction _summarizeCodeFunction; private readonly IKernel _kernel; private readonly WebFileDownloadSkill _downloadSkill; - private readonly DocumentSkill _documentSkill; private readonly ILogger _logger; internal const string SummarizeCodeSnippetDefinition = - @"BEGIN CONTENT TO SUMMARIZE: + @"BEGIN CONTENT TO SUMMARIZE: {{$INPUT}} END CONTENT TO SUMMARIZE. @@ -88,11 +86,10 @@ Do not incorporate other general knowledge. /// Instance of WebFileDownloadSkill used to download web files /// Instance of DocumentSkill used to read files /// Optional logger - public GitHubSkill(IKernel kernel, WebFileDownloadSkill downloadSkill, DocumentSkill documentSkill, ILogger? logger = null) + public GitHubSkill(IKernel kernel, WebFileDownloadSkill downloadSkill, ILogger? logger = null) { this._kernel = kernel; this._downloadSkill = downloadSkill; - this._documentSkill = documentSkill; this._logger = logger ?? NullLogger.Instance; this._summarizeCodeFunction = kernel.CreateSemanticFunction( @@ -107,14 +104,13 @@ public GitHubSkill(IKernel kernel, WebFileDownloadSkill downloadSkill, DocumentS /// /// Summarize the code downloaded from the specified URI. /// - /// URI to download the repository content to be summarized - /// Semantic kernel context + /// URI to download the respository content to be summarized + /// Semantic kernal context /// Task [SKFunction("Downloads a repository and summarizes the content")] [SKFunctionName("SummarizeRepository")] [SKFunctionInput(Description = "URL of the GitHub repository to summarize")] - [SKFunctionContextParameter(Name = Parameters.RepositoryBranch, - Description = "Name of the repository branch which will be downloaded and summarized")] + [SKFunctionContextParameter(Name = Parameters.RepositoryBranch, Description = "Name of the repository repositoryBranch which will be downloaded and summarized")] [SKFunctionContextParameter(Name = Parameters.SearchPattern, Description = "The search string to match against the names of files in the repository")] public async Task SummarizeRepositoryAsync(string source, SKContext context) { @@ -122,7 +118,6 @@ public async Task SummarizeRepositoryAsync(string source, SKContext context) { repositoryBranch = "main"; } - if (!context.Variables.Get(Parameters.SearchPattern, out string searchPattern) || string.IsNullOrEmpty(searchPattern)) { searchPattern = "*.md"; @@ -152,7 +147,6 @@ public async Task SummarizeRepositoryAsync(string source, SKContext context) { File.Delete(filePath); } - if (Directory.Exists(directoryPath)) { Directory.Delete(directoryPath, true); @@ -171,13 +165,44 @@ private async Task SummarizeCodeFileAsync(string filePath, string repositoryUri, { if (code.Length > MaxFileSize) { - this._logger.LogWarning("File with path {0} is longer than the maximum number of tokens", filePath); - return; + var extension = new FileInfo(filePath).Extension; + + List lines; + List paragraphs; + + switch (extension) + { + case ".md": + { + lines = SemanticTextPartitioner.SplitMarkDownLines(code, MaxTokens); + paragraphs = SemanticTextPartitioner.SplitMarkdownParagraphs(lines, MaxTokens); + + break; + } + default: + { + lines = SemanticTextPartitioner.SplitPlainTextLines(code, MaxTokens); + paragraphs = SemanticTextPartitioner.SplitPlainTextParagraphs(lines, MaxTokens); + + break; + } + } + + foreach (var paragraph in paragraphs) + { + await this._kernel.Memory.SaveInformationAsync( + $"{repositoryUri}-{repositoryBranch}", + text: $"{paragraph} File:{repositoryUri}/blob/{repositoryBranch}/{fileUri}", + id: fileUri); + } } - - string text = $"{code} File:{repositoryUri}/blob/{repositoryBranch}/{fileUri}"; - - await this._kernel.Memory.SaveInformationAsync($"{repositoryUri}-{repositoryBranch}", text: text, id: fileUri); + else + { + await this._kernel.Memory.SaveInformationAsync( + $"{repositoryUri}-{repositoryBranch}", + text: $"{code} File:{repositoryUri}/blob/{repositoryBranch}/{fileUri}", + id: fileUri); + } } } diff --git a/samples/skills/QASkill/MemoryQuery/config.json b/samples/skills/QASkill/GitHubMemoryQuery/config.json similarity index 57% rename from samples/skills/QASkill/MemoryQuery/config.json rename to samples/skills/QASkill/GitHubMemoryQuery/config.json index ff8a628236f7..2044f277d0bb 100644 --- a/samples/skills/QASkill/MemoryQuery/config.json +++ b/samples/skills/QASkill/GitHubMemoryQuery/config.json @@ -7,12 +7,6 @@ "temperature": 0.8, "top_p": 0.0, "presence_penalty": 0.0, - "frequency_penalty": 0.0, - "stop_sequences": [ - "[done]" - ] - }, - "default_backends": [ - "text-davinci-003" - ] + "frequency_penalty": 0.0 + } } \ No newline at end of file diff --git a/samples/skills/QASkill/GitHubMemoryQuery/skprompt.txt b/samples/skills/QASkill/GitHubMemoryQuery/skprompt.txt new file mode 100644 index 000000000000..117b8d950610 --- /dev/null +++ b/samples/skills/QASkill/GitHubMemoryQuery/skprompt.txt @@ -0,0 +1,6 @@ +{{textmemoryskill.recall $input}} +--- +Considering only the information above, which has been loaded from a GitHub repository, answer the following. +Question: {{$input}} + +Answer: \ No newline at end of file diff --git a/samples/skills/QASkill/MemoryQuery/skprompt.txt b/samples/skills/QASkill/MemoryQuery/skprompt.txt deleted file mode 100644 index c9cd2006cfc8..000000000000 --- a/samples/skills/QASkill/MemoryQuery/skprompt.txt +++ /dev/null @@ -1,7 +0,0 @@ -Someone wants to know the following about a GitHub repository: -{{$input}} - -Please answer the above question given the following info: -{{textmemoryskill.recall $input}} - -Answer: \ No newline at end of file From a269f8c01e5ae7e5d1dfb8d3477148af595a2483 Mon Sep 17 00:00:00 2001 From: Craig Presti <146438+craigomatic@users.noreply.github.com> Date: Mon, 20 Mar 2023 07:18:52 -0700 Subject: [PATCH 2/2] Formatting/typos --- samples/dotnet/github-skills/GitHubSkill.cs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/samples/dotnet/github-skills/GitHubSkill.cs b/samples/dotnet/github-skills/GitHubSkill.cs index 623e11c04198..1ad1b7ab5c0b 100644 --- a/samples/dotnet/github-skills/GitHubSkill.cs +++ b/samples/dotnet/github-skills/GitHubSkill.cs @@ -84,7 +84,6 @@ Do not incorporate other general knowledge. /// /// Kernel instance /// Instance of WebFileDownloadSkill used to download web files - /// Instance of DocumentSkill used to read files /// Optional logger public GitHubSkill(IKernel kernel, WebFileDownloadSkill downloadSkill, ILogger? logger = null) { @@ -104,8 +103,8 @@ public GitHubSkill(IKernel kernel, WebFileDownloadSkill downloadSkill, ILogger /// Summarize the code downloaded from the specified URI. /// - /// URI to download the respository content to be summarized - /// Semantic kernal context + /// URI to download the repository content to be summarized + /// Semantic kernel context /// Task [SKFunction("Downloads a repository and summarizes the content")] [SKFunctionName("SummarizeRepository")] @@ -169,9 +168,9 @@ private async Task SummarizeCodeFileAsync(string filePath, string repositoryUri, List lines; List paragraphs; - + switch (extension) - { + { case ".md": { lines = SemanticTextPartitioner.SplitMarkDownLines(code, MaxTokens); @@ -189,7 +188,7 @@ private async Task SummarizeCodeFileAsync(string filePath, string repositoryUri, } foreach (var paragraph in paragraphs) - { + { await this._kernel.Memory.SaveInformationAsync( $"{repositoryUri}-{repositoryBranch}", text: $"{paragraph} File:{repositoryUri}/blob/{repositoryBranch}/{fileUri}", @@ -202,7 +201,7 @@ await this._kernel.Memory.SaveInformationAsync( $"{repositoryUri}-{repositoryBranch}", text: $"{code} File:{repositoryUri}/blob/{repositoryBranch}/{fileUri}", id: fileUri); - } + } } }