diff --git a/samples/apps/github-qna-webapp-react/README.md b/samples/apps/github-qna-webapp-react/README.md index eb832925ae24..b590b80489f0 100644 --- a/samples/apps/github-qna-webapp-react/README.md +++ b/samples/apps/github-qna-webapp-react/README.md @@ -8,34 +8,22 @@ ## Running the sample 1. You will need an [Open AI Key](https://openai.com/api/) or - [Azure Open AI Service key](https://learn.microsoft.com/azure/cognitive-services/openai/quickstart) - for this sample. -2. Ensure the service API is already running `http://localhost:7071`. If not learn - how to start it [here](../../dotnet/KernelHttpServer/README.md). -3. **Run** the following command `yarn install` (if you have never run the sample before) - and/or `yarn start` from the command line. + [Azure Open AI Service key](https://learn.microsoft.com/azure/cognitive-services/openai/quickstart) for this sample. +2. Ensure the service API is already running `http://localhost:7071`. If not learn how to start it [here](../../dotnet/KernelHttpServer/README.md). +3. **Run** the following command `yarn install` (if you have never run the sample before) and/or `yarn start` from the command line. 4. A browser will open or you can navigate to `http://localhost:3000` to use the sample. ## About the GitHub Repo Q&A Bot Sample -The GitHub Repo Q&A Bot sample allows you to pull in data from a public GitHub repo -into a local memory store in order to ask questions about the project and to get -answers about it. The sample highlights how [memory](https://aka.ms/sk/memories) -and [embeddings](https://aka.ms/sk/embeddings) work along with SK Functions when -the size of the data is larger than the allowed token limited. Each SK function -will call Open AI to perform the tasks you ask about.​ +The GitHub Repo Q&A Bot sample allows you to pull in data from a public GitHub repo into a local memory store in order to ask questions about the project and to get answers about it. The sample highlights how [memory](https://aka.ms/sk/memories) and [embeddings](https://aka.ms/sk/embeddings) work along with the SemanticTextPartitioner when the size of the data is larger than the allowed token limited. Each SK function will call Open AI to perform the tasks you ask about.​ > [!CAUTION] > Each function will call Open AI which will use tokens that you will be billed for. ## Next Steps -Create Skills and SK functions: Check out the [documentation](https://aka.ms/sk/learn) -for how to create Skills. +Create Skills and SK functions: Check out the [documentation](https://aka.ms/sk/learn) for how to create Skills. -Join the community: Join our [Discord community](https://aka.ms/SKDiscord) to share -ideas and get help​. +Join the community: Join our [Discord community](https://aka.ms/SKDiscord) to share ideas and get help​. -Contribute: We need your help to make this the best it can be. Learn how you -can [contribute](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) -to this project.​ \ No newline at end of file +Contribute: We need your help to make this the best it can be. Learn how you can [contribute](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) to this project.​ \ No newline at end of file diff --git a/samples/apps/github-qna-webapp-react/src/components/GitHubRepoSelection.tsx b/samples/apps/github-qna-webapp-react/src/components/GitHubRepoSelection.tsx index d757c5f34694..7807426b9705 100644 --- a/samples/apps/github-qna-webapp-react/src/components/GitHubRepoSelection.tsx +++ b/samples/apps/github-qna-webapp-react/src/components/GitHubRepoSelection.tsx @@ -122,6 +122,18 @@ const GitHubProjectSelection: FC = ({ uri, keyConfig, prevProject, prevBr onClick={() => download()} /> + +
+ +
{downloadState === DownloadState.Loading ? ( <> diff --git a/samples/apps/github-qna-webapp-react/src/components/QnA.tsx b/samples/apps/github-qna-webapp-react/src/components/QnA.tsx index 33d1f74b5b99..07e715c04702 100644 --- a/samples/apps/github-qna-webapp-react/src/components/QnA.tsx +++ b/samples/apps/github-qna-webapp-react/src/components/QnA.tsx @@ -60,7 +60,7 @@ const QnA: FC = ({ uri, project, branch, keyConfig, onBack }) => { ], }, 'QASkill', - 'MemoryQuery', + 'GitHubMemoryQuery', ); response.content = result.value; response.fetchState = FetchState.Fetched; diff --git a/samples/dotnet/KernelHttpServer/Extensions.cs b/samples/dotnet/KernelHttpServer/Extensions.cs index 6b4abde4d996..e11dbe8bdbb4 100644 --- a/samples/dotnet/KernelHttpServer/Extensions.cs +++ b/samples/dotnet/KernelHttpServer/Extensions.cs @@ -8,7 +8,6 @@ using System.Net; using System.Net.Http; using System.Threading.Tasks; -using GitHubSkillsExample; using KernelHttpServer.Config; using Microsoft.Azure.Functions.Worker.Http; using Microsoft.Extensions.Logging; @@ -151,7 +150,7 @@ internal static void RegisterTextMemory(this IKernel kernel) } [SuppressMessage("Reliability", "CA2000:Dispose objects before losing scope", - Justification = "The caller invokes native skills during a request and the skill instances must remain alive for those requests to be successful.")] + Justification = "The caller invokes native skills during a request and the skill instances must remain alive for those requests to be successful.")] internal static void RegisterNativeSkills(this IKernel kernel, IEnumerable? skillsToLoad = null) { if (_ShouldLoad(nameof(DocumentSkill), skillsToLoad)) @@ -174,9 +173,8 @@ internal static void RegisterNativeSkills(this IKernel kernel, IEnumerable ExecutePlanAsync( await r.WriteAsJsonAsync(new AskResult { Value = result.Variables.ToPlan().Result }); return r; } + } diff --git a/samples/dotnet/github-skills/GitHubSkill.cs b/samples/dotnet/github-skills/GitHubSkill.cs index e37c173d22b1..1ad1b7ab5c0b 100644 --- a/samples/dotnet/github-skills/GitHubSkill.cs +++ b/samples/dotnet/github-skills/GitHubSkill.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. using System; +using System.Collections.Generic; using System.IO; using System.IO.Compression; using System.Threading.Tasks; @@ -10,12 +11,10 @@ using Microsoft.SemanticKernel.KernelExtensions; using Microsoft.SemanticKernel.Memory; using Microsoft.SemanticKernel.Orchestration; +using Microsoft.SemanticKernel.SemanticFunctions.Partitioning; using Microsoft.SemanticKernel.SkillDefinition; -using Microsoft.SemanticKernel.Skills.Document; using Microsoft.SemanticKernel.Skills.Web; -namespace GitHubSkillsExample; - /// /// Skill for interacting with a GitHub repository. /// @@ -66,11 +65,10 @@ public static class Parameters private readonly ISKFunction _summarizeCodeFunction; private readonly IKernel _kernel; private readonly WebFileDownloadSkill _downloadSkill; - private readonly DocumentSkill _documentSkill; private readonly ILogger _logger; internal const string SummarizeCodeSnippetDefinition = - @"BEGIN CONTENT TO SUMMARIZE: + @"BEGIN CONTENT TO SUMMARIZE: {{$INPUT}} END CONTENT TO SUMMARIZE. @@ -86,13 +84,11 @@ Do not incorporate other general knowledge. /// /// Kernel instance /// Instance of WebFileDownloadSkill used to download web files - /// Instance of DocumentSkill used to read files /// Optional logger - public GitHubSkill(IKernel kernel, WebFileDownloadSkill downloadSkill, DocumentSkill documentSkill, ILogger? logger = null) + public GitHubSkill(IKernel kernel, WebFileDownloadSkill downloadSkill, ILogger? logger = null) { this._kernel = kernel; this._downloadSkill = downloadSkill; - this._documentSkill = documentSkill; this._logger = logger ?? NullLogger.Instance; this._summarizeCodeFunction = kernel.CreateSemanticFunction( @@ -113,8 +109,7 @@ public GitHubSkill(IKernel kernel, WebFileDownloadSkill downloadSkill, DocumentS [SKFunction("Downloads a repository and summarizes the content")] [SKFunctionName("SummarizeRepository")] [SKFunctionInput(Description = "URL of the GitHub repository to summarize")] - [SKFunctionContextParameter(Name = Parameters.RepositoryBranch, - Description = "Name of the repository branch which will be downloaded and summarized")] + [SKFunctionContextParameter(Name = Parameters.RepositoryBranch, Description = "Name of the repository repositoryBranch which will be downloaded and summarized")] [SKFunctionContextParameter(Name = Parameters.SearchPattern, Description = "The search string to match against the names of files in the repository")] public async Task SummarizeRepositoryAsync(string source, SKContext context) { @@ -122,7 +117,6 @@ public async Task SummarizeRepositoryAsync(string source, SKContext context) { repositoryBranch = "main"; } - if (!context.Variables.Get(Parameters.SearchPattern, out string searchPattern) || string.IsNullOrEmpty(searchPattern)) { searchPattern = "*.md"; @@ -152,7 +146,6 @@ public async Task SummarizeRepositoryAsync(string source, SKContext context) { File.Delete(filePath); } - if (Directory.Exists(directoryPath)) { Directory.Delete(directoryPath, true); @@ -171,13 +164,44 @@ private async Task SummarizeCodeFileAsync(string filePath, string repositoryUri, { if (code.Length > MaxFileSize) { - this._logger.LogWarning("File with path {0} is longer than the maximum number of tokens", filePath); - return; + var extension = new FileInfo(filePath).Extension; + + List lines; + List paragraphs; + + switch (extension) + { + case ".md": + { + lines = SemanticTextPartitioner.SplitMarkDownLines(code, MaxTokens); + paragraphs = SemanticTextPartitioner.SplitMarkdownParagraphs(lines, MaxTokens); + + break; + } + default: + { + lines = SemanticTextPartitioner.SplitPlainTextLines(code, MaxTokens); + paragraphs = SemanticTextPartitioner.SplitPlainTextParagraphs(lines, MaxTokens); + + break; + } + } + + foreach (var paragraph in paragraphs) + { + await this._kernel.Memory.SaveInformationAsync( + $"{repositoryUri}-{repositoryBranch}", + text: $"{paragraph} File:{repositoryUri}/blob/{repositoryBranch}/{fileUri}", + id: fileUri); + } + } + else + { + await this._kernel.Memory.SaveInformationAsync( + $"{repositoryUri}-{repositoryBranch}", + text: $"{code} File:{repositoryUri}/blob/{repositoryBranch}/{fileUri}", + id: fileUri); } - - string text = $"{code} File:{repositoryUri}/blob/{repositoryBranch}/{fileUri}"; - - await this._kernel.Memory.SaveInformationAsync($"{repositoryUri}-{repositoryBranch}", text: text, id: fileUri); } } diff --git a/samples/skills/QASkill/MemoryQuery/config.json b/samples/skills/QASkill/GitHubMemoryQuery/config.json similarity index 57% rename from samples/skills/QASkill/MemoryQuery/config.json rename to samples/skills/QASkill/GitHubMemoryQuery/config.json index ff8a628236f7..2044f277d0bb 100644 --- a/samples/skills/QASkill/MemoryQuery/config.json +++ b/samples/skills/QASkill/GitHubMemoryQuery/config.json @@ -7,12 +7,6 @@ "temperature": 0.8, "top_p": 0.0, "presence_penalty": 0.0, - "frequency_penalty": 0.0, - "stop_sequences": [ - "[done]" - ] - }, - "default_backends": [ - "text-davinci-003" - ] + "frequency_penalty": 0.0 + } } \ No newline at end of file diff --git a/samples/skills/QASkill/GitHubMemoryQuery/skprompt.txt b/samples/skills/QASkill/GitHubMemoryQuery/skprompt.txt new file mode 100644 index 000000000000..117b8d950610 --- /dev/null +++ b/samples/skills/QASkill/GitHubMemoryQuery/skprompt.txt @@ -0,0 +1,6 @@ +{{textmemoryskill.recall $input}} +--- +Considering only the information above, which has been loaded from a GitHub repository, answer the following. +Question: {{$input}} + +Answer: \ No newline at end of file diff --git a/samples/skills/QASkill/MemoryQuery/skprompt.txt b/samples/skills/QASkill/MemoryQuery/skprompt.txt deleted file mode 100644 index c9cd2006cfc8..000000000000 --- a/samples/skills/QASkill/MemoryQuery/skprompt.txt +++ /dev/null @@ -1,7 +0,0 @@ -Someone wants to know the following about a GitHub repository: -{{$input}} - -Please answer the above question given the following info: -{{textmemoryskill.recall $input}} - -Answer: \ No newline at end of file