From 40f58cf15d91ac90e636055434889228a48d4329 Mon Sep 17 00:00:00 2001 From: markwallace-microsoft Date: Thu, 16 Mar 2023 17:56:33 +0000 Subject: [PATCH 1/5] Refactor and address TODO's 1. Based on some feedback Shawn posted in another PR I think we should make CodeSkill into a sample project (GitHubExampleSkill) rather than publish it as a nuget package. I have the changes made for this. 2. Also change the messages in the UI to reflect what the sample is doing. 3. Fixes: - Spinner working - Disable download button while downloading - Add a useEffect to add the response to the ChatHistory 4. One experimental change which only summarises files above a certain file size. This saves us some API calls so the .md files can be summarised in ~30 secs (as opposed to ~60 secs) --- dotnet/SK-dotnet.sln | 7 + .../Skills.Code - Copy/CodeSkill.cs | 187 ++++++++++++++++ .../Skills.Code - Copy/Skills.Code.csproj | 25 +++ .../src/components/GitHubRepoSelection.tsx | 35 +-- .../src/components/QnA.tsx | 13 +- samples/dotnet/KernelHttpServer/Extensions.cs | 7 +- .../KernelHttpServer/KernelHttpServer.csproj | 2 +- samples/dotnet/github-skills/GitHubSkill.cs | 211 ++++++++++++++++++ .../github-skills/GitHubSkillsExample.csproj | 13 ++ 9 files changed, 480 insertions(+), 20 deletions(-) create mode 100644 dotnet/src/SemanticKernel.Skills/Skills.Code - Copy/CodeSkill.cs create mode 100644 dotnet/src/SemanticKernel.Skills/Skills.Code - Copy/Skills.Code.csproj create mode 100644 samples/dotnet/github-skills/GitHubSkill.cs create mode 100644 samples/dotnet/github-skills/GitHubSkillsExample.csproj diff --git a/dotnet/SK-dotnet.sln b/dotnet/SK-dotnet.sln index 6223b71768a6..35a79bde0bf6 100644 --- a/dotnet/SK-dotnet.sln +++ b/dotnet/SK-dotnet.sln @@ -54,6 +54,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "KernelBuilder", "..\samples EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Skills.Code", "src\SemanticKernel.Skills\Skills.Code\Skills.Code.csproj", "{0EE82492-0176-43D5-A8E0-F2E2A766B5D5}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "GitHubSkillsExample", "..\samples\dotnet\github-skills\GitHubSkillsExample.csproj", "{39E5F0F6-8B36-4ECA-A5F6-FC7522DC2ECF}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -116,6 +118,10 @@ Global {0EE82492-0176-43D5-A8E0-F2E2A766B5D5}.Debug|Any CPU.Build.0 = Debug|Any CPU {0EE82492-0176-43D5-A8E0-F2E2A766B5D5}.Release|Any CPU.ActiveCfg = Release|Any CPU {0EE82492-0176-43D5-A8E0-F2E2A766B5D5}.Release|Any CPU.Build.0 = Release|Any CPU + {39E5F0F6-8B36-4ECA-A5F6-FC7522DC2ECF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {39E5F0F6-8B36-4ECA-A5F6-FC7522DC2ECF}.Debug|Any CPU.Build.0 = Debug|Any CPU + {39E5F0F6-8B36-4ECA-A5F6-FC7522DC2ECF}.Release|Any CPU.ActiveCfg = Release|Any CPU + {39E5F0F6-8B36-4ECA-A5F6-FC7522DC2ECF}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -137,6 +143,7 @@ Global {F4243136-252A-4459-A7C4-EE8C056D6B0B} = {158A4E5E-AEE0-4D60-83C7-8E089B2D881D} {A52818AC-57FB-495F-818F-9E1E7BC5618C} = {FA3720F1-C99A-49B2-9577-A940257098BF} {0EE82492-0176-43D5-A8E0-F2E2A766B5D5} = {9ECD1AA0-75B3-4E25-B0B5-9F0945B64974} + {39E5F0F6-8B36-4ECA-A5F6-FC7522DC2ECF} = {FA3720F1-C99A-49B2-9577-A940257098BF} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {FBDC56A3-86AD-4323-AA0F-201E59123B83} diff --git a/dotnet/src/SemanticKernel.Skills/Skills.Code - Copy/CodeSkill.cs b/dotnet/src/SemanticKernel.Skills/Skills.Code - Copy/CodeSkill.cs new file mode 100644 index 000000000000..5652f2cf67df --- /dev/null +++ b/dotnet/src/SemanticKernel.Skills/Skills.Code - Copy/CodeSkill.cs @@ -0,0 +1,187 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.IO; +using System.IO.Compression; +using System.Linq; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; +using Microsoft.SemanticKernel.KernelExtensions; +using Microsoft.SemanticKernel.Orchestration; +using Microsoft.SemanticKernel.SkillDefinition; +using Microsoft.SemanticKernel.Skills.Document; +using Microsoft.SemanticKernel.Skills.Web; + +namespace Microsoft.SemanticKernel.Skills.Code; + +/// +/// Skill for interacting with code files (e.g. C#) +/// +public class CodeSkill +{ + /// + /// Parameter names. + /// + /// + public static class Parameters + { + /// + /// Document file path. + /// + public const string FilePath = "filePath"; + + /// + /// Directory to which to extract compressed file's data. + /// + public const string DestinationDirectoryPath = "destinationDirectoryPath"; + + /// + /// Name of the memory collection used to store the code summaries. + /// + public const string MemoryCollectionName = "memoryCollectionName"; + } + + /// + /// The max tokens to process in a single semantic function call. + /// + private const int MaxTokens = 1024; + + private readonly ISKFunction _summarizeCodeFunction; + private readonly IKernel _kernel; + private readonly WebFileDownloadSkill _downloadSkill; + private readonly DocumentSkill _documentSkill; + private readonly ILogger _logger; + + internal const string SummarizeCodeSnippetDefinition = + @"BEGIN CONTENT TO SUMMARIZE: +{{$INPUT}} +END CONTENT TO SUMMARIZE. + +Summarize the content in 'CONTENT TO SUMMARIZE', identifying main points. +Do not incorporate other general knowledge. +Summary is in plain text, in complete sentences, with no markup or tags. + +BEGIN SUMMARY: +"; + + internal const string MemoryCollectionName = "CodeSkillMemory"; // TODO Should this be configurable + + + /// + /// Initializes a new instance of the class. + /// + /// Kernel instance + /// Instance of WebFileDownloadSkill used to download web files + /// Instance of DocumentSkill used to read files + /// Optional logger + public CodeSkill(IKernel kernel, WebFileDownloadSkill downloadSkill, DocumentSkill documentSkill, ILogger? logger = null) + { + this._kernel = kernel; + this._downloadSkill = downloadSkill; + this._documentSkill = documentSkill; + this._logger = logger ?? NullLogger.Instance; + + this._summarizeCodeFunction = kernel.CreateSemanticFunction( + SummarizeCodeSnippetDefinition, + skillName: nameof(CodeSkill), + description: "Given a snippet of code, summarize the part of the file.", + maxTokens: MaxTokens, + temperature: 0.1, + topP: 0.5); + } + + /// + /// Summarize a code file into an embedding + /// + /// Path of file to summarize + /// Semantic kernal context + /// Task + public async Task SummarizeCodeFileAsync(string filePath, SKContext context) + { + // TODO do we need to extend the DocumentSkill to read raw content? + // string code = await this._documentSkill.ReadTextAsync(filePath, context); + string code = File.ReadAllText(filePath); + + if (code != null && code.Length > 0) + { + // TODO should we create a new SKContext here? + context.Variables.Update(code); + await this._summarizeCodeFunction.InvokeAsync(context); + + var result = context.Variables.ToString(); + // TODO Include the file URI in the text + await this._kernel.Memory.SaveInformationAsync(MemoryCollectionName, text: result, id: filePath); + } + } + + /// + /// Summarize the code found under a directory into embeddings (one per file) + /// + /// Path of directory to summarize + /// Semantic kernal context + /// Task + public async Task SummarizeCodeDirectoryAsync(string directoryPath, SKContext context) + { + // TODO Use the document skill for recursion + // TODO Allow the wildcard match to be configurable + string[] filePaths = await Task.FromResult(Directory.GetFiles(directoryPath, "*.md", SearchOption.AllDirectories)); + + if (filePaths != null && filePaths.Length > 0) + { + this._logger.LogDebug("Found {0} files to summarize", filePaths.Length); + + foreach (string filePath in filePaths) + { + await this.SummarizeCodeFileAsync(filePath, context); + } + + _ = context.Variables.Update($"Found {filePaths.Length} files to summarize"); + context.Variables.Set(Parameters.MemoryCollectionName, MemoryCollectionName); + } + } + + /// + /// Summarize the code downloaded from the specified URI. + /// + /// URI to download the respository content to be summarized + /// Semantic kernal context + /// Task + [SKFunction("Downloads a repository and summarizes the content")] + [SKFunctionName("SummarizeRepository")] + [SKFunctionInput(Description = "URL of repository to summarize")] + public async Task SummarizeRepositoryAsync(string source, SKContext context) + { + // TODO Accept the repo uri and branch as separate parameters + // 1. Down URI would be calculated in the Skill rather than the client + // 2. We cna compute file URI's so the responses can include a link to the relevant file + string filePath = Environment.ExpandEnvironmentVariables($"%temp%\\SK-{Guid.NewGuid()}.zip"); + string directoryPath = Environment.ExpandEnvironmentVariables($"%temp%\\SK-{Guid.NewGuid()}"); + + try + { + // TODO should we create a new SKContext here? + context.Variables.Set(Parameters.FilePath, filePath); + await this._downloadSkill.DownloadToFileAsync(source, context); + context.Variables.Set(Parameters.FilePath, null); + + filePath = Environment.ExpandEnvironmentVariables(filePath); + // TODO Use the file compression skill + ZipFile.ExtractToDirectory(filePath, directoryPath); + + await this.SummarizeCodeDirectoryAsync(directoryPath, context); + } + finally + { + // Cleanup downloaded file and also unzipped content + if (File.Exists(filePath)) + { + File.Delete(filePath); + } + if (Directory.Exists(directoryPath)) + { + Directory.Delete(directoryPath, true); + } + } + } +} diff --git a/dotnet/src/SemanticKernel.Skills/Skills.Code - Copy/Skills.Code.csproj b/dotnet/src/SemanticKernel.Skills/Skills.Code - Copy/Skills.Code.csproj new file mode 100644 index 000000000000..ba4d1f5c4c3f --- /dev/null +++ b/dotnet/src/SemanticKernel.Skills/Skills.Code - Copy/Skills.Code.csproj @@ -0,0 +1,25 @@ + + + + $([System.IO.Path]::GetDirectoryName($([MSBuild]::GetPathOfFileAbove('.gitignore', '$(MSBuildThisFileDirectory)')))) + + + + + Microsoft.SemanticKernel.Skills.Code + Microsoft.SemanticKernel.Skills.Code + netstandard2.1 + + + + + Microsoft.SemanticKernel.Skills.Code + Semantic Kernel - Code Skill + + + + + + + + diff --git a/samples/apps/github-qna-webapp-react/src/components/GitHubRepoSelection.tsx b/samples/apps/github-qna-webapp-react/src/components/GitHubRepoSelection.tsx index 0cda2b7b2762..52cc6b616c6e 100644 --- a/samples/apps/github-qna-webapp-react/src/components/GitHubRepoSelection.tsx +++ b/samples/apps/github-qna-webapp-react/src/components/GitHubRepoSelection.tsx @@ -23,18 +23,20 @@ const GitHubProjectSelection: FC = ({ uri, keyConfig, onLoadProject, onBa const sk = useSemanticKernel(uri); const download = async () => { - let cleanProjectUri = project?.trim(); - - if (!cleanProjectUri?.endsWith('/')) { - cleanProjectUri = `${cleanProjectUri}/`; - } - - const url = `${cleanProjectUri}archive/refs/heads/${branch}.zip`; try { + setIsLoading(true); + setIsLoaded(false); + setIsLoadError(false); var result = await sk.invokeAsync( keyConfig, - { value: url, inputs: [] }, - 'CodeSkill', + { + value: project || '', + inputs: [ + { key: 'repositoryBranch', value: branch || '' }, + { key: 'searchPattern', value: '*.md' }, + ], + }, + 'GitHubSkill', 'SummarizeRepository', ); setIsLoaded(true); @@ -42,6 +44,8 @@ const GitHubProjectSelection: FC = ({ uri, keyConfig, onLoadProject, onBa } catch { setIsLoadError(true); alert('Something went wrong. Please check that the function is running and accessible from this location.'); + } finally { + setIsLoading(false); } }; @@ -77,7 +81,7 @@ const GitHubProjectSelection: FC = ({ uri, keyConfig, onLoadProject, onBa placeholder="main" />