From 09449e81e7624ed050553a8460ef8cc1be0919cb Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Thu, 22 Jan 2026 22:51:39 -0700 Subject: [PATCH 01/14] refactor: replace fetch_instructions tool with built-in skills - Add built-in skills (create-mcp-server, create-mode) following slash commands pattern - Update SkillsManager to merge built-in skills with user skills - Remove FetchInstructionsTool and related infrastructure - Remove enableMcpServerCreation setting and UI checkbox - Remove auto-approval logic for fetchInstructions - Update system prompt sections to remove fetch_instructions references Built-in skills are now available through the skills system with override precedence: project > global > built-in --- packages/types/src/global-settings.ts | 6 +- packages/types/src/tool.ts | 2 +- packages/types/src/vscode-extension-host.ts | 4 +- .../assistant-message/NativeToolCallParser.ts | 34 +- .../presentAssistantMessage.ts | 21 +- src/core/auto-approval/index.ts | 10 - .../__tests__/add-custom-instructions.spec.ts | 4 - .../__tests__/custom-system-prompt.spec.ts | 3 - .../prompts/__tests__/system-prompt.spec.ts | 103 +++++- src/core/prompts/instructions/create-mode.ts | 62 ---- src/core/prompts/instructions/instructions.ts | 25 -- src/core/prompts/sections/modes.ts | 20 +- src/core/prompts/sections/skills.ts | 14 +- src/core/prompts/system.ts | 14 +- .../tools/native-tools/fetch_instructions.ts | 26 -- src/core/prompts/tools/native-tools/index.ts | 4 +- src/core/prompts/tools/native-tools/skill.ts | 33 ++ src/core/task/Task.ts | 2 - src/core/tools/FetchInstructionsTool.ts | 75 ---- src/core/tools/SkillTool.ts | 112 ++++++ src/core/tools/__tests__/skillTool.spec.ts | 345 ++++++++++++++++++ src/core/webview/ClineProvider.ts | 6 +- .../webview/__tests__/ClineProvider.spec.ts | 57 ++- ...ateSystemPrompt.browser-capability.spec.ts | 1 - src/core/webview/generateSystemPrompt.ts | 2 - src/core/webview/webviewMessageHandler.ts | 4 - src/services/skills/SkillsManager.ts | 38 +- .../skills/__tests__/SkillsManager.spec.ts | 8 + .../skills/built-in-skills.ts} | 140 +++++-- src/shared/skills.ts | 4 +- src/shared/tools.ts | 18 +- webview-ui/src/components/mcp/McpView.tsx | 46 +-- .../src/context/ExtensionStateContext.tsx | 5 - .../__tests__/ExtensionStateContext.spec.tsx | 1 - 34 files changed, 861 insertions(+), 388 deletions(-) delete mode 100644 src/core/prompts/instructions/create-mode.ts delete mode 100644 src/core/prompts/instructions/instructions.ts delete mode 100644 src/core/prompts/tools/native-tools/fetch_instructions.ts create mode 100644 src/core/prompts/tools/native-tools/skill.ts delete mode 100644 src/core/tools/FetchInstructionsTool.ts create mode 100644 src/core/tools/SkillTool.ts create mode 100644 src/core/tools/__tests__/skillTool.spec.ts rename src/{core/prompts/instructions/create-mcp-server.ts => services/skills/built-in-skills.ts} (62%) diff --git a/packages/types/src/global-settings.ts b/packages/types/src/global-settings.ts index d57ec616ff4..75a39b41e65 100644 --- a/packages/types/src/global-settings.ts +++ b/packages/types/src/global-settings.ts @@ -100,6 +100,11 @@ export const globalSettingsSchema = z.object({ alwaysAllowWrite: z.boolean().optional(), alwaysAllowWriteOutsideWorkspace: z.boolean().optional(), alwaysAllowWriteProtected: z.boolean().optional(), + /** + * Whether diff-based editing tools are enabled. + * When disabled, the extension should avoid providing diff strategies / diff-based tooling. + */ + diffEnabled: z.boolean().optional(), writeDelayMs: z.number().min(0).optional(), alwaysAllowBrowser: z.boolean().optional(), requestDelaySeconds: z.number().optional(), @@ -199,7 +204,6 @@ export const globalSettingsSchema = z.object({ telemetrySetting: telemetrySettingsSchema.optional(), mcpEnabled: z.boolean().optional(), - enableMcpServerCreation: z.boolean().optional(), mode: z.string().optional(), modeApiConfigs: z.record(z.string(), z.string()).optional(), diff --git a/packages/types/src/tool.ts b/packages/types/src/tool.ts index f90ef42ede4..03144055c9a 100644 --- a/packages/types/src/tool.ts +++ b/packages/types/src/tool.ts @@ -33,10 +33,10 @@ export const toolNames = [ "attempt_completion", "switch_mode", "new_task", - "fetch_instructions", "codebase_search", "update_todo_list", "run_slash_command", + "skill", "generate_image", "custom_tool", ] as const diff --git a/packages/types/src/vscode-extension-host.ts b/packages/types/src/vscode-extension-host.ts index 7ae89e8777d..4765d05397d 100644 --- a/packages/types/src/vscode-extension-host.ts +++ b/packages/types/src/vscode-extension-host.ts @@ -280,6 +280,7 @@ export type ExtensionState = Pick< | "alwaysAllowWrite" | "alwaysAllowWriteOutsideWorkspace" | "alwaysAllowWriteProtected" + | "diffEnabled" | "alwaysAllowBrowser" | "alwaysAllowMcp" | "alwaysAllowModeSwitch" @@ -359,7 +360,6 @@ export type ExtensionState = Pick< experiments: Experiments // Map of experiment IDs to their enabled state mcpEnabled: boolean - enableMcpServerCreation: boolean mode: string customModes: ModeConfig[] @@ -499,7 +499,6 @@ export interface WebviewMessage { | "deleteMessageConfirm" | "submitEditedMessage" | "editMessageConfirm" - | "enableMcpServerCreation" | "remoteControlEnabled" | "taskSyncEnabled" | "searchCommits" @@ -779,7 +778,6 @@ export interface ClineSayTool { | "codebaseSearch" | "readFile" | "readCommandOutput" - | "fetchInstructions" | "listFilesTopLevel" | "listFilesRecursive" | "searchFiles" diff --git a/src/core/assistant-message/NativeToolCallParser.ts b/src/core/assistant-message/NativeToolCallParser.ts index 8aa369f74da..ecf649e2734 100644 --- a/src/core/assistant-message/NativeToolCallParser.ts +++ b/src/core/assistant-message/NativeToolCallParser.ts @@ -449,14 +449,6 @@ export class NativeToolCallParser { } break - case "fetch_instructions": - if (partialArgs.task !== undefined) { - nativeArgs = { - task: partialArgs.task, - } - } - break - case "generate_image": if (partialArgs.prompt !== undefined || partialArgs.path !== undefined) { nativeArgs = { @@ -476,6 +468,15 @@ export class NativeToolCallParser { } break + case "skill": + if (partialArgs.skill !== undefined) { + nativeArgs = { + skill: partialArgs.skill, + args: partialArgs.args, + } + } + break + case "search_files": if (partialArgs.path !== undefined || partialArgs.regex !== undefined) { nativeArgs = { @@ -736,14 +737,6 @@ export class NativeToolCallParser { } break - case "fetch_instructions": - if (args.task !== undefined) { - nativeArgs = { - task: args.task, - } as NativeArgsFor - } - break - case "generate_image": if (args.prompt !== undefined && args.path !== undefined) { nativeArgs = { @@ -763,6 +756,15 @@ export class NativeToolCallParser { } break + case "skill": + if (args.skill !== undefined) { + nativeArgs = { + skill: args.skill, + args: args.args, + } as NativeArgsFor + } + break + case "search_files": if (args.path !== undefined && args.regex !== undefined) { nativeArgs = { diff --git a/src/core/assistant-message/presentAssistantMessage.ts b/src/core/assistant-message/presentAssistantMessage.ts index db17bb97046..c0c7bf0eeb6 100644 --- a/src/core/assistant-message/presentAssistantMessage.ts +++ b/src/core/assistant-message/presentAssistantMessage.ts @@ -14,7 +14,6 @@ import type { ToolParamName, ToolResponse, ToolUse, McpToolUse } from "../../sha import { AskIgnoredError } from "../task/AskIgnoredError" import { Task } from "../task/Task" -import { fetchInstructionsTool } from "../tools/FetchInstructionsTool" import { listFilesTool } from "../tools/ListFilesTool" import { readFileTool } from "../tools/ReadFileTool" import { readCommandOutputTool } from "../tools/ReadCommandOutputTool" @@ -34,6 +33,7 @@ import { attemptCompletionTool, AttemptCompletionCallbacks } from "../tools/Atte import { newTaskTool } from "../tools/NewTaskTool" import { updateTodoListTool } from "../tools/UpdateTodoListTool" import { runSlashCommandTool } from "../tools/RunSlashCommandTool" +import { skillTool } from "../tools/SkillTool" import { generateImageTool } from "../tools/GenerateImageTool" import { applyDiffTool as applyDiffToolClass } from "../tools/ApplyDiffTool" import { isValidToolName, validateToolUse } from "../tools/validateToolUse" @@ -370,8 +370,6 @@ export async function presentAssistantMessage(cline: Task) { return readFileTool.getReadFileToolDescription(block.name, block.nativeArgs) } return readFileTool.getReadFileToolDescription(block.name, block.params) - case "fetch_instructions": - return `[${block.name} for '${block.params.task}']` case "write_to_file": return `[${block.name} for '${block.params.path}']` case "apply_diff": @@ -417,6 +415,8 @@ export async function presentAssistantMessage(cline: Task) { } case "run_slash_command": return `[${block.name} for '${block.params.command}'${block.params.args ? ` with args: ${block.params.args}` : ""}]` + case "skill": + return `[${block.name} for '${block.params.skill}'${block.params.args ? ` with args: ${block.params.args}` : ""}]` case "generate_image": return `[${block.name} for '${block.params.path}']` default: @@ -805,13 +805,6 @@ export async function presentAssistantMessage(cline: Task) { pushToolResult, }) break - case "fetch_instructions": - await fetchInstructionsTool.handle(cline, block as ToolUse<"fetch_instructions">, { - askApproval, - handleError, - pushToolResult, - }) - break case "list_files": await listFilesTool.handle(cline, block as ToolUse<"list_files">, { askApproval, @@ -915,6 +908,13 @@ export async function presentAssistantMessage(cline: Task) { pushToolResult, }) break + case "skill": + await skillTool.handle(cline, block as ToolUse<"skill">, { + askApproval, + handleError, + pushToolResult, + }) + break case "generate_image": await checkpointSaveAndMark(cline) await generateImageTool.handle(cline, block as ToolUse<"generate_image">, { @@ -1094,7 +1094,6 @@ function containsXmlToolMarkup(text: string): boolean { "codebase_search", "edit_file", "execute_command", - "fetch_instructions", "generate_image", "list_files", "new_task", diff --git a/src/core/auto-approval/index.ts b/src/core/auto-approval/index.ts index f2951405010..72d567a96ce 100644 --- a/src/core/auto-approval/index.ts +++ b/src/core/auto-approval/index.ts @@ -151,16 +151,6 @@ export async function checkAutoApproval({ return { decision: "approve" } } - if (tool?.tool === "fetchInstructions") { - if (tool.content === "create_mode") { - return state.alwaysAllowModeSwitch === true ? { decision: "approve" } : { decision: "ask" } - } - - if (tool.content === "create_mcp_server") { - return state.alwaysAllowMcp === true ? { decision: "approve" } : { decision: "ask" } - } - } - if (tool?.tool === "switchMode") { return state.alwaysAllowModeSwitch === true ? { decision: "approve" } : { decision: "ask" } } diff --git a/src/core/prompts/__tests__/add-custom-instructions.spec.ts b/src/core/prompts/__tests__/add-custom-instructions.spec.ts index 79399f40b2b..b7813d0f5b8 100644 --- a/src/core/prompts/__tests__/add-custom-instructions.spec.ts +++ b/src/core/prompts/__tests__/add-custom-instructions.spec.ts @@ -211,7 +211,6 @@ describe("addCustomInstructions", () => { undefined, // customModes undefined, // globalCustomInstructions undefined, // experiments - true, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions undefined, // partialReadsEnabled @@ -233,7 +232,6 @@ describe("addCustomInstructions", () => { undefined, // customModes undefined, // globalCustomInstructions undefined, // experiments - true, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions undefined, // partialReadsEnabled @@ -257,7 +255,6 @@ describe("addCustomInstructions", () => { undefined, // customModes, undefined, // globalCustomInstructions undefined, // experiments - false, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions undefined, // partialReadsEnabled @@ -280,7 +277,6 @@ describe("addCustomInstructions", () => { undefined, // customModes, undefined, // globalCustomInstructions undefined, // experiments - true, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions true, // partialReadsEnabled diff --git a/src/core/prompts/__tests__/custom-system-prompt.spec.ts b/src/core/prompts/__tests__/custom-system-prompt.spec.ts index 5399b92c651..0ec2956b317 100644 --- a/src/core/prompts/__tests__/custom-system-prompt.spec.ts +++ b/src/core/prompts/__tests__/custom-system-prompt.spec.ts @@ -105,7 +105,6 @@ describe("File-Based Custom System Prompt", () => { undefined, // customModes undefined, // globalCustomInstructions undefined, // experiments - true, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions undefined, // partialReadsEnabled @@ -142,7 +141,6 @@ describe("File-Based Custom System Prompt", () => { undefined, // customModes undefined, // globalCustomInstructions undefined, // experiments - true, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions undefined, // partialReadsEnabled @@ -187,7 +185,6 @@ describe("File-Based Custom System Prompt", () => { undefined, // customModes undefined, // globalCustomInstructions undefined, // experiments - true, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions undefined, // partialReadsEnabled diff --git a/src/core/prompts/__tests__/system-prompt.spec.ts b/src/core/prompts/__tests__/system-prompt.spec.ts index d171e135077..bc782930b10 100644 --- a/src/core/prompts/__tests__/system-prompt.spec.ts +++ b/src/core/prompts/__tests__/system-prompt.spec.ts @@ -225,8 +225,8 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions + undefined, // diffEnabled experiments, - true, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions undefined, // partialReadsEnabled @@ -247,8 +247,8 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes, undefined, // globalCustomInstructions + undefined, // diffEnabled experiments, - true, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions undefined, // partialReadsEnabled @@ -271,8 +271,8 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes, undefined, // globalCustomInstructions + undefined, // diffEnabled experiments, - true, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions undefined, // partialReadsEnabled @@ -293,8 +293,8 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes, undefined, // globalCustomInstructions + undefined, // diffEnabled experiments, - true, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions undefined, // partialReadsEnabled @@ -315,8 +315,8 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes, undefined, // globalCustomInstructions + undefined, // diffEnabled experiments, - true, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions undefined, // partialReadsEnabled @@ -324,6 +324,82 @@ describe("SYSTEM_PROMPT", () => { expect(prompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-different-viewport-size.snap") }) + + it("should include diff strategy tool description when diffEnabled is true", async () => { + const prompt = await SYSTEM_PROMPT( + mockContext, + "/test/path", + false, + undefined, // mcpHub + new MultiSearchReplaceDiffStrategy(), // Use actual diff strategy from the codebase + undefined, // browserViewportSize + defaultModeSlug, // mode + undefined, // customModePrompts + undefined, // customModes + undefined, // globalCustomInstructions + true, // diffEnabled + experiments, + undefined, // language + undefined, // rooIgnoreInstructions + undefined, // partialReadsEnabled + ) + + // Native-only: tool catalog isn't embedded in the system prompt anymore. + expect(prompt).not.toContain("# Tools") + expect(prompt).not.toContain("apply_diff") + expect(prompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-diff-enabled-true.snap") + }) + + it("should exclude diff strategy tool description when diffEnabled is false", async () => { + const prompt = await SYSTEM_PROMPT( + mockContext, + "/test/path", + false, // supportsImages + undefined, // mcpHub + new MultiSearchReplaceDiffStrategy(), // Use actual diff strategy from the codebase + undefined, // browserViewportSize + defaultModeSlug, // mode + undefined, // customModePrompts + undefined, // customModes + undefined, // globalCustomInstructions + false, // diffEnabled + experiments, + undefined, // language + undefined, // rooIgnoreInstructions + undefined, // partialReadsEnabled + ) + + // Native-only: tool catalog isn't embedded in the system prompt anymore. + expect(prompt).not.toContain("# Tools") + expect(prompt).not.toContain("apply_diff") + expect(prompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-diff-enabled-false.snap") + }) + + it("should exclude diff strategy tool description when diffEnabled is undefined", async () => { + const prompt = await SYSTEM_PROMPT( + mockContext, + "/test/path", + false, + undefined, // mcpHub + new MultiSearchReplaceDiffStrategy(), // Use actual diff strategy from the codebase + undefined, // browserViewportSize + defaultModeSlug, // mode + undefined, // customModePrompts + undefined, // customModes + undefined, // globalCustomInstructions + undefined, // diffEnabled + experiments, + undefined, // language + undefined, // rooIgnoreInstructions + undefined, // partialReadsEnabled + ) + + // Native-only: tool catalog isn't embedded in the system prompt anymore. + expect(prompt).not.toContain("# Tools") + expect(prompt).not.toContain("apply_diff") + expect(prompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-diff-enabled-undefined.snap") + }) + it("should include vscode language in custom instructions", async () => { // Mock vscode.env.language const vscode = vi.mocked(await import("vscode")) as any @@ -363,8 +439,8 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions + undefined, // diffEnabled undefined, // experiments - true, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions undefined, // partialReadsEnabled @@ -423,8 +499,8 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts customModes, // customModes "Global instructions", // globalCustomInstructions + undefined, // diffEnabled experiments, - true, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions undefined, // partialReadsEnabled @@ -460,8 +536,8 @@ describe("SYSTEM_PROMPT", () => { customModePrompts, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions + undefined, // diffEnabled undefined, // experiments - false, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions undefined, // partialReadsEnabled @@ -492,8 +568,8 @@ describe("SYSTEM_PROMPT", () => { customModePrompts, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions + undefined, // diffEnabled undefined, // experiments - false, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions undefined, // partialReadsEnabled @@ -522,8 +598,8 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions + undefined, // diffEnabled experiments, - true, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions undefined, // partialReadsEnabled @@ -554,8 +630,8 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions + undefined, // diffEnabled experiments, - true, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions undefined, // partialReadsEnabled @@ -586,8 +662,8 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions + undefined, // diffEnabled experiments, - true, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions undefined, // partialReadsEnabled @@ -618,8 +694,8 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions + undefined, // diffEnabled experiments, - true, // enableMcpServerCreation undefined, // language undefined, // rooIgnoreInstructions undefined, // partialReadsEnabled @@ -654,6 +730,7 @@ describe("SYSTEM_PROMPT", () => { expect(prompt).toContain("SYSTEM INFORMATION") expect(prompt).toContain("OBJECTIVE") }) + afterAll(() => { vi.restoreAllMocks() }) diff --git a/src/core/prompts/instructions/create-mode.ts b/src/core/prompts/instructions/create-mode.ts deleted file mode 100644 index 9623aae0cd1..00000000000 --- a/src/core/prompts/instructions/create-mode.ts +++ /dev/null @@ -1,62 +0,0 @@ -import * as path from "path" -import * as vscode from "vscode" - -import { GlobalFileNames } from "../../../shared/globalFileNames" -import { getSettingsDirectoryPath } from "../../../utils/storage" - -export async function createModeInstructions(context: vscode.ExtensionContext | undefined): Promise { - if (!context) throw new Error("Missing VSCode Extension Context") - - const settingsDir = await getSettingsDirectoryPath(context.globalStorageUri.fsPath) - const customModesPath = path.join(settingsDir, GlobalFileNames.customModes) - - return ` -Custom modes can be configured in two ways: - 1. Globally via '${customModesPath}' (created automatically on startup) - 2. Per-workspace via '.roomodes' in the workspace root directory - -When modes with the same slug exist in both files, the workspace-specific .roomodes version takes precedence. This allows projects to override global modes or define project-specific modes. - -If asked to create a project mode, create it in .roomodes in the workspace root. If asked to create a global mode, use the global custom modes file. - -- The following fields are required and must not be empty: - * slug: A valid slug (lowercase letters, numbers, and hyphens). Must be unique, and shorter is better. - * name: The display name for the mode - * roleDefinition: A detailed description of the mode's role and capabilities - * groups: Array of allowed tool groups (can be empty). Each group can be specified either as a string (e.g., "edit" to allow editing any file) or with file restrictions (e.g., ["edit", { fileRegex: "\\.md$", description: "Markdown files only" }] to only allow editing markdown files) - -- The following fields are optional but highly recommended: - * description: A short, human-readable description of what this mode does (5 words) - * whenToUse: A clear description of when this mode should be selected and what types of tasks it's best suited for. This helps the Orchestrator mode make better decisions. - * customInstructions: Additional instructions for how the mode should operate - -- For multi-line text, include newline characters in the string like "This is the first line.\\nThis is the next line.\\n\\nThis is a double line break." - -Both files should follow this structure (in YAML format): - -customModes: - - slug: designer # Required: unique slug with lowercase letters, numbers, and hyphens - name: Designer # Required: mode display name - description: UI/UX design systems expert # Optional but recommended: short description (5 words) - roleDefinition: >- - You are Roo, a UI/UX expert specializing in design systems and frontend development. Your expertise includes: - - Creating and maintaining design systems - - Implementing responsive and accessible web interfaces - - Working with CSS, HTML, and modern frontend frameworks - - Ensuring consistent user experiences across platforms # Required: non-empty - whenToUse: >- - Use this mode when creating or modifying UI components, implementing design systems, - or ensuring responsive web interfaces. This mode is especially effective with CSS, - HTML, and modern frontend frameworks. # Optional but recommended - groups: # Required: array of tool groups (can be empty) - - read # Read files group (read_file, fetch_instructions, search_files, list_files) - - edit # Edit files group (apply_diff, write_to_file) - allows editing any file - # Or with file restrictions: - # - - edit - # - fileRegex: \\.md$ - # description: Markdown files only # Edit group that only allows editing markdown files - - browser # Browser group (browser_action) - - command # Command group (execute_command) - - mcp # MCP group (use_mcp_tool, access_mcp_resource) - customInstructions: Additional instructions for the Designer mode # Optional` -} diff --git a/src/core/prompts/instructions/instructions.ts b/src/core/prompts/instructions/instructions.ts deleted file mode 100644 index c1ff2a1899e..00000000000 --- a/src/core/prompts/instructions/instructions.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { createMCPServerInstructions } from "./create-mcp-server" -import { createModeInstructions } from "./create-mode" -import { McpHub } from "../../../services/mcp/McpHub" -import { DiffStrategy } from "../../../shared/tools" -import * as vscode from "vscode" - -interface InstructionsDetail { - mcpHub?: McpHub - diffStrategy?: DiffStrategy - context?: vscode.ExtensionContext -} - -export async function fetchInstructions(text: string, detail: InstructionsDetail): Promise { - switch (text) { - case "create_mcp_server": { - return await createMCPServerInstructions(detail.mcpHub, detail.diffStrategy) - } - case "create_mode": { - return await createModeInstructions(detail.context) - } - default: { - return "" - } - } -} diff --git a/src/core/prompts/sections/modes.ts b/src/core/prompts/sections/modes.ts index 1925405aa87..5c4ea2cf53a 100644 --- a/src/core/prompts/sections/modes.ts +++ b/src/core/prompts/sections/modes.ts @@ -5,17 +5,14 @@ import type { ModeConfig } from "@roo-code/types" import { getAllModesWithPrompts } from "../../../shared/modes" import { ensureSettingsDirectoryExists } from "../../../utils/globalContext" -export async function getModesSection( - context: vscode.ExtensionContext, - skipXmlExamples: boolean = false, -): Promise { +export async function getModesSection(context: vscode.ExtensionContext): Promise { // Make sure path gets created await ensureSettingsDirectoryExists(context) // Get all modes with their overrides from extension state const allModes = await getAllModesWithPrompts(context) - let modesContent = `==== + const modesContent = `==== MODES @@ -34,18 +31,5 @@ ${allModes }) .join("\n")}` - if (!skipXmlExamples) { - modesContent += ` -If the user asks you to create or edit a new mode for this project, you should read the instructions by using the fetch_instructions tool, like this: - -create_mode - -` - } else { - modesContent += ` -If the user asks you to create or edit a new mode for this project, you should read the instructions by using the fetch_instructions tool. -` - } - return modesContent } diff --git a/src/core/prompts/sections/skills.ts b/src/core/prompts/sections/skills.ts index 53ba8b95f19..4530c13f87b 100644 --- a/src/core/prompts/sections/skills.ts +++ b/src/core/prompts/sections/skills.ts @@ -62,9 +62,9 @@ Step 2: Branching Decision - Select EXACTLY ONE skill. - Prefer the most specific skill when multiple skills match. -- Read the full SKILL.md file at the skill's . -- Load the SKILL.md contents fully into context BEFORE continuing. -- Follow the SKILL.md instructions precisely. +- Use the skill tool to load the skill by name. +- Load the skill's instructions fully into context BEFORE continuing. +- Follow the skill instructions precisely. - Do NOT respond outside the skill-defined flow. @@ -74,15 +74,15 @@ Step 2: Branching Decision CONSTRAINTS: -- Do NOT load every SKILL.md up front. -- Load SKILL.md ONLY after a skill is selected. +- Do NOT load every skill up front. +- Load skills ONLY after a skill is selected. - Do NOT skip this check. - FAILURE to perform this check is an error. -- When a SKILL.md is loaded, ONLY the contents of SKILL.md are present. -- Files linked from SKILL.md are NOT loaded automatically. +- When a skill is loaded, ONLY the skill instructions are present. +- Files linked from the skill are NOT loaded automatically. - The model MUST explicitly decide to read a linked file based on task relevance. - Do NOT assume the contents of linked files unless they have been explicitly read. - Prefer reading the minimum necessary linked file. diff --git a/src/core/prompts/system.ts b/src/core/prompts/system.ts index b79f845d442..6e978c2ece4 100644 --- a/src/core/prompts/system.ts +++ b/src/core/prompts/system.ts @@ -52,8 +52,8 @@ async function generatePrompt( promptComponent?: PromptComponent, customModeConfigs?: ModeConfig[], globalCustomInstructions?: string, + diffEnabled?: boolean, experiments?: Record, - enableMcpServerCreation?: boolean, language?: string, rooIgnoreInstructions?: string, partialReadsEnabled?: boolean, @@ -66,6 +66,9 @@ async function generatePrompt( throw new Error("Extension context is required for generating system prompt") } + // If diff is disabled, don't pass the diffStrategy + const effectiveDiffStrategy = diffEnabled ? diffStrategy : undefined + // Get the full mode config to ensure we have the role definition (used for groups, etc.) const modeConfig = getModeBySlug(mode, customModeConfigs) || modes.find((m) => m.slug === mode) || modes[0] const { roleDefinition, baseInstructions } = getModeSelection(mode, promptComponent, customModeConfigs) @@ -126,8 +129,8 @@ export const SYSTEM_PROMPT = async ( customModePrompts?: CustomModePrompts, customModes?: ModeConfig[], globalCustomInstructions?: string, + diffEnabled?: boolean, experiments?: Record, - enableMcpServerCreation?: boolean, language?: string, rooIgnoreInstructions?: string, partialReadsEnabled?: boolean, @@ -184,19 +187,22 @@ ${fileCustomSystemPrompt} ${customInstructions}` } + // If diff is disabled, don't pass the diffStrategy + const effectiveDiffStrategy = diffEnabled ? diffStrategy : undefined + return generatePrompt( context, cwd, supportsComputerUse, currentMode.slug, mcpHub, - diffStrategy, + effectiveDiffStrategy, browserViewportSize, promptComponent, customModes, globalCustomInstructions, + diffEnabled, experiments, - enableMcpServerCreation, language, rooIgnoreInstructions, partialReadsEnabled, diff --git a/src/core/prompts/tools/native-tools/fetch_instructions.ts b/src/core/prompts/tools/native-tools/fetch_instructions.ts deleted file mode 100644 index 86ab184c58d..00000000000 --- a/src/core/prompts/tools/native-tools/fetch_instructions.ts +++ /dev/null @@ -1,26 +0,0 @@ -import type OpenAI from "openai" - -const FETCH_INSTRUCTIONS_DESCRIPTION = `Retrieve detailed instructions for performing a predefined task, such as creating an MCP server or creating a mode.` - -const TASK_PARAMETER_DESCRIPTION = `Task identifier to fetch instructions for` - -export default { - type: "function", - function: { - name: "fetch_instructions", - description: FETCH_INSTRUCTIONS_DESCRIPTION, - strict: true, - parameters: { - type: "object", - properties: { - task: { - type: "string", - description: TASK_PARAMETER_DESCRIPTION, - enum: ["create_mcp_server", "create_mode"], - }, - }, - required: ["task"], - additionalProperties: false, - }, - }, -} satisfies OpenAI.Chat.ChatCompletionTool diff --git a/src/core/prompts/tools/native-tools/index.ts b/src/core/prompts/tools/native-tools/index.ts index b6af18fa154..f23a7b2f28f 100644 --- a/src/core/prompts/tools/native-tools/index.ts +++ b/src/core/prompts/tools/native-tools/index.ts @@ -7,13 +7,13 @@ import attemptCompletion from "./attempt_completion" import browserAction from "./browser_action" import codebaseSearch from "./codebase_search" import executeCommand from "./execute_command" -import fetchInstructions from "./fetch_instructions" import generateImage from "./generate_image" import listFiles from "./list_files" import newTask from "./new_task" import readCommandOutput from "./read_command_output" import { createReadFileTool, type ReadFileToolOptions } from "./read_file" import runSlashCommand from "./run_slash_command" +import skill from "./skill" import searchAndReplace from "./search_and_replace" import searchReplace from "./search_replace" import edit_file from "./edit_file" @@ -62,13 +62,13 @@ export function getNativeTools(options: NativeToolsOptions = {}): OpenAI.Chat.Ch browserAction, codebaseSearch, executeCommand, - fetchInstructions, generateImage, listFiles, newTask, readCommandOutput, createReadFileTool(readFileOptions), runSlashCommand, + skill, searchAndReplace, searchReplace, edit_file, diff --git a/src/core/prompts/tools/native-tools/skill.ts b/src/core/prompts/tools/native-tools/skill.ts new file mode 100644 index 00000000000..98a2d98cc8d --- /dev/null +++ b/src/core/prompts/tools/native-tools/skill.ts @@ -0,0 +1,33 @@ +import type OpenAI from "openai" + +const SKILL_DESCRIPTION = `Load and execute a skill by name. Skills provide specialized instructions for common tasks like creating MCP servers or custom modes. + +Use this tool when you need to follow specific procedures documented in a skill. Available skills are listed in the AVAILABLE SKILLS section of the system prompt.` + +const SKILL_PARAMETER_DESCRIPTION = `Name of the skill to load (e.g., create-mcp-server, create-mode). Must match a skill name from the available skills list.` + +const ARGS_PARAMETER_DESCRIPTION = `Optional context or arguments to pass to the skill` + +export default { + type: "function", + function: { + name: "skill", + description: SKILL_DESCRIPTION, + strict: true, + parameters: { + type: "object", + properties: { + skill: { + type: "string", + description: SKILL_PARAMETER_DESCRIPTION, + }, + args: { + type: ["string", "null"], + description: ARGS_PARAMETER_DESCRIPTION, + }, + }, + required: ["skill", "args"], + additionalProperties: false, + }, + }, +} satisfies OpenAI.Chat.ChatCompletionTool diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index ff697d77a88..0c54e560b89 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -3758,7 +3758,6 @@ export class Task extends EventEmitter implements TaskLike { customModePrompts, customInstructions, experiments, - enableMcpServerCreation, browserToolEnabled, language, maxConcurrentFileReads, @@ -3797,7 +3796,6 @@ export class Task extends EventEmitter implements TaskLike { customModes, customInstructions, experiments, - enableMcpServerCreation, language, rooIgnoreInstructions, maxReadFileLine !== -1, diff --git a/src/core/tools/FetchInstructionsTool.ts b/src/core/tools/FetchInstructionsTool.ts deleted file mode 100644 index f800e57fc4b..00000000000 --- a/src/core/tools/FetchInstructionsTool.ts +++ /dev/null @@ -1,75 +0,0 @@ -import { type ClineSayTool } from "@roo-code/types" - -import { Task } from "../task/Task" -import { fetchInstructions } from "../prompts/instructions/instructions" -import { formatResponse } from "../prompts/responses" -import type { ToolUse } from "../../shared/tools" - -import { BaseTool, ToolCallbacks } from "./BaseTool" - -interface FetchInstructionsParams { - task: string -} - -export class FetchInstructionsTool extends BaseTool<"fetch_instructions"> { - readonly name = "fetch_instructions" as const - - async execute(params: FetchInstructionsParams, task: Task, callbacks: ToolCallbacks): Promise { - const { handleError, pushToolResult, askApproval } = callbacks - const { task: taskParam } = params - - try { - if (!taskParam) { - task.consecutiveMistakeCount++ - task.recordToolError("fetch_instructions") - task.didToolFailInCurrentTurn = true - pushToolResult(await task.sayAndCreateMissingParamError("fetch_instructions", "task")) - return - } - - task.consecutiveMistakeCount = 0 - - const completeMessage = JSON.stringify({ - tool: "fetchInstructions", - content: taskParam, - } satisfies ClineSayTool) - - const didApprove = await askApproval("tool", completeMessage) - - if (!didApprove) { - return - } - - // Now fetch the content and provide it to the agent. - const provider = task.providerRef.deref() - const mcpHub = provider?.getMcpHub() - - if (!mcpHub) { - throw new Error("MCP hub not available") - } - - const diffStrategy = task.diffStrategy - const context = provider?.context - const content = await fetchInstructions(taskParam, { mcpHub, diffStrategy, context }) - - if (!content) { - pushToolResult(formatResponse.toolError(`Invalid instructions request: ${taskParam}`)) - return - } - - pushToolResult(content) - } catch (error) { - await handleError("fetch instructions", error as Error) - } - } - - override async handlePartial(task: Task, block: ToolUse<"fetch_instructions">): Promise { - const taskParam: string | undefined = block.params.task - const sharedMessageProps: ClineSayTool = { tool: "fetchInstructions", content: taskParam } - - const partialMessage = JSON.stringify({ ...sharedMessageProps, content: undefined } satisfies ClineSayTool) - await task.ask("tool", partialMessage, block.partial).catch(() => {}) - } -} - -export const fetchInstructionsTool = new FetchInstructionsTool() diff --git a/src/core/tools/SkillTool.ts b/src/core/tools/SkillTool.ts new file mode 100644 index 00000000000..213cfd91ee8 --- /dev/null +++ b/src/core/tools/SkillTool.ts @@ -0,0 +1,112 @@ +import { Task } from "../task/Task" +import { formatResponse } from "../prompts/responses" +import { BaseTool, ToolCallbacks } from "./BaseTool" +import type { ToolUse } from "../../shared/tools" + +interface SkillParams { + skill: string + args?: string +} + +export class SkillTool extends BaseTool<"skill"> { + readonly name = "skill" as const + + async execute(params: SkillParams, task: Task, callbacks: ToolCallbacks): Promise { + const { skill: skillName, args } = params + const { askApproval, handleError, pushToolResult } = callbacks + + try { + // Validate skill name parameter + if (!skillName) { + task.consecutiveMistakeCount++ + task.recordToolError("skill") + task.didToolFailInCurrentTurn = true + pushToolResult(await task.sayAndCreateMissingParamError("skill", "skill")) + return + } + + task.consecutiveMistakeCount = 0 + + // Get SkillsManager from provider + const provider = task.providerRef.deref() + const skillsManager = provider?.getSkillsManager() + + if (!skillsManager) { + task.recordToolError("skill") + task.didToolFailInCurrentTurn = true + pushToolResult(formatResponse.toolError("Skills Manager not available")) + return + } + + // Get current mode for skill resolution + const state = await provider?.getState() + const currentMode = state?.mode ?? "code" + + // Fetch skill content + const skillContent = await skillsManager.getSkillContent(skillName, currentMode) + + if (!skillContent) { + // Get available skills for error message + const availableSkills = skillsManager.getSkillsForMode(currentMode) + const skillNames = availableSkills.map((s) => s.name) + + task.recordToolError("skill") + task.didToolFailInCurrentTurn = true + pushToolResult( + formatResponse.toolError( + `Skill '${skillName}' not found. Available skills: ${skillNames.join(", ") || "(none)"}`, + ), + ) + return + } + + // Build approval message + const toolMessage = JSON.stringify({ + tool: "skill", + skill: skillName, + args: args, + source: skillContent.source, + description: skillContent.description, + }) + + const didApprove = await askApproval("tool", toolMessage) + + if (!didApprove) { + return + } + + // Build the result message + let result = `Skill: ${skillName}` + + if (skillContent.description) { + result += `\nDescription: ${skillContent.description}` + } + + if (args) { + result += `\nProvided arguments: ${args}` + } + + result += `\nSource: ${skillContent.source}` + result += `\n\n--- Skill Instructions ---\n\n${skillContent.instructions}` + + pushToolResult(result) + } catch (error) { + await handleError("executing skill", error as Error) + } + } + + override async handlePartial(task: Task, block: ToolUse<"skill">): Promise { + const skillName: string | undefined = block.params.skill + const args: string | undefined = block.params.args + + const partialMessage = JSON.stringify({ + tool: "skill", + skill: skillName, + args: args, + }) + + await task.ask("tool", partialMessage, block.partial).catch(() => {}) + } +} + +export const skillTool = new SkillTool() diff --git a/src/core/tools/__tests__/skillTool.spec.ts b/src/core/tools/__tests__/skillTool.spec.ts new file mode 100644 index 00000000000..fc1b3396e50 --- /dev/null +++ b/src/core/tools/__tests__/skillTool.spec.ts @@ -0,0 +1,345 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { skillTool } from "../SkillTool" +import { Task } from "../../task/Task" +import { formatResponse } from "../../prompts/responses" +import type { ToolUse } from "../../../shared/tools" + +describe("skillTool", () => { + let mockTask: any + let mockCallbacks: any + let mockSkillsManager: any + + beforeEach(() => { + vi.clearAllMocks() + + mockSkillsManager = { + getSkillContent: vi.fn(), + getSkillsForMode: vi.fn().mockReturnValue([]), + } + + mockTask = { + consecutiveMistakeCount: 0, + recordToolError: vi.fn(), + didToolFailInCurrentTurn: false, + sayAndCreateMissingParamError: vi.fn().mockResolvedValue("Missing parameter error"), + ask: vi.fn().mockResolvedValue({}), + providerRef: { + deref: vi.fn().mockReturnValue({ + getState: vi.fn().mockResolvedValue({ mode: "code" }), + getSkillsManager: vi.fn().mockReturnValue(mockSkillsManager), + }), + }, + } + + mockCallbacks = { + askApproval: vi.fn().mockResolvedValue(true), + handleError: vi.fn(), + pushToolResult: vi.fn(), + } + }) + + it("should handle missing skill parameter", async () => { + const block: ToolUse<"skill"> = { + type: "tool_use" as const, + name: "skill" as const, + params: {}, + partial: false, + nativeArgs: { + skill: "", + }, + } + + await skillTool.handle(mockTask as Task, block, mockCallbacks) + + expect(mockTask.consecutiveMistakeCount).toBe(1) + expect(mockTask.recordToolError).toHaveBeenCalledWith("skill") + expect(mockTask.sayAndCreateMissingParamError).toHaveBeenCalledWith("skill", "skill") + expect(mockCallbacks.pushToolResult).toHaveBeenCalledWith("Missing parameter error") + }) + + it("should handle skill not found", async () => { + const block: ToolUse<"skill"> = { + type: "tool_use" as const, + name: "skill" as const, + params: {}, + partial: false, + nativeArgs: { + skill: "non-existent", + }, + } + + mockSkillsManager.getSkillContent.mockResolvedValue(null) + mockSkillsManager.getSkillsForMode.mockReturnValue([{ name: "create-mcp-server" }]) + + await skillTool.handle(mockTask as Task, block, mockCallbacks) + + expect(mockCallbacks.pushToolResult).toHaveBeenCalledWith( + formatResponse.toolError("Skill 'non-existent' not found. Available skills: create-mcp-server"), + ) + }) + + it("should handle empty available skills list", async () => { + const block: ToolUse<"skill"> = { + type: "tool_use" as const, + name: "skill" as const, + params: {}, + partial: false, + nativeArgs: { + skill: "non-existent", + }, + } + + mockSkillsManager.getSkillContent.mockResolvedValue(null) + mockSkillsManager.getSkillsForMode.mockReturnValue([]) + + await skillTool.handle(mockTask as Task, block, mockCallbacks) + + expect(mockCallbacks.pushToolResult).toHaveBeenCalledWith( + formatResponse.toolError("Skill 'non-existent' not found. Available skills: (none)"), + ) + }) + + it("should successfully load built-in skill", async () => { + const block: ToolUse<"skill"> = { + type: "tool_use" as const, + name: "skill" as const, + params: {}, + partial: false, + nativeArgs: { + skill: "create-mcp-server", + }, + } + + const mockSkillContent = { + name: "create-mcp-server", + description: "Instructions for creating MCP servers", + source: "built-in", + instructions: "Step 1: Create the server...", + } + + mockSkillsManager.getSkillContent.mockResolvedValue(mockSkillContent) + + await skillTool.handle(mockTask as Task, block, mockCallbacks) + + expect(mockCallbacks.askApproval).toHaveBeenCalledWith( + "tool", + JSON.stringify({ + tool: "skill", + skill: "create-mcp-server", + args: undefined, + source: "built-in", + description: "Instructions for creating MCP servers", + }), + ) + + expect(mockCallbacks.pushToolResult).toHaveBeenCalledWith( + `Skill: create-mcp-server +Description: Instructions for creating MCP servers +Source: built-in + +--- Skill Instructions --- + +Step 1: Create the server...`, + ) + }) + + it("should successfully load skill with arguments", async () => { + const block: ToolUse<"skill"> = { + type: "tool_use" as const, + name: "skill" as const, + params: {}, + partial: false, + nativeArgs: { + skill: "create-mcp-server", + args: "weather API server", + }, + } + + const mockSkillContent = { + name: "create-mcp-server", + description: "Instructions for creating MCP servers", + source: "built-in", + instructions: "Step 1: Create the server...", + } + + mockSkillsManager.getSkillContent.mockResolvedValue(mockSkillContent) + + await skillTool.handle(mockTask as Task, block, mockCallbacks) + + expect(mockCallbacks.pushToolResult).toHaveBeenCalledWith( + `Skill: create-mcp-server +Description: Instructions for creating MCP servers +Provided arguments: weather API server +Source: built-in + +--- Skill Instructions --- + +Step 1: Create the server...`, + ) + }) + + it("should handle user rejection", async () => { + const block: ToolUse<"skill"> = { + type: "tool_use" as const, + name: "skill" as const, + params: {}, + partial: false, + nativeArgs: { + skill: "create-mcp-server", + }, + } + + mockSkillsManager.getSkillContent.mockResolvedValue({ + name: "create-mcp-server", + description: "Test", + source: "built-in", + instructions: "Test instructions", + }) + + mockCallbacks.askApproval.mockResolvedValue(false) + + await skillTool.handle(mockTask as Task, block, mockCallbacks) + + expect(mockCallbacks.pushToolResult).not.toHaveBeenCalled() + }) + + it("should handle partial block", async () => { + const block: ToolUse<"skill"> = { + type: "tool_use" as const, + name: "skill" as const, + params: { + skill: "create-mcp-server", + args: "", + }, + partial: true, + } + + await skillTool.handle(mockTask as Task, block, mockCallbacks) + + expect(mockTask.ask).toHaveBeenCalledWith( + "tool", + JSON.stringify({ + tool: "skill", + skill: "create-mcp-server", + args: "", + }), + true, + ) + + expect(mockCallbacks.pushToolResult).not.toHaveBeenCalled() + }) + + it("should handle errors during execution", async () => { + const block: ToolUse<"skill"> = { + type: "tool_use" as const, + name: "skill" as const, + params: {}, + partial: false, + nativeArgs: { + skill: "create-mcp-server", + }, + } + + const error = new Error("Test error") + mockSkillsManager.getSkillContent.mockRejectedValue(error) + + await skillTool.handle(mockTask as Task, block, mockCallbacks) + + expect(mockCallbacks.handleError).toHaveBeenCalledWith("executing skill", error) + }) + + it("should reset consecutive mistake count on valid skill", async () => { + const block: ToolUse<"skill"> = { + type: "tool_use" as const, + name: "skill" as const, + params: {}, + partial: false, + nativeArgs: { + skill: "create-mcp-server", + }, + } + + mockTask.consecutiveMistakeCount = 5 + + const mockSkillContent = { + name: "create-mcp-server", + description: "Test", + source: "built-in", + instructions: "Test instructions", + } + + mockSkillsManager.getSkillContent.mockResolvedValue(mockSkillContent) + + await skillTool.handle(mockTask as Task, block, mockCallbacks) + + expect(mockTask.consecutiveMistakeCount).toBe(0) + }) + + it("should handle Skills Manager not available", async () => { + const block: ToolUse<"skill"> = { + type: "tool_use" as const, + name: "skill" as const, + params: {}, + partial: false, + nativeArgs: { + skill: "create-mcp-server", + }, + } + + mockTask.providerRef.deref = vi.fn().mockReturnValue({ + getState: vi.fn().mockResolvedValue({ mode: "code" }), + getSkillsManager: vi.fn().mockReturnValue(undefined), + }) + + await skillTool.handle(mockTask as Task, block, mockCallbacks) + + expect(mockTask.recordToolError).toHaveBeenCalledWith("skill") + expect(mockCallbacks.pushToolResult).toHaveBeenCalledWith( + formatResponse.toolError("Skills Manager not available"), + ) + }) + + it("should load project skill", async () => { + const block: ToolUse<"skill"> = { + type: "tool_use" as const, + name: "skill" as const, + params: {}, + partial: false, + nativeArgs: { + skill: "my-project-skill", + }, + } + + const mockSkillContent = { + name: "my-project-skill", + description: "A custom project skill", + source: "project", + instructions: "Follow these project-specific instructions...", + } + + mockSkillsManager.getSkillContent.mockResolvedValue(mockSkillContent) + + await skillTool.handle(mockTask as Task, block, mockCallbacks) + + expect(mockCallbacks.askApproval).toHaveBeenCalledWith( + "tool", + JSON.stringify({ + tool: "skill", + skill: "my-project-skill", + args: undefined, + source: "project", + description: "A custom project skill", + }), + ) + + expect(mockCallbacks.pushToolResult).toHaveBeenCalledWith( + `Skill: my-project-skill +Description: A custom project skill +Source: project + +--- Skill Instructions --- + +Follow these project-specific instructions...`, + ) + }) +}) diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index b101bee7d29..0fa8528913a 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -1991,6 +1991,7 @@ export class ClineProvider alwaysAllowWrite, alwaysAllowWriteOutsideWorkspace, alwaysAllowWriteProtected, + diffEnabled, alwaysAllowExecute, allowedCommands, deniedCommands, @@ -2024,7 +2025,6 @@ export class ClineProvider terminalZshP10k, terminalZdotdir, mcpEnabled, - enableMcpServerCreation, currentApiConfigName, listApiConfigMeta, pinnedApiConfigs, @@ -2117,6 +2117,7 @@ export class ClineProvider alwaysAllowWrite: alwaysAllowWrite ?? false, alwaysAllowWriteOutsideWorkspace: alwaysAllowWriteOutsideWorkspace ?? false, alwaysAllowWriteProtected: alwaysAllowWriteProtected ?? false, + diffEnabled: diffEnabled ?? true, alwaysAllowExecute: alwaysAllowExecute ?? false, alwaysAllowBrowser: alwaysAllowBrowser ?? false, alwaysAllowMcp: alwaysAllowMcp ?? false, @@ -2162,7 +2163,6 @@ export class ClineProvider terminalZshP10k: terminalZshP10k ?? false, terminalZdotdir: terminalZdotdir ?? false, mcpEnabled: mcpEnabled ?? true, - enableMcpServerCreation: enableMcpServerCreation ?? true, currentApiConfigName: currentApiConfigName ?? "default", listApiConfigMeta: listApiConfigMeta ?? [], pinnedApiConfigs: pinnedApiConfigs ?? {}, @@ -2368,6 +2368,7 @@ export class ClineProvider alwaysAllowWrite: stateValues.alwaysAllowWrite ?? false, alwaysAllowWriteOutsideWorkspace: stateValues.alwaysAllowWriteOutsideWorkspace ?? false, alwaysAllowWriteProtected: stateValues.alwaysAllowWriteProtected ?? false, + diffEnabled: stateValues.diffEnabled ?? true, alwaysAllowExecute: stateValues.alwaysAllowExecute ?? false, alwaysAllowBrowser: stateValues.alwaysAllowBrowser ?? false, alwaysAllowMcp: stateValues.alwaysAllowMcp ?? false, @@ -2408,7 +2409,6 @@ export class ClineProvider mode: stateValues.mode ?? defaultModeSlug, language: stateValues.language ?? formatLanguage(vscode.env.language), mcpEnabled: stateValues.mcpEnabled ?? true, - enableMcpServerCreation: stateValues.enableMcpServerCreation ?? true, mcpServers: this.mcpHub?.getAllServers() ?? [], currentApiConfigName: stateValues.currentApiConfigName ?? "default", listApiConfigMeta: stateValues.listApiConfigMeta ?? [], diff --git a/src/core/webview/__tests__/ClineProvider.spec.ts b/src/core/webview/__tests__/ClineProvider.spec.ts index cacaf26004d..dcc4e20d6c0 100644 --- a/src/core/webview/__tests__/ClineProvider.spec.ts +++ b/src/core/webview/__tests__/ClineProvider.spec.ts @@ -315,6 +315,7 @@ vi.mock("../../../api/providers/fetchers/modelCache", () => ({ vi.mock("../diff/strategies/multi-search-replace", () => ({ MultiSearchReplaceDiffStrategy: vi.fn().mockImplementation(() => ({ + getToolDescription: () => "test", getName: () => "test-strategy", applyDiff: vi.fn(), })), @@ -553,11 +554,12 @@ describe("ClineProvider", () => { uriScheme: "vscode", soundEnabled: false, ttsEnabled: false, + diffEnabled: false, enableCheckpoints: false, writeDelayMs: 1000, browserViewportSize: "900x600", + fuzzyMatchThreshold: 1.0, mcpEnabled: true, - enableMcpServerCreation: false, mode: defaultModeSlug, customModes: [], experiments: experimentDefault, @@ -764,6 +766,7 @@ describe("ClineProvider", () => { expect(state).toHaveProperty("taskHistory") expect(state).toHaveProperty("soundEnabled") expect(state).toHaveProperty("ttsEnabled") + expect(state).toHaveProperty("diffEnabled") expect(state).toHaveProperty("writeDelayMs") }) @@ -775,6 +778,15 @@ describe("ClineProvider", () => { expect(state.language).toBe("pt-BR") }) + test("diffEnabled defaults to true when not set", async () => { + // Mock globalState.get to return undefined for diffEnabled + ;(mockContext.globalState.get as any).mockReturnValue(undefined) + + const state = await provider.getState() + + expect(state.diffEnabled).toBe(true) + }) + test("writeDelayMs defaults to 1000ms", async () => { // Mock globalState.get to return undefined for writeDelayMs ;(mockContext.globalState.get as any).mockImplementation((key: string) => @@ -1349,7 +1361,6 @@ describe("ClineProvider", () => { apiProvider: "openrouter" as const, }, mcpEnabled: true, - enableMcpServerCreation: false, mode: "code" as const, experiments: experimentDefault, } as any) @@ -1374,7 +1385,6 @@ describe("ClineProvider", () => { apiProvider: "openrouter" as const, }, mcpEnabled: false, - enableMcpServerCreation: false, mode: "code" as const, experiments: experimentDefault, } as any) @@ -1431,10 +1441,10 @@ describe("ClineProvider", () => { ) }) - test("generates system prompt with various configurations", async () => { + test("generates system prompt with diff enabled", async () => { await provider.resolveWebviewView(mockWebviewView) - // Mock getState with typical configuration + // Mock getState to return diffEnabled: true vi.spyOn(provider, "getState").mockResolvedValue({ apiConfiguration: { apiProvider: "openrouter", @@ -1442,9 +1452,10 @@ describe("ClineProvider", () => { }, customModePrompts: {}, mode: "code", - enableMcpServerCreation: true, mcpEnabled: false, browserViewportSize: "900x600", + diffEnabled: true, + fuzzyMatchThreshold: 0.8, experiments: experimentDefault, browserToolEnabled: true, } as any) @@ -1463,6 +1474,39 @@ describe("ClineProvider", () => { ) }) + test("generates system prompt with diff disabled", async () => { + await provider.resolveWebviewView(mockWebviewView) + + // Mock getState to return diffEnabled: false + vi.spyOn(provider, "getState").mockResolvedValue({ + apiConfiguration: { + apiProvider: "openrouter", + apiModelId: "test-model", + }, + customModePrompts: {}, + mode: "code", + mcpEnabled: false, + browserViewportSize: "900x600", + diffEnabled: false, + fuzzyMatchThreshold: 0.8, + experiments: experimentDefault, + browserToolEnabled: false, + } as any) + + // Trigger getSystemPrompt + const handler = getMessageHandler() + await handler({ type: "getSystemPrompt", mode: "code" }) + + // Verify system prompt was generated and sent + expect(mockPostMessage).toHaveBeenCalledWith( + expect.objectContaining({ + type: "systemPrompt", + text: expect.any(String), + mode: "code", + }), + ) + }) + test("uses correct mode-specific instructions when mode is specified", async () => { await provider.resolveWebviewView(mockWebviewView) @@ -1475,7 +1519,6 @@ describe("ClineProvider", () => { architect: { customInstructions: "Architect mode instructions" }, }, mode: "architect", - enableMcpServerCreation: false, mcpEnabled: false, browserViewportSize: "900x600", experiments: experimentDefault, diff --git a/src/core/webview/__tests__/generateSystemPrompt.browser-capability.spec.ts b/src/core/webview/__tests__/generateSystemPrompt.browser-capability.spec.ts index 702c932fd5d..3b521c0f14b 100644 --- a/src/core/webview/__tests__/generateSystemPrompt.browser-capability.spec.ts +++ b/src/core/webview/__tests__/generateSystemPrompt.browser-capability.spec.ts @@ -60,7 +60,6 @@ function makeProviderStub() { browserViewportSize: "900x600", mcpEnabled: false, experiments: {}, - enableMcpServerCreation: false, browserToolEnabled: true, // critical: enabled in settings language: "en", maxReadFileLine: -1, diff --git a/src/core/webview/generateSystemPrompt.ts b/src/core/webview/generateSystemPrompt.ts index ed1ab9e2726..b6f77d3842c 100644 --- a/src/core/webview/generateSystemPrompt.ts +++ b/src/core/webview/generateSystemPrompt.ts @@ -17,7 +17,6 @@ export const generateSystemPrompt = async (provider: ClineProvider, message: Web browserViewportSize, mcpEnabled, experiments, - enableMcpServerCreation, browserToolEnabled, language, maxReadFileLine, @@ -69,7 +68,6 @@ export const generateSystemPrompt = async (provider: ClineProvider, message: Web customModes, customInstructions, experiments, - enableMcpServerCreation, language, rooIgnoreInstructions, maxReadFileLine !== -1, diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index 6a0224b42f8..cc4b1a27e9e 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -1443,10 +1443,6 @@ export const webviewMessageHandler = async ( } break } - case "enableMcpServerCreation": - await updateGlobalState("enableMcpServerCreation", message.bool ?? true) - await provider.postStateToWebview() - break case "remoteControlEnabled": try { await CloudService.instance.updateUserSettings({ extensionBridgeEnabled: message.bool ?? false }) diff --git a/src/services/skills/SkillsManager.ts b/src/services/skills/SkillsManager.ts index 59b50cf1713..1c61b5b1761 100644 --- a/src/services/skills/SkillsManager.ts +++ b/src/services/skills/SkillsManager.ts @@ -8,6 +8,7 @@ import { getGlobalRooDirectory } from "../roo-config" import { directoryExists, fileExists } from "../roo-config" import { SkillMetadata, SkillContent } from "../../shared/skills" import { modes, getAllModes } from "../../shared/modes" +import { getBuiltInSkills, getBuiltInSkillContent } from "./built-in-skills" // Re-export for convenience export type { SkillMetadata, SkillContent } @@ -164,13 +165,19 @@ export class SkillsManager { /** * Get skills available for the current mode. - * Resolves overrides: project > global, mode-specific > generic. + * Resolves overrides: project > global > built-in, mode-specific > generic. * * @param currentMode - The current mode slug (e.g., 'code', 'architect') */ getSkillsForMode(currentMode: string): SkillMetadata[] { const resolvedSkills = new Map() + // First, add built-in skills (lowest priority) + for (const skill of getBuiltInSkills()) { + resolvedSkills.set(skill.name, skill) + } + + // Then, add discovered skills (will override built-in skills with same name) for (const skill of this.skills.values()) { // Skip mode-specific skills that don't match current mode if (skill.mode && skill.mode !== currentMode) continue @@ -194,12 +201,22 @@ export class SkillsManager { /** * Determine if newSkill should override existingSkill based on priority rules. - * Priority: project > global, mode-specific > generic + * Priority: project > global > built-in, mode-specific > generic */ private shouldOverrideSkill(existing: SkillMetadata, newSkill: SkillMetadata): boolean { - // Project always overrides global - if (newSkill.source === "project" && existing.source === "global") return true - if (newSkill.source === "global" && existing.source === "project") return false + // Define source priority: project > global > built-in + const sourcePriority: Record = { + project: 3, + global: 2, + "built-in": 1, + } + + const existingPriority = sourcePriority[existing.source] ?? 0 + const newPriority = sourcePriority[newSkill.source] ?? 0 + + // Higher priority source always wins + if (newPriority > existingPriority) return true + if (newPriority < existingPriority) return false // Same source: mode-specific overrides generic if (newSkill.mode && !existing.mode) return true @@ -224,12 +241,21 @@ export class SkillsManager { const modeSkills = this.getSkillsForMode(currentMode) skill = modeSkills.find((s) => s.name === name) } else { - // Fall back to any skill with this name + // Fall back to any skill with this name (check discovered skills first, then built-in) skill = Array.from(this.skills.values()).find((s) => s.name === name) + if (!skill) { + skill = getBuiltInSkills().find((s) => s.name === name) + } } if (!skill) return null + // For built-in skills, use the built-in content + if (skill.source === "built-in") { + return getBuiltInSkillContent(name) + } + + // For file-based skills, read from disk const fileContent = await fs.readFile(skill.path, "utf-8") const { content: body } = matter(fileContent) diff --git a/src/services/skills/__tests__/SkillsManager.spec.ts b/src/services/skills/__tests__/SkillsManager.spec.ts index 4b6549108bb..aaf2792626b 100644 --- a/src/services/skills/__tests__/SkillsManager.spec.ts +++ b/src/services/skills/__tests__/SkillsManager.spec.ts @@ -63,6 +63,14 @@ vi.mock("../../roo-config", () => ({ fileExists: mockFileExists, })) +// Mock built-in skills to isolate tests from actual built-in skills +vi.mock("../built-in-skills", () => ({ + getBuiltInSkills: () => [], + getBuiltInSkillContent: () => null, + isBuiltInSkill: () => false, + getBuiltInSkillNames: () => [], +})) + import { SkillsManager } from "../SkillsManager" import { ClineProvider } from "../../../core/webview/ClineProvider" diff --git a/src/core/prompts/instructions/create-mcp-server.ts b/src/services/skills/built-in-skills.ts similarity index 62% rename from src/core/prompts/instructions/create-mcp-server.ts rename to src/services/skills/built-in-skills.ts index a63fad1de56..86ad4d22a21 100644 --- a/src/core/prompts/instructions/create-mcp-server.ts +++ b/src/services/skills/built-in-skills.ts @@ -1,17 +1,24 @@ -import { McpHub } from "../../../services/mcp/McpHub" -import { DiffStrategy } from "../../../shared/tools" +import { SkillMetadata, SkillContent } from "../../shared/skills" -export async function createMCPServerInstructions( - mcpHub: McpHub | undefined, - diffStrategy: DiffStrategy | undefined, -): Promise { - if (!diffStrategy || !mcpHub) throw new Error("Missing MCP Hub or Diff Strategy") +interface BuiltInSkillDefinition { + name: string + description: string + instructions: string +} - return `You have the ability to create an MCP server and add it to a configuration file that will then expose the tools and resources for you to use with \`use_mcp_tool\` and \`access_mcp_resource\`. +const BUILT_IN_SKILLS: Record = { + "create-mcp-server": { + name: "create-mcp-server", + description: + "Instructions for creating MCP (Model Context Protocol) servers. Use when the user asks to add a tool, create an MCP server, or extend capabilities with external APIs.", + instructions: `You have the ability to create an MCP server and add it to a configuration file that will then expose the tools and resources for you to use with \`use_mcp_tool\` and \`access_mcp_resource\`. When creating MCP servers, it's important to understand that they operate in a non-interactive environment. The server cannot initiate OAuth flows, open browser windows, or prompt for user input during runtime. All credentials and authentication tokens must be provided upfront through environment variables in the MCP settings configuration. For example, Spotify's API uses OAuth to get a refresh token for the user, but the MCP server cannot initiate this flow. While you can walk the user through obtaining an application client ID and secret, you may have to create a separate one-time setup script (like get-refresh-token.js) that captures and logs the final piece of the puzzle: the user's refresh token (i.e. you might run the script using execute_command which would open a browser for authentication, and then log the refresh token so that you can see it in the command output for you to use in the MCP settings configuration). -Unless the user specifies otherwise, new local MCP servers should be created in: ${await mcpHub.getMcpServersPath()} +Unless the user specifies otherwise, new local MCP servers should be created in the default MCP servers directory: +- macOS: ~/Documents/Cline/MCP/ +- Windows: %USERPROFILE%\\Documents\\Cline\\MCP\\ +- Linux: ~/Documents/Cline/MCP/ ### MCP Server Types and Configuration @@ -61,7 +68,7 @@ The following example demonstrates how to build a local MCP server that provides 1. Use the \`create-typescript-server\` tool to bootstrap a new project in the default MCP servers directory: \`\`\`bash -cd ${await mcpHub.getMcpServersPath()} +cd ~/Documents/Cline/MCP/ npx @modelcontextprotocol/create-server weather-server cd weather-server # Install dependencies @@ -77,7 +84,7 @@ weather-server/ ... "type": "module", // added by default, uses ES module syntax (import/export) rather than CommonJS (require/module.exports) (Important to know if you create additional scripts in this server repository like a get-refresh-token.js script) "scripts": { - "build": "tsc && node -e \"require('fs').chmodSync('build/index.js', '755')\"", + "build": "tsc && node -e \\"require('fs').chmodSync('build/index.js', '755')\\"", ... } ... @@ -275,7 +282,7 @@ npm run build 4. Whenever you need an environment variable such as an API key to configure the MCP server, walk the user through the process of getting the key. For example, they may need to create an account and go to a developer dashboard to generate the key. Provide step-by-step instructions and URLs to make it easy for the user to retrieve the necessary information. Then use the ask_followup_question tool to ask the user for the key, in this case the OpenWeather API key. -5. Install the MCP Server by adding the MCP server configuration to the settings file located at '${await mcpHub.getMcpSettingsFilePath()}'. The settings file may have other MCP servers already configured, so you would read it first and then add your new server to the existing \`mcpServers\` object. +5. Install the MCP Server by adding the MCP server configuration to the MCP settings file. You can access the MCP settings through VS Code settings or by editing the mcp.json file in the Roo Code settings directory. The settings file may have other MCP servers already configured, so you would read it first and then add your new server to the existing \`mcpServers\` object. IMPORTANT: Regardless of what else you see in the MCP settings file, you must default any new MCP servers you create to disabled=false, alwaysAllow=[] and disabledTools=[]. @@ -294,7 +301,7 @@ IMPORTANT: Regardless of what else you see in the MCP settings file, you must de } \`\`\` -(Note: the user may also ask you to install the MCP server to the Claude desktop app, in which case you would read then modify \`~/Library/Application\ Support/Claude/claude_desktop_config.json\` on macOS for example. It follows the same format of a top level \`mcpServers\` object.) +(Note: the user may also ask you to install the MCP server to the Claude desktop app, in which case you would read then modify \`~/Library/Application\\ Support/Claude/claude_desktop_config.json\` on macOS for example. It follows the same format of a top level \`mcpServers\` object.) 6. After you have edited the MCP settings configuration file, the system will automatically run all the servers and expose the available tools and resources in the 'Connected MCP Servers' section. @@ -302,14 +309,7 @@ IMPORTANT: Regardless of what else you see in the MCP settings file, you must de ## Editing MCP Servers -The user may ask to add tools or resources that may make sense to add to an existing MCP server (listed under 'Connected MCP Servers' above: ${(() => { - if (!mcpHub) return "(None running currently)" - const servers = mcpHub - .getServers() - .map((server) => server.name) - .join(", ") - return servers || "(None running currently)" - })()}, e.g. if it would use the same API. This would be possible if you can locate the MCP server repository on the user's system by looking at the server arguments for a filepath. You might then use list_files and read_file to explore the files in the repository, and use write_to_file${diffStrategy ? " or apply_diff" : ""} to make changes to the files. +The user may ask to add tools or resources that may make sense to add to an existing MCP server (check the 'Connected MCP Servers' section in the system prompt), e.g. if it would use the same API. This would be possible if you can locate the MCP server repository on the user's system by looking at the server arguments for a filepath. You might then use list_files and read_file to explore the files in the repository, and use write_to_file or apply_diff to make changes to the files. However some MCP servers may be running from installed packages rather than a local repository, in which case it may make more sense to create a new MCP server. @@ -317,5 +317,101 @@ However some MCP servers may be running from installed packages rather than a lo The user may not always request the use or creation of MCP servers. Instead, they might provide tasks that can be completed with existing tools. While using the MCP SDK to extend your capabilities can be useful, it's important to understand that this is just one specialized type of task you can accomplish. You should only implement MCP servers when the user explicitly requests it (e.g., "add a tool that..."). -Remember: The MCP documentation and example provided above are to help you understand and work with existing MCP servers or create new ones when requested by the user. You already have access to tools and capabilities that can be used to accomplish a wide range of tasks.` +Remember: The MCP documentation and example provided above are to help you understand and work with existing MCP servers or create new ones when requested by the user. You already have access to tools and capabilities that can be used to accomplish a wide range of tasks.`, + }, + "create-mode": { + name: "create-mode", + description: + "Instructions for creating custom modes in Roo Code. Use when the user asks to create a new mode, edit an existing mode, or configure mode settings.", + instructions: `Custom modes can be configured in two ways: + 1. Globally via the custom modes file in your Roo Code settings directory (typically ~/.roo-code/settings/custom_modes.yaml on macOS/Linux or %APPDATA%\\roo-code\\settings\\custom_modes.yaml on Windows) - created automatically on startup + 2. Per-workspace via '.roomodes' in the workspace root directory + +When modes with the same slug exist in both files, the workspace-specific .roomodes version takes precedence. This allows projects to override global modes or define project-specific modes. + +If asked to create a project mode, create it in .roomodes in the workspace root. If asked to create a global mode, use the global custom modes file. + +- The following fields are required and must not be empty: + * slug: A valid slug (lowercase letters, numbers, and hyphens). Must be unique, and shorter is better. + * name: The display name for the mode + * roleDefinition: A detailed description of the mode's role and capabilities + * groups: Array of allowed tool groups (can be empty). Each group can be specified either as a string (e.g., "edit" to allow editing any file) or with file restrictions (e.g., ["edit", { fileRegex: "\\.md$", description: "Markdown files only" }] to only allow editing markdown files) + +- The following fields are optional but highly recommended: + * description: A short, human-readable description of what this mode does (5 words) + * whenToUse: A clear description of when this mode should be selected and what types of tasks it's best suited for. This helps the Orchestrator mode make better decisions. + * customInstructions: Additional instructions for how the mode should operate + +- For multi-line text, include newline characters in the string like "This is the first line.\\nThis is the next line.\\n\\nThis is a double line break." + +Both files should follow this structure (in YAML format): + +customModes: + - slug: designer # Required: unique slug with lowercase letters, numbers, and hyphens + name: Designer # Required: mode display name + description: UI/UX design systems expert # Optional but recommended: short description (5 words) + roleDefinition: >- + You are Roo, a UI/UX expert specializing in design systems and frontend development. Your expertise includes: + - Creating and maintaining design systems + - Implementing responsive and accessible web interfaces + - Working with CSS, HTML, and modern frontend frameworks + - Ensuring consistent user experiences across platforms # Required: non-empty + whenToUse: >- + Use this mode when creating or modifying UI components, implementing design systems, + or ensuring responsive web interfaces. This mode is especially effective with CSS, + HTML, and modern frontend frameworks. # Optional but recommended + groups: # Required: array of tool groups (can be empty) + - read # Read files group (read_file, search_files, list_files, codebase_search) + - edit # Edit files group (apply_diff, write_to_file) - allows editing any file + # Or with file restrictions: + # - - edit + # - fileRegex: \\.md$ + # description: Markdown files only # Edit group that only allows editing markdown files + - browser # Browser group (browser_action) + - command # Command group (execute_command) + - mcp # MCP group (use_mcp_tool, access_mcp_resource) + customInstructions: Additional instructions for the Designer mode # Optional`, + }, +} + +/** + * Get all built-in skills as SkillMetadata objects + */ +export function getBuiltInSkills(): SkillMetadata[] { + return Object.values(BUILT_IN_SKILLS).map((skill) => ({ + name: skill.name, + description: skill.description, + path: "built-in", + source: "built-in" as const, + })) +} + +/** + * Get a specific built-in skill's full content by name + */ +export function getBuiltInSkillContent(name: string): SkillContent | null { + const skill = BUILT_IN_SKILLS[name] + if (!skill) return null + + return { + name: skill.name, + description: skill.description, + path: "built-in", + source: "built-in" as const, + instructions: skill.instructions, + } +} + +/** + * Check if a skill name is a built-in skill + */ +export function isBuiltInSkill(name: string): boolean { + return name in BUILT_IN_SKILLS +} + +/** + * Get names of all built-in skills + */ +export function getBuiltInSkillNames(): string[] { + return Object.keys(BUILT_IN_SKILLS) } diff --git a/src/shared/skills.ts b/src/shared/skills.ts index 7ed85816aa8..ae35b8c3878 100644 --- a/src/shared/skills.ts +++ b/src/shared/skills.ts @@ -5,8 +5,8 @@ export interface SkillMetadata { name: string // Required: skill identifier description: string // Required: when to use this skill - path: string // Absolute path to SKILL.md - source: "global" | "project" // Where the skill was discovered + path: string // Absolute path to SKILL.md (or "" for built-in skills) + source: "global" | "project" | "built-in" // Where the skill was discovered mode?: string // If set, skill is only available in this mode } diff --git a/src/shared/tools.ts b/src/shared/tools.ts index dc1615c0654..5d7435573c8 100644 --- a/src/shared/tools.ts +++ b/src/shared/tools.ts @@ -60,6 +60,7 @@ export const toolParamNames = [ "size", "query", "args", + "skill", // skill tool parameter "start_line", "end_line", "todos", @@ -103,9 +104,9 @@ export type NativeToolArgs = { } browser_action: BrowserActionParams codebase_search: { query: string; path?: string } - fetch_instructions: { task: string } generate_image: GenerateImageParams run_slash_command: { command: string; args?: string } + skill: { skill: string; args?: string } search_files: { path: string; regex: string; file_pattern?: string | null } switch_mode: { mode_slug: string; reason: string } update_todo_list: { todos: string } @@ -167,11 +168,6 @@ export interface ReadFileToolUse extends ToolUse<"read_file"> { params: Partial, "args" | "path" | "start_line" | "end_line" | "files">> } -export interface FetchInstructionsToolUse extends ToolUse<"fetch_instructions"> { - name: "fetch_instructions" - params: Partial, "task">> -} - export interface WriteToFileToolUse extends ToolUse<"write_to_file"> { name: "write_to_file" params: Partial, "path" | "content">> @@ -232,6 +228,11 @@ export interface RunSlashCommandToolUse extends ToolUse<"run_slash_command"> { params: Partial, "command" | "args">> } +export interface SkillToolUse extends ToolUse<"skill"> { + name: "skill" + params: Partial, "skill" | "args">> +} + export interface GenerateImageToolUse extends ToolUse<"generate_image"> { name: "generate_image" params: Partial, "prompt" | "path" | "image">> @@ -248,7 +249,6 @@ export const TOOL_DISPLAY_NAMES: Record = { execute_command: "run commands", read_file: "read files", read_command_output: "read command output", - fetch_instructions: "fetch instructions", write_to_file: "write files", apply_diff: "apply changes", search_and_replace: "apply changes using search and replace", @@ -267,6 +267,7 @@ export const TOOL_DISPLAY_NAMES: Record = { codebase_search: "codebase search", update_todo_list: "update todo list", run_slash_command: "run slash command", + skill: "load skill", generate_image: "generate images", custom_tool: "use custom tools", } as const @@ -274,7 +275,7 @@ export const TOOL_DISPLAY_NAMES: Record = { // Define available tool groups. export const TOOL_GROUPS: Record = { read: { - tools: ["read_file", "fetch_instructions", "search_files", "list_files", "codebase_search"], + tools: ["read_file", "search_files", "list_files", "codebase_search"], }, edit: { tools: ["apply_diff", "write_to_file", "generate_image"], @@ -303,6 +304,7 @@ export const ALWAYS_AVAILABLE_TOOLS: ToolName[] = [ "new_task", "update_todo_list", "run_slash_command", + "skill", ] as const /** diff --git a/webview-ui/src/components/mcp/McpView.tsx b/webview-ui/src/components/mcp/McpView.tsx index 2167ee18b3e..75a9a1a3800 100644 --- a/webview-ui/src/components/mcp/McpView.tsx +++ b/webview-ui/src/components/mcp/McpView.tsx @@ -1,12 +1,6 @@ import React, { useState } from "react" import { Trans } from "react-i18next" -import { - VSCodeCheckbox, - VSCodeLink, - VSCodePanels, - VSCodePanelTab, - VSCodePanelView, -} from "@vscode/webview-ui-toolkit/react" +import { VSCodeLink, VSCodePanels, VSCodePanelTab, VSCodePanelView } from "@vscode/webview-ui-toolkit/react" import type { McpServer } from "@roo-code/types" @@ -35,13 +29,7 @@ import McpEnabledToggle from "./McpEnabledToggle" import { McpErrorRow } from "./McpErrorRow" const McpView = () => { - const { - mcpServers: servers, - alwaysAllowMcp, - mcpEnabled, - enableMcpServerCreation, - setEnableMcpServerCreation, - } = useExtensionState() + const { mcpServers: servers, alwaysAllowMcp, mcpEnabled } = useExtensionState() const { t } = useAppTranslation() const { isOverThreshold, title, message } = useTooManyTools() @@ -71,36 +59,6 @@ const McpView = () => { {mcpEnabled && ( <> -
- { - setEnableMcpServerCreation(e.target.checked) - vscode.postMessage({ type: "enableMcpServerCreation", bool: e.target.checked }) - }}> - {t("mcp:enableServerCreation.title")} - -
- - - Learn about server creation - - new - -

{t("mcp:enableServerCreation.hint")}

-
-
- {/* Too Many Tools Warning */} {isOverThreshold && (
diff --git a/webview-ui/src/context/ExtensionStateContext.tsx b/webview-ui/src/context/ExtensionStateContext.tsx index d37f09bbc51..9594f83b86a 100644 --- a/webview-ui/src/context/ExtensionStateContext.tsx +++ b/webview-ui/src/context/ExtensionStateContext.tsx @@ -100,8 +100,6 @@ export interface ExtensionStateContextType extends ExtensionState { setTerminalOutputPreviewSize: (value: "small" | "medium" | "large") => void mcpEnabled: boolean setMcpEnabled: (value: boolean) => void - enableMcpServerCreation: boolean - setEnableMcpServerCreation: (value: boolean) => void remoteControlEnabled: boolean setRemoteControlEnabled: (value: boolean) => void taskSyncEnabled: boolean @@ -211,7 +209,6 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode screenshotQuality: 75, terminalShellIntegrationTimeout: 4000, mcpEnabled: true, - enableMcpServerCreation: false, remoteControlEnabled: false, taskSyncEnabled: false, featureRoomoteControlEnabled: false, @@ -545,8 +542,6 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode setState((prevState) => ({ ...prevState, terminalShellIntegrationDisabled: value })), setTerminalZdotdir: (value) => setState((prevState) => ({ ...prevState, terminalZdotdir: value })), setMcpEnabled: (value) => setState((prevState) => ({ ...prevState, mcpEnabled: value })), - setEnableMcpServerCreation: (value) => - setState((prevState) => ({ ...prevState, enableMcpServerCreation: value })), setRemoteControlEnabled: (value) => setState((prevState) => ({ ...prevState, remoteControlEnabled: value })), setTaskSyncEnabled: (value) => setState((prevState) => ({ ...prevState, taskSyncEnabled: value }) as any), setFeatureRoomoteControlEnabled: (value) => diff --git a/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx b/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx index 0ee69a4ad68..4c6f395943b 100644 --- a/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx +++ b/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx @@ -187,7 +187,6 @@ describe("mergeExtensionState", () => { const baseState: ExtensionState = { version: "", mcpEnabled: false, - enableMcpServerCreation: false, clineMessages: [], taskHistory: [], shouldShowAnnouncement: false, From fc5073beeda11231270a34fb1fbe08ac9e8aaee3 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Fri, 23 Jan 2026 17:28:17 -0700 Subject: [PATCH 02/14] fix: address skill tool UI/typing and built-in skill location issues - Remove from built-in skills in prompt (only show for file-based skills) - Add 'skill' case to ChatRow.tsx for tool approval UI rendering - Add 'skill' to ClineSayTool union type and interface - Update i18n keys from 'instructions' to 'skill' --- packages/types/src/vscode-extension-host.ts | 3 + src/core/prompts/sections/skills.ts | 9 +-- webview-ui/src/components/chat/ChatRow.tsx | 73 +++++++++++++++++---- webview-ui/src/i18n/locales/en/chat.json | 5 +- 4 files changed, 73 insertions(+), 17 deletions(-) diff --git a/packages/types/src/vscode-extension-host.ts b/packages/types/src/vscode-extension-host.ts index 4765d05397d..ad460dd97a4 100644 --- a/packages/types/src/vscode-extension-host.ts +++ b/packages/types/src/vscode-extension-host.ts @@ -788,6 +788,7 @@ export interface ClineSayTool { | "imageGenerated" | "runSlashCommand" | "updateTodoList" + | "skill" path?: string // For readCommandOutput readStart?: number @@ -834,6 +835,8 @@ export interface ClineSayTool { args?: string source?: string description?: string + // Properties for skill tool + skill?: string } // Must keep in sync with system prompt. diff --git a/src/core/prompts/sections/skills.ts b/src/core/prompts/sections/skills.ts index 4530c13f87b..39cfca405b5 100644 --- a/src/core/prompts/sections/skills.ts +++ b/src/core/prompts/sections/skills.ts @@ -33,10 +33,11 @@ export async function getSkillsSection( .map((skill) => { const name = escapeXml(skill.name) const description = escapeXml(skill.description) - // Per the Agent Skills integration guidance for filesystem-based agents, - // location should be an absolute path to the SKILL.md file. - const location = escapeXml(skill.path) - return ` \n ${name}\n ${description}\n ${location}\n ` + // Only include location for file-based skills (not built-in) + // Built-in skills are loaded via the skill tool by name, not by path + const isFileBasedSkill = skill.source !== "built-in" && skill.path !== "built-in" + const locationLine = isFileBasedSkill ? `\n ${escapeXml(skill.path)}` : "" + return ` \n ${name}\n ${description}${locationLine}\n ` }) .join("\n") diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx index 6a6d5c3f6df..25bcd61ee3f 100644 --- a/webview-ui/src/components/chat/ChatRow.tsx +++ b/webview-ui/src/components/chat/ChatRow.tsx @@ -666,24 +666,75 @@ export const ChatRowContent = ({
) - case "fetchInstructions": + case "skill": { + const skillInfo = tool return ( <>
- {toolIcon("file-code")} - {t("chat:instructions.wantsToFetch")} + {toolIcon("book")} + + {message.type === "ask" ? t("chat:skill.wantsToLoad") : t("chat:skill.didLoad")} +
-
- +
+ +
+ + {skillInfo.skill} + + {skillInfo.source && ( + + {skillInfo.source} + + )} +
+ +
+ {isExpanded && (skillInfo.args || skillInfo.description) && ( +
+ {skillInfo.description && ( +
+ {skillInfo.description} +
+ )} + {skillInfo.args && ( +
+ Arguments: + + {skillInfo.args} + +
+ )} +
+ )}
) + } case "listFilesTopLevel": return ( <> diff --git a/webview-ui/src/i18n/locales/en/chat.json b/webview-ui/src/i18n/locales/en/chat.json index d167a19ff3e..7c2d811021a 100644 --- a/webview-ui/src/i18n/locales/en/chat.json +++ b/webview-ui/src/i18n/locales/en/chat.json @@ -203,8 +203,9 @@ "description": "Older messages were removed from the conversation to stay within the context window limit. This is a fast but less context-preserving approach compared to condensation." } }, - "instructions": { - "wantsToFetch": "Roo wants to fetch detailed instructions to assist with the current task" + "skill": { + "wantsToLoad": "Roo wants to load a skill", + "didLoad": "Roo loaded a skill" }, "fileOperations": { "wantsToRead": "Roo wants to read this file", From d30a3c71c488e3c0b0c11716b3ca03bd37e06bd9 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Fri, 23 Jan 2026 17:54:41 -0700 Subject: [PATCH 03/14] fix: add missing diffEnabled parameter to SYSTEM_PROMPT calls after rebase --- src/core/prompts/__tests__/add-custom-instructions.spec.ts | 4 ++++ src/core/task/Task.ts | 1 + src/core/webview/__tests__/ClineProvider.spec.ts | 1 - src/core/webview/generateSystemPrompt.ts | 1 + 4 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/core/prompts/__tests__/add-custom-instructions.spec.ts b/src/core/prompts/__tests__/add-custom-instructions.spec.ts index b7813d0f5b8..3f7b6e06e43 100644 --- a/src/core/prompts/__tests__/add-custom-instructions.spec.ts +++ b/src/core/prompts/__tests__/add-custom-instructions.spec.ts @@ -210,6 +210,7 @@ describe("addCustomInstructions", () => { undefined, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions + undefined, // diffEnabled undefined, // experiments undefined, // language undefined, // rooIgnoreInstructions @@ -231,6 +232,7 @@ describe("addCustomInstructions", () => { undefined, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions + undefined, // diffEnabled undefined, // experiments undefined, // language undefined, // rooIgnoreInstructions @@ -254,6 +256,7 @@ describe("addCustomInstructions", () => { undefined, // customModePrompts undefined, // customModes, undefined, // globalCustomInstructions + undefined, // diffEnabled undefined, // experiments undefined, // language undefined, // rooIgnoreInstructions @@ -276,6 +279,7 @@ describe("addCustomInstructions", () => { undefined, // customModePrompts undefined, // customModes, undefined, // globalCustomInstructions + undefined, // diffEnabled undefined, // experiments undefined, // language undefined, // rooIgnoreInstructions diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 0c54e560b89..1e8ecd43a29 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -3795,6 +3795,7 @@ export class Task extends EventEmitter implements TaskLike { customModePrompts, customModes, customInstructions, + undefined, // diffEnabled experiments, language, rooIgnoreInstructions, diff --git a/src/core/webview/__tests__/ClineProvider.spec.ts b/src/core/webview/__tests__/ClineProvider.spec.ts index dcc4e20d6c0..158dee9db90 100644 --- a/src/core/webview/__tests__/ClineProvider.spec.ts +++ b/src/core/webview/__tests__/ClineProvider.spec.ts @@ -558,7 +558,6 @@ describe("ClineProvider", () => { enableCheckpoints: false, writeDelayMs: 1000, browserViewportSize: "900x600", - fuzzyMatchThreshold: 1.0, mcpEnabled: true, mode: defaultModeSlug, customModes: [], diff --git a/src/core/webview/generateSystemPrompt.ts b/src/core/webview/generateSystemPrompt.ts index b6f77d3842c..100d2976590 100644 --- a/src/core/webview/generateSystemPrompt.ts +++ b/src/core/webview/generateSystemPrompt.ts @@ -67,6 +67,7 @@ export const generateSystemPrompt = async (provider: ClineProvider, message: Web customModePrompts, customModes, customInstructions, + undefined, // diffEnabled experiments, language, rooIgnoreInstructions, From 2aaebff32c408ed8de759ab7c13b4dd4d5113828 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Fri, 23 Jan 2026 18:15:49 -0700 Subject: [PATCH 04/14] chore: add missing skill translations for 17 locales --- webview-ui/src/i18n/locales/ca/chat.json | 4 ++++ webview-ui/src/i18n/locales/de/chat.json | 4 ++++ webview-ui/src/i18n/locales/es/chat.json | 4 ++++ webview-ui/src/i18n/locales/fr/chat.json | 4 ++++ webview-ui/src/i18n/locales/hi/chat.json | 4 ++++ webview-ui/src/i18n/locales/id/chat.json | 4 ++++ webview-ui/src/i18n/locales/it/chat.json | 4 ++++ webview-ui/src/i18n/locales/ja/chat.json | 4 ++++ webview-ui/src/i18n/locales/ko/chat.json | 4 ++++ webview-ui/src/i18n/locales/nl/chat.json | 4 ++++ webview-ui/src/i18n/locales/pl/chat.json | 4 ++++ webview-ui/src/i18n/locales/pt-BR/chat.json | 4 ++++ webview-ui/src/i18n/locales/ru/chat.json | 4 ++++ webview-ui/src/i18n/locales/tr/chat.json | 4 ++++ webview-ui/src/i18n/locales/vi/chat.json | 4 ++++ webview-ui/src/i18n/locales/zh-CN/chat.json | 4 ++++ webview-ui/src/i18n/locales/zh-TW/chat.json | 4 ++++ 17 files changed, 68 insertions(+) diff --git a/webview-ui/src/i18n/locales/ca/chat.json b/webview-ui/src/i18n/locales/ca/chat.json index c44a8724335..d3653c057d4 100644 --- a/webview-ui/src/i18n/locales/ca/chat.json +++ b/webview-ui/src/i18n/locales/ca/chat.json @@ -324,6 +324,10 @@ "description": "S'han eliminat missatges més antics de la conversa per mantenir-se dins del límit de la finestra de context. Aquest és un enfocament ràpid però menys conservador del context en comparació amb la condensació." } }, + "skill": { + "wantsToLoad": "En Roo vol carregar una habilitat", + "didLoad": "En Roo ha carregat una habilitat" + }, "followUpSuggest": { "copyToInput": "Copiar a l'entrada (o Shift + clic)", "timerPrefix": "Aprovació automàtica habilitada. Seleccionant en {{seconds}}s…" diff --git a/webview-ui/src/i18n/locales/de/chat.json b/webview-ui/src/i18n/locales/de/chat.json index 1f3f11bc81c..e3aefa36298 100644 --- a/webview-ui/src/i18n/locales/de/chat.json +++ b/webview-ui/src/i18n/locales/de/chat.json @@ -324,6 +324,10 @@ "description": "Ältere Nachrichten wurden aus der Konversation entfernt, um innerhalb des Kontextfenster-Limits zu bleiben. Dies ist ein schnellerer, aber weniger kontexterhaltender Ansatz im Vergleich zur Komprimierung." } }, + "skill": { + "wantsToLoad": "Roo möchte eine Fähigkeit laden", + "didLoad": "Roo hat eine Fähigkeit geladen" + }, "followUpSuggest": { "copyToInput": "In Eingabefeld kopieren (oder Shift + Klick)", "timerPrefix": "Automatische Genehmigung aktiviert. Wähle in {{seconds}}s…" diff --git a/webview-ui/src/i18n/locales/es/chat.json b/webview-ui/src/i18n/locales/es/chat.json index 2c9418cfa7e..d06d55cdfa5 100644 --- a/webview-ui/src/i18n/locales/es/chat.json +++ b/webview-ui/src/i18n/locales/es/chat.json @@ -324,6 +324,10 @@ "description": "Se eliminaron mensajes más antiguos de la conversación para mantenerse dentro del límite de la ventana de contexto. Este es un enfoque rápido pero menos conservador del contexto en comparación con la condensación." } }, + "skill": { + "wantsToLoad": "Roo quiere cargar una habilidad", + "didLoad": "Roo cargó una habilidad" + }, "followUpSuggest": { "copyToInput": "Copiar a la entrada (o Shift + clic)", "timerPrefix": "Aprobación automática habilitada. Seleccionando en {{seconds}}s…" diff --git a/webview-ui/src/i18n/locales/fr/chat.json b/webview-ui/src/i18n/locales/fr/chat.json index 8aa09075dc3..96781dfb712 100644 --- a/webview-ui/src/i18n/locales/fr/chat.json +++ b/webview-ui/src/i18n/locales/fr/chat.json @@ -324,6 +324,10 @@ "description": "Les messages plus anciens ont été supprimés de la conversation pour rester dans la limite de la fenêtre de contexte. C'est une approche rapide mais moins conservatrice du contexte par rapport à la condensation." } }, + "skill": { + "wantsToLoad": "Roo veut charger une compétence", + "didLoad": "Roo a chargé une compétence" + }, "followUpSuggest": { "copyToInput": "Copier vers l'entrée (ou Shift + clic)", "timerPrefix": "Approbation automatique activée. Sélection dans {{seconds}}s…" diff --git a/webview-ui/src/i18n/locales/hi/chat.json b/webview-ui/src/i18n/locales/hi/chat.json index 9c155e62ec4..225539cd7c1 100644 --- a/webview-ui/src/i18n/locales/hi/chat.json +++ b/webview-ui/src/i18n/locales/hi/chat.json @@ -324,6 +324,10 @@ "description": "संदर्भ विंडो सीमा के भीतर रहने के लिए बातचीत से पुराने संदेश हटा दिए गए। संघनन की तुलना में यह एक तेज़ लेकिन कम संदर्भ-संरक्षित दृष्टिकोण है।" } }, + "skill": { + "wantsToLoad": "Roo एक कौशल लोड करना चाहता है", + "didLoad": "Roo ने एक कौशल लोड किया" + }, "followUpSuggest": { "copyToInput": "इनपुट में कॉपी करें (या Shift + क्लिक)", "timerPrefix": "ऑटो-अनुमोदन सक्षम है। {{seconds}}s में चयन किया जा रहा है…" diff --git a/webview-ui/src/i18n/locales/id/chat.json b/webview-ui/src/i18n/locales/id/chat.json index c8569f36460..437d588c308 100644 --- a/webview-ui/src/i18n/locales/id/chat.json +++ b/webview-ui/src/i18n/locales/id/chat.json @@ -206,6 +206,10 @@ "description": "Pesan lama telah dihapus dari percakapan untuk tetap dalam batas jendela konteks. Ini adalah pendekatan yang cepat tetapi kurang mempertahankan konteks dibandingkan dengan kondensasi." } }, + "skill": { + "wantsToLoad": "Roo ingin memuat keterampilan", + "didLoad": "Roo telah memuat keterampilan" + }, "instructions": { "wantsToFetch": "Roo ingin mengambil instruksi detail untuk membantu tugas saat ini" }, diff --git a/webview-ui/src/i18n/locales/it/chat.json b/webview-ui/src/i18n/locales/it/chat.json index ac00a6dea09..7396f31a457 100644 --- a/webview-ui/src/i18n/locales/it/chat.json +++ b/webview-ui/src/i18n/locales/it/chat.json @@ -324,6 +324,10 @@ "description": "I messaggi più vecchi sono stati rimossi dalla conversazione per rimanere entro il limite della finestra di contesto. Questo è un approccio veloce ma meno conservativo del contesto rispetto alla condensazione." } }, + "skill": { + "wantsToLoad": "Roo vuole caricare una competenza", + "didLoad": "Roo ha caricato una competenza" + }, "followUpSuggest": { "copyToInput": "Copia nell'input (o Shift + clic)", "timerPrefix": "Approvazione automatica abilitata. Selezione tra {{seconds}}s…" diff --git a/webview-ui/src/i18n/locales/ja/chat.json b/webview-ui/src/i18n/locales/ja/chat.json index 34a494ba23a..913ae45238d 100644 --- a/webview-ui/src/i18n/locales/ja/chat.json +++ b/webview-ui/src/i18n/locales/ja/chat.json @@ -324,6 +324,10 @@ "description": "コンテキストウィンドウの制限内に収めるため、古いメッセージが会話から削除されました。これは圧縮と比較して高速ですが、コンテキストの保持性が低いアプローチです。" } }, + "skill": { + "wantsToLoad": "Rooはスキルを読み込もうとしています", + "didLoad": "Rooはスキルを読み込みました" + }, "followUpSuggest": { "copyToInput": "入力欄にコピー(またはShift + クリック)", "timerPrefix": "自動承認が有効です。{{seconds}}秒後に選択中…" diff --git a/webview-ui/src/i18n/locales/ko/chat.json b/webview-ui/src/i18n/locales/ko/chat.json index 18d0089e340..dca0fb149fd 100644 --- a/webview-ui/src/i18n/locales/ko/chat.json +++ b/webview-ui/src/i18n/locales/ko/chat.json @@ -324,6 +324,10 @@ "description": "컨텍스트 윈도우 제한 내에 유지하기 위해 대화에서 오래된 메시지가 제거되었습니다. 이것은 압축에 비해 빠르지만 컨텍스트 보존 능력이 낮은 접근 방식입니다." } }, + "skill": { + "wantsToLoad": "Roo가 스킬을 로드하려고 합니다", + "didLoad": "Roo가 스킬을 로드했습니다" + }, "followUpSuggest": { "copyToInput": "입력창에 복사 (또는 Shift + 클릭)", "timerPrefix": "자동 승인 활성화됨. {{seconds}}초 후 선택 중…" diff --git a/webview-ui/src/i18n/locales/nl/chat.json b/webview-ui/src/i18n/locales/nl/chat.json index 5f0f6936194..1995a5f9a14 100644 --- a/webview-ui/src/i18n/locales/nl/chat.json +++ b/webview-ui/src/i18n/locales/nl/chat.json @@ -346,6 +346,10 @@ "description": "Oudere berichten zijn uit het gesprek verwijderd om binnen de limiet van het contextvenster te blijven. Dit is een snelle maar minder contextbehoudende aanpak in vergelijking met samenvoeging." } }, + "skill": { + "wantsToLoad": "Roo wil een vaardigheid laden", + "didLoad": "Roo heeft een vaardigheid geladen" + }, "followUpSuggest": { "copyToInput": "Kopiëren naar invoer (zelfde als shift + klik)", "timerPrefix": "Automatisch goedkeuren ingeschakeld. Selecteren in {{seconds}}s…" diff --git a/webview-ui/src/i18n/locales/pl/chat.json b/webview-ui/src/i18n/locales/pl/chat.json index fd90a260034..b1e86920bd2 100644 --- a/webview-ui/src/i18n/locales/pl/chat.json +++ b/webview-ui/src/i18n/locales/pl/chat.json @@ -324,6 +324,10 @@ "description": "Starsze wiadomości zostały usunięte z konwersacji, aby pozostać w granicach okna kontekstu. To szybsze, ale mniej zachowujące kontekst podejście w porównaniu z kondensacją." } }, + "skill": { + "wantsToLoad": "Roo chce załadować umiejętność", + "didLoad": "Roo załadował umiejętność" + }, "followUpSuggest": { "copyToInput": "Kopiuj do pola wprowadzania (lub Shift + kliknięcie)", "timerPrefix": "Automatyczne zatwierdzanie włączone. Zaznaczanie za {{seconds}}s…" diff --git a/webview-ui/src/i18n/locales/pt-BR/chat.json b/webview-ui/src/i18n/locales/pt-BR/chat.json index c6fdc35e824..dc3ae3b3816 100644 --- a/webview-ui/src/i18n/locales/pt-BR/chat.json +++ b/webview-ui/src/i18n/locales/pt-BR/chat.json @@ -324,6 +324,10 @@ "description": "Mensagens mais antigas foram removidas da conversa para permanecer dentro do limite da janela de contexto. Esta é uma abordagem rápida, mas menos preservadora de contexto em comparação com a condensação." } }, + "skill": { + "wantsToLoad": "Roo quer carregar uma habilidade", + "didLoad": "Roo carregou uma habilidade" + }, "followUpSuggest": { "copyToInput": "Copiar para entrada (ou Shift + clique)", "timerPrefix": "Aprovação automática ativada. Selecionando em {{seconds}}s…" diff --git a/webview-ui/src/i18n/locales/ru/chat.json b/webview-ui/src/i18n/locales/ru/chat.json index dffbc64e8d4..ae182d126f6 100644 --- a/webview-ui/src/i18n/locales/ru/chat.json +++ b/webview-ui/src/i18n/locales/ru/chat.json @@ -347,6 +347,10 @@ "description": "Более старые сообщения были удалены из разговора, чтобы остаться в пределах контекстного окна. Это быстрый, но менее сохраняющий контекст подход по сравнению со сжатием." } }, + "skill": { + "wantsToLoad": "Roo хочет загрузить навык", + "didLoad": "Roo загрузил навык" + }, "followUpSuggest": { "copyToInput": "Скопировать во ввод (то же, что shift + клик)", "timerPrefix": "Автоматическое одобрение включено. Выбор через {{seconds}}s…" diff --git a/webview-ui/src/i18n/locales/tr/chat.json b/webview-ui/src/i18n/locales/tr/chat.json index 5d5d93893e3..267eae1dba6 100644 --- a/webview-ui/src/i18n/locales/tr/chat.json +++ b/webview-ui/src/i18n/locales/tr/chat.json @@ -325,6 +325,10 @@ "description": "Bağlam penceresi sınırında kalmak için eski mesajlar konuşmadan kaldırıldı. Bu, yoğunlaştırmaya kıyasla hızlı ancak daha az bağlam koruyucu bir yaklaşımdır." } }, + "skill": { + "wantsToLoad": "Roo bir beceri yüklemek istiyor", + "didLoad": "Roo bir beceri yükledi" + }, "followUpSuggest": { "copyToInput": "Giriş alanına kopyala (veya Shift + tıklama)", "timerPrefix": "Otomatik onay etkinleştirildi. {{seconds}}s içinde seçim yapılıyor…" diff --git a/webview-ui/src/i18n/locales/vi/chat.json b/webview-ui/src/i18n/locales/vi/chat.json index 76191a03cf1..5d37f66203a 100644 --- a/webview-ui/src/i18n/locales/vi/chat.json +++ b/webview-ui/src/i18n/locales/vi/chat.json @@ -325,6 +325,10 @@ "description": "Các tin nhắn cũ hơn đã bị xóa khỏi cuộc trò chuyện để giữ trong giới hạn cửa sổ ngữ cảnh. Đây là cách tiếp cận nhanh nhưng ít bảo toàn ngữ cảnh hơn so với cô đọng." } }, + "skill": { + "wantsToLoad": "Roo muốn tải một kỹ năng", + "didLoad": "Roo đã tải một kỹ năng" + }, "followUpSuggest": { "copyToInput": "Sao chép vào ô nhập liệu (hoặc Shift + nhấp chuột)", "timerPrefix": "Phê duyệt tự động được bật. Chọn trong {{seconds}}s…" diff --git a/webview-ui/src/i18n/locales/zh-CN/chat.json b/webview-ui/src/i18n/locales/zh-CN/chat.json index e63cc5dd08e..8a46b1e8a48 100644 --- a/webview-ui/src/i18n/locales/zh-CN/chat.json +++ b/webview-ui/src/i18n/locales/zh-CN/chat.json @@ -325,6 +325,10 @@ "description": "为保持在上下文窗口限制内,已从对话中移除较旧的消息。与压缩相比,这是一种快速但上下文保留较少的方法。" } }, + "skill": { + "wantsToLoad": "Roo 想要加载技能", + "didLoad": "Roo 加载了技能" + }, "followUpSuggest": { "copyToInput": "复制到输入框(或按住Shift点击)", "timerPrefix": "自动批准已启用。{{seconds}}秒后选择中…" diff --git a/webview-ui/src/i18n/locales/zh-TW/chat.json b/webview-ui/src/i18n/locales/zh-TW/chat.json index 95a96503baf..a8bce99ffe7 100644 --- a/webview-ui/src/i18n/locales/zh-TW/chat.json +++ b/webview-ui/src/i18n/locales/zh-TW/chat.json @@ -203,6 +203,10 @@ "description": "為保持在上下文視窗限制內,已從對話中移除較舊的訊息。與壓縮相比,這是一種快速但上下文保留較少的方法。" } }, + "skill": { + "wantsToLoad": "Roo 想要載入技能", + "didLoad": "Roo 載入了技能" + }, "instructions": { "wantsToFetch": "Roo 想要取得詳細指示以協助目前工作" }, From 93400904701c0a603b64ebb4d36f4c1344723bc8 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Wed, 28 Jan 2026 08:57:31 -0700 Subject: [PATCH 05/14] feat(skills): add build-time generation for built-in skills from SKILL.md files - Add generate-built-in-skills.ts script to parse SKILL.md files and generate TypeScript - Add prebundle npm script to auto-generate before builds - Create built-in/create-mode/SKILL.md for mode creation instructions - Create built-in/mcp-builder/ with comprehensive MCP server development guide - Add project-level .roo/skills/mcp-builder/ skill - Add tests for the generation script - Update built-in-skills.ts to be auto-generated (DO NOT EDIT DIRECTLY) The source of truth is now the SKILL.md files. Run 'pnpm generate:skills' to regenerate. --- .roo/skills/mcp-builder/LICENSE.txt | 202 ++++ .roo/skills/mcp-builder/SKILL.md | 256 +++++ .../mcp-builder/reference/evaluation.md | 642 ++++++++++++ .../reference/mcp_best_practices.md | 269 +++++ .../mcp-builder/reference/node_mcp_server.md | 975 ++++++++++++++++++ .../reference/python_mcp_server.md | 738 +++++++++++++ .../skills/mcp-builder/scripts/connections.py | 151 +++ .roo/skills/mcp-builder/scripts/evaluation.py | 373 +++++++ .../scripts/example_evaluation.xml | 22 + .../mcp-builder/scripts/requirements.txt | 2 + src/package.json | 2 + .../generate-built-in-skills.spec.ts | 175 ++++ src/services/skills/built-in-skills.ts | 557 +++++----- .../skills/built-in/create-mode/SKILL.md | 57 + .../skills/built-in/mcp-builder/LICENSE.txt | 202 ++++ .../skills/built-in/mcp-builder/SKILL.md | 256 +++++ .../mcp-builder/reference/evaluation.md | 642 ++++++++++++ .../reference/mcp_best_practices.md | 269 +++++ .../mcp-builder/reference/node_mcp_server.md | 975 ++++++++++++++++++ .../reference/python_mcp_server.md | 738 +++++++++++++ .../mcp-builder/scripts/connections.py | 151 +++ .../mcp-builder/scripts/evaluation.py | 373 +++++++ .../scripts/example_evaluation.xml | 22 + .../mcp-builder/scripts/requirements.txt | 2 + .../skills/generate-built-in-skills.ts | 289 ++++++ 25 files changed, 8028 insertions(+), 312 deletions(-) create mode 100644 .roo/skills/mcp-builder/LICENSE.txt create mode 100644 .roo/skills/mcp-builder/SKILL.md create mode 100644 .roo/skills/mcp-builder/reference/evaluation.md create mode 100644 .roo/skills/mcp-builder/reference/mcp_best_practices.md create mode 100644 .roo/skills/mcp-builder/reference/node_mcp_server.md create mode 100644 .roo/skills/mcp-builder/reference/python_mcp_server.md create mode 100644 .roo/skills/mcp-builder/scripts/connections.py create mode 100644 .roo/skills/mcp-builder/scripts/evaluation.py create mode 100644 .roo/skills/mcp-builder/scripts/example_evaluation.xml create mode 100644 .roo/skills/mcp-builder/scripts/requirements.txt create mode 100644 src/services/skills/__tests__/generate-built-in-skills.spec.ts create mode 100644 src/services/skills/built-in/create-mode/SKILL.md create mode 100644 src/services/skills/built-in/mcp-builder/LICENSE.txt create mode 100644 src/services/skills/built-in/mcp-builder/SKILL.md create mode 100644 src/services/skills/built-in/mcp-builder/reference/evaluation.md create mode 100644 src/services/skills/built-in/mcp-builder/reference/mcp_best_practices.md create mode 100644 src/services/skills/built-in/mcp-builder/reference/node_mcp_server.md create mode 100644 src/services/skills/built-in/mcp-builder/reference/python_mcp_server.md create mode 100644 src/services/skills/built-in/mcp-builder/scripts/connections.py create mode 100644 src/services/skills/built-in/mcp-builder/scripts/evaluation.py create mode 100644 src/services/skills/built-in/mcp-builder/scripts/example_evaluation.xml create mode 100644 src/services/skills/built-in/mcp-builder/scripts/requirements.txt create mode 100644 src/services/skills/generate-built-in-skills.ts diff --git a/.roo/skills/mcp-builder/LICENSE.txt b/.roo/skills/mcp-builder/LICENSE.txt new file mode 100644 index 00000000000..7a4a3ea2424 --- /dev/null +++ b/.roo/skills/mcp-builder/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/.roo/skills/mcp-builder/SKILL.md b/.roo/skills/mcp-builder/SKILL.md new file mode 100644 index 00000000000..bd45c9c8791 --- /dev/null +++ b/.roo/skills/mcp-builder/SKILL.md @@ -0,0 +1,256 @@ +--- +name: mcp-builder +description: Guide for creating high-quality MCP (Model Context Protocol) servers that enable LLMs to interact with external services through well-designed tools. Use when building MCP servers to integrate external APIs or services, whether in Python (FastMCP) or Node/TypeScript (MCP SDK). +license: Complete terms in LICENSE.txt +--- + +# MCP Server Development Guide + +## Overview + +Create MCP (Model Context Protocol) servers that enable LLMs to interact with external services through well-designed tools. The quality of an MCP server is measured by how well it enables LLMs to accomplish real-world tasks. + +--- + +# Process + +## 🚀 High-Level Workflow + +Creating a high-quality MCP server involves four main phases: + +### Phase 1: Deep Research and Planning + +#### 1.1 Understand Modern MCP Design + +**API Coverage vs. Workflow Tools:** +Balance comprehensive API endpoint coverage with specialized workflow tools. Workflow tools can be more convenient for specific tasks, while comprehensive coverage gives agents flexibility to compose operations. Performance varies by client—some clients benefit from code execution that combines basic tools, while others work better with higher-level workflows. When uncertain, prioritize comprehensive API coverage. + +**Tool Naming and Discoverability:** +Clear, descriptive tool names help agents find the right tools quickly. Use consistent prefixes (e.g., `github_create_issue`, `github_list_repos`) and action-oriented naming. + +**Context Management:** +Agents benefit from concise tool descriptions and the ability to filter/paginate results. Design tools that return focused, relevant data. Some clients support code execution which can help agents filter and process data efficiently. + +**Actionable Error Messages:** +Error messages should guide agents toward solutions with specific suggestions and next steps. + +#### 1.2 Study MCP Protocol Documentation + +**Navigate the MCP specification:** + +Start with the sitemap to find relevant pages: `https://modelcontextprotocol.io/sitemap.xml` + +Then fetch specific pages with `.md` suffix for markdown format (e.g., `https://modelcontextprotocol.io/specification/draft.md`). + +Key pages to review: + +- Specification overview and architecture +- Transport mechanisms (streamable HTTP, stdio) +- Tool, resource, and prompt definitions + +#### 1.3 Study Framework Documentation + +**Recommended stack:** + +- **Language**: TypeScript (high-quality SDK support and good compatibility in many execution environments e.g. MCPB. Plus AI models are good at generating TypeScript code, benefiting from its broad usage, static typing and good linting tools) +- **Transport**: Streamable HTTP for remote servers, using stateless JSON (simpler to scale and maintain, as opposed to stateful sessions and streaming responses). stdio for local servers. + +**Load framework documentation:** + +- **MCP Best Practices**: [📋 View Best Practices](./reference/mcp_best_practices.md) - Core guidelines + +**For TypeScript (recommended):** + +- **TypeScript SDK**: Use WebFetch to load `https://raw.githubusercontent.com/modelcontextprotocol/typescript-sdk/main/README.md` +- [⚡ TypeScript Guide](./reference/node_mcp_server.md) - TypeScript patterns and examples + +**For Python:** + +- **Python SDK**: Use WebFetch to load `https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md` +- [🐍 Python Guide](./reference/python_mcp_server.md) - Python patterns and examples + +#### 1.4 Plan Your Implementation + +**Understand the API:** +Review the service's API documentation to identify key endpoints, authentication requirements, and data models. Use web search and WebFetch as needed. + +**Tool Selection:** +Prioritize comprehensive API coverage. List endpoints to implement, starting with the most common operations. + +--- + +### Phase 2: Implementation + +#### 2.1 Set Up Project Structure + +See language-specific guides for project setup: + +- [⚡ TypeScript Guide](./reference/node_mcp_server.md) - Project structure, package.json, tsconfig.json +- [🐍 Python Guide](./reference/python_mcp_server.md) - Module organization, dependencies + +#### 2.2 Implement Core Infrastructure + +Create shared utilities: + +- API client with authentication +- Error handling helpers +- Response formatting (JSON/Markdown) +- Pagination support + +#### 2.3 Implement Tools + +For each tool: + +**Input Schema:** + +- Use Zod (TypeScript) or Pydantic (Python) +- Include constraints and clear descriptions +- Add examples in field descriptions + +**Output Schema:** + +- Define `outputSchema` where possible for structured data +- Use `structuredContent` in tool responses (TypeScript SDK feature) +- Helps clients understand and process tool outputs + +**Tool Description:** + +- Concise summary of functionality +- Parameter descriptions +- Return type schema + +**Implementation:** + +- Async/await for I/O operations +- Proper error handling with actionable messages +- Support pagination where applicable +- Return both text content and structured data when using modern SDKs + +**Annotations:** + +- `readOnlyHint`: true/false +- `destructiveHint`: true/false +- `idempotentHint`: true/false +- `openWorldHint`: true/false + +--- + +### Phase 3: Review and Test + +#### 3.1 Code Quality + +Review for: + +- No duplicated code (DRY principle) +- Consistent error handling +- Full type coverage +- Clear tool descriptions + +#### 3.2 Build and Test + +**TypeScript:** + +- Run `npm run build` to verify compilation +- Test with MCP Inspector: `npx @modelcontextprotocol/inspector` + +**Python:** + +- Verify syntax: `python -m py_compile your_server.py` +- Test with MCP Inspector + +See language-specific guides for detailed testing approaches and quality checklists. + +--- + +### Phase 4: Create Evaluations + +After implementing your MCP server, create comprehensive evaluations to test its effectiveness. + +**Load [✅ Evaluation Guide](./reference/evaluation.md) for complete evaluation guidelines.** + +#### 4.1 Understand Evaluation Purpose + +Use evaluations to test whether LLMs can effectively use your MCP server to answer realistic, complex questions. + +#### 4.2 Create 10 Evaluation Questions + +To create effective evaluations, follow the process outlined in the evaluation guide: + +1. **Tool Inspection**: List available tools and understand their capabilities +2. **Content Exploration**: Use READ-ONLY operations to explore available data +3. **Question Generation**: Create 10 complex, realistic questions +4. **Answer Verification**: Solve each question yourself to verify answers + +#### 4.3 Evaluation Requirements + +Ensure each question is: + +- **Independent**: Not dependent on other questions +- **Read-only**: Only non-destructive operations required +- **Complex**: Requiring multiple tool calls and deep exploration +- **Realistic**: Based on real use cases humans would care about +- **Verifiable**: Single, clear answer that can be verified by string comparison +- **Stable**: Answer won't change over time + +#### 4.4 Output Format + +Create an XML file with this structure: + +```xml + + + Find discussions about AI model launches with animal codenames. One model needed a specific safety designation that uses the format ASL-X. What number X was being determined for the model named after a spotted wild cat? + 3 + + + +``` + +--- + +# Reference Files + +## 📚 Documentation Library + +Load these resources as needed during development: + +### Core MCP Documentation (Load First) + +- **MCP Protocol**: Start with sitemap at `https://modelcontextprotocol.io/sitemap.xml`, then fetch specific pages with `.md` suffix +- [📋 MCP Best Practices](./reference/mcp_best_practices.md) - Universal MCP guidelines including: + - Server and tool naming conventions + - Response format guidelines (JSON vs Markdown) + - Pagination best practices + - Transport selection (streamable HTTP vs stdio) + - Security and error handling standards + +### SDK Documentation (Load During Phase 1/2) + +- **Python SDK**: Fetch from `https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md` +- **TypeScript SDK**: Fetch from `https://raw.githubusercontent.com/modelcontextprotocol/typescript-sdk/main/README.md` + +### Language-Specific Implementation Guides (Load During Phase 2) + +- [🐍 Python Implementation Guide](./reference/python_mcp_server.md) - Complete Python/FastMCP guide with: + + - Server initialization patterns + - Pydantic model examples + - Tool registration with `@mcp.tool` + - Complete working examples + - Quality checklist + +- [⚡ TypeScript Implementation Guide](./reference/node_mcp_server.md) - Complete TypeScript guide with: + - Project structure + - Zod schema patterns + - Tool registration with `server.registerTool` + - Complete working examples + - Quality checklist + +### Evaluation Guide (Load During Phase 4) + +- [✅ Evaluation Guide](./reference/evaluation.md) - Complete evaluation creation guide with: + - Question creation guidelines + - Answer verification strategies + - XML format specifications + - Example questions and answers + - Running an evaluation with the provided scripts diff --git a/.roo/skills/mcp-builder/reference/evaluation.md b/.roo/skills/mcp-builder/reference/evaluation.md new file mode 100644 index 00000000000..c9375b535a3 --- /dev/null +++ b/.roo/skills/mcp-builder/reference/evaluation.md @@ -0,0 +1,642 @@ +# MCP Server Evaluation Guide + +## Overview + +This document provides guidance on creating comprehensive evaluations for MCP servers. Evaluations test whether LLMs can effectively use your MCP server to answer realistic, complex questions using only the tools provided. + +--- + +## Quick Reference + +### Evaluation Requirements + +- Create 10 human-readable questions +- Questions must be READ-ONLY, INDEPENDENT, NON-DESTRUCTIVE +- Each question requires multiple tool calls (potentially dozens) +- Answers must be single, verifiable values +- Answers must be STABLE (won't change over time) + +### Output Format + +```xml + + + Your question here + Single verifiable answer + + +``` + +--- + +## Purpose of Evaluations + +The measure of quality of an MCP server is NOT how well or comprehensively the server implements tools, but how well these implementations (input/output schemas, docstrings/descriptions, functionality) enable LLMs with no other context and access ONLY to the MCP servers to answer realistic and difficult questions. + +## Evaluation Overview + +Create 10 human-readable questions requiring ONLY READ-ONLY, INDEPENDENT, NON-DESTRUCTIVE, and IDEMPOTENT operations to answer. Each question should be: + +- Realistic +- Clear and concise +- Unambiguous +- Complex, requiring potentially dozens of tool calls or steps +- Answerable with a single, verifiable value that you identify in advance + +## Question Guidelines + +### Core Requirements + +1. **Questions MUST be independent** + + - Each question should NOT depend on the answer to any other question + - Should not assume prior write operations from processing another question + +2. **Questions MUST require ONLY NON-DESTRUCTIVE AND IDEMPOTENT tool use** + + - Should not instruct or require modifying state to arrive at the correct answer + +3. **Questions must be REALISTIC, CLEAR, CONCISE, and COMPLEX** + - Must require another LLM to use multiple (potentially dozens of) tools or steps to answer + +### Complexity and Depth + +4. **Questions must require deep exploration** + + - Consider multi-hop questions requiring multiple sub-questions and sequential tool calls + - Each step should benefit from information found in previous questions + +5. **Questions may require extensive paging** + + - May need paging through multiple pages of results + - May require querying old data (1-2 years out-of-date) to find niche information + - The questions must be DIFFICULT + +6. **Questions must require deep understanding** + + - Rather than surface-level knowledge + - May pose complex ideas as True/False questions requiring evidence + - May use multiple-choice format where LLM must search different hypotheses + +7. **Questions must not be solvable with straightforward keyword search** + - Do not include specific keywords from the target content + - Use synonyms, related concepts, or paraphrases + - Require multiple searches, analyzing multiple related items, extracting context, then deriving the answer + +### Tool Testing + +8. **Questions should stress-test tool return values** + + - May elicit tools returning large JSON objects or lists, overwhelming the LLM + - Should require understanding multiple modalities of data: + - IDs and names + - Timestamps and datetimes (months, days, years, seconds) + - File IDs, names, extensions, and mimetypes + - URLs, GIDs, etc. + - Should probe the tool's ability to return all useful forms of data + +9. **Questions should MOSTLY reflect real human use cases** + + - The kinds of information retrieval tasks that HUMANS assisted by an LLM would care about + +10. **Questions may require dozens of tool calls** + + - This challenges LLMs with limited context + - Encourages MCP server tools to reduce information returned + +11. **Include ambiguous questions** + - May be ambiguous OR require difficult decisions on which tools to call + - Force the LLM to potentially make mistakes or misinterpret + - Ensure that despite AMBIGUITY, there is STILL A SINGLE VERIFIABLE ANSWER + +### Stability + +12. **Questions must be designed so the answer DOES NOT CHANGE** + + - Do not ask questions that rely on "current state" which is dynamic + - For example, do not count: + - Number of reactions to a post + - Number of replies to a thread + - Number of members in a channel + +13. **DO NOT let the MCP server RESTRICT the kinds of questions you create** + - Create challenging and complex questions + - Some may not be solvable with the available MCP server tools + - Questions may require specific output formats (datetime vs. epoch time, JSON vs. MARKDOWN) + - Questions may require dozens of tool calls to complete + +## Answer Guidelines + +### Verification + +1. **Answers must be VERIFIABLE via direct string comparison** + - If the answer can be re-written in many formats, clearly specify the output format in the QUESTION + - Examples: "Use YYYY/MM/DD.", "Respond True or False.", "Answer A, B, C, or D and nothing else." + - Answer should be a single VERIFIABLE value such as: + - User ID, user name, display name, first name, last name + - Channel ID, channel name + - Message ID, string + - URL, title + - Numerical quantity + - Timestamp, datetime + - Boolean (for True/False questions) + - Email address, phone number + - File ID, file name, file extension + - Multiple choice answer + - Answers must not require special formatting or complex, structured output + - Answer will be verified using DIRECT STRING COMPARISON + +### Readability + +2. **Answers should generally prefer HUMAN-READABLE formats** + - Examples: names, first name, last name, datetime, file name, message string, URL, yes/no, true/false, a/b/c/d + - Rather than opaque IDs (though IDs are acceptable) + - The VAST MAJORITY of answers should be human-readable + +### Stability + +3. **Answers must be STABLE/STATIONARY** + + - Look at old content (e.g., conversations that have ended, projects that have launched, questions answered) + - Create QUESTIONS based on "closed" concepts that will always return the same answer + - Questions may ask to consider a fixed time window to insulate from non-stationary answers + - Rely on context UNLIKELY to change + - Example: if finding a paper name, be SPECIFIC enough so answer is not confused with papers published later + +4. **Answers must be CLEAR and UNAMBIGUOUS** + - Questions must be designed so there is a single, clear answer + - Answer can be derived from using the MCP server tools + +### Diversity + +5. **Answers must be DIVERSE** + + - Answer should be a single VERIFIABLE value in diverse modalities and formats + - User concept: user ID, user name, display name, first name, last name, email address, phone number + - Channel concept: channel ID, channel name, channel topic + - Message concept: message ID, message string, timestamp, month, day, year + +6. **Answers must NOT be complex structures** + - Not a list of values + - Not a complex object + - Not a list of IDs or strings + - Not natural language text + - UNLESS the answer can be straightforwardly verified using DIRECT STRING COMPARISON + - And can be realistically reproduced + - It should be unlikely that an LLM would return the same list in any other order or format + +## Evaluation Process + +### Step 1: Documentation Inspection + +Read the documentation of the target API to understand: + +- Available endpoints and functionality +- If ambiguity exists, fetch additional information from the web +- Parallelize this step AS MUCH AS POSSIBLE +- Ensure each subagent is ONLY examining documentation from the file system or on the web + +### Step 2: Tool Inspection + +List the tools available in the MCP server: + +- Inspect the MCP server directly +- Understand input/output schemas, docstrings, and descriptions +- WITHOUT calling the tools themselves at this stage + +### Step 3: Developing Understanding + +Repeat steps 1 & 2 until you have a good understanding: + +- Iterate multiple times +- Think about the kinds of tasks you want to create +- Refine your understanding +- At NO stage should you READ the code of the MCP server implementation itself +- Use your intuition and understanding to create reasonable, realistic, but VERY challenging tasks + +### Step 4: Read-Only Content Inspection + +After understanding the API and tools, USE the MCP server tools: + +- Inspect content using READ-ONLY and NON-DESTRUCTIVE operations ONLY +- Goal: identify specific content (e.g., users, channels, messages, projects, tasks) for creating realistic questions +- Should NOT call any tools that modify state +- Will NOT read the code of the MCP server implementation itself +- Parallelize this step with individual sub-agents pursuing independent explorations +- Ensure each subagent is only performing READ-ONLY, NON-DESTRUCTIVE, and IDEMPOTENT operations +- BE CAREFUL: SOME TOOLS may return LOTS OF DATA which would cause you to run out of CONTEXT +- Make INCREMENTAL, SMALL, AND TARGETED tool calls for exploration +- In all tool call requests, use the `limit` parameter to limit results (<10) +- Use pagination + +### Step 5: Task Generation + +After inspecting the content, create 10 human-readable questions: + +- An LLM should be able to answer these with the MCP server +- Follow all question and answer guidelines above + +## Output Format + +Each QA pair consists of a question and an answer. The output should be an XML file with this structure: + +```xml + + + Find the project created in Q2 2024 with the highest number of completed tasks. What is the project name? + Website Redesign + + + Search for issues labeled as "bug" that were closed in March 2024. Which user closed the most issues? Provide their username. + sarah_dev + + + Look for pull requests that modified files in the /api directory and were merged between January 1 and January 31, 2024. How many different contributors worked on these PRs? + 7 + + + Find the repository with the most stars that was created before 2023. What is the repository name? + data-pipeline + + +``` + +## Evaluation Examples + +### Good Questions + +**Example 1: Multi-hop question requiring deep exploration (GitHub MCP)** + +```xml + + Find the repository that was archived in Q3 2023 and had previously been the most forked project in the organization. What was the primary programming language used in that repository? + Python + +``` + +This question is good because: + +- Requires multiple searches to find archived repositories +- Needs to identify which had the most forks before archival +- Requires examining repository details for the language +- Answer is a simple, verifiable value +- Based on historical (closed) data that won't change + +**Example 2: Requires understanding context without keyword matching (Project Management MCP)** + +```xml + + Locate the initiative focused on improving customer onboarding that was completed in late 2023. The project lead created a retrospective document after completion. What was the lead's role title at that time? + Product Manager + +``` + +This question is good because: + +- Doesn't use specific project name ("initiative focused on improving customer onboarding") +- Requires finding completed projects from specific timeframe +- Needs to identify the project lead and their role +- Requires understanding context from retrospective documents +- Answer is human-readable and stable +- Based on completed work (won't change) + +**Example 3: Complex aggregation requiring multiple steps (Issue Tracker MCP)** + +```xml + + Among all bugs reported in January 2024 that were marked as critical priority, which assignee resolved the highest percentage of their assigned bugs within 48 hours? Provide the assignee's username. + alex_eng + +``` + +This question is good because: + +- Requires filtering bugs by date, priority, and status +- Needs to group by assignee and calculate resolution rates +- Requires understanding timestamps to determine 48-hour windows +- Tests pagination (potentially many bugs to process) +- Answer is a single username +- Based on historical data from specific time period + +**Example 4: Requires synthesis across multiple data types (CRM MCP)** + +```xml + + Find the account that upgraded from the Starter to Enterprise plan in Q4 2023 and had the highest annual contract value. What industry does this account operate in? + Healthcare + +``` + +This question is good because: + +- Requires understanding subscription tier changes +- Needs to identify upgrade events in specific timeframe +- Requires comparing contract values +- Must access account industry information +- Answer is simple and verifiable +- Based on completed historical transactions + +### Poor Questions + +**Example 1: Answer changes over time** + +```xml + + How many open issues are currently assigned to the engineering team? + 47 + +``` + +This question is poor because: + +- The answer will change as issues are created, closed, or reassigned +- Not based on stable/stationary data +- Relies on "current state" which is dynamic + +**Example 2: Too easy with keyword search** + +```xml + + Find the pull request with title "Add authentication feature" and tell me who created it. + developer123 + +``` + +This question is poor because: + +- Can be solved with a straightforward keyword search for exact title +- Doesn't require deep exploration or understanding +- No synthesis or analysis needed + +**Example 3: Ambiguous answer format** + +```xml + + List all the repositories that have Python as their primary language. + repo1, repo2, repo3, data-pipeline, ml-tools + +``` + +This question is poor because: + +- Answer is a list that could be returned in any order +- Difficult to verify with direct string comparison +- LLM might format differently (JSON array, comma-separated, newline-separated) +- Better to ask for a specific aggregate (count) or superlative (most stars) + +## Verification Process + +After creating evaluations: + +1. **Examine the XML file** to understand the schema +2. **Load each task instruction** and in parallel using the MCP server and tools, identify the correct answer by attempting to solve the task YOURSELF +3. **Flag any operations** that require WRITE or DESTRUCTIVE operations +4. **Accumulate all CORRECT answers** and replace any incorrect answers in the document +5. **Remove any ``** that require WRITE or DESTRUCTIVE operations + +Remember to parallelize solving tasks to avoid running out of context, then accumulate all answers and make changes to the file at the end. + +## Tips for Creating Quality Evaluations + +1. **Think Hard and Plan Ahead** before generating tasks +2. **Parallelize Where Opportunity Arises** to speed up the process and manage context +3. **Focus on Realistic Use Cases** that humans would actually want to accomplish +4. **Create Challenging Questions** that test the limits of the MCP server's capabilities +5. **Ensure Stability** by using historical data and closed concepts +6. **Verify Answers** by solving the questions yourself using the MCP server tools +7. **Iterate and Refine** based on what you learn during the process + +--- + +# Running Evaluations + +After creating your evaluation file, you can use the provided evaluation harness to test your MCP server. + +## Setup + +1. **Install Dependencies** + + ```bash + pip install -r scripts/requirements.txt + ``` + + Or install manually: + + ```bash + pip install anthropic mcp + ``` + +2. **Set API Key** + + ```bash + export ANTHROPIC_API_KEY=your_api_key_here + ``` + +## Evaluation File Format + +Evaluation files use XML format with `` elements: + +```xml + + + Find the project created in Q2 2024 with the highest number of completed tasks. What is the project name? + Website Redesign + + + Search for issues labeled as "bug" that were closed in March 2024. Which user closed the most issues? Provide their username. + sarah_dev + + +``` + +## Running Evaluations + +The evaluation script (`scripts/evaluation.py`) supports three transport types: + +**Important:** + +- **stdio transport**: The evaluation script automatically launches and manages the MCP server process for you. Do not run the server manually. +- **sse/http transports**: You must start the MCP server separately before running the evaluation. The script connects to the already-running server at the specified URL. + +### 1. Local STDIO Server + +For locally-run MCP servers (script launches the server automatically): + +```bash +python scripts/evaluation.py \ + -t stdio \ + -c python \ + -a my_mcp_server.py \ + evaluation.xml +``` + +With environment variables: + +```bash +python scripts/evaluation.py \ + -t stdio \ + -c python \ + -a my_mcp_server.py \ + -e API_KEY=abc123 \ + -e DEBUG=true \ + evaluation.xml +``` + +### 2. Server-Sent Events (SSE) + +For SSE-based MCP servers (you must start the server first): + +```bash +python scripts/evaluation.py \ + -t sse \ + -u https://example.com/mcp \ + -H "Authorization: Bearer token123" \ + -H "X-Custom-Header: value" \ + evaluation.xml +``` + +### 3. HTTP (Streamable HTTP) + +For HTTP-based MCP servers (you must start the server first): + +```bash +python scripts/evaluation.py \ + -t http \ + -u https://example.com/mcp \ + -H "Authorization: Bearer token123" \ + evaluation.xml +``` + +## Command-Line Options + +``` +usage: evaluation.py [-h] [-t {stdio,sse,http}] [-m MODEL] [-c COMMAND] + [-a ARGS [ARGS ...]] [-e ENV [ENV ...]] [-u URL] + [-H HEADERS [HEADERS ...]] [-o OUTPUT] + eval_file + +positional arguments: + eval_file Path to evaluation XML file + +optional arguments: + -h, --help Show help message + -t, --transport Transport type: stdio, sse, or http (default: stdio) + -m, --model Claude model to use (default: claude-3-7-sonnet-20250219) + -o, --output Output file for report (default: print to stdout) + +stdio options: + -c, --command Command to run MCP server (e.g., python, node) + -a, --args Arguments for the command (e.g., server.py) + -e, --env Environment variables in KEY=VALUE format + +sse/http options: + -u, --url MCP server URL + -H, --header HTTP headers in 'Key: Value' format +``` + +## Output + +The evaluation script generates a detailed report including: + +- **Summary Statistics**: + + - Accuracy (correct/total) + - Average task duration + - Average tool calls per task + - Total tool calls + +- **Per-Task Results**: + - Prompt and expected response + - Actual response from the agent + - Whether the answer was correct (✅/❌) + - Duration and tool call details + - Agent's summary of its approach + - Agent's feedback on the tools + +### Save Report to File + +```bash +python scripts/evaluation.py \ + -t stdio \ + -c python \ + -a my_server.py \ + -o evaluation_report.md \ + evaluation.xml +``` + +## Complete Example Workflow + +Here's a complete example of creating and running an evaluation: + +1. **Create your evaluation file** (`my_evaluation.xml`): + +```xml + + + Find the user who created the most issues in January 2024. What is their username? + alice_developer + + + Among all pull requests merged in Q1 2024, which repository had the highest number? Provide the repository name. + backend-api + + + Find the project that was completed in December 2023 and had the longest duration from start to finish. How many days did it take? + 127 + + +``` + +2. **Install dependencies**: + +```bash +pip install -r scripts/requirements.txt +export ANTHROPIC_API_KEY=your_api_key +``` + +3. **Run evaluation**: + +```bash +python scripts/evaluation.py \ + -t stdio \ + -c python \ + -a github_mcp_server.py \ + -e GITHUB_TOKEN=ghp_xxx \ + -o github_eval_report.md \ + my_evaluation.xml +``` + +4. **Review the report** in `github_eval_report.md` to: + - See which questions passed/failed + - Read the agent's feedback on your tools + - Identify areas for improvement + - Iterate on your MCP server design + +## Troubleshooting + +### Connection Errors + +If you get connection errors: + +- **STDIO**: Verify the command and arguments are correct +- **SSE/HTTP**: Check the URL is accessible and headers are correct +- Ensure any required API keys are set in environment variables or headers + +### Low Accuracy + +If many evaluations fail: + +- Review the agent's feedback for each task +- Check if tool descriptions are clear and comprehensive +- Verify input parameters are well-documented +- Consider whether tools return too much or too little data +- Ensure error messages are actionable + +### Timeout Issues + +If tasks are timing out: + +- Use a more capable model (e.g., `claude-3-7-sonnet-20250219`) +- Check if tools are returning too much data +- Verify pagination is working correctly +- Consider simplifying complex questions diff --git a/.roo/skills/mcp-builder/reference/mcp_best_practices.md b/.roo/skills/mcp-builder/reference/mcp_best_practices.md new file mode 100644 index 00000000000..428e1e80947 --- /dev/null +++ b/.roo/skills/mcp-builder/reference/mcp_best_practices.md @@ -0,0 +1,269 @@ +# MCP Server Best Practices + +## Quick Reference + +### Server Naming + +- **Python**: `{service}_mcp` (e.g., `slack_mcp`) +- **Node/TypeScript**: `{service}-mcp-server` (e.g., `slack-mcp-server`) + +### Tool Naming + +- Use snake_case with service prefix +- Format: `{service}_{action}_{resource}` +- Example: `slack_send_message`, `github_create_issue` + +### Response Formats + +- Support both JSON and Markdown formats +- JSON for programmatic processing +- Markdown for human readability + +### Pagination + +- Always respect `limit` parameter +- Return `has_more`, `next_offset`, `total_count` +- Default to 20-50 items + +### Transport + +- **Streamable HTTP**: For remote servers, multi-client scenarios +- **stdio**: For local integrations, command-line tools +- Avoid SSE (deprecated in favor of streamable HTTP) + +--- + +## Server Naming Conventions + +Follow these standardized naming patterns: + +**Python**: Use format `{service}_mcp` (lowercase with underscores) + +- Examples: `slack_mcp`, `github_mcp`, `jira_mcp` + +**Node/TypeScript**: Use format `{service}-mcp-server` (lowercase with hyphens) + +- Examples: `slack-mcp-server`, `github-mcp-server`, `jira-mcp-server` + +The name should be general, descriptive of the service being integrated, easy to infer from the task description, and without version numbers. + +--- + +## Tool Naming and Design + +### Tool Naming + +1. **Use snake_case**: `search_users`, `create_project`, `get_channel_info` +2. **Include service prefix**: Anticipate that your MCP server may be used alongside other MCP servers + - Use `slack_send_message` instead of just `send_message` + - Use `github_create_issue` instead of just `create_issue` +3. **Be action-oriented**: Start with verbs (get, list, search, create, etc.) +4. **Be specific**: Avoid generic names that could conflict with other servers + +### Tool Design + +- Tool descriptions must narrowly and unambiguously describe functionality +- Descriptions must precisely match actual functionality +- Provide tool annotations (readOnlyHint, destructiveHint, idempotentHint, openWorldHint) +- Keep tool operations focused and atomic + +--- + +## Response Formats + +All tools that return data should support multiple formats: + +### JSON Format (`response_format="json"`) + +- Machine-readable structured data +- Include all available fields and metadata +- Consistent field names and types +- Use for programmatic processing + +### Markdown Format (`response_format="markdown"`, typically default) + +- Human-readable formatted text +- Use headers, lists, and formatting for clarity +- Convert timestamps to human-readable format +- Show display names with IDs in parentheses +- Omit verbose metadata + +--- + +## Pagination + +For tools that list resources: + +- **Always respect the `limit` parameter** +- **Implement pagination**: Use `offset` or cursor-based pagination +- **Return pagination metadata**: Include `has_more`, `next_offset`/`next_cursor`, `total_count` +- **Never load all results into memory**: Especially important for large datasets +- **Default to reasonable limits**: 20-50 items is typical + +Example pagination response: + +```json +{ + "total": 150, + "count": 20, + "offset": 0, + "items": [...], + "has_more": true, + "next_offset": 20 +} +``` + +--- + +## Transport Options + +### Streamable HTTP + +**Best for**: Remote servers, web services, multi-client scenarios + +**Characteristics**: + +- Bidirectional communication over HTTP +- Supports multiple simultaneous clients +- Can be deployed as a web service +- Enables server-to-client notifications + +**Use when**: + +- Serving multiple clients simultaneously +- Deploying as a cloud service +- Integration with web applications + +### stdio + +**Best for**: Local integrations, command-line tools + +**Characteristics**: + +- Standard input/output stream communication +- Simple setup, no network configuration needed +- Runs as a subprocess of the client + +**Use when**: + +- Building tools for local development environments +- Integrating with desktop applications +- Single-user, single-session scenarios + +**Note**: stdio servers should NOT log to stdout (use stderr for logging) + +### Transport Selection + +| Criterion | stdio | Streamable HTTP | +| -------------- | ------ | --------------- | +| **Deployment** | Local | Remote | +| **Clients** | Single | Multiple | +| **Complexity** | Low | Medium | +| **Real-time** | No | Yes | + +--- + +## Security Best Practices + +### Authentication and Authorization + +**OAuth 2.1**: + +- Use secure OAuth 2.1 with certificates from recognized authorities +- Validate access tokens before processing requests +- Only accept tokens specifically intended for your server + +**API Keys**: + +- Store API keys in environment variables, never in code +- Validate keys on server startup +- Provide clear error messages when authentication fails + +### Input Validation + +- Sanitize file paths to prevent directory traversal +- Validate URLs and external identifiers +- Check parameter sizes and ranges +- Prevent command injection in system calls +- Use schema validation (Pydantic/Zod) for all inputs + +### Error Handling + +- Don't expose internal errors to clients +- Log security-relevant errors server-side +- Provide helpful but not revealing error messages +- Clean up resources after errors + +### DNS Rebinding Protection + +For streamable HTTP servers running locally: + +- Enable DNS rebinding protection +- Validate the `Origin` header on all incoming connections +- Bind to `127.0.0.1` rather than `0.0.0.0` + +--- + +## Tool Annotations + +Provide annotations to help clients understand tool behavior: + +| Annotation | Type | Default | Description | +| ----------------- | ------- | ------- | ------------------------------------------------------- | +| `readOnlyHint` | boolean | false | Tool does not modify its environment | +| `destructiveHint` | boolean | true | Tool may perform destructive updates | +| `idempotentHint` | boolean | false | Repeated calls with same args have no additional effect | +| `openWorldHint` | boolean | true | Tool interacts with external entities | + +**Important**: Annotations are hints, not security guarantees. Clients should not make security-critical decisions based solely on annotations. + +--- + +## Error Handling + +- Use standard JSON-RPC error codes +- Report tool errors within result objects (not protocol-level errors) +- Provide helpful, specific error messages with suggested next steps +- Don't expose internal implementation details +- Clean up resources properly on errors + +Example error handling: + +```typescript +try { + const result = performOperation() + return { content: [{ type: "text", text: result }] } +} catch (error) { + return { + isError: true, + content: [ + { + type: "text", + text: `Error: ${error.message}. Try using filter='active_only' to reduce results.`, + }, + ], + } +} +``` + +--- + +## Testing Requirements + +Comprehensive testing should cover: + +- **Functional testing**: Verify correct execution with valid/invalid inputs +- **Integration testing**: Test interaction with external systems +- **Security testing**: Validate auth, input sanitization, rate limiting +- **Performance testing**: Check behavior under load, timeouts +- **Error handling**: Ensure proper error reporting and cleanup + +--- + +## Documentation Requirements + +- Provide clear documentation of all tools and capabilities +- Include working examples (at least 3 per major feature) +- Document security considerations +- Specify required permissions and access levels +- Document rate limits and performance characteristics diff --git a/.roo/skills/mcp-builder/reference/node_mcp_server.md b/.roo/skills/mcp-builder/reference/node_mcp_server.md new file mode 100644 index 00000000000..e645b0291de --- /dev/null +++ b/.roo/skills/mcp-builder/reference/node_mcp_server.md @@ -0,0 +1,975 @@ +# Node/TypeScript MCP Server Implementation Guide + +## Overview + +This document provides Node/TypeScript-specific best practices and examples for implementing MCP servers using the MCP TypeScript SDK. It covers project structure, server setup, tool registration patterns, input validation with Zod, error handling, and complete working examples. + +--- + +## Quick Reference + +### Key Imports + +```typescript +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" +import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js" +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" +import express from "express" +import { z } from "zod" +``` + +### Server Initialization + +```typescript +const server = new McpServer({ + name: "service-mcp-server", + version: "1.0.0", +}) +``` + +### Tool Registration Pattern + +```typescript +server.registerTool( + "tool_name", + { + title: "Tool Display Name", + description: "What the tool does", + inputSchema: { param: z.string() }, + outputSchema: { result: z.string() }, + }, + async ({ param }) => { + const output = { result: `Processed: ${param}` } + return { + content: [{ type: "text", text: JSON.stringify(output) }], + structuredContent: output, // Modern pattern for structured data + } + }, +) +``` + +--- + +## MCP TypeScript SDK + +The official MCP TypeScript SDK provides: + +- `McpServer` class for server initialization +- `registerTool` method for tool registration +- Zod schema integration for runtime input validation +- Type-safe tool handler implementations + +**IMPORTANT - Use Modern APIs Only:** + +- **DO use**: `server.registerTool()`, `server.registerResource()`, `server.registerPrompt()` +- **DO NOT use**: Old deprecated APIs such as `server.tool()`, `server.setRequestHandler(ListToolsRequestSchema, ...)`, or manual handler registration +- The `register*` methods provide better type safety, automatic schema handling, and are the recommended approach + +See the MCP SDK documentation in the references for complete details. + +## Server Naming Convention + +Node/TypeScript MCP servers must follow this naming pattern: + +- **Format**: `{service}-mcp-server` (lowercase with hyphens) +- **Examples**: `github-mcp-server`, `jira-mcp-server`, `stripe-mcp-server` + +The name should be: + +- General (not tied to specific features) +- Descriptive of the service/API being integrated +- Easy to infer from the task description +- Without version numbers or dates + +## Project Structure + +Create the following structure for Node/TypeScript MCP servers: + +``` +{service}-mcp-server/ +├── package.json +├── tsconfig.json +├── README.md +├── src/ +│ ├── index.ts # Main entry point with McpServer initialization +│ ├── types.ts # TypeScript type definitions and interfaces +│ ├── tools/ # Tool implementations (one file per domain) +│ ├── services/ # API clients and shared utilities +│ ├── schemas/ # Zod validation schemas +│ └── constants.ts # Shared constants (API_URL, CHARACTER_LIMIT, etc.) +└── dist/ # Built JavaScript files (entry point: dist/index.js) +``` + +## Tool Implementation + +### Tool Naming + +Use snake_case for tool names (e.g., "search_users", "create_project", "get_channel_info") with clear, action-oriented names. + +**Avoid Naming Conflicts**: Include the service context to prevent overlaps: + +- Use "slack_send_message" instead of just "send_message" +- Use "github_create_issue" instead of just "create_issue" +- Use "asana_list_tasks" instead of just "list_tasks" + +### Tool Structure + +Tools are registered using the `registerTool` method with the following requirements: + +- Use Zod schemas for runtime input validation and type safety +- The `description` field must be explicitly provided - JSDoc comments are NOT automatically extracted +- Explicitly provide `title`, `description`, `inputSchema`, and `annotations` +- The `inputSchema` must be a Zod schema object (not a JSON schema) +- Type all parameters and return values explicitly + +```typescript +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" +import { z } from "zod" + +const server = new McpServer({ + name: "example-mcp", + version: "1.0.0", +}) + +// Zod schema for input validation +const UserSearchInputSchema = z + .object({ + query: z + .string() + .min(2, "Query must be at least 2 characters") + .max(200, "Query must not exceed 200 characters") + .describe("Search string to match against names/emails"), + limit: z.number().int().min(1).max(100).default(20).describe("Maximum results to return"), + offset: z.number().int().min(0).default(0).describe("Number of results to skip for pagination"), + response_format: z + .nativeEnum(ResponseFormat) + .default(ResponseFormat.MARKDOWN) + .describe("Output format: 'markdown' for human-readable or 'json' for machine-readable"), + }) + .strict() + +// Type definition from Zod schema +type UserSearchInput = z.infer + +server.registerTool( + "example_search_users", + { + title: "Search Example Users", + description: `Search for users in the Example system by name, email, or team. + +This tool searches across all user profiles in the Example platform, supporting partial matches and various search filters. It does NOT create or modify users, only searches existing ones. + +Args: + - query (string): Search string to match against names/emails + - limit (number): Maximum results to return, between 1-100 (default: 20) + - offset (number): Number of results to skip for pagination (default: 0) + - response_format ('markdown' | 'json'): Output format (default: 'markdown') + +Returns: + For JSON format: Structured data with schema: + { + "total": number, // Total number of matches found + "count": number, // Number of results in this response + "offset": number, // Current pagination offset + "users": [ + { + "id": string, // User ID (e.g., "U123456789") + "name": string, // Full name (e.g., "John Doe") + "email": string, // Email address + "team": string, // Team name (optional) + "active": boolean // Whether user is active + } + ], + "has_more": boolean, // Whether more results are available + "next_offset": number // Offset for next page (if has_more is true) + } + +Examples: + - Use when: "Find all marketing team members" -> params with query="team:marketing" + - Use when: "Search for John's account" -> params with query="john" + - Don't use when: You need to create a user (use example_create_user instead) + +Error Handling: + - Returns "Error: Rate limit exceeded" if too many requests (429 status) + - Returns "No users found matching ''" if search returns empty`, + inputSchema: UserSearchInputSchema, + annotations: { + readOnlyHint: true, + destructiveHint: false, + idempotentHint: true, + openWorldHint: true, + }, + }, + async (params: UserSearchInput) => { + try { + // Input validation is handled by Zod schema + // Make API request using validated parameters + const data = await makeApiRequest("users/search", "GET", undefined, { + q: params.query, + limit: params.limit, + offset: params.offset, + }) + + const users = data.users || [] + const total = data.total || 0 + + if (!users.length) { + return { + content: [ + { + type: "text", + text: `No users found matching '${params.query}'`, + }, + ], + } + } + + // Prepare structured output + const output = { + total, + count: users.length, + offset: params.offset, + users: users.map((user: any) => ({ + id: user.id, + name: user.name, + email: user.email, + ...(user.team ? { team: user.team } : {}), + active: user.active ?? true, + })), + has_more: total > params.offset + users.length, + ...(total > params.offset + users.length + ? { + next_offset: params.offset + users.length, + } + : {}), + } + + // Format text representation based on requested format + let textContent: string + if (params.response_format === ResponseFormat.MARKDOWN) { + const lines = [ + `# User Search Results: '${params.query}'`, + "", + `Found ${total} users (showing ${users.length})`, + "", + ] + for (const user of users) { + lines.push(`## ${user.name} (${user.id})`) + lines.push(`- **Email**: ${user.email}`) + if (user.team) lines.push(`- **Team**: ${user.team}`) + lines.push("") + } + textContent = lines.join("\n") + } else { + textContent = JSON.stringify(output, null, 2) + } + + return { + content: [{ type: "text", text: textContent }], + structuredContent: output, // Modern pattern for structured data + } + } catch (error) { + return { + content: [ + { + type: "text", + text: handleApiError(error), + }, + ], + } + } + }, +) +``` + +## Zod Schemas for Input Validation + +Zod provides runtime type validation: + +```typescript +import { z } from "zod" + +// Basic schema with validation +const CreateUserSchema = z + .object({ + name: z.string().min(1, "Name is required").max(100, "Name must not exceed 100 characters"), + email: z.string().email("Invalid email format"), + age: z + .number() + .int("Age must be a whole number") + .min(0, "Age cannot be negative") + .max(150, "Age cannot be greater than 150"), + }) + .strict() // Use .strict() to forbid extra fields + +// Enums +enum ResponseFormat { + MARKDOWN = "markdown", + JSON = "json", +} + +const SearchSchema = z.object({ + response_format: z.nativeEnum(ResponseFormat).default(ResponseFormat.MARKDOWN).describe("Output format"), +}) + +// Optional fields with defaults +const PaginationSchema = z.object({ + limit: z.number().int().min(1).max(100).default(20).describe("Maximum results to return"), + offset: z.number().int().min(0).default(0).describe("Number of results to skip"), +}) +``` + +## Response Format Options + +Support multiple output formats for flexibility: + +```typescript +enum ResponseFormat { + MARKDOWN = "markdown", + JSON = "json", +} + +const inputSchema = z.object({ + query: z.string(), + response_format: z + .nativeEnum(ResponseFormat) + .default(ResponseFormat.MARKDOWN) + .describe("Output format: 'markdown' for human-readable or 'json' for machine-readable"), +}) +``` + +**Markdown format**: + +- Use headers, lists, and formatting for clarity +- Convert timestamps to human-readable format +- Show display names with IDs in parentheses +- Omit verbose metadata +- Group related information logically + +**JSON format**: + +- Return complete, structured data suitable for programmatic processing +- Include all available fields and metadata +- Use consistent field names and types + +## Pagination Implementation + +For tools that list resources: + +```typescript +const ListSchema = z.object({ + limit: z.number().int().min(1).max(100).default(20), + offset: z.number().int().min(0).default(0), +}) + +async function listItems(params: z.infer) { + const data = await apiRequest(params.limit, params.offset) + + const response = { + total: data.total, + count: data.items.length, + offset: params.offset, + items: data.items, + has_more: data.total > params.offset + data.items.length, + next_offset: data.total > params.offset + data.items.length ? params.offset + data.items.length : undefined, + } + + return JSON.stringify(response, null, 2) +} +``` + +## Character Limits and Truncation + +Add a CHARACTER_LIMIT constant to prevent overwhelming responses: + +```typescript +// At module level in constants.ts +export const CHARACTER_LIMIT = 25000 // Maximum response size in characters + +async function searchTool(params: SearchInput) { + let result = generateResponse(data) + + // Check character limit and truncate if needed + if (result.length > CHARACTER_LIMIT) { + const truncatedData = data.slice(0, Math.max(1, data.length / 2)) + response.data = truncatedData + response.truncated = true + response.truncation_message = + `Response truncated from ${data.length} to ${truncatedData.length} items. ` + + `Use 'offset' parameter or add filters to see more results.` + result = JSON.stringify(response, null, 2) + } + + return result +} +``` + +## Error Handling + +Provide clear, actionable error messages: + +```typescript +import axios, { AxiosError } from "axios" + +function handleApiError(error: unknown): string { + if (error instanceof AxiosError) { + if (error.response) { + switch (error.response.status) { + case 404: + return "Error: Resource not found. Please check the ID is correct." + case 403: + return "Error: Permission denied. You don't have access to this resource." + case 429: + return "Error: Rate limit exceeded. Please wait before making more requests." + default: + return `Error: API request failed with status ${error.response.status}` + } + } else if (error.code === "ECONNABORTED") { + return "Error: Request timed out. Please try again." + } + } + return `Error: Unexpected error occurred: ${error instanceof Error ? error.message : String(error)}` +} +``` + +## Shared Utilities + +Extract common functionality into reusable functions: + +```typescript +// Shared API request function +async function makeApiRequest( + endpoint: string, + method: "GET" | "POST" | "PUT" | "DELETE" = "GET", + data?: any, + params?: any, +): Promise { + try { + const response = await axios({ + method, + url: `${API_BASE_URL}/${endpoint}`, + data, + params, + timeout: 30000, + headers: { + "Content-Type": "application/json", + Accept: "application/json", + }, + }) + return response.data + } catch (error) { + throw error + } +} +``` + +## Async/Await Best Practices + +Always use async/await for network requests and I/O operations: + +```typescript +// Good: Async network request +async function fetchData(resourceId: string): Promise { + const response = await axios.get(`${API_URL}/resource/${resourceId}`) + return response.data +} + +// Bad: Promise chains +function fetchData(resourceId: string): Promise { + return axios.get(`${API_URL}/resource/${resourceId}`).then((response) => response.data) // Harder to read and maintain +} +``` + +## TypeScript Best Practices + +1. **Use Strict TypeScript**: Enable strict mode in tsconfig.json +2. **Define Interfaces**: Create clear interface definitions for all data structures +3. **Avoid `any`**: Use proper types or `unknown` instead of `any` +4. **Zod for Runtime Validation**: Use Zod schemas to validate external data +5. **Type Guards**: Create type guard functions for complex type checking +6. **Error Handling**: Always use try-catch with proper error type checking +7. **Null Safety**: Use optional chaining (`?.`) and nullish coalescing (`??`) + +```typescript +// Good: Type-safe with Zod and interfaces +interface UserResponse { + id: string + name: string + email: string + team?: string + active: boolean +} + +const UserSchema = z.object({ + id: z.string(), + name: z.string(), + email: z.string().email(), + team: z.string().optional(), + active: z.boolean(), +}) + +type User = z.infer + +async function getUser(id: string): Promise { + const data = await apiCall(`/users/${id}`) + return UserSchema.parse(data) // Runtime validation +} + +// Bad: Using any +async function getUser(id: string): Promise { + return await apiCall(`/users/${id}`) // No type safety +} +``` + +## Package Configuration + +### package.json + +```json +{ + "name": "{service}-mcp-server", + "version": "1.0.0", + "description": "MCP server for {Service} API integration", + "type": "module", + "main": "dist/index.js", + "scripts": { + "start": "node dist/index.js", + "dev": "tsx watch src/index.ts", + "build": "tsc", + "clean": "rm -rf dist" + }, + "engines": { + "node": ">=18" + }, + "dependencies": { + "@modelcontextprotocol/sdk": "^1.6.1", + "axios": "^1.7.9", + "zod": "^3.23.8" + }, + "devDependencies": { + "@types/node": "^22.10.0", + "tsx": "^4.19.2", + "typescript": "^5.7.2" + } +} +``` + +### tsconfig.json + +```json +{ + "compilerOptions": { + "target": "ES2022", + "module": "Node16", + "moduleResolution": "Node16", + "lib": ["ES2022"], + "outDir": "./dist", + "rootDir": "./src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "allowSyntheticDefaultImports": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +} +``` + +## Complete Example + +```typescript +#!/usr/bin/env node +/** + * MCP Server for Example Service. + * + * This server provides tools to interact with Example API, including user search, + * project management, and data export capabilities. + */ + +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" +import { z } from "zod" +import axios, { AxiosError } from "axios" + +// Constants +const API_BASE_URL = "https://api.example.com/v1" +const CHARACTER_LIMIT = 25000 + +// Enums +enum ResponseFormat { + MARKDOWN = "markdown", + JSON = "json", +} + +// Zod schemas +const UserSearchInputSchema = z + .object({ + query: z + .string() + .min(2, "Query must be at least 2 characters") + .max(200, "Query must not exceed 200 characters") + .describe("Search string to match against names/emails"), + limit: z.number().int().min(1).max(100).default(20).describe("Maximum results to return"), + offset: z.number().int().min(0).default(0).describe("Number of results to skip for pagination"), + response_format: z + .nativeEnum(ResponseFormat) + .default(ResponseFormat.MARKDOWN) + .describe("Output format: 'markdown' for human-readable or 'json' for machine-readable"), + }) + .strict() + +type UserSearchInput = z.infer + +// Shared utility functions +async function makeApiRequest( + endpoint: string, + method: "GET" | "POST" | "PUT" | "DELETE" = "GET", + data?: any, + params?: any, +): Promise { + try { + const response = await axios({ + method, + url: `${API_BASE_URL}/${endpoint}`, + data, + params, + timeout: 30000, + headers: { + "Content-Type": "application/json", + Accept: "application/json", + }, + }) + return response.data + } catch (error) { + throw error + } +} + +function handleApiError(error: unknown): string { + if (error instanceof AxiosError) { + if (error.response) { + switch (error.response.status) { + case 404: + return "Error: Resource not found. Please check the ID is correct." + case 403: + return "Error: Permission denied. You don't have access to this resource." + case 429: + return "Error: Rate limit exceeded. Please wait before making more requests." + default: + return `Error: API request failed with status ${error.response.status}` + } + } else if (error.code === "ECONNABORTED") { + return "Error: Request timed out. Please try again." + } + } + return `Error: Unexpected error occurred: ${error instanceof Error ? error.message : String(error)}` +} + +// Create MCP server instance +const server = new McpServer({ + name: "example-mcp", + version: "1.0.0", +}) + +// Register tools +server.registerTool( + "example_search_users", + { + title: "Search Example Users", + description: `[Full description as shown above]`, + inputSchema: UserSearchInputSchema, + annotations: { + readOnlyHint: true, + destructiveHint: false, + idempotentHint: true, + openWorldHint: true, + }, + }, + async (params: UserSearchInput) => { + // Implementation as shown above + }, +) + +// Main function +// For stdio (local): +async function runStdio() { + if (!process.env.EXAMPLE_API_KEY) { + console.error("ERROR: EXAMPLE_API_KEY environment variable is required") + process.exit(1) + } + + const transport = new StdioServerTransport() + await server.connect(transport) + console.error("MCP server running via stdio") +} + +// For streamable HTTP (remote): +async function runHTTP() { + if (!process.env.EXAMPLE_API_KEY) { + console.error("ERROR: EXAMPLE_API_KEY environment variable is required") + process.exit(1) + } + + const app = express() + app.use(express.json()) + + app.post("/mcp", async (req, res) => { + const transport = new StreamableHTTPServerTransport({ + sessionIdGenerator: undefined, + enableJsonResponse: true, + }) + res.on("close", () => transport.close()) + await server.connect(transport) + await transport.handleRequest(req, res, req.body) + }) + + const port = parseInt(process.env.PORT || "3000") + app.listen(port, () => { + console.error(`MCP server running on http://localhost:${port}/mcp`) + }) +} + +// Choose transport based on environment +const transport = process.env.TRANSPORT || "stdio" +if (transport === "http") { + runHTTP().catch((error) => { + console.error("Server error:", error) + process.exit(1) + }) +} else { + runStdio().catch((error) => { + console.error("Server error:", error) + process.exit(1) + }) +} +``` + +--- + +## Advanced MCP Features + +### Resource Registration + +Expose data as resources for efficient, URI-based access: + +```typescript +import { ResourceTemplate } from "@modelcontextprotocol/sdk/types.js" + +// Register a resource with URI template +server.registerResource( + { + uri: "file://documents/{name}", + name: "Document Resource", + description: "Access documents by name", + mimeType: "text/plain", + }, + async (uri: string) => { + // Extract parameter from URI + const match = uri.match(/^file:\/\/documents\/(.+)$/) + if (!match) { + throw new Error("Invalid URI format") + } + + const documentName = match[1] + const content = await loadDocument(documentName) + + return { + contents: [ + { + uri, + mimeType: "text/plain", + text: content, + }, + ], + } + }, +) + +// List available resources dynamically +server.registerResourceList(async () => { + const documents = await getAvailableDocuments() + return { + resources: documents.map((doc) => ({ + uri: `file://documents/${doc.name}`, + name: doc.name, + mimeType: "text/plain", + description: doc.description, + })), + } +}) +``` + +**When to use Resources vs Tools:** + +- **Resources**: For data access with simple URI-based parameters +- **Tools**: For complex operations requiring validation and business logic +- **Resources**: When data is relatively static or template-based +- **Tools**: When operations have side effects or complex workflows + +### Transport Options + +The TypeScript SDK supports two main transport mechanisms: + +#### Streamable HTTP (Recommended for Remote Servers) + +```typescript +import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js" +import express from "express" + +const app = express() +app.use(express.json()) + +app.post("/mcp", async (req, res) => { + // Create new transport for each request (stateless, prevents request ID collisions) + const transport = new StreamableHTTPServerTransport({ + sessionIdGenerator: undefined, + enableJsonResponse: true, + }) + + res.on("close", () => transport.close()) + + await server.connect(transport) + await transport.handleRequest(req, res, req.body) +}) + +app.listen(3000) +``` + +#### stdio (For Local Integrations) + +```typescript +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" + +const transport = new StdioServerTransport() +await server.connect(transport) +``` + +**Transport selection:** + +- **Streamable HTTP**: Web services, remote access, multiple clients +- **stdio**: Command-line tools, local development, subprocess integration + +### Notification Support + +Notify clients when server state changes: + +```typescript +// Notify when tools list changes +server.notification({ + method: "notifications/tools/list_changed", +}) + +// Notify when resources change +server.notification({ + method: "notifications/resources/list_changed", +}) +``` + +Use notifications sparingly - only when server capabilities genuinely change. + +--- + +## Code Best Practices + +### Code Composability and Reusability + +Your implementation MUST prioritize composability and code reuse: + +1. **Extract Common Functionality**: + + - Create reusable helper functions for operations used across multiple tools + - Build shared API clients for HTTP requests instead of duplicating code + - Centralize error handling logic in utility functions + - Extract business logic into dedicated functions that can be composed + - Extract shared markdown or JSON field selection & formatting functionality + +2. **Avoid Duplication**: + - NEVER copy-paste similar code between tools + - If you find yourself writing similar logic twice, extract it into a function + - Common operations like pagination, filtering, field selection, and formatting should be shared + - Authentication/authorization logic should be centralized + +## Building and Running + +Always build your TypeScript code before running: + +```bash +# Build the project +npm run build + +# Run the server +npm start + +# Development with auto-reload +npm run dev +``` + +Always ensure `npm run build` completes successfully before considering the implementation complete. + +## Quality Checklist + +Before finalizing your Node/TypeScript MCP server implementation, ensure: + +### Strategic Design + +- [ ] Tools enable complete workflows, not just API endpoint wrappers +- [ ] Tool names reflect natural task subdivisions +- [ ] Response formats optimize for agent context efficiency +- [ ] Human-readable identifiers used where appropriate +- [ ] Error messages guide agents toward correct usage + +### Implementation Quality + +- [ ] FOCUSED IMPLEMENTATION: Most important and valuable tools implemented +- [ ] All tools registered using `registerTool` with complete configuration +- [ ] All tools include `title`, `description`, `inputSchema`, and `annotations` +- [ ] Annotations correctly set (readOnlyHint, destructiveHint, idempotentHint, openWorldHint) +- [ ] All tools use Zod schemas for runtime input validation with `.strict()` enforcement +- [ ] All Zod schemas have proper constraints and descriptive error messages +- [ ] All tools have comprehensive descriptions with explicit input/output types +- [ ] Descriptions include return value examples and complete schema documentation +- [ ] Error messages are clear, actionable, and educational + +### TypeScript Quality + +- [ ] TypeScript interfaces are defined for all data structures +- [ ] Strict TypeScript is enabled in tsconfig.json +- [ ] No use of `any` type - use `unknown` or proper types instead +- [ ] All async functions have explicit Promise return types +- [ ] Error handling uses proper type guards (e.g., `axios.isAxiosError`, `z.ZodError`) + +### Advanced Features (where applicable) + +- [ ] Resources registered for appropriate data endpoints +- [ ] Appropriate transport configured (stdio or streamable HTTP) +- [ ] Notifications implemented for dynamic server capabilities +- [ ] Type-safe with SDK interfaces + +### Project Configuration + +- [ ] Package.json includes all necessary dependencies +- [ ] Build script produces working JavaScript in dist/ directory +- [ ] Main entry point is properly configured as dist/index.js +- [ ] Server name follows format: `{service}-mcp-server` +- [ ] tsconfig.json properly configured with strict mode + +### Code Quality + +- [ ] Pagination is properly implemented where applicable +- [ ] Large responses check CHARACTER_LIMIT constant and truncate with clear messages +- [ ] Filtering options are provided for potentially large result sets +- [ ] All network operations handle timeouts and connection errors gracefully +- [ ] Common functionality is extracted into reusable functions +- [ ] Return types are consistent across similar operations + +### Testing and Build + +- [ ] `npm run build` completes successfully without errors +- [ ] dist/index.js created and executable +- [ ] Server runs: `node dist/index.js --help` +- [ ] All imports resolve correctly +- [ ] Sample tool calls work as expected diff --git a/.roo/skills/mcp-builder/reference/python_mcp_server.md b/.roo/skills/mcp-builder/reference/python_mcp_server.md new file mode 100644 index 00000000000..bc6b789546e --- /dev/null +++ b/.roo/skills/mcp-builder/reference/python_mcp_server.md @@ -0,0 +1,738 @@ +# Python MCP Server Implementation Guide + +## Overview + +This document provides Python-specific best practices and examples for implementing MCP servers using the MCP Python SDK. It covers server setup, tool registration patterns, input validation with Pydantic, error handling, and complete working examples. + +--- + +## Quick Reference + +### Key Imports + +```python +from mcp.server.fastmcp import FastMCP +from pydantic import BaseModel, Field, field_validator, ConfigDict +from typing import Optional, List, Dict, Any +from enum import Enum +import httpx +``` + +### Server Initialization + +```python +mcp = FastMCP("service_mcp") +``` + +### Tool Registration Pattern + +```python +@mcp.tool(name="tool_name", annotations={...}) +async def tool_function(params: InputModel) -> str: + # Implementation + pass +``` + +--- + +## MCP Python SDK and FastMCP + +The official MCP Python SDK provides FastMCP, a high-level framework for building MCP servers. It provides: + +- Automatic description and inputSchema generation from function signatures and docstrings +- Pydantic model integration for input validation +- Decorator-based tool registration with `@mcp.tool` + +**For complete SDK documentation, use WebFetch to load:** +`https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md` + +## Server Naming Convention + +Python MCP servers must follow this naming pattern: + +- **Format**: `{service}_mcp` (lowercase with underscores) +- **Examples**: `github_mcp`, `jira_mcp`, `stripe_mcp` + +The name should be: + +- General (not tied to specific features) +- Descriptive of the service/API being integrated +- Easy to infer from the task description +- Without version numbers or dates + +## Tool Implementation + +### Tool Naming + +Use snake_case for tool names (e.g., "search_users", "create_project", "get_channel_info") with clear, action-oriented names. + +**Avoid Naming Conflicts**: Include the service context to prevent overlaps: + +- Use "slack_send_message" instead of just "send_message" +- Use "github_create_issue" instead of just "create_issue" +- Use "asana_list_tasks" instead of just "list_tasks" + +### Tool Structure with FastMCP + +Tools are defined using the `@mcp.tool` decorator with Pydantic models for input validation: + +```python +from pydantic import BaseModel, Field, ConfigDict +from mcp.server.fastmcp import FastMCP + +# Initialize the MCP server +mcp = FastMCP("example_mcp") + +# Define Pydantic model for input validation +class ServiceToolInput(BaseModel): + '''Input model for service tool operation.''' + model_config = ConfigDict( + str_strip_whitespace=True, # Auto-strip whitespace from strings + validate_assignment=True, # Validate on assignment + extra='forbid' # Forbid extra fields + ) + + param1: str = Field(..., description="First parameter description (e.g., 'user123', 'project-abc')", min_length=1, max_length=100) + param2: Optional[int] = Field(default=None, description="Optional integer parameter with constraints", ge=0, le=1000) + tags: Optional[List[str]] = Field(default_factory=list, description="List of tags to apply", max_items=10) + +@mcp.tool( + name="service_tool_name", + annotations={ + "title": "Human-Readable Tool Title", + "readOnlyHint": True, # Tool does not modify environment + "destructiveHint": False, # Tool does not perform destructive operations + "idempotentHint": True, # Repeated calls have no additional effect + "openWorldHint": False # Tool does not interact with external entities + } +) +async def service_tool_name(params: ServiceToolInput) -> str: + '''Tool description automatically becomes the 'description' field. + + This tool performs a specific operation on the service. It validates all inputs + using the ServiceToolInput Pydantic model before processing. + + Args: + params (ServiceToolInput): Validated input parameters containing: + - param1 (str): First parameter description + - param2 (Optional[int]): Optional parameter with default + - tags (Optional[List[str]]): List of tags + + Returns: + str: JSON-formatted response containing operation results + ''' + # Implementation here + pass +``` + +## Pydantic v2 Key Features + +- Use `model_config` instead of nested `Config` class +- Use `field_validator` instead of deprecated `validator` +- Use `model_dump()` instead of deprecated `dict()` +- Validators require `@classmethod` decorator +- Type hints are required for validator methods + +```python +from pydantic import BaseModel, Field, field_validator, ConfigDict + +class CreateUserInput(BaseModel): + model_config = ConfigDict( + str_strip_whitespace=True, + validate_assignment=True + ) + + name: str = Field(..., description="User's full name", min_length=1, max_length=100) + email: str = Field(..., description="User's email address", pattern=r'^[\w\.-]+@[\w\.-]+\.\w+$') + age: int = Field(..., description="User's age", ge=0, le=150) + + @field_validator('email') + @classmethod + def validate_email(cls, v: str) -> str: + if not v.strip(): + raise ValueError("Email cannot be empty") + return v.lower() +``` + +## Response Format Options + +Support multiple output formats for flexibility: + +```python +from enum import Enum + +class ResponseFormat(str, Enum): + '''Output format for tool responses.''' + MARKDOWN = "markdown" + JSON = "json" + +class UserSearchInput(BaseModel): + query: str = Field(..., description="Search query") + response_format: ResponseFormat = Field( + default=ResponseFormat.MARKDOWN, + description="Output format: 'markdown' for human-readable or 'json' for machine-readable" + ) +``` + +**Markdown format**: + +- Use headers, lists, and formatting for clarity +- Convert timestamps to human-readable format (e.g., "2024-01-15 10:30:00 UTC" instead of epoch) +- Show display names with IDs in parentheses (e.g., "@john.doe (U123456)") +- Omit verbose metadata (e.g., show only one profile image URL, not all sizes) +- Group related information logically + +**JSON format**: + +- Return complete, structured data suitable for programmatic processing +- Include all available fields and metadata +- Use consistent field names and types + +## Pagination Implementation + +For tools that list resources: + +```python +class ListInput(BaseModel): + limit: Optional[int] = Field(default=20, description="Maximum results to return", ge=1, le=100) + offset: Optional[int] = Field(default=0, description="Number of results to skip for pagination", ge=0) + +async def list_items(params: ListInput) -> str: + # Make API request with pagination + data = await api_request(limit=params.limit, offset=params.offset) + + # Return pagination info + response = { + "total": data["total"], + "count": len(data["items"]), + "offset": params.offset, + "items": data["items"], + "has_more": data["total"] > params.offset + len(data["items"]), + "next_offset": params.offset + len(data["items"]) if data["total"] > params.offset + len(data["items"]) else None + } + return json.dumps(response, indent=2) +``` + +## Error Handling + +Provide clear, actionable error messages: + +```python +def _handle_api_error(e: Exception) -> str: + '''Consistent error formatting across all tools.''' + if isinstance(e, httpx.HTTPStatusError): + if e.response.status_code == 404: + return "Error: Resource not found. Please check the ID is correct." + elif e.response.status_code == 403: + return "Error: Permission denied. You don't have access to this resource." + elif e.response.status_code == 429: + return "Error: Rate limit exceeded. Please wait before making more requests." + return f"Error: API request failed with status {e.response.status_code}" + elif isinstance(e, httpx.TimeoutException): + return "Error: Request timed out. Please try again." + return f"Error: Unexpected error occurred: {type(e).__name__}" +``` + +## Shared Utilities + +Extract common functionality into reusable functions: + +```python +# Shared API request function +async def _make_api_request(endpoint: str, method: str = "GET", **kwargs) -> dict: + '''Reusable function for all API calls.''' + async with httpx.AsyncClient() as client: + response = await client.request( + method, + f"{API_BASE_URL}/{endpoint}", + timeout=30.0, + **kwargs + ) + response.raise_for_status() + return response.json() +``` + +## Async/Await Best Practices + +Always use async/await for network requests and I/O operations: + +```python +# Good: Async network request +async def fetch_data(resource_id: str) -> dict: + async with httpx.AsyncClient() as client: + response = await client.get(f"{API_URL}/resource/{resource_id}") + response.raise_for_status() + return response.json() + +# Bad: Synchronous request +def fetch_data(resource_id: str) -> dict: + response = requests.get(f"{API_URL}/resource/{resource_id}") # Blocks + return response.json() +``` + +## Type Hints + +Use type hints throughout: + +```python +from typing import Optional, List, Dict, Any + +async def get_user(user_id: str) -> Dict[str, Any]: + data = await fetch_user(user_id) + return {"id": data["id"], "name": data["name"]} +``` + +## Tool Docstrings + +Every tool must have comprehensive docstrings with explicit type information: + +```python +async def search_users(params: UserSearchInput) -> str: + ''' + Search for users in the Example system by name, email, or team. + + This tool searches across all user profiles in the Example platform, + supporting partial matches and various search filters. It does NOT + create or modify users, only searches existing ones. + + Args: + params (UserSearchInput): Validated input parameters containing: + - query (str): Search string to match against names/emails (e.g., "john", "@example.com", "team:marketing") + - limit (Optional[int]): Maximum results to return, between 1-100 (default: 20) + - offset (Optional[int]): Number of results to skip for pagination (default: 0) + + Returns: + str: JSON-formatted string containing search results with the following schema: + + Success response: + { + "total": int, # Total number of matches found + "count": int, # Number of results in this response + "offset": int, # Current pagination offset + "users": [ + { + "id": str, # User ID (e.g., "U123456789") + "name": str, # Full name (e.g., "John Doe") + "email": str, # Email address (e.g., "john@example.com") + "team": str # Team name (e.g., "Marketing") - optional + } + ] + } + + Error response: + "Error: " or "No users found matching ''" + + Examples: + - Use when: "Find all marketing team members" -> params with query="team:marketing" + - Use when: "Search for John's account" -> params with query="john" + - Don't use when: You need to create a user (use example_create_user instead) + - Don't use when: You have a user ID and need full details (use example_get_user instead) + + Error Handling: + - Input validation errors are handled by Pydantic model + - Returns "Error: Rate limit exceeded" if too many requests (429 status) + - Returns "Error: Invalid API authentication" if API key is invalid (401 status) + - Returns formatted list of results or "No users found matching 'query'" + ''' +``` + +## Complete Example + +See below for a complete Python MCP server example: + +```python +#!/usr/bin/env python3 +''' +MCP Server for Example Service. + +This server provides tools to interact with Example API, including user search, +project management, and data export capabilities. +''' + +from typing import Optional, List, Dict, Any +from enum import Enum +import httpx +from pydantic import BaseModel, Field, field_validator, ConfigDict +from mcp.server.fastmcp import FastMCP + +# Initialize the MCP server +mcp = FastMCP("example_mcp") + +# Constants +API_BASE_URL = "https://api.example.com/v1" + +# Enums +class ResponseFormat(str, Enum): + '''Output format for tool responses.''' + MARKDOWN = "markdown" + JSON = "json" + +# Pydantic Models for Input Validation +class UserSearchInput(BaseModel): + '''Input model for user search operations.''' + model_config = ConfigDict( + str_strip_whitespace=True, + validate_assignment=True + ) + + query: str = Field(..., description="Search string to match against names/emails", min_length=2, max_length=200) + limit: Optional[int] = Field(default=20, description="Maximum results to return", ge=1, le=100) + offset: Optional[int] = Field(default=0, description="Number of results to skip for pagination", ge=0) + response_format: ResponseFormat = Field(default=ResponseFormat.MARKDOWN, description="Output format") + + @field_validator('query') + @classmethod + def validate_query(cls, v: str) -> str: + if not v.strip(): + raise ValueError("Query cannot be empty or whitespace only") + return v.strip() + +# Shared utility functions +async def _make_api_request(endpoint: str, method: str = "GET", **kwargs) -> dict: + '''Reusable function for all API calls.''' + async with httpx.AsyncClient() as client: + response = await client.request( + method, + f"{API_BASE_URL}/{endpoint}", + timeout=30.0, + **kwargs + ) + response.raise_for_status() + return response.json() + +def _handle_api_error(e: Exception) -> str: + '''Consistent error formatting across all tools.''' + if isinstance(e, httpx.HTTPStatusError): + if e.response.status_code == 404: + return "Error: Resource not found. Please check the ID is correct." + elif e.response.status_code == 403: + return "Error: Permission denied. You don't have access to this resource." + elif e.response.status_code == 429: + return "Error: Rate limit exceeded. Please wait before making more requests." + return f"Error: API request failed with status {e.response.status_code}" + elif isinstance(e, httpx.TimeoutException): + return "Error: Request timed out. Please try again." + return f"Error: Unexpected error occurred: {type(e).__name__}" + +# Tool definitions +@mcp.tool( + name="example_search_users", + annotations={ + "title": "Search Example Users", + "readOnlyHint": True, + "destructiveHint": False, + "idempotentHint": True, + "openWorldHint": True + } +) +async def example_search_users(params: UserSearchInput) -> str: + '''Search for users in the Example system by name, email, or team. + + [Full docstring as shown above] + ''' + try: + # Make API request using validated parameters + data = await _make_api_request( + "users/search", + params={ + "q": params.query, + "limit": params.limit, + "offset": params.offset + } + ) + + users = data.get("users", []) + total = data.get("total", 0) + + if not users: + return f"No users found matching '{params.query}'" + + # Format response based on requested format + if params.response_format == ResponseFormat.MARKDOWN: + lines = [f"# User Search Results: '{params.query}'", ""] + lines.append(f"Found {total} users (showing {len(users)})") + lines.append("") + + for user in users: + lines.append(f"## {user['name']} ({user['id']})") + lines.append(f"- **Email**: {user['email']}") + if user.get('team'): + lines.append(f"- **Team**: {user['team']}") + lines.append("") + + return "\n".join(lines) + + else: + # Machine-readable JSON format + import json + response = { + "total": total, + "count": len(users), + "offset": params.offset, + "users": users + } + return json.dumps(response, indent=2) + + except Exception as e: + return _handle_api_error(e) + +if __name__ == "__main__": + mcp.run() +``` + +--- + +## Advanced FastMCP Features + +### Context Parameter Injection + +FastMCP can automatically inject a `Context` parameter into tools for advanced capabilities like logging, progress reporting, resource reading, and user interaction: + +```python +from mcp.server.fastmcp import FastMCP, Context + +mcp = FastMCP("example_mcp") + +@mcp.tool() +async def advanced_search(query: str, ctx: Context) -> str: + '''Advanced tool with context access for logging and progress.''' + + # Report progress for long operations + await ctx.report_progress(0.25, "Starting search...") + + # Log information for debugging + await ctx.log_info("Processing query", {"query": query, "timestamp": datetime.now()}) + + # Perform search + results = await search_api(query) + await ctx.report_progress(0.75, "Formatting results...") + + # Access server configuration + server_name = ctx.fastmcp.name + + return format_results(results) + +@mcp.tool() +async def interactive_tool(resource_id: str, ctx: Context) -> str: + '''Tool that can request additional input from users.''' + + # Request sensitive information when needed + api_key = await ctx.elicit( + prompt="Please provide your API key:", + input_type="password" + ) + + # Use the provided key + return await api_call(resource_id, api_key) +``` + +**Context capabilities:** + +- `ctx.report_progress(progress, message)` - Report progress for long operations +- `ctx.log_info(message, data)` / `ctx.log_error()` / `ctx.log_debug()` - Logging +- `ctx.elicit(prompt, input_type)` - Request input from users +- `ctx.fastmcp.name` - Access server configuration +- `ctx.read_resource(uri)` - Read MCP resources + +### Resource Registration + +Expose data as resources for efficient, template-based access: + +```python +@mcp.resource("file://documents/{name}") +async def get_document(name: str) -> str: + '''Expose documents as MCP resources. + + Resources are useful for static or semi-static data that doesn't + require complex parameters. They use URI templates for flexible access. + ''' + document_path = f"./docs/{name}" + with open(document_path, "r") as f: + return f.read() + +@mcp.resource("config://settings/{key}") +async def get_setting(key: str, ctx: Context) -> str: + '''Expose configuration as resources with context.''' + settings = await load_settings() + return json.dumps(settings.get(key, {})) +``` + +**When to use Resources vs Tools:** + +- **Resources**: For data access with simple parameters (URI templates) +- **Tools**: For complex operations with validation and business logic + +### Structured Output Types + +FastMCP supports multiple return types beyond strings: + +```python +from typing import TypedDict +from dataclasses import dataclass +from pydantic import BaseModel + +# TypedDict for structured returns +class UserData(TypedDict): + id: str + name: str + email: str + +@mcp.tool() +async def get_user_typed(user_id: str) -> UserData: + '''Returns structured data - FastMCP handles serialization.''' + return {"id": user_id, "name": "John Doe", "email": "john@example.com"} + +# Pydantic models for complex validation +class DetailedUser(BaseModel): + id: str + name: str + email: str + created_at: datetime + metadata: Dict[str, Any] + +@mcp.tool() +async def get_user_detailed(user_id: str) -> DetailedUser: + '''Returns Pydantic model - automatically generates schema.''' + user = await fetch_user(user_id) + return DetailedUser(**user) +``` + +### Lifespan Management + +Initialize resources that persist across requests: + +```python +from contextlib import asynccontextmanager + +@asynccontextmanager +async def app_lifespan(): + '''Manage resources that live for the server's lifetime.''' + # Initialize connections, load config, etc. + db = await connect_to_database() + config = load_configuration() + + # Make available to all tools + yield {"db": db, "config": config} + + # Cleanup on shutdown + await db.close() + +mcp = FastMCP("example_mcp", lifespan=app_lifespan) + +@mcp.tool() +async def query_data(query: str, ctx: Context) -> str: + '''Access lifespan resources through context.''' + db = ctx.request_context.lifespan_state["db"] + results = await db.query(query) + return format_results(results) +``` + +### Transport Options + +FastMCP supports two main transport mechanisms: + +```python +# stdio transport (for local tools) - default +if __name__ == "__main__": + mcp.run() + +# Streamable HTTP transport (for remote servers) +if __name__ == "__main__": + mcp.run(transport="streamable_http", port=8000) +``` + +**Transport selection:** + +- **stdio**: Command-line tools, local integrations, subprocess execution +- **Streamable HTTP**: Web services, remote access, multiple clients + +--- + +## Code Best Practices + +### Code Composability and Reusability + +Your implementation MUST prioritize composability and code reuse: + +1. **Extract Common Functionality**: + + - Create reusable helper functions for operations used across multiple tools + - Build shared API clients for HTTP requests instead of duplicating code + - Centralize error handling logic in utility functions + - Extract business logic into dedicated functions that can be composed + - Extract shared markdown or JSON field selection & formatting functionality + +2. **Avoid Duplication**: + - NEVER copy-paste similar code between tools + - If you find yourself writing similar logic twice, extract it into a function + - Common operations like pagination, filtering, field selection, and formatting should be shared + - Authentication/authorization logic should be centralized + +### Python-Specific Best Practices + +1. **Use Type Hints**: Always include type annotations for function parameters and return values +2. **Pydantic Models**: Define clear Pydantic models for all input validation +3. **Avoid Manual Validation**: Let Pydantic handle input validation with constraints +4. **Proper Imports**: Group imports (standard library, third-party, local) +5. **Error Handling**: Use specific exception types (httpx.HTTPStatusError, not generic Exception) +6. **Async Context Managers**: Use `async with` for resources that need cleanup +7. **Constants**: Define module-level constants in UPPER_CASE + +## Quality Checklist + +Before finalizing your Python MCP server implementation, ensure: + +### Strategic Design + +- [ ] Tools enable complete workflows, not just API endpoint wrappers +- [ ] Tool names reflect natural task subdivisions +- [ ] Response formats optimize for agent context efficiency +- [ ] Human-readable identifiers used where appropriate +- [ ] Error messages guide agents toward correct usage + +### Implementation Quality + +- [ ] FOCUSED IMPLEMENTATION: Most important and valuable tools implemented +- [ ] All tools have descriptive names and documentation +- [ ] Return types are consistent across similar operations +- [ ] Error handling is implemented for all external calls +- [ ] Server name follows format: `{service}_mcp` +- [ ] All network operations use async/await +- [ ] Common functionality is extracted into reusable functions +- [ ] Error messages are clear, actionable, and educational +- [ ] Outputs are properly validated and formatted + +### Tool Configuration + +- [ ] All tools implement 'name' and 'annotations' in the decorator +- [ ] Annotations correctly set (readOnlyHint, destructiveHint, idempotentHint, openWorldHint) +- [ ] All tools use Pydantic BaseModel for input validation with Field() definitions +- [ ] All Pydantic Fields have explicit types and descriptions with constraints +- [ ] All tools have comprehensive docstrings with explicit input/output types +- [ ] Docstrings include complete schema structure for dict/JSON returns +- [ ] Pydantic models handle input validation (no manual validation needed) + +### Advanced Features (where applicable) + +- [ ] Context injection used for logging, progress, or elicitation +- [ ] Resources registered for appropriate data endpoints +- [ ] Lifespan management implemented for persistent connections +- [ ] Structured output types used (TypedDict, Pydantic models) +- [ ] Appropriate transport configured (stdio or streamable HTTP) + +### Code Quality + +- [ ] File includes proper imports including Pydantic imports +- [ ] Pagination is properly implemented where applicable +- [ ] Filtering options are provided for potentially large result sets +- [ ] All async functions are properly defined with `async def` +- [ ] HTTP client usage follows async patterns with proper context managers +- [ ] Type hints are used throughout the code +- [ ] Constants are defined at module level in UPPER_CASE + +### Testing + +- [ ] Server runs successfully: `python your_server.py --help` +- [ ] All imports resolve correctly +- [ ] Sample tool calls work as expected +- [ ] Error scenarios handled gracefully diff --git a/.roo/skills/mcp-builder/scripts/connections.py b/.roo/skills/mcp-builder/scripts/connections.py new file mode 100644 index 00000000000..ffcd0da3fbe --- /dev/null +++ b/.roo/skills/mcp-builder/scripts/connections.py @@ -0,0 +1,151 @@ +"""Lightweight connection handling for MCP servers.""" + +from abc import ABC, abstractmethod +from contextlib import AsyncExitStack +from typing import Any + +from mcp import ClientSession, StdioServerParameters +from mcp.client.sse import sse_client +from mcp.client.stdio import stdio_client +from mcp.client.streamable_http import streamablehttp_client + + +class MCPConnection(ABC): + """Base class for MCP server connections.""" + + def __init__(self): + self.session = None + self._stack = None + + @abstractmethod + def _create_context(self): + """Create the connection context based on connection type.""" + + async def __aenter__(self): + """Initialize MCP server connection.""" + self._stack = AsyncExitStack() + await self._stack.__aenter__() + + try: + ctx = self._create_context() + result = await self._stack.enter_async_context(ctx) + + if len(result) == 2: + read, write = result + elif len(result) == 3: + read, write, _ = result + else: + raise ValueError(f"Unexpected context result: {result}") + + session_ctx = ClientSession(read, write) + self.session = await self._stack.enter_async_context(session_ctx) + await self.session.initialize() + return self + except BaseException: + await self._stack.__aexit__(None, None, None) + raise + + async def __aexit__(self, exc_type, exc_val, exc_tb): + """Clean up MCP server connection resources.""" + if self._stack: + await self._stack.__aexit__(exc_type, exc_val, exc_tb) + self.session = None + self._stack = None + + async def list_tools(self) -> list[dict[str, Any]]: + """Retrieve available tools from the MCP server.""" + response = await self.session.list_tools() + return [ + { + "name": tool.name, + "description": tool.description, + "input_schema": tool.inputSchema, + } + for tool in response.tools + ] + + async def call_tool(self, tool_name: str, arguments: dict[str, Any]) -> Any: + """Call a tool on the MCP server with provided arguments.""" + result = await self.session.call_tool(tool_name, arguments=arguments) + return result.content + + +class MCPConnectionStdio(MCPConnection): + """MCP connection using standard input/output.""" + + def __init__(self, command: str, args: list[str] = None, env: dict[str, str] = None): + super().__init__() + self.command = command + self.args = args or [] + self.env = env + + def _create_context(self): + return stdio_client( + StdioServerParameters(command=self.command, args=self.args, env=self.env) + ) + + +class MCPConnectionSSE(MCPConnection): + """MCP connection using Server-Sent Events.""" + + def __init__(self, url: str, headers: dict[str, str] = None): + super().__init__() + self.url = url + self.headers = headers or {} + + def _create_context(self): + return sse_client(url=self.url, headers=self.headers) + + +class MCPConnectionHTTP(MCPConnection): + """MCP connection using Streamable HTTP.""" + + def __init__(self, url: str, headers: dict[str, str] = None): + super().__init__() + self.url = url + self.headers = headers or {} + + def _create_context(self): + return streamablehttp_client(url=self.url, headers=self.headers) + + +def create_connection( + transport: str, + command: str = None, + args: list[str] = None, + env: dict[str, str] = None, + url: str = None, + headers: dict[str, str] = None, +) -> MCPConnection: + """Factory function to create the appropriate MCP connection. + + Args: + transport: Connection type ("stdio", "sse", or "http") + command: Command to run (stdio only) + args: Command arguments (stdio only) + env: Environment variables (stdio only) + url: Server URL (sse and http only) + headers: HTTP headers (sse and http only) + + Returns: + MCPConnection instance + """ + transport = transport.lower() + + if transport == "stdio": + if not command: + raise ValueError("Command is required for stdio transport") + return MCPConnectionStdio(command=command, args=args, env=env) + + elif transport == "sse": + if not url: + raise ValueError("URL is required for sse transport") + return MCPConnectionSSE(url=url, headers=headers) + + elif transport in ["http", "streamable_http", "streamable-http"]: + if not url: + raise ValueError("URL is required for http transport") + return MCPConnectionHTTP(url=url, headers=headers) + + else: + raise ValueError(f"Unsupported transport type: {transport}. Use 'stdio', 'sse', or 'http'") diff --git a/.roo/skills/mcp-builder/scripts/evaluation.py b/.roo/skills/mcp-builder/scripts/evaluation.py new file mode 100644 index 00000000000..41778569c45 --- /dev/null +++ b/.roo/skills/mcp-builder/scripts/evaluation.py @@ -0,0 +1,373 @@ +"""MCP Server Evaluation Harness + +This script evaluates MCP servers by running test questions against them using Claude. +""" + +import argparse +import asyncio +import json +import re +import sys +import time +import traceback +import xml.etree.ElementTree as ET +from pathlib import Path +from typing import Any + +from anthropic import Anthropic + +from connections import create_connection + +EVALUATION_PROMPT = """You are an AI assistant with access to tools. + +When given a task, you MUST: +1. Use the available tools to complete the task +2. Provide summary of each step in your approach, wrapped in tags +3. Provide feedback on the tools provided, wrapped in tags +4. Provide your final response, wrapped in tags + +Summary Requirements: +- In your tags, you must explain: + - The steps you took to complete the task + - Which tools you used, in what order, and why + - The inputs you provided to each tool + - The outputs you received from each tool + - A summary for how you arrived at the response + +Feedback Requirements: +- In your tags, provide constructive feedback on the tools: + - Comment on tool names: Are they clear and descriptive? + - Comment on input parameters: Are they well-documented? Are required vs optional parameters clear? + - Comment on descriptions: Do they accurately describe what the tool does? + - Comment on any errors encountered during tool usage: Did the tool fail to execute? Did the tool return too many tokens? + - Identify specific areas for improvement and explain WHY they would help + - Be specific and actionable in your suggestions + +Response Requirements: +- Your response should be concise and directly address what was asked +- Always wrap your final response in tags +- If you cannot solve the task return NOT_FOUND +- For numeric responses, provide just the number +- For IDs, provide just the ID +- For names or text, provide the exact text requested +- Your response should go last""" + + +def parse_evaluation_file(file_path: Path) -> list[dict[str, Any]]: + """Parse XML evaluation file with qa_pair elements.""" + try: + tree = ET.parse(file_path) + root = tree.getroot() + evaluations = [] + + for qa_pair in root.findall(".//qa_pair"): + question_elem = qa_pair.find("question") + answer_elem = qa_pair.find("answer") + + if question_elem is not None and answer_elem is not None: + evaluations.append({ + "question": (question_elem.text or "").strip(), + "answer": (answer_elem.text or "").strip(), + }) + + return evaluations + except Exception as e: + print(f"Error parsing evaluation file {file_path}: {e}") + return [] + + +def extract_xml_content(text: str, tag: str) -> str | None: + """Extract content from XML tags.""" + pattern = rf"<{tag}>(.*?)" + matches = re.findall(pattern, text, re.DOTALL) + return matches[-1].strip() if matches else None + + +async def agent_loop( + client: Anthropic, + model: str, + question: str, + tools: list[dict[str, Any]], + connection: Any, +) -> tuple[str, dict[str, Any]]: + """Run the agent loop with MCP tools.""" + messages = [{"role": "user", "content": question}] + + response = await asyncio.to_thread( + client.messages.create, + model=model, + max_tokens=4096, + system=EVALUATION_PROMPT, + messages=messages, + tools=tools, + ) + + messages.append({"role": "assistant", "content": response.content}) + + tool_metrics = {} + + while response.stop_reason == "tool_use": + tool_use = next(block for block in response.content if block.type == "tool_use") + tool_name = tool_use.name + tool_input = tool_use.input + + tool_start_ts = time.time() + try: + tool_result = await connection.call_tool(tool_name, tool_input) + tool_response = json.dumps(tool_result) if isinstance(tool_result, (dict, list)) else str(tool_result) + except Exception as e: + tool_response = f"Error executing tool {tool_name}: {str(e)}\n" + tool_response += traceback.format_exc() + tool_duration = time.time() - tool_start_ts + + if tool_name not in tool_metrics: + tool_metrics[tool_name] = {"count": 0, "durations": []} + tool_metrics[tool_name]["count"] += 1 + tool_metrics[tool_name]["durations"].append(tool_duration) + + messages.append({ + "role": "user", + "content": [{ + "type": "tool_result", + "tool_use_id": tool_use.id, + "content": tool_response, + }] + }) + + response = await asyncio.to_thread( + client.messages.create, + model=model, + max_tokens=4096, + system=EVALUATION_PROMPT, + messages=messages, + tools=tools, + ) + messages.append({"role": "assistant", "content": response.content}) + + response_text = next( + (block.text for block in response.content if hasattr(block, "text")), + None, + ) + return response_text, tool_metrics + + +async def evaluate_single_task( + client: Anthropic, + model: str, + qa_pair: dict[str, Any], + tools: list[dict[str, Any]], + connection: Any, + task_index: int, +) -> dict[str, Any]: + """Evaluate a single QA pair with the given tools.""" + start_time = time.time() + + print(f"Task {task_index + 1}: Running task with question: {qa_pair['question']}") + response, tool_metrics = await agent_loop(client, model, qa_pair["question"], tools, connection) + + response_value = extract_xml_content(response, "response") + summary = extract_xml_content(response, "summary") + feedback = extract_xml_content(response, "feedback") + + duration_seconds = time.time() - start_time + + return { + "question": qa_pair["question"], + "expected": qa_pair["answer"], + "actual": response_value, + "score": int(response_value == qa_pair["answer"]) if response_value else 0, + "total_duration": duration_seconds, + "tool_calls": tool_metrics, + "num_tool_calls": sum(len(metrics["durations"]) for metrics in tool_metrics.values()), + "summary": summary, + "feedback": feedback, + } + + +REPORT_HEADER = """ +# Evaluation Report + +## Summary + +- **Accuracy**: {correct}/{total} ({accuracy:.1f}%) +- **Average Task Duration**: {average_duration_s:.2f}s +- **Average Tool Calls per Task**: {average_tool_calls:.2f} +- **Total Tool Calls**: {total_tool_calls} + +--- +""" + +TASK_TEMPLATE = """ +### Task {task_num} + +**Question**: {question} +**Ground Truth Answer**: `{expected_answer}` +**Actual Answer**: `{actual_answer}` +**Correct**: {correct_indicator} +**Duration**: {total_duration:.2f}s +**Tool Calls**: {tool_calls} + +**Summary** +{summary} + +**Feedback** +{feedback} + +--- +""" + + +async def run_evaluation( + eval_path: Path, + connection: Any, + model: str = "claude-3-7-sonnet-20250219", +) -> str: + """Run evaluation with MCP server tools.""" + print("🚀 Starting Evaluation") + + client = Anthropic() + + tools = await connection.list_tools() + print(f"📋 Loaded {len(tools)} tools from MCP server") + + qa_pairs = parse_evaluation_file(eval_path) + print(f"📋 Loaded {len(qa_pairs)} evaluation tasks") + + results = [] + for i, qa_pair in enumerate(qa_pairs): + print(f"Processing task {i + 1}/{len(qa_pairs)}") + result = await evaluate_single_task(client, model, qa_pair, tools, connection, i) + results.append(result) + + correct = sum(r["score"] for r in results) + accuracy = (correct / len(results)) * 100 if results else 0 + average_duration_s = sum(r["total_duration"] for r in results) / len(results) if results else 0 + average_tool_calls = sum(r["num_tool_calls"] for r in results) / len(results) if results else 0 + total_tool_calls = sum(r["num_tool_calls"] for r in results) + + report = REPORT_HEADER.format( + correct=correct, + total=len(results), + accuracy=accuracy, + average_duration_s=average_duration_s, + average_tool_calls=average_tool_calls, + total_tool_calls=total_tool_calls, + ) + + report += "".join([ + TASK_TEMPLATE.format( + task_num=i + 1, + question=qa_pair["question"], + expected_answer=qa_pair["answer"], + actual_answer=result["actual"] or "N/A", + correct_indicator="✅" if result["score"] else "❌", + total_duration=result["total_duration"], + tool_calls=json.dumps(result["tool_calls"], indent=2), + summary=result["summary"] or "N/A", + feedback=result["feedback"] or "N/A", + ) + for i, (qa_pair, result) in enumerate(zip(qa_pairs, results)) + ]) + + return report + + +def parse_headers(header_list: list[str]) -> dict[str, str]: + """Parse header strings in format 'Key: Value' into a dictionary.""" + headers = {} + if not header_list: + return headers + + for header in header_list: + if ":" in header: + key, value = header.split(":", 1) + headers[key.strip()] = value.strip() + else: + print(f"Warning: Ignoring malformed header: {header}") + return headers + + +def parse_env_vars(env_list: list[str]) -> dict[str, str]: + """Parse environment variable strings in format 'KEY=VALUE' into a dictionary.""" + env = {} + if not env_list: + return env + + for env_var in env_list: + if "=" in env_var: + key, value = env_var.split("=", 1) + env[key.strip()] = value.strip() + else: + print(f"Warning: Ignoring malformed environment variable: {env_var}") + return env + + +async def main(): + parser = argparse.ArgumentParser( + description="Evaluate MCP servers using test questions", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Evaluate a local stdio MCP server + python evaluation.py -t stdio -c python -a my_server.py eval.xml + + # Evaluate an SSE MCP server + python evaluation.py -t sse -u https://example.com/mcp -H "Authorization: Bearer token" eval.xml + + # Evaluate an HTTP MCP server with custom model + python evaluation.py -t http -u https://example.com/mcp -m claude-3-5-sonnet-20241022 eval.xml + """, + ) + + parser.add_argument("eval_file", type=Path, help="Path to evaluation XML file") + parser.add_argument("-t", "--transport", choices=["stdio", "sse", "http"], default="stdio", help="Transport type (default: stdio)") + parser.add_argument("-m", "--model", default="claude-3-7-sonnet-20250219", help="Claude model to use (default: claude-3-7-sonnet-20250219)") + + stdio_group = parser.add_argument_group("stdio options") + stdio_group.add_argument("-c", "--command", help="Command to run MCP server (stdio only)") + stdio_group.add_argument("-a", "--args", nargs="+", help="Arguments for the command (stdio only)") + stdio_group.add_argument("-e", "--env", nargs="+", help="Environment variables in KEY=VALUE format (stdio only)") + + remote_group = parser.add_argument_group("sse/http options") + remote_group.add_argument("-u", "--url", help="MCP server URL (sse/http only)") + remote_group.add_argument("-H", "--header", nargs="+", dest="headers", help="HTTP headers in 'Key: Value' format (sse/http only)") + + parser.add_argument("-o", "--output", type=Path, help="Output file for evaluation report (default: stdout)") + + args = parser.parse_args() + + if not args.eval_file.exists(): + print(f"Error: Evaluation file not found: {args.eval_file}") + sys.exit(1) + + headers = parse_headers(args.headers) if args.headers else None + env_vars = parse_env_vars(args.env) if args.env else None + + try: + connection = create_connection( + transport=args.transport, + command=args.command, + args=args.args, + env=env_vars, + url=args.url, + headers=headers, + ) + except ValueError as e: + print(f"Error: {e}") + sys.exit(1) + + print(f"🔗 Connecting to MCP server via {args.transport}...") + + async with connection: + print("✅ Connected successfully") + report = await run_evaluation(args.eval_file, connection, args.model) + + if args.output: + args.output.write_text(report) + print(f"\n✅ Report saved to {args.output}") + else: + print("\n" + report) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/.roo/skills/mcp-builder/scripts/example_evaluation.xml b/.roo/skills/mcp-builder/scripts/example_evaluation.xml new file mode 100644 index 00000000000..41e4459b5af --- /dev/null +++ b/.roo/skills/mcp-builder/scripts/example_evaluation.xml @@ -0,0 +1,22 @@ + + + Calculate the compound interest on $10,000 invested at 5% annual interest rate, compounded monthly for 3 years. What is the final amount in dollars (rounded to 2 decimal places)? + 11614.72 + + + A projectile is launched at a 45-degree angle with an initial velocity of 50 m/s. Calculate the total distance (in meters) it has traveled from the launch point after 2 seconds, assuming g=9.8 m/s². Round to 2 decimal places. + 87.25 + + + A sphere has a volume of 500 cubic meters. Calculate its surface area in square meters. Round to 2 decimal places. + 304.65 + + + Calculate the population standard deviation of this dataset: [12, 15, 18, 22, 25, 30, 35]. Round to 2 decimal places. + 7.61 + + + Calculate the pH of a solution with a hydrogen ion concentration of 3.5 × 10^-5 M. Round to 2 decimal places. + 4.46 + + diff --git a/.roo/skills/mcp-builder/scripts/requirements.txt b/.roo/skills/mcp-builder/scripts/requirements.txt new file mode 100644 index 00000000000..e73e5d1e356 --- /dev/null +++ b/.roo/skills/mcp-builder/scripts/requirements.txt @@ -0,0 +1,2 @@ +anthropic>=0.39.0 +mcp>=1.1.0 diff --git a/src/package.json b/src/package.json index bf4a009a946..53c7554d5eb 100644 --- a/src/package.json +++ b/src/package.json @@ -439,6 +439,8 @@ "pretest": "turbo run bundle --cwd ..", "test": "vitest run", "format": "prettier --write .", + "generate:skills": "tsx services/skills/generate-built-in-skills.ts", + "prebundle": "pnpm generate:skills", "bundle": "node esbuild.mjs", "vscode:prepublish": "pnpm bundle --production", "vsix": "mkdirp ../bin && vsce package --no-dependencies --out ../bin", diff --git a/src/services/skills/__tests__/generate-built-in-skills.spec.ts b/src/services/skills/__tests__/generate-built-in-skills.spec.ts new file mode 100644 index 00000000000..0c954953a5f --- /dev/null +++ b/src/services/skills/__tests__/generate-built-in-skills.spec.ts @@ -0,0 +1,175 @@ +/** + * Tests for the built-in skills generation script validation logic. + * + * Note: These tests focus on the validation functions since the main script + * is designed to be run as a CLI tool. The actual generation is tested + * via the integration with the build process. + */ + +describe("generate-built-in-skills validation", () => { + describe("validateSkillName", () => { + // Validation function extracted from the generation script + function validateSkillName(name: string): string[] { + const errors: string[] = [] + + if (name.length < 1 || name.length > 64) { + errors.push(`Name must be 1-64 characters (got ${name.length})`) + } + + const nameFormat = /^[a-z0-9]+(?:-[a-z0-9]+)*$/ + if (!nameFormat.test(name)) { + errors.push( + "Name must be lowercase letters/numbers/hyphens only (no leading/trailing hyphen, no consecutive hyphens)", + ) + } + + return errors + } + + it("should accept valid skill names", () => { + expect(validateSkillName("mcp-builder")).toHaveLength(0) + expect(validateSkillName("create-mode")).toHaveLength(0) + expect(validateSkillName("pdf-processing")).toHaveLength(0) + expect(validateSkillName("a")).toHaveLength(0) + expect(validateSkillName("skill123")).toHaveLength(0) + expect(validateSkillName("my-skill-v2")).toHaveLength(0) + }) + + it("should reject names with uppercase letters", () => { + const errors = validateSkillName("Create-MCP-Server") + expect(errors).toHaveLength(1) + expect(errors[0]).toContain("lowercase") + }) + + it("should reject names with leading hyphen", () => { + const errors = validateSkillName("-my-skill") + expect(errors).toHaveLength(1) + expect(errors[0]).toContain("leading/trailing hyphen") + }) + + it("should reject names with trailing hyphen", () => { + const errors = validateSkillName("my-skill-") + expect(errors).toHaveLength(1) + expect(errors[0]).toContain("leading/trailing hyphen") + }) + + it("should reject names with consecutive hyphens", () => { + const errors = validateSkillName("my--skill") + expect(errors).toHaveLength(1) + expect(errors[0]).toContain("consecutive hyphens") + }) + + it("should reject empty names", () => { + const errors = validateSkillName("") + expect(errors.length).toBeGreaterThan(0) + }) + + it("should reject names longer than 64 characters", () => { + const longName = "a".repeat(65) + const errors = validateSkillName(longName) + expect(errors).toHaveLength(1) + expect(errors[0]).toContain("1-64 characters") + }) + + it("should reject names with special characters", () => { + expect(validateSkillName("my_skill").length).toBeGreaterThan(0) + expect(validateSkillName("my.skill").length).toBeGreaterThan(0) + expect(validateSkillName("my skill").length).toBeGreaterThan(0) + }) + }) + + describe("validateDescription", () => { + // Validation function extracted from the generation script + function validateDescription(description: string): string[] { + const errors: string[] = [] + const trimmed = description.trim() + + if (trimmed.length < 1 || trimmed.length > 1024) { + errors.push(`Description must be 1-1024 characters (got ${trimmed.length})`) + } + + return errors + } + + it("should accept valid descriptions", () => { + expect(validateDescription("A short description")).toHaveLength(0) + expect(validateDescription("x")).toHaveLength(0) + expect(validateDescription("x".repeat(1024))).toHaveLength(0) + }) + + it("should reject empty descriptions", () => { + const errors = validateDescription("") + expect(errors).toHaveLength(1) + expect(errors[0]).toContain("1-1024 characters") + }) + + it("should reject whitespace-only descriptions", () => { + const errors = validateDescription(" ") + expect(errors).toHaveLength(1) + expect(errors[0]).toContain("got 0") + }) + + it("should reject descriptions longer than 1024 characters", () => { + const longDesc = "x".repeat(1025) + const errors = validateDescription(longDesc) + expect(errors).toHaveLength(1) + expect(errors[0]).toContain("got 1025") + }) + }) + + describe("escapeForTemplateLiteral", () => { + // Escape function extracted from the generation script + function escapeForTemplateLiteral(str: string): string { + return str.replace(/\\/g, "\\\\").replace(/`/g, "\\`").replace(/\$\{/g, "\\${") + } + + it("should escape backticks", () => { + expect(escapeForTemplateLiteral("code `example`")).toBe("code \\`example\\`") + }) + + it("should escape template literal interpolation", () => { + expect(escapeForTemplateLiteral("value: ${foo}")).toBe("value: \\${foo}") + }) + + it("should escape backslashes", () => { + expect(escapeForTemplateLiteral("path\\to\\file")).toBe("path\\\\to\\\\file") + }) + + it("should handle combined escapes", () => { + const input = "const x = `${value}`" + const expected = "const x = \\`\\${value}\\`" + expect(escapeForTemplateLiteral(input)).toBe(expected) + }) + }) +}) + +describe("built-in skills integration", () => { + it("should have valid skill names matching directory names", async () => { + // Import the generated built-in skills + const { getBuiltInSkills, getBuiltInSkillContent } = await import("../built-in-skills") + + const skills = getBuiltInSkills() + + // Verify we have the expected skills + const skillNames = skills.map((s) => s.name) + expect(skillNames).toContain("mcp-builder") + expect(skillNames).toContain("create-mode") + + // Verify each skill has valid content + for (const skill of skills) { + expect(skill.source).toBe("built-in") + expect(skill.path).toBe("built-in") + + const content = getBuiltInSkillContent(skill.name) + expect(content).not.toBeNull() + expect(content!.instructions.length).toBeGreaterThan(0) + } + }) + + it("should return null for non-existent skills", async () => { + const { getBuiltInSkillContent } = await import("../built-in-skills") + + const content = getBuiltInSkillContent("non-existent-skill") + expect(content).toBeNull() + }) +}) diff --git a/src/services/skills/built-in-skills.ts b/src/services/skills/built-in-skills.ts index 86ad4d22a21..64024439e7d 100644 --- a/src/services/skills/built-in-skills.ts +++ b/src/services/skills/built-in-skills.ts @@ -1,3 +1,13 @@ +/** + * AUTO-GENERATED FILE - DO NOT EDIT DIRECTLY + * + * This file is generated by generate-built-in-skills.ts from the SKILL.md files + * in the built-in/ directory. To modify built-in skills, edit the corresponding + * SKILL.md file and run: pnpm generate:skills + * + * Generated at: 2026-01-24T03:15:43.562Z + */ + import { SkillMetadata, SkillContent } from "../../shared/skills" interface BuiltInSkillDefinition { @@ -7,318 +17,6 @@ interface BuiltInSkillDefinition { } const BUILT_IN_SKILLS: Record = { - "create-mcp-server": { - name: "create-mcp-server", - description: - "Instructions for creating MCP (Model Context Protocol) servers. Use when the user asks to add a tool, create an MCP server, or extend capabilities with external APIs.", - instructions: `You have the ability to create an MCP server and add it to a configuration file that will then expose the tools and resources for you to use with \`use_mcp_tool\` and \`access_mcp_resource\`. - -When creating MCP servers, it's important to understand that they operate in a non-interactive environment. The server cannot initiate OAuth flows, open browser windows, or prompt for user input during runtime. All credentials and authentication tokens must be provided upfront through environment variables in the MCP settings configuration. For example, Spotify's API uses OAuth to get a refresh token for the user, but the MCP server cannot initiate this flow. While you can walk the user through obtaining an application client ID and secret, you may have to create a separate one-time setup script (like get-refresh-token.js) that captures and logs the final piece of the puzzle: the user's refresh token (i.e. you might run the script using execute_command which would open a browser for authentication, and then log the refresh token so that you can see it in the command output for you to use in the MCP settings configuration). - -Unless the user specifies otherwise, new local MCP servers should be created in the default MCP servers directory: -- macOS: ~/Documents/Cline/MCP/ -- Windows: %USERPROFILE%\\Documents\\Cline\\MCP\\ -- Linux: ~/Documents/Cline/MCP/ - -### MCP Server Types and Configuration - -MCP servers can be configured in two ways in the MCP settings file: - -1. Local (Stdio) Server Configuration: -\`\`\`json -{ - "mcpServers": { - "local-weather": { - "command": "node", - "args": ["/path/to/weather-server/build/index.js"], - "env": { - "OPENWEATHER_API_KEY": "your-api-key" - } - } - } -} -\`\`\` - -2. Remote (SSE) Server Configuration: -\`\`\`json -{ - "mcpServers": { - "remote-weather": { - "url": "https://api.example.com/mcp", - "headers": { - "Authorization": "Bearer your-api-key" - } - } - } -} -\`\`\` - -Common configuration options for both types: -- \`disabled\`: (optional) Set to true to temporarily disable the server -- \`timeout\`: (optional) Maximum time in seconds to wait for server responses (default: 60) -- \`alwaysAllow\`: (optional) Array of tool names that don't require user confirmation -- \`disabledTools\`: (optional) Array of tool names that are not included in the system prompt and won't be used - -### Example Local MCP Server - -For example, if the user wanted to give you the ability to retrieve weather information, you could create an MCP server that uses the OpenWeather API to get weather information, add it to the MCP settings configuration file, and then notice that you now have access to new tools and resources in the system prompt that you might use to show the user your new capabilities. - -The following example demonstrates how to build a local MCP server that provides weather data functionality using the Stdio transport. While this example shows how to implement resources, resource templates, and tools, in practice you should prefer using tools since they are more flexible and can handle dynamic parameters. The resource and resource template implementations are included here mainly for demonstration purposes of the different MCP capabilities, but a real weather server would likely just expose tools for fetching weather data. (The following steps are for macOS) - -1. Use the \`create-typescript-server\` tool to bootstrap a new project in the default MCP servers directory: - -\`\`\`bash -cd ~/Documents/Cline/MCP/ -npx @modelcontextprotocol/create-server weather-server -cd weather-server -# Install dependencies -npm install axios zod @modelcontextprotocol/sdk -\`\`\` - -This will create a new project with the following structure: - -\`\`\` -weather-server/ - ├── package.json - { - ... - "type": "module", // added by default, uses ES module syntax (import/export) rather than CommonJS (require/module.exports) (Important to know if you create additional scripts in this server repository like a get-refresh-token.js script) - "scripts": { - "build": "tsc && node -e \\"require('fs').chmodSync('build/index.js', '755')\\"", - ... - } - ... - } - ├── tsconfig.json - └── src/ - └── index.ts # Main server implementation -\`\`\` - -2. Replace \`src/index.ts\` with the following: - -\`\`\`typescript -#!/usr/bin/env node -import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js"; -import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; -import { z } from "zod"; -import axios from 'axios'; - -const API_KEY = process.env.OPENWEATHER_API_KEY; // provided by MCP config -if (!API_KEY) { - throw new Error('OPENWEATHER_API_KEY environment variable is required'); -} - -// Define types for OpenWeather API responses -interface WeatherData { - main: { - temp: number; - humidity: number; - }; - weather: Array<{ - description: string; - }>; - wind: { - speed: number; - }; -} - -interface ForecastData { - list: Array; -} - -// Create an MCP server -const server = new McpServer({ - name: "weather-server", - version: "0.1.0" -}); - -// Create axios instance for OpenWeather API -const weatherApi = axios.create({ - baseURL: 'http://api.openweathermap.org/data/2.5', - params: { - appid: API_KEY, - units: 'metric', - }, -}); - -// Add a tool for getting weather forecasts -server.tool( - "get_forecast", - { - city: z.string().describe("City name"), - days: z.number().min(1).max(5).optional().describe("Number of days (1-5)"), - }, - async ({ city, days = 3 }) => { - try { - const response = await weatherApi.get('forecast', { - params: { - q: city, - cnt: Math.min(days, 5) * 8, - }, - }); - - return { - content: [ - { - type: "text", - text: JSON.stringify(response.data.list, null, 2), - }, - ], - }; - } catch (error) { - if (axios.isAxiosError(error)) { - return { - content: [ - { - type: "text", - text: \`Weather API error: \${ - error.response?.data.message ?? error.message - }\`, - }, - ], - isError: true, - }; - } - throw error; - } - } -); - -// Add a resource for current weather in San Francisco -server.resource( - "sf_weather", - { uri: "weather://San Francisco/current", list: true }, - async (uri) => { - try { - const response = weatherApi.get('weather', { - params: { q: "San Francisco" }, - }); - - return { - contents: [ - { - uri: uri.href, - mimeType: "application/json", - text: JSON.stringify( - { - temperature: response.data.main.temp, - conditions: response.data.weather[0].description, - humidity: response.data.main.humidity, - wind_speed: response.data.wind.speed, - timestamp: new Date().toISOString(), - }, - null, - 2 - ), - }, - ], - }; - } catch (error) { - if (axios.isAxiosError(error)) { - throw new Error(\`Weather API error: \${ - error.response?.data.message ?? error.message - }\`); - } - throw error; - } - } -); - -// Add a dynamic resource template for current weather by city -server.resource( - "current_weather", - new ResourceTemplate("weather://{city}/current", { list: true }), - async (uri, { city }) => { - try { - const response = await weatherApi.get('weather', { - params: { q: city }, - }); - - return { - contents: [ - { - uri: uri.href, - mimeType: "application/json", - text: JSON.stringify( - { - temperature: response.data.main.temp, - conditions: response.data.weather[0].description, - humidity: response.data.main.humidity, - wind_speed: response.data.wind.speed, - timestamp: new Date().toISOString(), - }, - null, - 2 - ), - }, - ], - }; - } catch (error) { - if (axios.isAxiosError(error)) { - throw new Error(\`Weather API error: \${ - error.response?.data.message ?? error.message - }\`); - } - throw error; - } - } -); - -// Start receiving messages on stdin and sending messages on stdout -const transport = new StdioServerTransport(); -await server.connect(transport); -console.error('Weather MCP server running on stdio'); -\`\`\` - -(Remember: This is just an example–you may use different dependencies, break the implementation up into multiple files, etc.) - -3. Build and compile the executable JavaScript file - -\`\`\`bash -npm run build -\`\`\` - -4. Whenever you need an environment variable such as an API key to configure the MCP server, walk the user through the process of getting the key. For example, they may need to create an account and go to a developer dashboard to generate the key. Provide step-by-step instructions and URLs to make it easy for the user to retrieve the necessary information. Then use the ask_followup_question tool to ask the user for the key, in this case the OpenWeather API key. - -5. Install the MCP Server by adding the MCP server configuration to the MCP settings file. You can access the MCP settings through VS Code settings or by editing the mcp.json file in the Roo Code settings directory. The settings file may have other MCP servers already configured, so you would read it first and then add your new server to the existing \`mcpServers\` object. - -IMPORTANT: Regardless of what else you see in the MCP settings file, you must default any new MCP servers you create to disabled=false, alwaysAllow=[] and disabledTools=[]. - -\`\`\`json -{ - "mcpServers": { - ..., - "weather": { - "command": "node", - "args": ["/path/to/weather-server/build/index.js"], - "env": { - "OPENWEATHER_API_KEY": "user-provided-api-key" - } - }, - } -} -\`\`\` - -(Note: the user may also ask you to install the MCP server to the Claude desktop app, in which case you would read then modify \`~/Library/Application\\ Support/Claude/claude_desktop_config.json\` on macOS for example. It follows the same format of a top level \`mcpServers\` object.) - -6. After you have edited the MCP settings configuration file, the system will automatically run all the servers and expose the available tools and resources in the 'Connected MCP Servers' section. - -7. Now that you have access to these new tools and resources, you may suggest ways the user can command you to invoke them - for example, with this new weather tool now available, you can invite the user to ask "what's the weather in San Francisco?" - -## Editing MCP Servers - -The user may ask to add tools or resources that may make sense to add to an existing MCP server (check the 'Connected MCP Servers' section in the system prompt), e.g. if it would use the same API. This would be possible if you can locate the MCP server repository on the user's system by looking at the server arguments for a filepath. You might then use list_files and read_file to explore the files in the repository, and use write_to_file or apply_diff to make changes to the files. - -However some MCP servers may be running from installed packages rather than a local repository, in which case it may make more sense to create a new MCP server. - -# MCP Servers Are Not Always Necessary - -The user may not always request the use or creation of MCP servers. Instead, they might provide tasks that can be completed with existing tools. While using the MCP SDK to extend your capabilities can be useful, it's important to understand that this is just one specialized type of task you can accomplish. You should only implement MCP servers when the user explicitly requests it (e.g., "add a tool that..."). - -Remember: The MCP documentation and example provided above are to help you understand and work with existing MCP servers or create new ones when requested by the user. You already have access to tools and capabilities that can be used to accomplish a wide range of tasks.`, - }, "create-mode": { name: "create-mode", description: @@ -372,6 +70,241 @@ customModes: - mcp # MCP group (use_mcp_tool, access_mcp_resource) customInstructions: Additional instructions for the Designer mode # Optional`, }, + "mcp-builder": { + name: "mcp-builder", + description: + "Guide for creating high-quality MCP (Model Context Protocol) servers that enable LLMs to interact with external services through well-designed tools. Use when building MCP servers to integrate external APIs or services, whether in Python (FastMCP) or Node/TypeScript (MCP SDK).", + instructions: `# MCP Server Development Guide + +## Overview + +Create MCP (Model Context Protocol) servers that enable LLMs to interact with external services through well-designed tools. The quality of an MCP server is measured by how well it enables LLMs to accomplish real-world tasks. + +--- + +# Process + +## 🚀 High-Level Workflow + +Creating a high-quality MCP server involves four main phases: + +### Phase 1: Deep Research and Planning + +#### 1.1 Understand Modern MCP Design + +**API Coverage vs. Workflow Tools:** +Balance comprehensive API endpoint coverage with specialized workflow tools. Workflow tools can be more convenient for specific tasks, while comprehensive coverage gives agents flexibility to compose operations. Performance varies by client—some clients benefit from code execution that combines basic tools, while others work better with higher-level workflows. When uncertain, prioritize comprehensive API coverage. + +**Tool Naming and Discoverability:** +Clear, descriptive tool names help agents find the right tools quickly. Use consistent prefixes (e.g., \`github_create_issue\`, \`github_list_repos\`) and action-oriented naming. + +**Context Management:** +Agents benefit from concise tool descriptions and the ability to filter/paginate results. Design tools that return focused, relevant data. Some clients support code execution which can help agents filter and process data efficiently. + +**Actionable Error Messages:** +Error messages should guide agents toward solutions with specific suggestions and next steps. + +#### 1.2 Study MCP Protocol Documentation + +**Navigate the MCP specification:** + +Start with the sitemap to find relevant pages: \`https://modelcontextprotocol.io/sitemap.xml\` + +Then fetch specific pages with \`.md\` suffix for markdown format (e.g., \`https://modelcontextprotocol.io/specification/draft.md\`). + +Key pages to review: +- Specification overview and architecture +- Transport mechanisms (streamable HTTP, stdio) +- Tool, resource, and prompt definitions + +#### 1.3 Study Framework Documentation + +**Recommended stack:** +- **Language**: TypeScript (high-quality SDK support and good compatibility in many execution environments e.g. MCPB. Plus AI models are good at generating TypeScript code, benefiting from its broad usage, static typing and good linting tools) +- **Transport**: Streamable HTTP for remote servers, using stateless JSON (simpler to scale and maintain, as opposed to stateful sessions and streaming responses). stdio for local servers. + +**Load framework documentation:** + +- **MCP Best Practices**: [📋 View Best Practices](./reference/mcp_best_practices.md) - Core guidelines + +**For TypeScript (recommended):** +- **TypeScript SDK**: Use WebFetch to load \`https://raw.githubusercontent.com/modelcontextprotocol/typescript-sdk/main/README.md\` +- [⚡ TypeScript Guide](./reference/node_mcp_server.md) - TypeScript patterns and examples + +**For Python:** +- **Python SDK**: Use WebFetch to load \`https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md\` +- [🐍 Python Guide](./reference/python_mcp_server.md) - Python patterns and examples + +#### 1.4 Plan Your Implementation + +**Understand the API:** +Review the service's API documentation to identify key endpoints, authentication requirements, and data models. Use web search and WebFetch as needed. + +**Tool Selection:** +Prioritize comprehensive API coverage. List endpoints to implement, starting with the most common operations. + +--- + +### Phase 2: Implementation + +#### 2.1 Set Up Project Structure + +See language-specific guides for project setup: +- [⚡ TypeScript Guide](./reference/node_mcp_server.md) - Project structure, package.json, tsconfig.json +- [🐍 Python Guide](./reference/python_mcp_server.md) - Module organization, dependencies + +#### 2.2 Implement Core Infrastructure + +Create shared utilities: +- API client with authentication +- Error handling helpers +- Response formatting (JSON/Markdown) +- Pagination support + +#### 2.3 Implement Tools + +For each tool: + +**Input Schema:** +- Use Zod (TypeScript) or Pydantic (Python) +- Include constraints and clear descriptions +- Add examples in field descriptions + +**Output Schema:** +- Define \`outputSchema\` where possible for structured data +- Use \`structuredContent\` in tool responses (TypeScript SDK feature) +- Helps clients understand and process tool outputs + +**Tool Description:** +- Concise summary of functionality +- Parameter descriptions +- Return type schema + +**Implementation:** +- Async/await for I/O operations +- Proper error handling with actionable messages +- Support pagination where applicable +- Return both text content and structured data when using modern SDKs + +**Annotations:** +- \`readOnlyHint\`: true/false +- \`destructiveHint\`: true/false +- \`idempotentHint\`: true/false +- \`openWorldHint\`: true/false + +--- + +### Phase 3: Review and Test + +#### 3.1 Code Quality + +Review for: +- No duplicated code (DRY principle) +- Consistent error handling +- Full type coverage +- Clear tool descriptions + +#### 3.2 Build and Test + +**TypeScript:** +- Run \`npm run build\` to verify compilation +- Test with MCP Inspector: \`npx @modelcontextprotocol/inspector\` + +**Python:** +- Verify syntax: \`python -m py_compile your_server.py\` +- Test with MCP Inspector + +See language-specific guides for detailed testing approaches and quality checklists. + +--- + +### Phase 4: Create Evaluations + +After implementing your MCP server, create comprehensive evaluations to test its effectiveness. + +**Load [✅ Evaluation Guide](./reference/evaluation.md) for complete evaluation guidelines.** + +#### 4.1 Understand Evaluation Purpose + +Use evaluations to test whether LLMs can effectively use your MCP server to answer realistic, complex questions. + +#### 4.2 Create 10 Evaluation Questions + +To create effective evaluations, follow the process outlined in the evaluation guide: + +1. **Tool Inspection**: List available tools and understand their capabilities +2. **Content Exploration**: Use READ-ONLY operations to explore available data +3. **Question Generation**: Create 10 complex, realistic questions +4. **Answer Verification**: Solve each question yourself to verify answers + +#### 4.3 Evaluation Requirements + +Ensure each question is: +- **Independent**: Not dependent on other questions +- **Read-only**: Only non-destructive operations required +- **Complex**: Requiring multiple tool calls and deep exploration +- **Realistic**: Based on real use cases humans would care about +- **Verifiable**: Single, clear answer that can be verified by string comparison +- **Stable**: Answer won't change over time + +#### 4.4 Output Format + +Create an XML file with this structure: + +\`\`\`xml + + + Find discussions about AI model launches with animal codenames. One model needed a specific safety designation that uses the format ASL-X. What number X was being determined for the model named after a spotted wild cat? + 3 + + + +\`\`\` + +--- + +# Reference Files + +## 📚 Documentation Library + +Load these resources as needed during development: + +### Core MCP Documentation (Load First) +- **MCP Protocol**: Start with sitemap at \`https://modelcontextprotocol.io/sitemap.xml\`, then fetch specific pages with \`.md\` suffix +- [📋 MCP Best Practices](./reference/mcp_best_practices.md) - Universal MCP guidelines including: + - Server and tool naming conventions + - Response format guidelines (JSON vs Markdown) + - Pagination best practices + - Transport selection (streamable HTTP vs stdio) + - Security and error handling standards + +### SDK Documentation (Load During Phase 1/2) +- **Python SDK**: Fetch from \`https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md\` +- **TypeScript SDK**: Fetch from \`https://raw.githubusercontent.com/modelcontextprotocol/typescript-sdk/main/README.md\` + +### Language-Specific Implementation Guides (Load During Phase 2) +- [🐍 Python Implementation Guide](./reference/python_mcp_server.md) - Complete Python/FastMCP guide with: + - Server initialization patterns + - Pydantic model examples + - Tool registration with \`@mcp.tool\` + - Complete working examples + - Quality checklist + +- [⚡ TypeScript Implementation Guide](./reference/node_mcp_server.md) - Complete TypeScript guide with: + - Project structure + - Zod schema patterns + - Tool registration with \`server.registerTool\` + - Complete working examples + - Quality checklist + +### Evaluation Guide (Load During Phase 4) +- [✅ Evaluation Guide](./reference/evaluation.md) - Complete evaluation creation guide with: + - Question creation guidelines + - Answer verification strategies + - XML format specifications + - Example questions and answers + - Running an evaluation with the provided scripts`, + }, } /** diff --git a/src/services/skills/built-in/create-mode/SKILL.md b/src/services/skills/built-in/create-mode/SKILL.md new file mode 100644 index 00000000000..ec43ac9bea1 --- /dev/null +++ b/src/services/skills/built-in/create-mode/SKILL.md @@ -0,0 +1,57 @@ +--- +name: create-mode +description: Instructions for creating custom modes in Roo Code. Use when the user asks to create a new mode, edit an existing mode, or configure mode settings. +--- + +Custom modes can be configured in two ways: + +1. Globally via the custom modes file in your Roo Code settings directory (typically ~/.roo-code/settings/custom_modes.yaml on macOS/Linux or %APPDATA%\roo-code\settings\custom_modes.yaml on Windows) - created automatically on startup +2. Per-workspace via '.roomodes' in the workspace root directory + +When modes with the same slug exist in both files, the workspace-specific .roomodes version takes precedence. This allows projects to override global modes or define project-specific modes. + +If asked to create a project mode, create it in .roomodes in the workspace root. If asked to create a global mode, use the global custom modes file. + +- The following fields are required and must not be empty: + + - slug: A valid slug (lowercase letters, numbers, and hyphens). Must be unique, and shorter is better. + - name: The display name for the mode + - roleDefinition: A detailed description of the mode's role and capabilities + - groups: Array of allowed tool groups (can be empty). Each group can be specified either as a string (e.g., "edit" to allow editing any file) or with file restrictions (e.g., ["edit", { fileRegex: "\.md$", description: "Markdown files only" }] to only allow editing markdown files) + +- The following fields are optional but highly recommended: + + - description: A short, human-readable description of what this mode does (5 words) + - whenToUse: A clear description of when this mode should be selected and what types of tasks it's best suited for. This helps the Orchestrator mode make better decisions. + - customInstructions: Additional instructions for how the mode should operate + +- For multi-line text, include newline characters in the string like "This is the first line.\nThis is the next line.\n\nThis is a double line break." + +Both files should follow this structure (in YAML format): + +customModes: + +- slug: designer # Required: unique slug with lowercase letters, numbers, and hyphens + name: Designer # Required: mode display name + description: UI/UX design systems expert # Optional but recommended: short description (5 words) + roleDefinition: >- + You are Roo, a UI/UX expert specializing in design systems and frontend development. Your expertise includes: + - Creating and maintaining design systems + - Implementing responsive and accessible web interfaces + - Working with CSS, HTML, and modern frontend frameworks + - Ensuring consistent user experiences across platforms # Required: non-empty + whenToUse: >- + Use this mode when creating or modifying UI components, implementing design systems, + or ensuring responsive web interfaces. This mode is especially effective with CSS, + HTML, and modern frontend frameworks. # Optional but recommended + groups: # Required: array of tool groups (can be empty) + - read # Read files group (read_file, search_files, list_files, codebase_search) + - edit # Edit files group (apply_diff, write_to_file) - allows editing any file + # Or with file restrictions: + # - - edit + # - fileRegex: \.md$ + # description: Markdown files only # Edit group that only allows editing markdown files + - browser # Browser group (browser_action) + - command # Command group (execute_command) + - mcp # MCP group (use_mcp_tool, access_mcp_resource) + customInstructions: Additional instructions for the Designer mode # Optional diff --git a/src/services/skills/built-in/mcp-builder/LICENSE.txt b/src/services/skills/built-in/mcp-builder/LICENSE.txt new file mode 100644 index 00000000000..7a4a3ea2424 --- /dev/null +++ b/src/services/skills/built-in/mcp-builder/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/src/services/skills/built-in/mcp-builder/SKILL.md b/src/services/skills/built-in/mcp-builder/SKILL.md new file mode 100644 index 00000000000..bd45c9c8791 --- /dev/null +++ b/src/services/skills/built-in/mcp-builder/SKILL.md @@ -0,0 +1,256 @@ +--- +name: mcp-builder +description: Guide for creating high-quality MCP (Model Context Protocol) servers that enable LLMs to interact with external services through well-designed tools. Use when building MCP servers to integrate external APIs or services, whether in Python (FastMCP) or Node/TypeScript (MCP SDK). +license: Complete terms in LICENSE.txt +--- + +# MCP Server Development Guide + +## Overview + +Create MCP (Model Context Protocol) servers that enable LLMs to interact with external services through well-designed tools. The quality of an MCP server is measured by how well it enables LLMs to accomplish real-world tasks. + +--- + +# Process + +## 🚀 High-Level Workflow + +Creating a high-quality MCP server involves four main phases: + +### Phase 1: Deep Research and Planning + +#### 1.1 Understand Modern MCP Design + +**API Coverage vs. Workflow Tools:** +Balance comprehensive API endpoint coverage with specialized workflow tools. Workflow tools can be more convenient for specific tasks, while comprehensive coverage gives agents flexibility to compose operations. Performance varies by client—some clients benefit from code execution that combines basic tools, while others work better with higher-level workflows. When uncertain, prioritize comprehensive API coverage. + +**Tool Naming and Discoverability:** +Clear, descriptive tool names help agents find the right tools quickly. Use consistent prefixes (e.g., `github_create_issue`, `github_list_repos`) and action-oriented naming. + +**Context Management:** +Agents benefit from concise tool descriptions and the ability to filter/paginate results. Design tools that return focused, relevant data. Some clients support code execution which can help agents filter and process data efficiently. + +**Actionable Error Messages:** +Error messages should guide agents toward solutions with specific suggestions and next steps. + +#### 1.2 Study MCP Protocol Documentation + +**Navigate the MCP specification:** + +Start with the sitemap to find relevant pages: `https://modelcontextprotocol.io/sitemap.xml` + +Then fetch specific pages with `.md` suffix for markdown format (e.g., `https://modelcontextprotocol.io/specification/draft.md`). + +Key pages to review: + +- Specification overview and architecture +- Transport mechanisms (streamable HTTP, stdio) +- Tool, resource, and prompt definitions + +#### 1.3 Study Framework Documentation + +**Recommended stack:** + +- **Language**: TypeScript (high-quality SDK support and good compatibility in many execution environments e.g. MCPB. Plus AI models are good at generating TypeScript code, benefiting from its broad usage, static typing and good linting tools) +- **Transport**: Streamable HTTP for remote servers, using stateless JSON (simpler to scale and maintain, as opposed to stateful sessions and streaming responses). stdio for local servers. + +**Load framework documentation:** + +- **MCP Best Practices**: [📋 View Best Practices](./reference/mcp_best_practices.md) - Core guidelines + +**For TypeScript (recommended):** + +- **TypeScript SDK**: Use WebFetch to load `https://raw.githubusercontent.com/modelcontextprotocol/typescript-sdk/main/README.md` +- [⚡ TypeScript Guide](./reference/node_mcp_server.md) - TypeScript patterns and examples + +**For Python:** + +- **Python SDK**: Use WebFetch to load `https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md` +- [🐍 Python Guide](./reference/python_mcp_server.md) - Python patterns and examples + +#### 1.4 Plan Your Implementation + +**Understand the API:** +Review the service's API documentation to identify key endpoints, authentication requirements, and data models. Use web search and WebFetch as needed. + +**Tool Selection:** +Prioritize comprehensive API coverage. List endpoints to implement, starting with the most common operations. + +--- + +### Phase 2: Implementation + +#### 2.1 Set Up Project Structure + +See language-specific guides for project setup: + +- [⚡ TypeScript Guide](./reference/node_mcp_server.md) - Project structure, package.json, tsconfig.json +- [🐍 Python Guide](./reference/python_mcp_server.md) - Module organization, dependencies + +#### 2.2 Implement Core Infrastructure + +Create shared utilities: + +- API client with authentication +- Error handling helpers +- Response formatting (JSON/Markdown) +- Pagination support + +#### 2.3 Implement Tools + +For each tool: + +**Input Schema:** + +- Use Zod (TypeScript) or Pydantic (Python) +- Include constraints and clear descriptions +- Add examples in field descriptions + +**Output Schema:** + +- Define `outputSchema` where possible for structured data +- Use `structuredContent` in tool responses (TypeScript SDK feature) +- Helps clients understand and process tool outputs + +**Tool Description:** + +- Concise summary of functionality +- Parameter descriptions +- Return type schema + +**Implementation:** + +- Async/await for I/O operations +- Proper error handling with actionable messages +- Support pagination where applicable +- Return both text content and structured data when using modern SDKs + +**Annotations:** + +- `readOnlyHint`: true/false +- `destructiveHint`: true/false +- `idempotentHint`: true/false +- `openWorldHint`: true/false + +--- + +### Phase 3: Review and Test + +#### 3.1 Code Quality + +Review for: + +- No duplicated code (DRY principle) +- Consistent error handling +- Full type coverage +- Clear tool descriptions + +#### 3.2 Build and Test + +**TypeScript:** + +- Run `npm run build` to verify compilation +- Test with MCP Inspector: `npx @modelcontextprotocol/inspector` + +**Python:** + +- Verify syntax: `python -m py_compile your_server.py` +- Test with MCP Inspector + +See language-specific guides for detailed testing approaches and quality checklists. + +--- + +### Phase 4: Create Evaluations + +After implementing your MCP server, create comprehensive evaluations to test its effectiveness. + +**Load [✅ Evaluation Guide](./reference/evaluation.md) for complete evaluation guidelines.** + +#### 4.1 Understand Evaluation Purpose + +Use evaluations to test whether LLMs can effectively use your MCP server to answer realistic, complex questions. + +#### 4.2 Create 10 Evaluation Questions + +To create effective evaluations, follow the process outlined in the evaluation guide: + +1. **Tool Inspection**: List available tools and understand their capabilities +2. **Content Exploration**: Use READ-ONLY operations to explore available data +3. **Question Generation**: Create 10 complex, realistic questions +4. **Answer Verification**: Solve each question yourself to verify answers + +#### 4.3 Evaluation Requirements + +Ensure each question is: + +- **Independent**: Not dependent on other questions +- **Read-only**: Only non-destructive operations required +- **Complex**: Requiring multiple tool calls and deep exploration +- **Realistic**: Based on real use cases humans would care about +- **Verifiable**: Single, clear answer that can be verified by string comparison +- **Stable**: Answer won't change over time + +#### 4.4 Output Format + +Create an XML file with this structure: + +```xml + + + Find discussions about AI model launches with animal codenames. One model needed a specific safety designation that uses the format ASL-X. What number X was being determined for the model named after a spotted wild cat? + 3 + + + +``` + +--- + +# Reference Files + +## 📚 Documentation Library + +Load these resources as needed during development: + +### Core MCP Documentation (Load First) + +- **MCP Protocol**: Start with sitemap at `https://modelcontextprotocol.io/sitemap.xml`, then fetch specific pages with `.md` suffix +- [📋 MCP Best Practices](./reference/mcp_best_practices.md) - Universal MCP guidelines including: + - Server and tool naming conventions + - Response format guidelines (JSON vs Markdown) + - Pagination best practices + - Transport selection (streamable HTTP vs stdio) + - Security and error handling standards + +### SDK Documentation (Load During Phase 1/2) + +- **Python SDK**: Fetch from `https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md` +- **TypeScript SDK**: Fetch from `https://raw.githubusercontent.com/modelcontextprotocol/typescript-sdk/main/README.md` + +### Language-Specific Implementation Guides (Load During Phase 2) + +- [🐍 Python Implementation Guide](./reference/python_mcp_server.md) - Complete Python/FastMCP guide with: + + - Server initialization patterns + - Pydantic model examples + - Tool registration with `@mcp.tool` + - Complete working examples + - Quality checklist + +- [⚡ TypeScript Implementation Guide](./reference/node_mcp_server.md) - Complete TypeScript guide with: + - Project structure + - Zod schema patterns + - Tool registration with `server.registerTool` + - Complete working examples + - Quality checklist + +### Evaluation Guide (Load During Phase 4) + +- [✅ Evaluation Guide](./reference/evaluation.md) - Complete evaluation creation guide with: + - Question creation guidelines + - Answer verification strategies + - XML format specifications + - Example questions and answers + - Running an evaluation with the provided scripts diff --git a/src/services/skills/built-in/mcp-builder/reference/evaluation.md b/src/services/skills/built-in/mcp-builder/reference/evaluation.md new file mode 100644 index 00000000000..c9375b535a3 --- /dev/null +++ b/src/services/skills/built-in/mcp-builder/reference/evaluation.md @@ -0,0 +1,642 @@ +# MCP Server Evaluation Guide + +## Overview + +This document provides guidance on creating comprehensive evaluations for MCP servers. Evaluations test whether LLMs can effectively use your MCP server to answer realistic, complex questions using only the tools provided. + +--- + +## Quick Reference + +### Evaluation Requirements + +- Create 10 human-readable questions +- Questions must be READ-ONLY, INDEPENDENT, NON-DESTRUCTIVE +- Each question requires multiple tool calls (potentially dozens) +- Answers must be single, verifiable values +- Answers must be STABLE (won't change over time) + +### Output Format + +```xml + + + Your question here + Single verifiable answer + + +``` + +--- + +## Purpose of Evaluations + +The measure of quality of an MCP server is NOT how well or comprehensively the server implements tools, but how well these implementations (input/output schemas, docstrings/descriptions, functionality) enable LLMs with no other context and access ONLY to the MCP servers to answer realistic and difficult questions. + +## Evaluation Overview + +Create 10 human-readable questions requiring ONLY READ-ONLY, INDEPENDENT, NON-DESTRUCTIVE, and IDEMPOTENT operations to answer. Each question should be: + +- Realistic +- Clear and concise +- Unambiguous +- Complex, requiring potentially dozens of tool calls or steps +- Answerable with a single, verifiable value that you identify in advance + +## Question Guidelines + +### Core Requirements + +1. **Questions MUST be independent** + + - Each question should NOT depend on the answer to any other question + - Should not assume prior write operations from processing another question + +2. **Questions MUST require ONLY NON-DESTRUCTIVE AND IDEMPOTENT tool use** + + - Should not instruct or require modifying state to arrive at the correct answer + +3. **Questions must be REALISTIC, CLEAR, CONCISE, and COMPLEX** + - Must require another LLM to use multiple (potentially dozens of) tools or steps to answer + +### Complexity and Depth + +4. **Questions must require deep exploration** + + - Consider multi-hop questions requiring multiple sub-questions and sequential tool calls + - Each step should benefit from information found in previous questions + +5. **Questions may require extensive paging** + + - May need paging through multiple pages of results + - May require querying old data (1-2 years out-of-date) to find niche information + - The questions must be DIFFICULT + +6. **Questions must require deep understanding** + + - Rather than surface-level knowledge + - May pose complex ideas as True/False questions requiring evidence + - May use multiple-choice format where LLM must search different hypotheses + +7. **Questions must not be solvable with straightforward keyword search** + - Do not include specific keywords from the target content + - Use synonyms, related concepts, or paraphrases + - Require multiple searches, analyzing multiple related items, extracting context, then deriving the answer + +### Tool Testing + +8. **Questions should stress-test tool return values** + + - May elicit tools returning large JSON objects or lists, overwhelming the LLM + - Should require understanding multiple modalities of data: + - IDs and names + - Timestamps and datetimes (months, days, years, seconds) + - File IDs, names, extensions, and mimetypes + - URLs, GIDs, etc. + - Should probe the tool's ability to return all useful forms of data + +9. **Questions should MOSTLY reflect real human use cases** + + - The kinds of information retrieval tasks that HUMANS assisted by an LLM would care about + +10. **Questions may require dozens of tool calls** + + - This challenges LLMs with limited context + - Encourages MCP server tools to reduce information returned + +11. **Include ambiguous questions** + - May be ambiguous OR require difficult decisions on which tools to call + - Force the LLM to potentially make mistakes or misinterpret + - Ensure that despite AMBIGUITY, there is STILL A SINGLE VERIFIABLE ANSWER + +### Stability + +12. **Questions must be designed so the answer DOES NOT CHANGE** + + - Do not ask questions that rely on "current state" which is dynamic + - For example, do not count: + - Number of reactions to a post + - Number of replies to a thread + - Number of members in a channel + +13. **DO NOT let the MCP server RESTRICT the kinds of questions you create** + - Create challenging and complex questions + - Some may not be solvable with the available MCP server tools + - Questions may require specific output formats (datetime vs. epoch time, JSON vs. MARKDOWN) + - Questions may require dozens of tool calls to complete + +## Answer Guidelines + +### Verification + +1. **Answers must be VERIFIABLE via direct string comparison** + - If the answer can be re-written in many formats, clearly specify the output format in the QUESTION + - Examples: "Use YYYY/MM/DD.", "Respond True or False.", "Answer A, B, C, or D and nothing else." + - Answer should be a single VERIFIABLE value such as: + - User ID, user name, display name, first name, last name + - Channel ID, channel name + - Message ID, string + - URL, title + - Numerical quantity + - Timestamp, datetime + - Boolean (for True/False questions) + - Email address, phone number + - File ID, file name, file extension + - Multiple choice answer + - Answers must not require special formatting or complex, structured output + - Answer will be verified using DIRECT STRING COMPARISON + +### Readability + +2. **Answers should generally prefer HUMAN-READABLE formats** + - Examples: names, first name, last name, datetime, file name, message string, URL, yes/no, true/false, a/b/c/d + - Rather than opaque IDs (though IDs are acceptable) + - The VAST MAJORITY of answers should be human-readable + +### Stability + +3. **Answers must be STABLE/STATIONARY** + + - Look at old content (e.g., conversations that have ended, projects that have launched, questions answered) + - Create QUESTIONS based on "closed" concepts that will always return the same answer + - Questions may ask to consider a fixed time window to insulate from non-stationary answers + - Rely on context UNLIKELY to change + - Example: if finding a paper name, be SPECIFIC enough so answer is not confused with papers published later + +4. **Answers must be CLEAR and UNAMBIGUOUS** + - Questions must be designed so there is a single, clear answer + - Answer can be derived from using the MCP server tools + +### Diversity + +5. **Answers must be DIVERSE** + + - Answer should be a single VERIFIABLE value in diverse modalities and formats + - User concept: user ID, user name, display name, first name, last name, email address, phone number + - Channel concept: channel ID, channel name, channel topic + - Message concept: message ID, message string, timestamp, month, day, year + +6. **Answers must NOT be complex structures** + - Not a list of values + - Not a complex object + - Not a list of IDs or strings + - Not natural language text + - UNLESS the answer can be straightforwardly verified using DIRECT STRING COMPARISON + - And can be realistically reproduced + - It should be unlikely that an LLM would return the same list in any other order or format + +## Evaluation Process + +### Step 1: Documentation Inspection + +Read the documentation of the target API to understand: + +- Available endpoints and functionality +- If ambiguity exists, fetch additional information from the web +- Parallelize this step AS MUCH AS POSSIBLE +- Ensure each subagent is ONLY examining documentation from the file system or on the web + +### Step 2: Tool Inspection + +List the tools available in the MCP server: + +- Inspect the MCP server directly +- Understand input/output schemas, docstrings, and descriptions +- WITHOUT calling the tools themselves at this stage + +### Step 3: Developing Understanding + +Repeat steps 1 & 2 until you have a good understanding: + +- Iterate multiple times +- Think about the kinds of tasks you want to create +- Refine your understanding +- At NO stage should you READ the code of the MCP server implementation itself +- Use your intuition and understanding to create reasonable, realistic, but VERY challenging tasks + +### Step 4: Read-Only Content Inspection + +After understanding the API and tools, USE the MCP server tools: + +- Inspect content using READ-ONLY and NON-DESTRUCTIVE operations ONLY +- Goal: identify specific content (e.g., users, channels, messages, projects, tasks) for creating realistic questions +- Should NOT call any tools that modify state +- Will NOT read the code of the MCP server implementation itself +- Parallelize this step with individual sub-agents pursuing independent explorations +- Ensure each subagent is only performing READ-ONLY, NON-DESTRUCTIVE, and IDEMPOTENT operations +- BE CAREFUL: SOME TOOLS may return LOTS OF DATA which would cause you to run out of CONTEXT +- Make INCREMENTAL, SMALL, AND TARGETED tool calls for exploration +- In all tool call requests, use the `limit` parameter to limit results (<10) +- Use pagination + +### Step 5: Task Generation + +After inspecting the content, create 10 human-readable questions: + +- An LLM should be able to answer these with the MCP server +- Follow all question and answer guidelines above + +## Output Format + +Each QA pair consists of a question and an answer. The output should be an XML file with this structure: + +```xml + + + Find the project created in Q2 2024 with the highest number of completed tasks. What is the project name? + Website Redesign + + + Search for issues labeled as "bug" that were closed in March 2024. Which user closed the most issues? Provide their username. + sarah_dev + + + Look for pull requests that modified files in the /api directory and were merged between January 1 and January 31, 2024. How many different contributors worked on these PRs? + 7 + + + Find the repository with the most stars that was created before 2023. What is the repository name? + data-pipeline + + +``` + +## Evaluation Examples + +### Good Questions + +**Example 1: Multi-hop question requiring deep exploration (GitHub MCP)** + +```xml + + Find the repository that was archived in Q3 2023 and had previously been the most forked project in the organization. What was the primary programming language used in that repository? + Python + +``` + +This question is good because: + +- Requires multiple searches to find archived repositories +- Needs to identify which had the most forks before archival +- Requires examining repository details for the language +- Answer is a simple, verifiable value +- Based on historical (closed) data that won't change + +**Example 2: Requires understanding context without keyword matching (Project Management MCP)** + +```xml + + Locate the initiative focused on improving customer onboarding that was completed in late 2023. The project lead created a retrospective document after completion. What was the lead's role title at that time? + Product Manager + +``` + +This question is good because: + +- Doesn't use specific project name ("initiative focused on improving customer onboarding") +- Requires finding completed projects from specific timeframe +- Needs to identify the project lead and their role +- Requires understanding context from retrospective documents +- Answer is human-readable and stable +- Based on completed work (won't change) + +**Example 3: Complex aggregation requiring multiple steps (Issue Tracker MCP)** + +```xml + + Among all bugs reported in January 2024 that were marked as critical priority, which assignee resolved the highest percentage of their assigned bugs within 48 hours? Provide the assignee's username. + alex_eng + +``` + +This question is good because: + +- Requires filtering bugs by date, priority, and status +- Needs to group by assignee and calculate resolution rates +- Requires understanding timestamps to determine 48-hour windows +- Tests pagination (potentially many bugs to process) +- Answer is a single username +- Based on historical data from specific time period + +**Example 4: Requires synthesis across multiple data types (CRM MCP)** + +```xml + + Find the account that upgraded from the Starter to Enterprise plan in Q4 2023 and had the highest annual contract value. What industry does this account operate in? + Healthcare + +``` + +This question is good because: + +- Requires understanding subscription tier changes +- Needs to identify upgrade events in specific timeframe +- Requires comparing contract values +- Must access account industry information +- Answer is simple and verifiable +- Based on completed historical transactions + +### Poor Questions + +**Example 1: Answer changes over time** + +```xml + + How many open issues are currently assigned to the engineering team? + 47 + +``` + +This question is poor because: + +- The answer will change as issues are created, closed, or reassigned +- Not based on stable/stationary data +- Relies on "current state" which is dynamic + +**Example 2: Too easy with keyword search** + +```xml + + Find the pull request with title "Add authentication feature" and tell me who created it. + developer123 + +``` + +This question is poor because: + +- Can be solved with a straightforward keyword search for exact title +- Doesn't require deep exploration or understanding +- No synthesis or analysis needed + +**Example 3: Ambiguous answer format** + +```xml + + List all the repositories that have Python as their primary language. + repo1, repo2, repo3, data-pipeline, ml-tools + +``` + +This question is poor because: + +- Answer is a list that could be returned in any order +- Difficult to verify with direct string comparison +- LLM might format differently (JSON array, comma-separated, newline-separated) +- Better to ask for a specific aggregate (count) or superlative (most stars) + +## Verification Process + +After creating evaluations: + +1. **Examine the XML file** to understand the schema +2. **Load each task instruction** and in parallel using the MCP server and tools, identify the correct answer by attempting to solve the task YOURSELF +3. **Flag any operations** that require WRITE or DESTRUCTIVE operations +4. **Accumulate all CORRECT answers** and replace any incorrect answers in the document +5. **Remove any ``** that require WRITE or DESTRUCTIVE operations + +Remember to parallelize solving tasks to avoid running out of context, then accumulate all answers and make changes to the file at the end. + +## Tips for Creating Quality Evaluations + +1. **Think Hard and Plan Ahead** before generating tasks +2. **Parallelize Where Opportunity Arises** to speed up the process and manage context +3. **Focus on Realistic Use Cases** that humans would actually want to accomplish +4. **Create Challenging Questions** that test the limits of the MCP server's capabilities +5. **Ensure Stability** by using historical data and closed concepts +6. **Verify Answers** by solving the questions yourself using the MCP server tools +7. **Iterate and Refine** based on what you learn during the process + +--- + +# Running Evaluations + +After creating your evaluation file, you can use the provided evaluation harness to test your MCP server. + +## Setup + +1. **Install Dependencies** + + ```bash + pip install -r scripts/requirements.txt + ``` + + Or install manually: + + ```bash + pip install anthropic mcp + ``` + +2. **Set API Key** + + ```bash + export ANTHROPIC_API_KEY=your_api_key_here + ``` + +## Evaluation File Format + +Evaluation files use XML format with `` elements: + +```xml + + + Find the project created in Q2 2024 with the highest number of completed tasks. What is the project name? + Website Redesign + + + Search for issues labeled as "bug" that were closed in March 2024. Which user closed the most issues? Provide their username. + sarah_dev + + +``` + +## Running Evaluations + +The evaluation script (`scripts/evaluation.py`) supports three transport types: + +**Important:** + +- **stdio transport**: The evaluation script automatically launches and manages the MCP server process for you. Do not run the server manually. +- **sse/http transports**: You must start the MCP server separately before running the evaluation. The script connects to the already-running server at the specified URL. + +### 1. Local STDIO Server + +For locally-run MCP servers (script launches the server automatically): + +```bash +python scripts/evaluation.py \ + -t stdio \ + -c python \ + -a my_mcp_server.py \ + evaluation.xml +``` + +With environment variables: + +```bash +python scripts/evaluation.py \ + -t stdio \ + -c python \ + -a my_mcp_server.py \ + -e API_KEY=abc123 \ + -e DEBUG=true \ + evaluation.xml +``` + +### 2. Server-Sent Events (SSE) + +For SSE-based MCP servers (you must start the server first): + +```bash +python scripts/evaluation.py \ + -t sse \ + -u https://example.com/mcp \ + -H "Authorization: Bearer token123" \ + -H "X-Custom-Header: value" \ + evaluation.xml +``` + +### 3. HTTP (Streamable HTTP) + +For HTTP-based MCP servers (you must start the server first): + +```bash +python scripts/evaluation.py \ + -t http \ + -u https://example.com/mcp \ + -H "Authorization: Bearer token123" \ + evaluation.xml +``` + +## Command-Line Options + +``` +usage: evaluation.py [-h] [-t {stdio,sse,http}] [-m MODEL] [-c COMMAND] + [-a ARGS [ARGS ...]] [-e ENV [ENV ...]] [-u URL] + [-H HEADERS [HEADERS ...]] [-o OUTPUT] + eval_file + +positional arguments: + eval_file Path to evaluation XML file + +optional arguments: + -h, --help Show help message + -t, --transport Transport type: stdio, sse, or http (default: stdio) + -m, --model Claude model to use (default: claude-3-7-sonnet-20250219) + -o, --output Output file for report (default: print to stdout) + +stdio options: + -c, --command Command to run MCP server (e.g., python, node) + -a, --args Arguments for the command (e.g., server.py) + -e, --env Environment variables in KEY=VALUE format + +sse/http options: + -u, --url MCP server URL + -H, --header HTTP headers in 'Key: Value' format +``` + +## Output + +The evaluation script generates a detailed report including: + +- **Summary Statistics**: + + - Accuracy (correct/total) + - Average task duration + - Average tool calls per task + - Total tool calls + +- **Per-Task Results**: + - Prompt and expected response + - Actual response from the agent + - Whether the answer was correct (✅/❌) + - Duration and tool call details + - Agent's summary of its approach + - Agent's feedback on the tools + +### Save Report to File + +```bash +python scripts/evaluation.py \ + -t stdio \ + -c python \ + -a my_server.py \ + -o evaluation_report.md \ + evaluation.xml +``` + +## Complete Example Workflow + +Here's a complete example of creating and running an evaluation: + +1. **Create your evaluation file** (`my_evaluation.xml`): + +```xml + + + Find the user who created the most issues in January 2024. What is their username? + alice_developer + + + Among all pull requests merged in Q1 2024, which repository had the highest number? Provide the repository name. + backend-api + + + Find the project that was completed in December 2023 and had the longest duration from start to finish. How many days did it take? + 127 + + +``` + +2. **Install dependencies**: + +```bash +pip install -r scripts/requirements.txt +export ANTHROPIC_API_KEY=your_api_key +``` + +3. **Run evaluation**: + +```bash +python scripts/evaluation.py \ + -t stdio \ + -c python \ + -a github_mcp_server.py \ + -e GITHUB_TOKEN=ghp_xxx \ + -o github_eval_report.md \ + my_evaluation.xml +``` + +4. **Review the report** in `github_eval_report.md` to: + - See which questions passed/failed + - Read the agent's feedback on your tools + - Identify areas for improvement + - Iterate on your MCP server design + +## Troubleshooting + +### Connection Errors + +If you get connection errors: + +- **STDIO**: Verify the command and arguments are correct +- **SSE/HTTP**: Check the URL is accessible and headers are correct +- Ensure any required API keys are set in environment variables or headers + +### Low Accuracy + +If many evaluations fail: + +- Review the agent's feedback for each task +- Check if tool descriptions are clear and comprehensive +- Verify input parameters are well-documented +- Consider whether tools return too much or too little data +- Ensure error messages are actionable + +### Timeout Issues + +If tasks are timing out: + +- Use a more capable model (e.g., `claude-3-7-sonnet-20250219`) +- Check if tools are returning too much data +- Verify pagination is working correctly +- Consider simplifying complex questions diff --git a/src/services/skills/built-in/mcp-builder/reference/mcp_best_practices.md b/src/services/skills/built-in/mcp-builder/reference/mcp_best_practices.md new file mode 100644 index 00000000000..428e1e80947 --- /dev/null +++ b/src/services/skills/built-in/mcp-builder/reference/mcp_best_practices.md @@ -0,0 +1,269 @@ +# MCP Server Best Practices + +## Quick Reference + +### Server Naming + +- **Python**: `{service}_mcp` (e.g., `slack_mcp`) +- **Node/TypeScript**: `{service}-mcp-server` (e.g., `slack-mcp-server`) + +### Tool Naming + +- Use snake_case with service prefix +- Format: `{service}_{action}_{resource}` +- Example: `slack_send_message`, `github_create_issue` + +### Response Formats + +- Support both JSON and Markdown formats +- JSON for programmatic processing +- Markdown for human readability + +### Pagination + +- Always respect `limit` parameter +- Return `has_more`, `next_offset`, `total_count` +- Default to 20-50 items + +### Transport + +- **Streamable HTTP**: For remote servers, multi-client scenarios +- **stdio**: For local integrations, command-line tools +- Avoid SSE (deprecated in favor of streamable HTTP) + +--- + +## Server Naming Conventions + +Follow these standardized naming patterns: + +**Python**: Use format `{service}_mcp` (lowercase with underscores) + +- Examples: `slack_mcp`, `github_mcp`, `jira_mcp` + +**Node/TypeScript**: Use format `{service}-mcp-server` (lowercase with hyphens) + +- Examples: `slack-mcp-server`, `github-mcp-server`, `jira-mcp-server` + +The name should be general, descriptive of the service being integrated, easy to infer from the task description, and without version numbers. + +--- + +## Tool Naming and Design + +### Tool Naming + +1. **Use snake_case**: `search_users`, `create_project`, `get_channel_info` +2. **Include service prefix**: Anticipate that your MCP server may be used alongside other MCP servers + - Use `slack_send_message` instead of just `send_message` + - Use `github_create_issue` instead of just `create_issue` +3. **Be action-oriented**: Start with verbs (get, list, search, create, etc.) +4. **Be specific**: Avoid generic names that could conflict with other servers + +### Tool Design + +- Tool descriptions must narrowly and unambiguously describe functionality +- Descriptions must precisely match actual functionality +- Provide tool annotations (readOnlyHint, destructiveHint, idempotentHint, openWorldHint) +- Keep tool operations focused and atomic + +--- + +## Response Formats + +All tools that return data should support multiple formats: + +### JSON Format (`response_format="json"`) + +- Machine-readable structured data +- Include all available fields and metadata +- Consistent field names and types +- Use for programmatic processing + +### Markdown Format (`response_format="markdown"`, typically default) + +- Human-readable formatted text +- Use headers, lists, and formatting for clarity +- Convert timestamps to human-readable format +- Show display names with IDs in parentheses +- Omit verbose metadata + +--- + +## Pagination + +For tools that list resources: + +- **Always respect the `limit` parameter** +- **Implement pagination**: Use `offset` or cursor-based pagination +- **Return pagination metadata**: Include `has_more`, `next_offset`/`next_cursor`, `total_count` +- **Never load all results into memory**: Especially important for large datasets +- **Default to reasonable limits**: 20-50 items is typical + +Example pagination response: + +```json +{ + "total": 150, + "count": 20, + "offset": 0, + "items": [...], + "has_more": true, + "next_offset": 20 +} +``` + +--- + +## Transport Options + +### Streamable HTTP + +**Best for**: Remote servers, web services, multi-client scenarios + +**Characteristics**: + +- Bidirectional communication over HTTP +- Supports multiple simultaneous clients +- Can be deployed as a web service +- Enables server-to-client notifications + +**Use when**: + +- Serving multiple clients simultaneously +- Deploying as a cloud service +- Integration with web applications + +### stdio + +**Best for**: Local integrations, command-line tools + +**Characteristics**: + +- Standard input/output stream communication +- Simple setup, no network configuration needed +- Runs as a subprocess of the client + +**Use when**: + +- Building tools for local development environments +- Integrating with desktop applications +- Single-user, single-session scenarios + +**Note**: stdio servers should NOT log to stdout (use stderr for logging) + +### Transport Selection + +| Criterion | stdio | Streamable HTTP | +| -------------- | ------ | --------------- | +| **Deployment** | Local | Remote | +| **Clients** | Single | Multiple | +| **Complexity** | Low | Medium | +| **Real-time** | No | Yes | + +--- + +## Security Best Practices + +### Authentication and Authorization + +**OAuth 2.1**: + +- Use secure OAuth 2.1 with certificates from recognized authorities +- Validate access tokens before processing requests +- Only accept tokens specifically intended for your server + +**API Keys**: + +- Store API keys in environment variables, never in code +- Validate keys on server startup +- Provide clear error messages when authentication fails + +### Input Validation + +- Sanitize file paths to prevent directory traversal +- Validate URLs and external identifiers +- Check parameter sizes and ranges +- Prevent command injection in system calls +- Use schema validation (Pydantic/Zod) for all inputs + +### Error Handling + +- Don't expose internal errors to clients +- Log security-relevant errors server-side +- Provide helpful but not revealing error messages +- Clean up resources after errors + +### DNS Rebinding Protection + +For streamable HTTP servers running locally: + +- Enable DNS rebinding protection +- Validate the `Origin` header on all incoming connections +- Bind to `127.0.0.1` rather than `0.0.0.0` + +--- + +## Tool Annotations + +Provide annotations to help clients understand tool behavior: + +| Annotation | Type | Default | Description | +| ----------------- | ------- | ------- | ------------------------------------------------------- | +| `readOnlyHint` | boolean | false | Tool does not modify its environment | +| `destructiveHint` | boolean | true | Tool may perform destructive updates | +| `idempotentHint` | boolean | false | Repeated calls with same args have no additional effect | +| `openWorldHint` | boolean | true | Tool interacts with external entities | + +**Important**: Annotations are hints, not security guarantees. Clients should not make security-critical decisions based solely on annotations. + +--- + +## Error Handling + +- Use standard JSON-RPC error codes +- Report tool errors within result objects (not protocol-level errors) +- Provide helpful, specific error messages with suggested next steps +- Don't expose internal implementation details +- Clean up resources properly on errors + +Example error handling: + +```typescript +try { + const result = performOperation() + return { content: [{ type: "text", text: result }] } +} catch (error) { + return { + isError: true, + content: [ + { + type: "text", + text: `Error: ${error.message}. Try using filter='active_only' to reduce results.`, + }, + ], + } +} +``` + +--- + +## Testing Requirements + +Comprehensive testing should cover: + +- **Functional testing**: Verify correct execution with valid/invalid inputs +- **Integration testing**: Test interaction with external systems +- **Security testing**: Validate auth, input sanitization, rate limiting +- **Performance testing**: Check behavior under load, timeouts +- **Error handling**: Ensure proper error reporting and cleanup + +--- + +## Documentation Requirements + +- Provide clear documentation of all tools and capabilities +- Include working examples (at least 3 per major feature) +- Document security considerations +- Specify required permissions and access levels +- Document rate limits and performance characteristics diff --git a/src/services/skills/built-in/mcp-builder/reference/node_mcp_server.md b/src/services/skills/built-in/mcp-builder/reference/node_mcp_server.md new file mode 100644 index 00000000000..e645b0291de --- /dev/null +++ b/src/services/skills/built-in/mcp-builder/reference/node_mcp_server.md @@ -0,0 +1,975 @@ +# Node/TypeScript MCP Server Implementation Guide + +## Overview + +This document provides Node/TypeScript-specific best practices and examples for implementing MCP servers using the MCP TypeScript SDK. It covers project structure, server setup, tool registration patterns, input validation with Zod, error handling, and complete working examples. + +--- + +## Quick Reference + +### Key Imports + +```typescript +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" +import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js" +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" +import express from "express" +import { z } from "zod" +``` + +### Server Initialization + +```typescript +const server = new McpServer({ + name: "service-mcp-server", + version: "1.0.0", +}) +``` + +### Tool Registration Pattern + +```typescript +server.registerTool( + "tool_name", + { + title: "Tool Display Name", + description: "What the tool does", + inputSchema: { param: z.string() }, + outputSchema: { result: z.string() }, + }, + async ({ param }) => { + const output = { result: `Processed: ${param}` } + return { + content: [{ type: "text", text: JSON.stringify(output) }], + structuredContent: output, // Modern pattern for structured data + } + }, +) +``` + +--- + +## MCP TypeScript SDK + +The official MCP TypeScript SDK provides: + +- `McpServer` class for server initialization +- `registerTool` method for tool registration +- Zod schema integration for runtime input validation +- Type-safe tool handler implementations + +**IMPORTANT - Use Modern APIs Only:** + +- **DO use**: `server.registerTool()`, `server.registerResource()`, `server.registerPrompt()` +- **DO NOT use**: Old deprecated APIs such as `server.tool()`, `server.setRequestHandler(ListToolsRequestSchema, ...)`, or manual handler registration +- The `register*` methods provide better type safety, automatic schema handling, and are the recommended approach + +See the MCP SDK documentation in the references for complete details. + +## Server Naming Convention + +Node/TypeScript MCP servers must follow this naming pattern: + +- **Format**: `{service}-mcp-server` (lowercase with hyphens) +- **Examples**: `github-mcp-server`, `jira-mcp-server`, `stripe-mcp-server` + +The name should be: + +- General (not tied to specific features) +- Descriptive of the service/API being integrated +- Easy to infer from the task description +- Without version numbers or dates + +## Project Structure + +Create the following structure for Node/TypeScript MCP servers: + +``` +{service}-mcp-server/ +├── package.json +├── tsconfig.json +├── README.md +├── src/ +│ ├── index.ts # Main entry point with McpServer initialization +│ ├── types.ts # TypeScript type definitions and interfaces +│ ├── tools/ # Tool implementations (one file per domain) +│ ├── services/ # API clients and shared utilities +│ ├── schemas/ # Zod validation schemas +│ └── constants.ts # Shared constants (API_URL, CHARACTER_LIMIT, etc.) +└── dist/ # Built JavaScript files (entry point: dist/index.js) +``` + +## Tool Implementation + +### Tool Naming + +Use snake_case for tool names (e.g., "search_users", "create_project", "get_channel_info") with clear, action-oriented names. + +**Avoid Naming Conflicts**: Include the service context to prevent overlaps: + +- Use "slack_send_message" instead of just "send_message" +- Use "github_create_issue" instead of just "create_issue" +- Use "asana_list_tasks" instead of just "list_tasks" + +### Tool Structure + +Tools are registered using the `registerTool` method with the following requirements: + +- Use Zod schemas for runtime input validation and type safety +- The `description` field must be explicitly provided - JSDoc comments are NOT automatically extracted +- Explicitly provide `title`, `description`, `inputSchema`, and `annotations` +- The `inputSchema` must be a Zod schema object (not a JSON schema) +- Type all parameters and return values explicitly + +```typescript +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" +import { z } from "zod" + +const server = new McpServer({ + name: "example-mcp", + version: "1.0.0", +}) + +// Zod schema for input validation +const UserSearchInputSchema = z + .object({ + query: z + .string() + .min(2, "Query must be at least 2 characters") + .max(200, "Query must not exceed 200 characters") + .describe("Search string to match against names/emails"), + limit: z.number().int().min(1).max(100).default(20).describe("Maximum results to return"), + offset: z.number().int().min(0).default(0).describe("Number of results to skip for pagination"), + response_format: z + .nativeEnum(ResponseFormat) + .default(ResponseFormat.MARKDOWN) + .describe("Output format: 'markdown' for human-readable or 'json' for machine-readable"), + }) + .strict() + +// Type definition from Zod schema +type UserSearchInput = z.infer + +server.registerTool( + "example_search_users", + { + title: "Search Example Users", + description: `Search for users in the Example system by name, email, or team. + +This tool searches across all user profiles in the Example platform, supporting partial matches and various search filters. It does NOT create or modify users, only searches existing ones. + +Args: + - query (string): Search string to match against names/emails + - limit (number): Maximum results to return, between 1-100 (default: 20) + - offset (number): Number of results to skip for pagination (default: 0) + - response_format ('markdown' | 'json'): Output format (default: 'markdown') + +Returns: + For JSON format: Structured data with schema: + { + "total": number, // Total number of matches found + "count": number, // Number of results in this response + "offset": number, // Current pagination offset + "users": [ + { + "id": string, // User ID (e.g., "U123456789") + "name": string, // Full name (e.g., "John Doe") + "email": string, // Email address + "team": string, // Team name (optional) + "active": boolean // Whether user is active + } + ], + "has_more": boolean, // Whether more results are available + "next_offset": number // Offset for next page (if has_more is true) + } + +Examples: + - Use when: "Find all marketing team members" -> params with query="team:marketing" + - Use when: "Search for John's account" -> params with query="john" + - Don't use when: You need to create a user (use example_create_user instead) + +Error Handling: + - Returns "Error: Rate limit exceeded" if too many requests (429 status) + - Returns "No users found matching ''" if search returns empty`, + inputSchema: UserSearchInputSchema, + annotations: { + readOnlyHint: true, + destructiveHint: false, + idempotentHint: true, + openWorldHint: true, + }, + }, + async (params: UserSearchInput) => { + try { + // Input validation is handled by Zod schema + // Make API request using validated parameters + const data = await makeApiRequest("users/search", "GET", undefined, { + q: params.query, + limit: params.limit, + offset: params.offset, + }) + + const users = data.users || [] + const total = data.total || 0 + + if (!users.length) { + return { + content: [ + { + type: "text", + text: `No users found matching '${params.query}'`, + }, + ], + } + } + + // Prepare structured output + const output = { + total, + count: users.length, + offset: params.offset, + users: users.map((user: any) => ({ + id: user.id, + name: user.name, + email: user.email, + ...(user.team ? { team: user.team } : {}), + active: user.active ?? true, + })), + has_more: total > params.offset + users.length, + ...(total > params.offset + users.length + ? { + next_offset: params.offset + users.length, + } + : {}), + } + + // Format text representation based on requested format + let textContent: string + if (params.response_format === ResponseFormat.MARKDOWN) { + const lines = [ + `# User Search Results: '${params.query}'`, + "", + `Found ${total} users (showing ${users.length})`, + "", + ] + for (const user of users) { + lines.push(`## ${user.name} (${user.id})`) + lines.push(`- **Email**: ${user.email}`) + if (user.team) lines.push(`- **Team**: ${user.team}`) + lines.push("") + } + textContent = lines.join("\n") + } else { + textContent = JSON.stringify(output, null, 2) + } + + return { + content: [{ type: "text", text: textContent }], + structuredContent: output, // Modern pattern for structured data + } + } catch (error) { + return { + content: [ + { + type: "text", + text: handleApiError(error), + }, + ], + } + } + }, +) +``` + +## Zod Schemas for Input Validation + +Zod provides runtime type validation: + +```typescript +import { z } from "zod" + +// Basic schema with validation +const CreateUserSchema = z + .object({ + name: z.string().min(1, "Name is required").max(100, "Name must not exceed 100 characters"), + email: z.string().email("Invalid email format"), + age: z + .number() + .int("Age must be a whole number") + .min(0, "Age cannot be negative") + .max(150, "Age cannot be greater than 150"), + }) + .strict() // Use .strict() to forbid extra fields + +// Enums +enum ResponseFormat { + MARKDOWN = "markdown", + JSON = "json", +} + +const SearchSchema = z.object({ + response_format: z.nativeEnum(ResponseFormat).default(ResponseFormat.MARKDOWN).describe("Output format"), +}) + +// Optional fields with defaults +const PaginationSchema = z.object({ + limit: z.number().int().min(1).max(100).default(20).describe("Maximum results to return"), + offset: z.number().int().min(0).default(0).describe("Number of results to skip"), +}) +``` + +## Response Format Options + +Support multiple output formats for flexibility: + +```typescript +enum ResponseFormat { + MARKDOWN = "markdown", + JSON = "json", +} + +const inputSchema = z.object({ + query: z.string(), + response_format: z + .nativeEnum(ResponseFormat) + .default(ResponseFormat.MARKDOWN) + .describe("Output format: 'markdown' for human-readable or 'json' for machine-readable"), +}) +``` + +**Markdown format**: + +- Use headers, lists, and formatting for clarity +- Convert timestamps to human-readable format +- Show display names with IDs in parentheses +- Omit verbose metadata +- Group related information logically + +**JSON format**: + +- Return complete, structured data suitable for programmatic processing +- Include all available fields and metadata +- Use consistent field names and types + +## Pagination Implementation + +For tools that list resources: + +```typescript +const ListSchema = z.object({ + limit: z.number().int().min(1).max(100).default(20), + offset: z.number().int().min(0).default(0), +}) + +async function listItems(params: z.infer) { + const data = await apiRequest(params.limit, params.offset) + + const response = { + total: data.total, + count: data.items.length, + offset: params.offset, + items: data.items, + has_more: data.total > params.offset + data.items.length, + next_offset: data.total > params.offset + data.items.length ? params.offset + data.items.length : undefined, + } + + return JSON.stringify(response, null, 2) +} +``` + +## Character Limits and Truncation + +Add a CHARACTER_LIMIT constant to prevent overwhelming responses: + +```typescript +// At module level in constants.ts +export const CHARACTER_LIMIT = 25000 // Maximum response size in characters + +async function searchTool(params: SearchInput) { + let result = generateResponse(data) + + // Check character limit and truncate if needed + if (result.length > CHARACTER_LIMIT) { + const truncatedData = data.slice(0, Math.max(1, data.length / 2)) + response.data = truncatedData + response.truncated = true + response.truncation_message = + `Response truncated from ${data.length} to ${truncatedData.length} items. ` + + `Use 'offset' parameter or add filters to see more results.` + result = JSON.stringify(response, null, 2) + } + + return result +} +``` + +## Error Handling + +Provide clear, actionable error messages: + +```typescript +import axios, { AxiosError } from "axios" + +function handleApiError(error: unknown): string { + if (error instanceof AxiosError) { + if (error.response) { + switch (error.response.status) { + case 404: + return "Error: Resource not found. Please check the ID is correct." + case 403: + return "Error: Permission denied. You don't have access to this resource." + case 429: + return "Error: Rate limit exceeded. Please wait before making more requests." + default: + return `Error: API request failed with status ${error.response.status}` + } + } else if (error.code === "ECONNABORTED") { + return "Error: Request timed out. Please try again." + } + } + return `Error: Unexpected error occurred: ${error instanceof Error ? error.message : String(error)}` +} +``` + +## Shared Utilities + +Extract common functionality into reusable functions: + +```typescript +// Shared API request function +async function makeApiRequest( + endpoint: string, + method: "GET" | "POST" | "PUT" | "DELETE" = "GET", + data?: any, + params?: any, +): Promise { + try { + const response = await axios({ + method, + url: `${API_BASE_URL}/${endpoint}`, + data, + params, + timeout: 30000, + headers: { + "Content-Type": "application/json", + Accept: "application/json", + }, + }) + return response.data + } catch (error) { + throw error + } +} +``` + +## Async/Await Best Practices + +Always use async/await for network requests and I/O operations: + +```typescript +// Good: Async network request +async function fetchData(resourceId: string): Promise { + const response = await axios.get(`${API_URL}/resource/${resourceId}`) + return response.data +} + +// Bad: Promise chains +function fetchData(resourceId: string): Promise { + return axios.get(`${API_URL}/resource/${resourceId}`).then((response) => response.data) // Harder to read and maintain +} +``` + +## TypeScript Best Practices + +1. **Use Strict TypeScript**: Enable strict mode in tsconfig.json +2. **Define Interfaces**: Create clear interface definitions for all data structures +3. **Avoid `any`**: Use proper types or `unknown` instead of `any` +4. **Zod for Runtime Validation**: Use Zod schemas to validate external data +5. **Type Guards**: Create type guard functions for complex type checking +6. **Error Handling**: Always use try-catch with proper error type checking +7. **Null Safety**: Use optional chaining (`?.`) and nullish coalescing (`??`) + +```typescript +// Good: Type-safe with Zod and interfaces +interface UserResponse { + id: string + name: string + email: string + team?: string + active: boolean +} + +const UserSchema = z.object({ + id: z.string(), + name: z.string(), + email: z.string().email(), + team: z.string().optional(), + active: z.boolean(), +}) + +type User = z.infer + +async function getUser(id: string): Promise { + const data = await apiCall(`/users/${id}`) + return UserSchema.parse(data) // Runtime validation +} + +// Bad: Using any +async function getUser(id: string): Promise { + return await apiCall(`/users/${id}`) // No type safety +} +``` + +## Package Configuration + +### package.json + +```json +{ + "name": "{service}-mcp-server", + "version": "1.0.0", + "description": "MCP server for {Service} API integration", + "type": "module", + "main": "dist/index.js", + "scripts": { + "start": "node dist/index.js", + "dev": "tsx watch src/index.ts", + "build": "tsc", + "clean": "rm -rf dist" + }, + "engines": { + "node": ">=18" + }, + "dependencies": { + "@modelcontextprotocol/sdk": "^1.6.1", + "axios": "^1.7.9", + "zod": "^3.23.8" + }, + "devDependencies": { + "@types/node": "^22.10.0", + "tsx": "^4.19.2", + "typescript": "^5.7.2" + } +} +``` + +### tsconfig.json + +```json +{ + "compilerOptions": { + "target": "ES2022", + "module": "Node16", + "moduleResolution": "Node16", + "lib": ["ES2022"], + "outDir": "./dist", + "rootDir": "./src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "allowSyntheticDefaultImports": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +} +``` + +## Complete Example + +```typescript +#!/usr/bin/env node +/** + * MCP Server for Example Service. + * + * This server provides tools to interact with Example API, including user search, + * project management, and data export capabilities. + */ + +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" +import { z } from "zod" +import axios, { AxiosError } from "axios" + +// Constants +const API_BASE_URL = "https://api.example.com/v1" +const CHARACTER_LIMIT = 25000 + +// Enums +enum ResponseFormat { + MARKDOWN = "markdown", + JSON = "json", +} + +// Zod schemas +const UserSearchInputSchema = z + .object({ + query: z + .string() + .min(2, "Query must be at least 2 characters") + .max(200, "Query must not exceed 200 characters") + .describe("Search string to match against names/emails"), + limit: z.number().int().min(1).max(100).default(20).describe("Maximum results to return"), + offset: z.number().int().min(0).default(0).describe("Number of results to skip for pagination"), + response_format: z + .nativeEnum(ResponseFormat) + .default(ResponseFormat.MARKDOWN) + .describe("Output format: 'markdown' for human-readable or 'json' for machine-readable"), + }) + .strict() + +type UserSearchInput = z.infer + +// Shared utility functions +async function makeApiRequest( + endpoint: string, + method: "GET" | "POST" | "PUT" | "DELETE" = "GET", + data?: any, + params?: any, +): Promise { + try { + const response = await axios({ + method, + url: `${API_BASE_URL}/${endpoint}`, + data, + params, + timeout: 30000, + headers: { + "Content-Type": "application/json", + Accept: "application/json", + }, + }) + return response.data + } catch (error) { + throw error + } +} + +function handleApiError(error: unknown): string { + if (error instanceof AxiosError) { + if (error.response) { + switch (error.response.status) { + case 404: + return "Error: Resource not found. Please check the ID is correct." + case 403: + return "Error: Permission denied. You don't have access to this resource." + case 429: + return "Error: Rate limit exceeded. Please wait before making more requests." + default: + return `Error: API request failed with status ${error.response.status}` + } + } else if (error.code === "ECONNABORTED") { + return "Error: Request timed out. Please try again." + } + } + return `Error: Unexpected error occurred: ${error instanceof Error ? error.message : String(error)}` +} + +// Create MCP server instance +const server = new McpServer({ + name: "example-mcp", + version: "1.0.0", +}) + +// Register tools +server.registerTool( + "example_search_users", + { + title: "Search Example Users", + description: `[Full description as shown above]`, + inputSchema: UserSearchInputSchema, + annotations: { + readOnlyHint: true, + destructiveHint: false, + idempotentHint: true, + openWorldHint: true, + }, + }, + async (params: UserSearchInput) => { + // Implementation as shown above + }, +) + +// Main function +// For stdio (local): +async function runStdio() { + if (!process.env.EXAMPLE_API_KEY) { + console.error("ERROR: EXAMPLE_API_KEY environment variable is required") + process.exit(1) + } + + const transport = new StdioServerTransport() + await server.connect(transport) + console.error("MCP server running via stdio") +} + +// For streamable HTTP (remote): +async function runHTTP() { + if (!process.env.EXAMPLE_API_KEY) { + console.error("ERROR: EXAMPLE_API_KEY environment variable is required") + process.exit(1) + } + + const app = express() + app.use(express.json()) + + app.post("/mcp", async (req, res) => { + const transport = new StreamableHTTPServerTransport({ + sessionIdGenerator: undefined, + enableJsonResponse: true, + }) + res.on("close", () => transport.close()) + await server.connect(transport) + await transport.handleRequest(req, res, req.body) + }) + + const port = parseInt(process.env.PORT || "3000") + app.listen(port, () => { + console.error(`MCP server running on http://localhost:${port}/mcp`) + }) +} + +// Choose transport based on environment +const transport = process.env.TRANSPORT || "stdio" +if (transport === "http") { + runHTTP().catch((error) => { + console.error("Server error:", error) + process.exit(1) + }) +} else { + runStdio().catch((error) => { + console.error("Server error:", error) + process.exit(1) + }) +} +``` + +--- + +## Advanced MCP Features + +### Resource Registration + +Expose data as resources for efficient, URI-based access: + +```typescript +import { ResourceTemplate } from "@modelcontextprotocol/sdk/types.js" + +// Register a resource with URI template +server.registerResource( + { + uri: "file://documents/{name}", + name: "Document Resource", + description: "Access documents by name", + mimeType: "text/plain", + }, + async (uri: string) => { + // Extract parameter from URI + const match = uri.match(/^file:\/\/documents\/(.+)$/) + if (!match) { + throw new Error("Invalid URI format") + } + + const documentName = match[1] + const content = await loadDocument(documentName) + + return { + contents: [ + { + uri, + mimeType: "text/plain", + text: content, + }, + ], + } + }, +) + +// List available resources dynamically +server.registerResourceList(async () => { + const documents = await getAvailableDocuments() + return { + resources: documents.map((doc) => ({ + uri: `file://documents/${doc.name}`, + name: doc.name, + mimeType: "text/plain", + description: doc.description, + })), + } +}) +``` + +**When to use Resources vs Tools:** + +- **Resources**: For data access with simple URI-based parameters +- **Tools**: For complex operations requiring validation and business logic +- **Resources**: When data is relatively static or template-based +- **Tools**: When operations have side effects or complex workflows + +### Transport Options + +The TypeScript SDK supports two main transport mechanisms: + +#### Streamable HTTP (Recommended for Remote Servers) + +```typescript +import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js" +import express from "express" + +const app = express() +app.use(express.json()) + +app.post("/mcp", async (req, res) => { + // Create new transport for each request (stateless, prevents request ID collisions) + const transport = new StreamableHTTPServerTransport({ + sessionIdGenerator: undefined, + enableJsonResponse: true, + }) + + res.on("close", () => transport.close()) + + await server.connect(transport) + await transport.handleRequest(req, res, req.body) +}) + +app.listen(3000) +``` + +#### stdio (For Local Integrations) + +```typescript +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" + +const transport = new StdioServerTransport() +await server.connect(transport) +``` + +**Transport selection:** + +- **Streamable HTTP**: Web services, remote access, multiple clients +- **stdio**: Command-line tools, local development, subprocess integration + +### Notification Support + +Notify clients when server state changes: + +```typescript +// Notify when tools list changes +server.notification({ + method: "notifications/tools/list_changed", +}) + +// Notify when resources change +server.notification({ + method: "notifications/resources/list_changed", +}) +``` + +Use notifications sparingly - only when server capabilities genuinely change. + +--- + +## Code Best Practices + +### Code Composability and Reusability + +Your implementation MUST prioritize composability and code reuse: + +1. **Extract Common Functionality**: + + - Create reusable helper functions for operations used across multiple tools + - Build shared API clients for HTTP requests instead of duplicating code + - Centralize error handling logic in utility functions + - Extract business logic into dedicated functions that can be composed + - Extract shared markdown or JSON field selection & formatting functionality + +2. **Avoid Duplication**: + - NEVER copy-paste similar code between tools + - If you find yourself writing similar logic twice, extract it into a function + - Common operations like pagination, filtering, field selection, and formatting should be shared + - Authentication/authorization logic should be centralized + +## Building and Running + +Always build your TypeScript code before running: + +```bash +# Build the project +npm run build + +# Run the server +npm start + +# Development with auto-reload +npm run dev +``` + +Always ensure `npm run build` completes successfully before considering the implementation complete. + +## Quality Checklist + +Before finalizing your Node/TypeScript MCP server implementation, ensure: + +### Strategic Design + +- [ ] Tools enable complete workflows, not just API endpoint wrappers +- [ ] Tool names reflect natural task subdivisions +- [ ] Response formats optimize for agent context efficiency +- [ ] Human-readable identifiers used where appropriate +- [ ] Error messages guide agents toward correct usage + +### Implementation Quality + +- [ ] FOCUSED IMPLEMENTATION: Most important and valuable tools implemented +- [ ] All tools registered using `registerTool` with complete configuration +- [ ] All tools include `title`, `description`, `inputSchema`, and `annotations` +- [ ] Annotations correctly set (readOnlyHint, destructiveHint, idempotentHint, openWorldHint) +- [ ] All tools use Zod schemas for runtime input validation with `.strict()` enforcement +- [ ] All Zod schemas have proper constraints and descriptive error messages +- [ ] All tools have comprehensive descriptions with explicit input/output types +- [ ] Descriptions include return value examples and complete schema documentation +- [ ] Error messages are clear, actionable, and educational + +### TypeScript Quality + +- [ ] TypeScript interfaces are defined for all data structures +- [ ] Strict TypeScript is enabled in tsconfig.json +- [ ] No use of `any` type - use `unknown` or proper types instead +- [ ] All async functions have explicit Promise return types +- [ ] Error handling uses proper type guards (e.g., `axios.isAxiosError`, `z.ZodError`) + +### Advanced Features (where applicable) + +- [ ] Resources registered for appropriate data endpoints +- [ ] Appropriate transport configured (stdio or streamable HTTP) +- [ ] Notifications implemented for dynamic server capabilities +- [ ] Type-safe with SDK interfaces + +### Project Configuration + +- [ ] Package.json includes all necessary dependencies +- [ ] Build script produces working JavaScript in dist/ directory +- [ ] Main entry point is properly configured as dist/index.js +- [ ] Server name follows format: `{service}-mcp-server` +- [ ] tsconfig.json properly configured with strict mode + +### Code Quality + +- [ ] Pagination is properly implemented where applicable +- [ ] Large responses check CHARACTER_LIMIT constant and truncate with clear messages +- [ ] Filtering options are provided for potentially large result sets +- [ ] All network operations handle timeouts and connection errors gracefully +- [ ] Common functionality is extracted into reusable functions +- [ ] Return types are consistent across similar operations + +### Testing and Build + +- [ ] `npm run build` completes successfully without errors +- [ ] dist/index.js created and executable +- [ ] Server runs: `node dist/index.js --help` +- [ ] All imports resolve correctly +- [ ] Sample tool calls work as expected diff --git a/src/services/skills/built-in/mcp-builder/reference/python_mcp_server.md b/src/services/skills/built-in/mcp-builder/reference/python_mcp_server.md new file mode 100644 index 00000000000..bc6b789546e --- /dev/null +++ b/src/services/skills/built-in/mcp-builder/reference/python_mcp_server.md @@ -0,0 +1,738 @@ +# Python MCP Server Implementation Guide + +## Overview + +This document provides Python-specific best practices and examples for implementing MCP servers using the MCP Python SDK. It covers server setup, tool registration patterns, input validation with Pydantic, error handling, and complete working examples. + +--- + +## Quick Reference + +### Key Imports + +```python +from mcp.server.fastmcp import FastMCP +from pydantic import BaseModel, Field, field_validator, ConfigDict +from typing import Optional, List, Dict, Any +from enum import Enum +import httpx +``` + +### Server Initialization + +```python +mcp = FastMCP("service_mcp") +``` + +### Tool Registration Pattern + +```python +@mcp.tool(name="tool_name", annotations={...}) +async def tool_function(params: InputModel) -> str: + # Implementation + pass +``` + +--- + +## MCP Python SDK and FastMCP + +The official MCP Python SDK provides FastMCP, a high-level framework for building MCP servers. It provides: + +- Automatic description and inputSchema generation from function signatures and docstrings +- Pydantic model integration for input validation +- Decorator-based tool registration with `@mcp.tool` + +**For complete SDK documentation, use WebFetch to load:** +`https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md` + +## Server Naming Convention + +Python MCP servers must follow this naming pattern: + +- **Format**: `{service}_mcp` (lowercase with underscores) +- **Examples**: `github_mcp`, `jira_mcp`, `stripe_mcp` + +The name should be: + +- General (not tied to specific features) +- Descriptive of the service/API being integrated +- Easy to infer from the task description +- Without version numbers or dates + +## Tool Implementation + +### Tool Naming + +Use snake_case for tool names (e.g., "search_users", "create_project", "get_channel_info") with clear, action-oriented names. + +**Avoid Naming Conflicts**: Include the service context to prevent overlaps: + +- Use "slack_send_message" instead of just "send_message" +- Use "github_create_issue" instead of just "create_issue" +- Use "asana_list_tasks" instead of just "list_tasks" + +### Tool Structure with FastMCP + +Tools are defined using the `@mcp.tool` decorator with Pydantic models for input validation: + +```python +from pydantic import BaseModel, Field, ConfigDict +from mcp.server.fastmcp import FastMCP + +# Initialize the MCP server +mcp = FastMCP("example_mcp") + +# Define Pydantic model for input validation +class ServiceToolInput(BaseModel): + '''Input model for service tool operation.''' + model_config = ConfigDict( + str_strip_whitespace=True, # Auto-strip whitespace from strings + validate_assignment=True, # Validate on assignment + extra='forbid' # Forbid extra fields + ) + + param1: str = Field(..., description="First parameter description (e.g., 'user123', 'project-abc')", min_length=1, max_length=100) + param2: Optional[int] = Field(default=None, description="Optional integer parameter with constraints", ge=0, le=1000) + tags: Optional[List[str]] = Field(default_factory=list, description="List of tags to apply", max_items=10) + +@mcp.tool( + name="service_tool_name", + annotations={ + "title": "Human-Readable Tool Title", + "readOnlyHint": True, # Tool does not modify environment + "destructiveHint": False, # Tool does not perform destructive operations + "idempotentHint": True, # Repeated calls have no additional effect + "openWorldHint": False # Tool does not interact with external entities + } +) +async def service_tool_name(params: ServiceToolInput) -> str: + '''Tool description automatically becomes the 'description' field. + + This tool performs a specific operation on the service. It validates all inputs + using the ServiceToolInput Pydantic model before processing. + + Args: + params (ServiceToolInput): Validated input parameters containing: + - param1 (str): First parameter description + - param2 (Optional[int]): Optional parameter with default + - tags (Optional[List[str]]): List of tags + + Returns: + str: JSON-formatted response containing operation results + ''' + # Implementation here + pass +``` + +## Pydantic v2 Key Features + +- Use `model_config` instead of nested `Config` class +- Use `field_validator` instead of deprecated `validator` +- Use `model_dump()` instead of deprecated `dict()` +- Validators require `@classmethod` decorator +- Type hints are required for validator methods + +```python +from pydantic import BaseModel, Field, field_validator, ConfigDict + +class CreateUserInput(BaseModel): + model_config = ConfigDict( + str_strip_whitespace=True, + validate_assignment=True + ) + + name: str = Field(..., description="User's full name", min_length=1, max_length=100) + email: str = Field(..., description="User's email address", pattern=r'^[\w\.-]+@[\w\.-]+\.\w+$') + age: int = Field(..., description="User's age", ge=0, le=150) + + @field_validator('email') + @classmethod + def validate_email(cls, v: str) -> str: + if not v.strip(): + raise ValueError("Email cannot be empty") + return v.lower() +``` + +## Response Format Options + +Support multiple output formats for flexibility: + +```python +from enum import Enum + +class ResponseFormat(str, Enum): + '''Output format for tool responses.''' + MARKDOWN = "markdown" + JSON = "json" + +class UserSearchInput(BaseModel): + query: str = Field(..., description="Search query") + response_format: ResponseFormat = Field( + default=ResponseFormat.MARKDOWN, + description="Output format: 'markdown' for human-readable or 'json' for machine-readable" + ) +``` + +**Markdown format**: + +- Use headers, lists, and formatting for clarity +- Convert timestamps to human-readable format (e.g., "2024-01-15 10:30:00 UTC" instead of epoch) +- Show display names with IDs in parentheses (e.g., "@john.doe (U123456)") +- Omit verbose metadata (e.g., show only one profile image URL, not all sizes) +- Group related information logically + +**JSON format**: + +- Return complete, structured data suitable for programmatic processing +- Include all available fields and metadata +- Use consistent field names and types + +## Pagination Implementation + +For tools that list resources: + +```python +class ListInput(BaseModel): + limit: Optional[int] = Field(default=20, description="Maximum results to return", ge=1, le=100) + offset: Optional[int] = Field(default=0, description="Number of results to skip for pagination", ge=0) + +async def list_items(params: ListInput) -> str: + # Make API request with pagination + data = await api_request(limit=params.limit, offset=params.offset) + + # Return pagination info + response = { + "total": data["total"], + "count": len(data["items"]), + "offset": params.offset, + "items": data["items"], + "has_more": data["total"] > params.offset + len(data["items"]), + "next_offset": params.offset + len(data["items"]) if data["total"] > params.offset + len(data["items"]) else None + } + return json.dumps(response, indent=2) +``` + +## Error Handling + +Provide clear, actionable error messages: + +```python +def _handle_api_error(e: Exception) -> str: + '''Consistent error formatting across all tools.''' + if isinstance(e, httpx.HTTPStatusError): + if e.response.status_code == 404: + return "Error: Resource not found. Please check the ID is correct." + elif e.response.status_code == 403: + return "Error: Permission denied. You don't have access to this resource." + elif e.response.status_code == 429: + return "Error: Rate limit exceeded. Please wait before making more requests." + return f"Error: API request failed with status {e.response.status_code}" + elif isinstance(e, httpx.TimeoutException): + return "Error: Request timed out. Please try again." + return f"Error: Unexpected error occurred: {type(e).__name__}" +``` + +## Shared Utilities + +Extract common functionality into reusable functions: + +```python +# Shared API request function +async def _make_api_request(endpoint: str, method: str = "GET", **kwargs) -> dict: + '''Reusable function for all API calls.''' + async with httpx.AsyncClient() as client: + response = await client.request( + method, + f"{API_BASE_URL}/{endpoint}", + timeout=30.0, + **kwargs + ) + response.raise_for_status() + return response.json() +``` + +## Async/Await Best Practices + +Always use async/await for network requests and I/O operations: + +```python +# Good: Async network request +async def fetch_data(resource_id: str) -> dict: + async with httpx.AsyncClient() as client: + response = await client.get(f"{API_URL}/resource/{resource_id}") + response.raise_for_status() + return response.json() + +# Bad: Synchronous request +def fetch_data(resource_id: str) -> dict: + response = requests.get(f"{API_URL}/resource/{resource_id}") # Blocks + return response.json() +``` + +## Type Hints + +Use type hints throughout: + +```python +from typing import Optional, List, Dict, Any + +async def get_user(user_id: str) -> Dict[str, Any]: + data = await fetch_user(user_id) + return {"id": data["id"], "name": data["name"]} +``` + +## Tool Docstrings + +Every tool must have comprehensive docstrings with explicit type information: + +```python +async def search_users(params: UserSearchInput) -> str: + ''' + Search for users in the Example system by name, email, or team. + + This tool searches across all user profiles in the Example platform, + supporting partial matches and various search filters. It does NOT + create or modify users, only searches existing ones. + + Args: + params (UserSearchInput): Validated input parameters containing: + - query (str): Search string to match against names/emails (e.g., "john", "@example.com", "team:marketing") + - limit (Optional[int]): Maximum results to return, between 1-100 (default: 20) + - offset (Optional[int]): Number of results to skip for pagination (default: 0) + + Returns: + str: JSON-formatted string containing search results with the following schema: + + Success response: + { + "total": int, # Total number of matches found + "count": int, # Number of results in this response + "offset": int, # Current pagination offset + "users": [ + { + "id": str, # User ID (e.g., "U123456789") + "name": str, # Full name (e.g., "John Doe") + "email": str, # Email address (e.g., "john@example.com") + "team": str # Team name (e.g., "Marketing") - optional + } + ] + } + + Error response: + "Error: " or "No users found matching ''" + + Examples: + - Use when: "Find all marketing team members" -> params with query="team:marketing" + - Use when: "Search for John's account" -> params with query="john" + - Don't use when: You need to create a user (use example_create_user instead) + - Don't use when: You have a user ID and need full details (use example_get_user instead) + + Error Handling: + - Input validation errors are handled by Pydantic model + - Returns "Error: Rate limit exceeded" if too many requests (429 status) + - Returns "Error: Invalid API authentication" if API key is invalid (401 status) + - Returns formatted list of results or "No users found matching 'query'" + ''' +``` + +## Complete Example + +See below for a complete Python MCP server example: + +```python +#!/usr/bin/env python3 +''' +MCP Server for Example Service. + +This server provides tools to interact with Example API, including user search, +project management, and data export capabilities. +''' + +from typing import Optional, List, Dict, Any +from enum import Enum +import httpx +from pydantic import BaseModel, Field, field_validator, ConfigDict +from mcp.server.fastmcp import FastMCP + +# Initialize the MCP server +mcp = FastMCP("example_mcp") + +# Constants +API_BASE_URL = "https://api.example.com/v1" + +# Enums +class ResponseFormat(str, Enum): + '''Output format for tool responses.''' + MARKDOWN = "markdown" + JSON = "json" + +# Pydantic Models for Input Validation +class UserSearchInput(BaseModel): + '''Input model for user search operations.''' + model_config = ConfigDict( + str_strip_whitespace=True, + validate_assignment=True + ) + + query: str = Field(..., description="Search string to match against names/emails", min_length=2, max_length=200) + limit: Optional[int] = Field(default=20, description="Maximum results to return", ge=1, le=100) + offset: Optional[int] = Field(default=0, description="Number of results to skip for pagination", ge=0) + response_format: ResponseFormat = Field(default=ResponseFormat.MARKDOWN, description="Output format") + + @field_validator('query') + @classmethod + def validate_query(cls, v: str) -> str: + if not v.strip(): + raise ValueError("Query cannot be empty or whitespace only") + return v.strip() + +# Shared utility functions +async def _make_api_request(endpoint: str, method: str = "GET", **kwargs) -> dict: + '''Reusable function for all API calls.''' + async with httpx.AsyncClient() as client: + response = await client.request( + method, + f"{API_BASE_URL}/{endpoint}", + timeout=30.0, + **kwargs + ) + response.raise_for_status() + return response.json() + +def _handle_api_error(e: Exception) -> str: + '''Consistent error formatting across all tools.''' + if isinstance(e, httpx.HTTPStatusError): + if e.response.status_code == 404: + return "Error: Resource not found. Please check the ID is correct." + elif e.response.status_code == 403: + return "Error: Permission denied. You don't have access to this resource." + elif e.response.status_code == 429: + return "Error: Rate limit exceeded. Please wait before making more requests." + return f"Error: API request failed with status {e.response.status_code}" + elif isinstance(e, httpx.TimeoutException): + return "Error: Request timed out. Please try again." + return f"Error: Unexpected error occurred: {type(e).__name__}" + +# Tool definitions +@mcp.tool( + name="example_search_users", + annotations={ + "title": "Search Example Users", + "readOnlyHint": True, + "destructiveHint": False, + "idempotentHint": True, + "openWorldHint": True + } +) +async def example_search_users(params: UserSearchInput) -> str: + '''Search for users in the Example system by name, email, or team. + + [Full docstring as shown above] + ''' + try: + # Make API request using validated parameters + data = await _make_api_request( + "users/search", + params={ + "q": params.query, + "limit": params.limit, + "offset": params.offset + } + ) + + users = data.get("users", []) + total = data.get("total", 0) + + if not users: + return f"No users found matching '{params.query}'" + + # Format response based on requested format + if params.response_format == ResponseFormat.MARKDOWN: + lines = [f"# User Search Results: '{params.query}'", ""] + lines.append(f"Found {total} users (showing {len(users)})") + lines.append("") + + for user in users: + lines.append(f"## {user['name']} ({user['id']})") + lines.append(f"- **Email**: {user['email']}") + if user.get('team'): + lines.append(f"- **Team**: {user['team']}") + lines.append("") + + return "\n".join(lines) + + else: + # Machine-readable JSON format + import json + response = { + "total": total, + "count": len(users), + "offset": params.offset, + "users": users + } + return json.dumps(response, indent=2) + + except Exception as e: + return _handle_api_error(e) + +if __name__ == "__main__": + mcp.run() +``` + +--- + +## Advanced FastMCP Features + +### Context Parameter Injection + +FastMCP can automatically inject a `Context` parameter into tools for advanced capabilities like logging, progress reporting, resource reading, and user interaction: + +```python +from mcp.server.fastmcp import FastMCP, Context + +mcp = FastMCP("example_mcp") + +@mcp.tool() +async def advanced_search(query: str, ctx: Context) -> str: + '''Advanced tool with context access for logging and progress.''' + + # Report progress for long operations + await ctx.report_progress(0.25, "Starting search...") + + # Log information for debugging + await ctx.log_info("Processing query", {"query": query, "timestamp": datetime.now()}) + + # Perform search + results = await search_api(query) + await ctx.report_progress(0.75, "Formatting results...") + + # Access server configuration + server_name = ctx.fastmcp.name + + return format_results(results) + +@mcp.tool() +async def interactive_tool(resource_id: str, ctx: Context) -> str: + '''Tool that can request additional input from users.''' + + # Request sensitive information when needed + api_key = await ctx.elicit( + prompt="Please provide your API key:", + input_type="password" + ) + + # Use the provided key + return await api_call(resource_id, api_key) +``` + +**Context capabilities:** + +- `ctx.report_progress(progress, message)` - Report progress for long operations +- `ctx.log_info(message, data)` / `ctx.log_error()` / `ctx.log_debug()` - Logging +- `ctx.elicit(prompt, input_type)` - Request input from users +- `ctx.fastmcp.name` - Access server configuration +- `ctx.read_resource(uri)` - Read MCP resources + +### Resource Registration + +Expose data as resources for efficient, template-based access: + +```python +@mcp.resource("file://documents/{name}") +async def get_document(name: str) -> str: + '''Expose documents as MCP resources. + + Resources are useful for static or semi-static data that doesn't + require complex parameters. They use URI templates for flexible access. + ''' + document_path = f"./docs/{name}" + with open(document_path, "r") as f: + return f.read() + +@mcp.resource("config://settings/{key}") +async def get_setting(key: str, ctx: Context) -> str: + '''Expose configuration as resources with context.''' + settings = await load_settings() + return json.dumps(settings.get(key, {})) +``` + +**When to use Resources vs Tools:** + +- **Resources**: For data access with simple parameters (URI templates) +- **Tools**: For complex operations with validation and business logic + +### Structured Output Types + +FastMCP supports multiple return types beyond strings: + +```python +from typing import TypedDict +from dataclasses import dataclass +from pydantic import BaseModel + +# TypedDict for structured returns +class UserData(TypedDict): + id: str + name: str + email: str + +@mcp.tool() +async def get_user_typed(user_id: str) -> UserData: + '''Returns structured data - FastMCP handles serialization.''' + return {"id": user_id, "name": "John Doe", "email": "john@example.com"} + +# Pydantic models for complex validation +class DetailedUser(BaseModel): + id: str + name: str + email: str + created_at: datetime + metadata: Dict[str, Any] + +@mcp.tool() +async def get_user_detailed(user_id: str) -> DetailedUser: + '''Returns Pydantic model - automatically generates schema.''' + user = await fetch_user(user_id) + return DetailedUser(**user) +``` + +### Lifespan Management + +Initialize resources that persist across requests: + +```python +from contextlib import asynccontextmanager + +@asynccontextmanager +async def app_lifespan(): + '''Manage resources that live for the server's lifetime.''' + # Initialize connections, load config, etc. + db = await connect_to_database() + config = load_configuration() + + # Make available to all tools + yield {"db": db, "config": config} + + # Cleanup on shutdown + await db.close() + +mcp = FastMCP("example_mcp", lifespan=app_lifespan) + +@mcp.tool() +async def query_data(query: str, ctx: Context) -> str: + '''Access lifespan resources through context.''' + db = ctx.request_context.lifespan_state["db"] + results = await db.query(query) + return format_results(results) +``` + +### Transport Options + +FastMCP supports two main transport mechanisms: + +```python +# stdio transport (for local tools) - default +if __name__ == "__main__": + mcp.run() + +# Streamable HTTP transport (for remote servers) +if __name__ == "__main__": + mcp.run(transport="streamable_http", port=8000) +``` + +**Transport selection:** + +- **stdio**: Command-line tools, local integrations, subprocess execution +- **Streamable HTTP**: Web services, remote access, multiple clients + +--- + +## Code Best Practices + +### Code Composability and Reusability + +Your implementation MUST prioritize composability and code reuse: + +1. **Extract Common Functionality**: + + - Create reusable helper functions for operations used across multiple tools + - Build shared API clients for HTTP requests instead of duplicating code + - Centralize error handling logic in utility functions + - Extract business logic into dedicated functions that can be composed + - Extract shared markdown or JSON field selection & formatting functionality + +2. **Avoid Duplication**: + - NEVER copy-paste similar code between tools + - If you find yourself writing similar logic twice, extract it into a function + - Common operations like pagination, filtering, field selection, and formatting should be shared + - Authentication/authorization logic should be centralized + +### Python-Specific Best Practices + +1. **Use Type Hints**: Always include type annotations for function parameters and return values +2. **Pydantic Models**: Define clear Pydantic models for all input validation +3. **Avoid Manual Validation**: Let Pydantic handle input validation with constraints +4. **Proper Imports**: Group imports (standard library, third-party, local) +5. **Error Handling**: Use specific exception types (httpx.HTTPStatusError, not generic Exception) +6. **Async Context Managers**: Use `async with` for resources that need cleanup +7. **Constants**: Define module-level constants in UPPER_CASE + +## Quality Checklist + +Before finalizing your Python MCP server implementation, ensure: + +### Strategic Design + +- [ ] Tools enable complete workflows, not just API endpoint wrappers +- [ ] Tool names reflect natural task subdivisions +- [ ] Response formats optimize for agent context efficiency +- [ ] Human-readable identifiers used where appropriate +- [ ] Error messages guide agents toward correct usage + +### Implementation Quality + +- [ ] FOCUSED IMPLEMENTATION: Most important and valuable tools implemented +- [ ] All tools have descriptive names and documentation +- [ ] Return types are consistent across similar operations +- [ ] Error handling is implemented for all external calls +- [ ] Server name follows format: `{service}_mcp` +- [ ] All network operations use async/await +- [ ] Common functionality is extracted into reusable functions +- [ ] Error messages are clear, actionable, and educational +- [ ] Outputs are properly validated and formatted + +### Tool Configuration + +- [ ] All tools implement 'name' and 'annotations' in the decorator +- [ ] Annotations correctly set (readOnlyHint, destructiveHint, idempotentHint, openWorldHint) +- [ ] All tools use Pydantic BaseModel for input validation with Field() definitions +- [ ] All Pydantic Fields have explicit types and descriptions with constraints +- [ ] All tools have comprehensive docstrings with explicit input/output types +- [ ] Docstrings include complete schema structure for dict/JSON returns +- [ ] Pydantic models handle input validation (no manual validation needed) + +### Advanced Features (where applicable) + +- [ ] Context injection used for logging, progress, or elicitation +- [ ] Resources registered for appropriate data endpoints +- [ ] Lifespan management implemented for persistent connections +- [ ] Structured output types used (TypedDict, Pydantic models) +- [ ] Appropriate transport configured (stdio or streamable HTTP) + +### Code Quality + +- [ ] File includes proper imports including Pydantic imports +- [ ] Pagination is properly implemented where applicable +- [ ] Filtering options are provided for potentially large result sets +- [ ] All async functions are properly defined with `async def` +- [ ] HTTP client usage follows async patterns with proper context managers +- [ ] Type hints are used throughout the code +- [ ] Constants are defined at module level in UPPER_CASE + +### Testing + +- [ ] Server runs successfully: `python your_server.py --help` +- [ ] All imports resolve correctly +- [ ] Sample tool calls work as expected +- [ ] Error scenarios handled gracefully diff --git a/src/services/skills/built-in/mcp-builder/scripts/connections.py b/src/services/skills/built-in/mcp-builder/scripts/connections.py new file mode 100644 index 00000000000..ffcd0da3fbe --- /dev/null +++ b/src/services/skills/built-in/mcp-builder/scripts/connections.py @@ -0,0 +1,151 @@ +"""Lightweight connection handling for MCP servers.""" + +from abc import ABC, abstractmethod +from contextlib import AsyncExitStack +from typing import Any + +from mcp import ClientSession, StdioServerParameters +from mcp.client.sse import sse_client +from mcp.client.stdio import stdio_client +from mcp.client.streamable_http import streamablehttp_client + + +class MCPConnection(ABC): + """Base class for MCP server connections.""" + + def __init__(self): + self.session = None + self._stack = None + + @abstractmethod + def _create_context(self): + """Create the connection context based on connection type.""" + + async def __aenter__(self): + """Initialize MCP server connection.""" + self._stack = AsyncExitStack() + await self._stack.__aenter__() + + try: + ctx = self._create_context() + result = await self._stack.enter_async_context(ctx) + + if len(result) == 2: + read, write = result + elif len(result) == 3: + read, write, _ = result + else: + raise ValueError(f"Unexpected context result: {result}") + + session_ctx = ClientSession(read, write) + self.session = await self._stack.enter_async_context(session_ctx) + await self.session.initialize() + return self + except BaseException: + await self._stack.__aexit__(None, None, None) + raise + + async def __aexit__(self, exc_type, exc_val, exc_tb): + """Clean up MCP server connection resources.""" + if self._stack: + await self._stack.__aexit__(exc_type, exc_val, exc_tb) + self.session = None + self._stack = None + + async def list_tools(self) -> list[dict[str, Any]]: + """Retrieve available tools from the MCP server.""" + response = await self.session.list_tools() + return [ + { + "name": tool.name, + "description": tool.description, + "input_schema": tool.inputSchema, + } + for tool in response.tools + ] + + async def call_tool(self, tool_name: str, arguments: dict[str, Any]) -> Any: + """Call a tool on the MCP server with provided arguments.""" + result = await self.session.call_tool(tool_name, arguments=arguments) + return result.content + + +class MCPConnectionStdio(MCPConnection): + """MCP connection using standard input/output.""" + + def __init__(self, command: str, args: list[str] = None, env: dict[str, str] = None): + super().__init__() + self.command = command + self.args = args or [] + self.env = env + + def _create_context(self): + return stdio_client( + StdioServerParameters(command=self.command, args=self.args, env=self.env) + ) + + +class MCPConnectionSSE(MCPConnection): + """MCP connection using Server-Sent Events.""" + + def __init__(self, url: str, headers: dict[str, str] = None): + super().__init__() + self.url = url + self.headers = headers or {} + + def _create_context(self): + return sse_client(url=self.url, headers=self.headers) + + +class MCPConnectionHTTP(MCPConnection): + """MCP connection using Streamable HTTP.""" + + def __init__(self, url: str, headers: dict[str, str] = None): + super().__init__() + self.url = url + self.headers = headers or {} + + def _create_context(self): + return streamablehttp_client(url=self.url, headers=self.headers) + + +def create_connection( + transport: str, + command: str = None, + args: list[str] = None, + env: dict[str, str] = None, + url: str = None, + headers: dict[str, str] = None, +) -> MCPConnection: + """Factory function to create the appropriate MCP connection. + + Args: + transport: Connection type ("stdio", "sse", or "http") + command: Command to run (stdio only) + args: Command arguments (stdio only) + env: Environment variables (stdio only) + url: Server URL (sse and http only) + headers: HTTP headers (sse and http only) + + Returns: + MCPConnection instance + """ + transport = transport.lower() + + if transport == "stdio": + if not command: + raise ValueError("Command is required for stdio transport") + return MCPConnectionStdio(command=command, args=args, env=env) + + elif transport == "sse": + if not url: + raise ValueError("URL is required for sse transport") + return MCPConnectionSSE(url=url, headers=headers) + + elif transport in ["http", "streamable_http", "streamable-http"]: + if not url: + raise ValueError("URL is required for http transport") + return MCPConnectionHTTP(url=url, headers=headers) + + else: + raise ValueError(f"Unsupported transport type: {transport}. Use 'stdio', 'sse', or 'http'") diff --git a/src/services/skills/built-in/mcp-builder/scripts/evaluation.py b/src/services/skills/built-in/mcp-builder/scripts/evaluation.py new file mode 100644 index 00000000000..41778569c45 --- /dev/null +++ b/src/services/skills/built-in/mcp-builder/scripts/evaluation.py @@ -0,0 +1,373 @@ +"""MCP Server Evaluation Harness + +This script evaluates MCP servers by running test questions against them using Claude. +""" + +import argparse +import asyncio +import json +import re +import sys +import time +import traceback +import xml.etree.ElementTree as ET +from pathlib import Path +from typing import Any + +from anthropic import Anthropic + +from connections import create_connection + +EVALUATION_PROMPT = """You are an AI assistant with access to tools. + +When given a task, you MUST: +1. Use the available tools to complete the task +2. Provide summary of each step in your approach, wrapped in tags +3. Provide feedback on the tools provided, wrapped in tags +4. Provide your final response, wrapped in tags + +Summary Requirements: +- In your tags, you must explain: + - The steps you took to complete the task + - Which tools you used, in what order, and why + - The inputs you provided to each tool + - The outputs you received from each tool + - A summary for how you arrived at the response + +Feedback Requirements: +- In your tags, provide constructive feedback on the tools: + - Comment on tool names: Are they clear and descriptive? + - Comment on input parameters: Are they well-documented? Are required vs optional parameters clear? + - Comment on descriptions: Do they accurately describe what the tool does? + - Comment on any errors encountered during tool usage: Did the tool fail to execute? Did the tool return too many tokens? + - Identify specific areas for improvement and explain WHY they would help + - Be specific and actionable in your suggestions + +Response Requirements: +- Your response should be concise and directly address what was asked +- Always wrap your final response in tags +- If you cannot solve the task return NOT_FOUND +- For numeric responses, provide just the number +- For IDs, provide just the ID +- For names or text, provide the exact text requested +- Your response should go last""" + + +def parse_evaluation_file(file_path: Path) -> list[dict[str, Any]]: + """Parse XML evaluation file with qa_pair elements.""" + try: + tree = ET.parse(file_path) + root = tree.getroot() + evaluations = [] + + for qa_pair in root.findall(".//qa_pair"): + question_elem = qa_pair.find("question") + answer_elem = qa_pair.find("answer") + + if question_elem is not None and answer_elem is not None: + evaluations.append({ + "question": (question_elem.text or "").strip(), + "answer": (answer_elem.text or "").strip(), + }) + + return evaluations + except Exception as e: + print(f"Error parsing evaluation file {file_path}: {e}") + return [] + + +def extract_xml_content(text: str, tag: str) -> str | None: + """Extract content from XML tags.""" + pattern = rf"<{tag}>(.*?)" + matches = re.findall(pattern, text, re.DOTALL) + return matches[-1].strip() if matches else None + + +async def agent_loop( + client: Anthropic, + model: str, + question: str, + tools: list[dict[str, Any]], + connection: Any, +) -> tuple[str, dict[str, Any]]: + """Run the agent loop with MCP tools.""" + messages = [{"role": "user", "content": question}] + + response = await asyncio.to_thread( + client.messages.create, + model=model, + max_tokens=4096, + system=EVALUATION_PROMPT, + messages=messages, + tools=tools, + ) + + messages.append({"role": "assistant", "content": response.content}) + + tool_metrics = {} + + while response.stop_reason == "tool_use": + tool_use = next(block for block in response.content if block.type == "tool_use") + tool_name = tool_use.name + tool_input = tool_use.input + + tool_start_ts = time.time() + try: + tool_result = await connection.call_tool(tool_name, tool_input) + tool_response = json.dumps(tool_result) if isinstance(tool_result, (dict, list)) else str(tool_result) + except Exception as e: + tool_response = f"Error executing tool {tool_name}: {str(e)}\n" + tool_response += traceback.format_exc() + tool_duration = time.time() - tool_start_ts + + if tool_name not in tool_metrics: + tool_metrics[tool_name] = {"count": 0, "durations": []} + tool_metrics[tool_name]["count"] += 1 + tool_metrics[tool_name]["durations"].append(tool_duration) + + messages.append({ + "role": "user", + "content": [{ + "type": "tool_result", + "tool_use_id": tool_use.id, + "content": tool_response, + }] + }) + + response = await asyncio.to_thread( + client.messages.create, + model=model, + max_tokens=4096, + system=EVALUATION_PROMPT, + messages=messages, + tools=tools, + ) + messages.append({"role": "assistant", "content": response.content}) + + response_text = next( + (block.text for block in response.content if hasattr(block, "text")), + None, + ) + return response_text, tool_metrics + + +async def evaluate_single_task( + client: Anthropic, + model: str, + qa_pair: dict[str, Any], + tools: list[dict[str, Any]], + connection: Any, + task_index: int, +) -> dict[str, Any]: + """Evaluate a single QA pair with the given tools.""" + start_time = time.time() + + print(f"Task {task_index + 1}: Running task with question: {qa_pair['question']}") + response, tool_metrics = await agent_loop(client, model, qa_pair["question"], tools, connection) + + response_value = extract_xml_content(response, "response") + summary = extract_xml_content(response, "summary") + feedback = extract_xml_content(response, "feedback") + + duration_seconds = time.time() - start_time + + return { + "question": qa_pair["question"], + "expected": qa_pair["answer"], + "actual": response_value, + "score": int(response_value == qa_pair["answer"]) if response_value else 0, + "total_duration": duration_seconds, + "tool_calls": tool_metrics, + "num_tool_calls": sum(len(metrics["durations"]) for metrics in tool_metrics.values()), + "summary": summary, + "feedback": feedback, + } + + +REPORT_HEADER = """ +# Evaluation Report + +## Summary + +- **Accuracy**: {correct}/{total} ({accuracy:.1f}%) +- **Average Task Duration**: {average_duration_s:.2f}s +- **Average Tool Calls per Task**: {average_tool_calls:.2f} +- **Total Tool Calls**: {total_tool_calls} + +--- +""" + +TASK_TEMPLATE = """ +### Task {task_num} + +**Question**: {question} +**Ground Truth Answer**: `{expected_answer}` +**Actual Answer**: `{actual_answer}` +**Correct**: {correct_indicator} +**Duration**: {total_duration:.2f}s +**Tool Calls**: {tool_calls} + +**Summary** +{summary} + +**Feedback** +{feedback} + +--- +""" + + +async def run_evaluation( + eval_path: Path, + connection: Any, + model: str = "claude-3-7-sonnet-20250219", +) -> str: + """Run evaluation with MCP server tools.""" + print("🚀 Starting Evaluation") + + client = Anthropic() + + tools = await connection.list_tools() + print(f"📋 Loaded {len(tools)} tools from MCP server") + + qa_pairs = parse_evaluation_file(eval_path) + print(f"📋 Loaded {len(qa_pairs)} evaluation tasks") + + results = [] + for i, qa_pair in enumerate(qa_pairs): + print(f"Processing task {i + 1}/{len(qa_pairs)}") + result = await evaluate_single_task(client, model, qa_pair, tools, connection, i) + results.append(result) + + correct = sum(r["score"] for r in results) + accuracy = (correct / len(results)) * 100 if results else 0 + average_duration_s = sum(r["total_duration"] for r in results) / len(results) if results else 0 + average_tool_calls = sum(r["num_tool_calls"] for r in results) / len(results) if results else 0 + total_tool_calls = sum(r["num_tool_calls"] for r in results) + + report = REPORT_HEADER.format( + correct=correct, + total=len(results), + accuracy=accuracy, + average_duration_s=average_duration_s, + average_tool_calls=average_tool_calls, + total_tool_calls=total_tool_calls, + ) + + report += "".join([ + TASK_TEMPLATE.format( + task_num=i + 1, + question=qa_pair["question"], + expected_answer=qa_pair["answer"], + actual_answer=result["actual"] or "N/A", + correct_indicator="✅" if result["score"] else "❌", + total_duration=result["total_duration"], + tool_calls=json.dumps(result["tool_calls"], indent=2), + summary=result["summary"] or "N/A", + feedback=result["feedback"] or "N/A", + ) + for i, (qa_pair, result) in enumerate(zip(qa_pairs, results)) + ]) + + return report + + +def parse_headers(header_list: list[str]) -> dict[str, str]: + """Parse header strings in format 'Key: Value' into a dictionary.""" + headers = {} + if not header_list: + return headers + + for header in header_list: + if ":" in header: + key, value = header.split(":", 1) + headers[key.strip()] = value.strip() + else: + print(f"Warning: Ignoring malformed header: {header}") + return headers + + +def parse_env_vars(env_list: list[str]) -> dict[str, str]: + """Parse environment variable strings in format 'KEY=VALUE' into a dictionary.""" + env = {} + if not env_list: + return env + + for env_var in env_list: + if "=" in env_var: + key, value = env_var.split("=", 1) + env[key.strip()] = value.strip() + else: + print(f"Warning: Ignoring malformed environment variable: {env_var}") + return env + + +async def main(): + parser = argparse.ArgumentParser( + description="Evaluate MCP servers using test questions", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Evaluate a local stdio MCP server + python evaluation.py -t stdio -c python -a my_server.py eval.xml + + # Evaluate an SSE MCP server + python evaluation.py -t sse -u https://example.com/mcp -H "Authorization: Bearer token" eval.xml + + # Evaluate an HTTP MCP server with custom model + python evaluation.py -t http -u https://example.com/mcp -m claude-3-5-sonnet-20241022 eval.xml + """, + ) + + parser.add_argument("eval_file", type=Path, help="Path to evaluation XML file") + parser.add_argument("-t", "--transport", choices=["stdio", "sse", "http"], default="stdio", help="Transport type (default: stdio)") + parser.add_argument("-m", "--model", default="claude-3-7-sonnet-20250219", help="Claude model to use (default: claude-3-7-sonnet-20250219)") + + stdio_group = parser.add_argument_group("stdio options") + stdio_group.add_argument("-c", "--command", help="Command to run MCP server (stdio only)") + stdio_group.add_argument("-a", "--args", nargs="+", help="Arguments for the command (stdio only)") + stdio_group.add_argument("-e", "--env", nargs="+", help="Environment variables in KEY=VALUE format (stdio only)") + + remote_group = parser.add_argument_group("sse/http options") + remote_group.add_argument("-u", "--url", help="MCP server URL (sse/http only)") + remote_group.add_argument("-H", "--header", nargs="+", dest="headers", help="HTTP headers in 'Key: Value' format (sse/http only)") + + parser.add_argument("-o", "--output", type=Path, help="Output file for evaluation report (default: stdout)") + + args = parser.parse_args() + + if not args.eval_file.exists(): + print(f"Error: Evaluation file not found: {args.eval_file}") + sys.exit(1) + + headers = parse_headers(args.headers) if args.headers else None + env_vars = parse_env_vars(args.env) if args.env else None + + try: + connection = create_connection( + transport=args.transport, + command=args.command, + args=args.args, + env=env_vars, + url=args.url, + headers=headers, + ) + except ValueError as e: + print(f"Error: {e}") + sys.exit(1) + + print(f"🔗 Connecting to MCP server via {args.transport}...") + + async with connection: + print("✅ Connected successfully") + report = await run_evaluation(args.eval_file, connection, args.model) + + if args.output: + args.output.write_text(report) + print(f"\n✅ Report saved to {args.output}") + else: + print("\n" + report) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/src/services/skills/built-in/mcp-builder/scripts/example_evaluation.xml b/src/services/skills/built-in/mcp-builder/scripts/example_evaluation.xml new file mode 100644 index 00000000000..41e4459b5af --- /dev/null +++ b/src/services/skills/built-in/mcp-builder/scripts/example_evaluation.xml @@ -0,0 +1,22 @@ + + + Calculate the compound interest on $10,000 invested at 5% annual interest rate, compounded monthly for 3 years. What is the final amount in dollars (rounded to 2 decimal places)? + 11614.72 + + + A projectile is launched at a 45-degree angle with an initial velocity of 50 m/s. Calculate the total distance (in meters) it has traveled from the launch point after 2 seconds, assuming g=9.8 m/s². Round to 2 decimal places. + 87.25 + + + A sphere has a volume of 500 cubic meters. Calculate its surface area in square meters. Round to 2 decimal places. + 304.65 + + + Calculate the population standard deviation of this dataset: [12, 15, 18, 22, 25, 30, 35]. Round to 2 decimal places. + 7.61 + + + Calculate the pH of a solution with a hydrogen ion concentration of 3.5 × 10^-5 M. Round to 2 decimal places. + 4.46 + + diff --git a/src/services/skills/built-in/mcp-builder/scripts/requirements.txt b/src/services/skills/built-in/mcp-builder/scripts/requirements.txt new file mode 100644 index 00000000000..e73e5d1e356 --- /dev/null +++ b/src/services/skills/built-in/mcp-builder/scripts/requirements.txt @@ -0,0 +1,2 @@ +anthropic>=0.39.0 +mcp>=1.1.0 diff --git a/src/services/skills/generate-built-in-skills.ts b/src/services/skills/generate-built-in-skills.ts new file mode 100644 index 00000000000..0efa43b10e7 --- /dev/null +++ b/src/services/skills/generate-built-in-skills.ts @@ -0,0 +1,289 @@ +#!/usr/bin/env tsx +/** + * Build script to generate built-in-skills.ts from SKILL.md files. + * + * This script scans the built-in/ directory for skill folders, parses each + * SKILL.md file using gray-matter, validates the frontmatter, and generates + * the built-in-skills.ts file. + * + * Run with: npx tsx src/services/skills/generate-built-in-skills.ts + */ + +import * as fs from "fs/promises" +import * as path from "path" +import matter from "gray-matter" + +const BUILT_IN_DIR = path.join(__dirname, "built-in") +const OUTPUT_FILE = path.join(__dirname, "built-in-skills.ts") + +interface SkillData { + name: string + description: string + instructions: string +} + +interface ValidationError { + skillDir: string + errors: string[] +} + +/** + * Validate a skill name according to Agent Skills spec: + * - 1-64 characters + * - lowercase letters, numbers, and hyphens only + * - must not start/end with hyphen + * - must not contain consecutive hyphens + */ +function validateSkillName(name: string): string[] { + const errors: string[] = [] + + if (name.length < 1 || name.length > 64) { + errors.push(`Name must be 1-64 characters (got ${name.length})`) + } + + const nameFormat = /^[a-z0-9]+(?:-[a-z0-9]+)*$/ + if (!nameFormat.test(name)) { + errors.push( + "Name must be lowercase letters/numbers/hyphens only (no leading/trailing hyphen, no consecutive hyphens)", + ) + } + + return errors +} + +/** + * Validate a skill description: + * - 1-1024 characters (after trimming) + */ +function validateDescription(description: string): string[] { + const errors: string[] = [] + const trimmed = description.trim() + + if (trimmed.length < 1 || trimmed.length > 1024) { + errors.push(`Description must be 1-1024 characters (got ${trimmed.length})`) + } + + return errors +} + +/** + * Parse and validate a single SKILL.md file + */ +async function parseSkillFile( + skillDir: string, + dirName: string, +): Promise<{ skill?: SkillData; errors?: ValidationError }> { + const skillMdPath = path.join(skillDir, "SKILL.md") + + try { + const fileContent = await fs.readFile(skillMdPath, "utf-8") + const { data: frontmatter, content: body } = matter(fileContent) + + const errors: string[] = [] + + // Validate required fields + if (!frontmatter.name || typeof frontmatter.name !== "string") { + errors.push("Missing required 'name' field in frontmatter") + } + if (!frontmatter.description || typeof frontmatter.description !== "string") { + errors.push("Missing required 'description' field in frontmatter") + } + + if (errors.length > 0) { + return { errors: { skillDir, errors } } + } + + // Validate name matches directory name + if (frontmatter.name !== dirName) { + errors.push(`Frontmatter name "${frontmatter.name}" doesn't match directory name "${dirName}"`) + } + + // Validate name format + errors.push(...validateSkillName(dirName)) + + // Validate description + errors.push(...validateDescription(frontmatter.description)) + + if (errors.length > 0) { + return { errors: { skillDir, errors } } + } + + return { + skill: { + name: frontmatter.name, + description: frontmatter.description.trim(), + instructions: body.trim(), + }, + } + } catch (error) { + return { + errors: { + skillDir, + errors: [`Failed to read or parse SKILL.md: ${error instanceof Error ? error.message : String(error)}`], + }, + } + } +} + +/** + * Escape a string for use in TypeScript template literal + */ +function escapeForTemplateLiteral(str: string): string { + return str.replace(/\\/g, "\\\\").replace(/`/g, "\\`").replace(/\$\{/g, "\\${") +} + +/** + * Generate the TypeScript code for built-in-skills.ts + */ +function generateTypeScript(skills: Record): string { + const skillEntries = Object.entries(skills) + .map(([key, skill]) => { + const escapedInstructions = escapeForTemplateLiteral(skill.instructions) + return `\t"${key}": { + name: "${skill.name}", + description: "${skill.description.replace(/"/g, '\\"')}", + instructions: \`${escapedInstructions}\`, + }` + }) + .join(",\n") + + return `/** + * AUTO-GENERATED FILE - DO NOT EDIT DIRECTLY + * + * This file is generated by generate-built-in-skills.ts from the SKILL.md files + * in the built-in/ directory. To modify built-in skills, edit the corresponding + * SKILL.md file and run: pnpm generate:skills + * + * Generated at: ${new Date().toISOString()} + */ + +import { SkillMetadata, SkillContent } from "../../shared/skills" + +interface BuiltInSkillDefinition { + name: string + description: string + instructions: string +} + +const BUILT_IN_SKILLS: Record = { +${skillEntries} +} + +/** + * Get all built-in skills as SkillMetadata objects + */ +export function getBuiltInSkills(): SkillMetadata[] { + return Object.values(BUILT_IN_SKILLS).map((skill) => ({ + name: skill.name, + description: skill.description, + path: "built-in", + source: "built-in" as const, + })) +} + +/** + * Get a specific built-in skill's full content by name + */ +export function getBuiltInSkillContent(name: string): SkillContent | null { + const skill = BUILT_IN_SKILLS[name] + if (!skill) return null + + return { + name: skill.name, + description: skill.description, + path: "built-in", + source: "built-in" as const, + instructions: skill.instructions, + } +} + +/** + * Check if a skill name is a built-in skill + */ +export function isBuiltInSkill(name: string): boolean { + return name in BUILT_IN_SKILLS +} + +/** + * Get names of all built-in skills + */ +export function getBuiltInSkillNames(): string[] { + return Object.keys(BUILT_IN_SKILLS) +} +` +} + +async function main() { + console.log("Generating built-in skills from SKILL.md files...") + + // Check if built-in directory exists + try { + await fs.access(BUILT_IN_DIR) + } catch { + console.error(`Error: Built-in skills directory not found: ${BUILT_IN_DIR}`) + process.exit(1) + } + + // Scan for skill directories + const entries = await fs.readdir(BUILT_IN_DIR) + const skills: Record = {} + const validationErrors: ValidationError[] = [] + + for (const entry of entries) { + const skillDir = path.join(BUILT_IN_DIR, entry) + const stats = await fs.stat(skillDir) + + if (!stats.isDirectory()) { + continue + } + + // Check if SKILL.md exists + const skillMdPath = path.join(skillDir, "SKILL.md") + try { + await fs.access(skillMdPath) + } catch { + console.warn(`Warning: No SKILL.md found in ${entry}, skipping`) + continue + } + + const result = await parseSkillFile(skillDir, entry) + + if (result.errors) { + validationErrors.push(result.errors) + } else if (result.skill) { + skills[entry] = result.skill + console.log(` ✓ Parsed ${entry}`) + } + } + + // Report validation errors + if (validationErrors.length > 0) { + console.error("\nValidation errors:") + for (const { skillDir, errors } of validationErrors) { + console.error(`\n ${path.basename(skillDir)}:`) + for (const error of errors) { + console.error(` - ${error}`) + } + } + process.exit(1) + } + + // Check if any skills were found + if (Object.keys(skills).length === 0) { + console.error("Error: No valid skills found in built-in directory") + process.exit(1) + } + + // Generate TypeScript + const output = generateTypeScript(skills) + + // Write output file + await fs.writeFile(OUTPUT_FILE, output, "utf-8") + console.log(`\n✓ Generated ${OUTPUT_FILE}`) + console.log(` Skills: ${Object.keys(skills).join(", ")}`) +} + +main().catch((error) => { + console.error("Fatal error:", error) + process.exit(1) +}) From 7c2f3c4360a98ee75eb0b7c043f5bfbcf8c877d2 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Wed, 28 Jan 2026 15:42:27 -0700 Subject: [PATCH 06/14] fix: run prettier during skill generation to ensure stable output The generator now runs prettier on the output file after generation. This ensures the generated file is already formatted and won't change when the pre-commit hook runs prettier via lint-staged. --- src/services/skills/built-in-skills.ts | 128 +++++++++++------- .../skills/generate-built-in-skills.ts | 31 +++-- 2 files changed, 98 insertions(+), 61 deletions(-) diff --git a/src/services/skills/built-in-skills.ts b/src/services/skills/built-in-skills.ts index 64024439e7d..afe479942c6 100644 --- a/src/services/skills/built-in-skills.ts +++ b/src/services/skills/built-in-skills.ts @@ -5,7 +5,7 @@ * in the built-in/ directory. To modify built-in skills, edit the corresponding * SKILL.md file and run: pnpm generate:skills * - * Generated at: 2026-01-24T03:15:43.562Z + * Generated at: 2026-01-28T22:42:03.971Z */ import { SkillMetadata, SkillContent } from "../../shared/skills" @@ -22,53 +22,57 @@ const BUILT_IN_SKILLS: Record = { description: "Instructions for creating custom modes in Roo Code. Use when the user asks to create a new mode, edit an existing mode, or configure mode settings.", instructions: `Custom modes can be configured in two ways: - 1. Globally via the custom modes file in your Roo Code settings directory (typically ~/.roo-code/settings/custom_modes.yaml on macOS/Linux or %APPDATA%\\roo-code\\settings\\custom_modes.yaml on Windows) - created automatically on startup - 2. Per-workspace via '.roomodes' in the workspace root directory + +1. Globally via the custom modes file in your Roo Code settings directory (typically ~/.roo-code/settings/custom_modes.yaml on macOS/Linux or %APPDATA%\\roo-code\\settings\\custom_modes.yaml on Windows) - created automatically on startup +2. Per-workspace via '.roomodes' in the workspace root directory When modes with the same slug exist in both files, the workspace-specific .roomodes version takes precedence. This allows projects to override global modes or define project-specific modes. If asked to create a project mode, create it in .roomodes in the workspace root. If asked to create a global mode, use the global custom modes file. - The following fields are required and must not be empty: - * slug: A valid slug (lowercase letters, numbers, and hyphens). Must be unique, and shorter is better. - * name: The display name for the mode - * roleDefinition: A detailed description of the mode's role and capabilities - * groups: Array of allowed tool groups (can be empty). Each group can be specified either as a string (e.g., "edit" to allow editing any file) or with file restrictions (e.g., ["edit", { fileRegex: "\\.md$", description: "Markdown files only" }] to only allow editing markdown files) + + - slug: A valid slug (lowercase letters, numbers, and hyphens). Must be unique, and shorter is better. + - name: The display name for the mode + - roleDefinition: A detailed description of the mode's role and capabilities + - groups: Array of allowed tool groups (can be empty). Each group can be specified either as a string (e.g., "edit" to allow editing any file) or with file restrictions (e.g., ["edit", { fileRegex: "\\.md$", description: "Markdown files only" }] to only allow editing markdown files) - The following fields are optional but highly recommended: - * description: A short, human-readable description of what this mode does (5 words) - * whenToUse: A clear description of when this mode should be selected and what types of tasks it's best suited for. This helps the Orchestrator mode make better decisions. - * customInstructions: Additional instructions for how the mode should operate + + - description: A short, human-readable description of what this mode does (5 words) + - whenToUse: A clear description of when this mode should be selected and what types of tasks it's best suited for. This helps the Orchestrator mode make better decisions. + - customInstructions: Additional instructions for how the mode should operate - For multi-line text, include newline characters in the string like "This is the first line.\\nThis is the next line.\\n\\nThis is a double line break." Both files should follow this structure (in YAML format): customModes: - - slug: designer # Required: unique slug with lowercase letters, numbers, and hyphens - name: Designer # Required: mode display name - description: UI/UX design systems expert # Optional but recommended: short description (5 words) - roleDefinition: >- - You are Roo, a UI/UX expert specializing in design systems and frontend development. Your expertise includes: - - Creating and maintaining design systems - - Implementing responsive and accessible web interfaces - - Working with CSS, HTML, and modern frontend frameworks - - Ensuring consistent user experiences across platforms # Required: non-empty - whenToUse: >- + +- slug: designer # Required: unique slug with lowercase letters, numbers, and hyphens + name: Designer # Required: mode display name + description: UI/UX design systems expert # Optional but recommended: short description (5 words) + roleDefinition: >- + You are Roo, a UI/UX expert specializing in design systems and frontend development. Your expertise includes: + - Creating and maintaining design systems + - Implementing responsive and accessible web interfaces + - Working with CSS, HTML, and modern frontend frameworks + - Ensuring consistent user experiences across platforms # Required: non-empty + whenToUse: >- Use this mode when creating or modifying UI components, implementing design systems, or ensuring responsive web interfaces. This mode is especially effective with CSS, - HTML, and modern frontend frameworks. # Optional but recommended - groups: # Required: array of tool groups (can be empty) - - read # Read files group (read_file, search_files, list_files, codebase_search) - - edit # Edit files group (apply_diff, write_to_file) - allows editing any file - # Or with file restrictions: - # - - edit - # - fileRegex: \\.md$ - # description: Markdown files only # Edit group that only allows editing markdown files - - browser # Browser group (browser_action) - - command # Command group (execute_command) - - mcp # MCP group (use_mcp_tool, access_mcp_resource) - customInstructions: Additional instructions for the Designer mode # Optional`, + HTML, and modern frontend frameworks. # Optional but recommended + groups: # Required: array of tool groups (can be empty) + - read # Read files group (read_file, search_files, list_files, codebase_search) + - edit # Edit files group (apply_diff, write_to_file) - allows editing any file + # Or with file restrictions: + # - - edit + # - fileRegex: \\.md$ + # description: Markdown files only # Edit group that only allows editing markdown files + - browser # Browser group (browser_action) + - command # Command group (execute_command) + - mcp # MCP group (use_mcp_tool, access_mcp_resource) + customInstructions: Additional instructions for the Designer mode # Optional`, }, "mcp-builder": { name: "mcp-builder", @@ -113,6 +117,7 @@ Start with the sitemap to find relevant pages: \`https://modelcontextprotocol.io Then fetch specific pages with \`.md\` suffix for markdown format (e.g., \`https://modelcontextprotocol.io/specification/draft.md\`). Key pages to review: + - Specification overview and architecture - Transport mechanisms (streamable HTTP, stdio) - Tool, resource, and prompt definitions @@ -120,6 +125,7 @@ Key pages to review: #### 1.3 Study Framework Documentation **Recommended stack:** + - **Language**: TypeScript (high-quality SDK support and good compatibility in many execution environments e.g. MCPB. Plus AI models are good at generating TypeScript code, benefiting from its broad usage, static typing and good linting tools) - **Transport**: Streamable HTTP for remote servers, using stateless JSON (simpler to scale and maintain, as opposed to stateful sessions and streaming responses). stdio for local servers. @@ -128,10 +134,12 @@ Key pages to review: - **MCP Best Practices**: [📋 View Best Practices](./reference/mcp_best_practices.md) - Core guidelines **For TypeScript (recommended):** + - **TypeScript SDK**: Use WebFetch to load \`https://raw.githubusercontent.com/modelcontextprotocol/typescript-sdk/main/README.md\` - [⚡ TypeScript Guide](./reference/node_mcp_server.md) - TypeScript patterns and examples **For Python:** + - **Python SDK**: Use WebFetch to load \`https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md\` - [🐍 Python Guide](./reference/python_mcp_server.md) - Python patterns and examples @@ -150,12 +158,14 @@ Prioritize comprehensive API coverage. List endpoints to implement, starting wit #### 2.1 Set Up Project Structure See language-specific guides for project setup: + - [⚡ TypeScript Guide](./reference/node_mcp_server.md) - Project structure, package.json, tsconfig.json - [🐍 Python Guide](./reference/python_mcp_server.md) - Module organization, dependencies #### 2.2 Implement Core Infrastructure Create shared utilities: + - API client with authentication - Error handling helpers - Response formatting (JSON/Markdown) @@ -166,27 +176,32 @@ Create shared utilities: For each tool: **Input Schema:** + - Use Zod (TypeScript) or Pydantic (Python) - Include constraints and clear descriptions - Add examples in field descriptions **Output Schema:** + - Define \`outputSchema\` where possible for structured data - Use \`structuredContent\` in tool responses (TypeScript SDK feature) - Helps clients understand and process tool outputs **Tool Description:** + - Concise summary of functionality - Parameter descriptions - Return type schema **Implementation:** + - Async/await for I/O operations - Proper error handling with actionable messages - Support pagination where applicable - Return both text content and structured data when using modern SDKs **Annotations:** + - \`readOnlyHint\`: true/false - \`destructiveHint\`: true/false - \`idempotentHint\`: true/false @@ -199,6 +214,7 @@ For each tool: #### 3.1 Code Quality Review for: + - No duplicated code (DRY principle) - Consistent error handling - Full type coverage @@ -207,10 +223,12 @@ Review for: #### 3.2 Build and Test **TypeScript:** + - Run \`npm run build\` to verify compilation - Test with MCP Inspector: \`npx @modelcontextprotocol/inspector\` **Python:** + - Verify syntax: \`python -m py_compile your_server.py\` - Test with MCP Inspector @@ -240,6 +258,7 @@ To create effective evaluations, follow the process outlined in the evaluation g #### 4.3 Evaluation Requirements Ensure each question is: + - **Independent**: Not dependent on other questions - **Read-only**: Only non-destructive operations required - **Complex**: Requiring multiple tool calls and deep exploration @@ -270,40 +289,45 @@ Create an XML file with this structure: Load these resources as needed during development: ### Core MCP Documentation (Load First) + - **MCP Protocol**: Start with sitemap at \`https://modelcontextprotocol.io/sitemap.xml\`, then fetch specific pages with \`.md\` suffix - [📋 MCP Best Practices](./reference/mcp_best_practices.md) - Universal MCP guidelines including: - - Server and tool naming conventions - - Response format guidelines (JSON vs Markdown) - - Pagination best practices - - Transport selection (streamable HTTP vs stdio) - - Security and error handling standards + - Server and tool naming conventions + - Response format guidelines (JSON vs Markdown) + - Pagination best practices + - Transport selection (streamable HTTP vs stdio) + - Security and error handling standards ### SDK Documentation (Load During Phase 1/2) + - **Python SDK**: Fetch from \`https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md\` - **TypeScript SDK**: Fetch from \`https://raw.githubusercontent.com/modelcontextprotocol/typescript-sdk/main/README.md\` ### Language-Specific Implementation Guides (Load During Phase 2) + - [🐍 Python Implementation Guide](./reference/python_mcp_server.md) - Complete Python/FastMCP guide with: - - Server initialization patterns - - Pydantic model examples - - Tool registration with \`@mcp.tool\` - - Complete working examples - - Quality checklist + + - Server initialization patterns + - Pydantic model examples + - Tool registration with \`@mcp.tool\` + - Complete working examples + - Quality checklist - [⚡ TypeScript Implementation Guide](./reference/node_mcp_server.md) - Complete TypeScript guide with: - - Project structure - - Zod schema patterns - - Tool registration with \`server.registerTool\` - - Complete working examples - - Quality checklist + - Project structure + - Zod schema patterns + - Tool registration with \`server.registerTool\` + - Complete working examples + - Quality checklist ### Evaluation Guide (Load During Phase 4) + - [✅ Evaluation Guide](./reference/evaluation.md) - Complete evaluation creation guide with: - - Question creation guidelines - - Answer verification strategies - - XML format specifications - - Example questions and answers - - Running an evaluation with the provided scripts`, + - Question creation guidelines + - Answer verification strategies + - XML format specifications + - Example questions and answers + - Running an evaluation with the provided scripts`, }, } diff --git a/src/services/skills/generate-built-in-skills.ts b/src/services/skills/generate-built-in-skills.ts index 0efa43b10e7..a1fb0fcb108 100644 --- a/src/services/skills/generate-built-in-skills.ts +++ b/src/services/skills/generate-built-in-skills.ts @@ -11,6 +11,7 @@ import * as fs from "fs/promises" import * as path from "path" +import { execSync } from "child_process" import matter from "gray-matter" const BUILT_IN_DIR = path.join(__dirname, "built-in") @@ -148,14 +149,14 @@ function generateTypeScript(skills: Record): string { .join(",\n") return `/** - * AUTO-GENERATED FILE - DO NOT EDIT DIRECTLY - * - * This file is generated by generate-built-in-skills.ts from the SKILL.md files - * in the built-in/ directory. To modify built-in skills, edit the corresponding - * SKILL.md file and run: pnpm generate:skills - * - * Generated at: ${new Date().toISOString()} - */ + * AUTO-GENERATED FILE - DO NOT EDIT DIRECTLY + * + * This file is generated by generate-built-in-skills.ts from the SKILL.md files + * in the built-in/ directory. To modify built-in skills, edit the corresponding + * SKILL.md file and run: pnpm generate:skills + * + * Generated at: ${new Date().toISOString()} + */ import { SkillMetadata, SkillContent } from "../../shared/skills" @@ -279,7 +280,19 @@ async function main() { // Write output file await fs.writeFile(OUTPUT_FILE, output, "utf-8") - console.log(`\n✓ Generated ${OUTPUT_FILE}`) + + // Format with prettier to ensure stable output + // Run from workspace root (3 levels up from src/services/skills/) to find .prettierrc.json + const workspaceRoot = path.resolve(__dirname, "..", "..", "..") + try { + execSync(`npx prettier --write "${OUTPUT_FILE}"`, { + cwd: workspaceRoot, + stdio: "pipe", + }) + console.log(`\n✓ Generated and formatted ${OUTPUT_FILE}`) + } catch { + console.log(`\n✓ Generated ${OUTPUT_FILE} (prettier not available)`) + } console.log(` Skills: ${Object.keys(skills).join(", ")}`) } From 8abe3d3477d34cc3f3aff3aa0f6f252515610d1e Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Wed, 28 Jan 2026 16:10:49 -0700 Subject: [PATCH 07/14] refactor: rename mcp-builder to create-mcp-server with original content - Rename built-in skill from 'mcp-builder' to 'create-mcp-server' - Replace enhanced skill content with original fetch_instructions content - Convert dynamic paths to static/generic instructions - Remove mcp-builder reference docs, scripts, and LICENSE - Update test to expect create-mcp-server instead of mcp-builder The enhanced mcp-builder skill will be added in a separate PR. --- .roo/skills/mcp-builder/LICENSE.txt | 202 ---- .roo/skills/mcp-builder/SKILL.md | 256 ----- .../mcp-builder/reference/evaluation.md | 642 ------------ .../reference/mcp_best_practices.md | 269 ----- .../mcp-builder/reference/node_mcp_server.md | 975 ------------------ .../reference/python_mcp_server.md | 738 ------------- .../skills/mcp-builder/scripts/connections.py | 151 --- .roo/skills/mcp-builder/scripts/evaluation.py | 373 ------- .../scripts/example_evaluation.xml | 22 - .../mcp-builder/scripts/requirements.txt | 2 - .../generate-built-in-skills.spec.ts | 2 +- src/services/skills/built-in-skills.ts | 566 +++++----- .../built-in/create-mcp-server/SKILL.md | 304 ++++++ .../skills/built-in/mcp-builder/LICENSE.txt | 202 ---- .../skills/built-in/mcp-builder/SKILL.md | 256 ----- .../mcp-builder/reference/evaluation.md | 642 ------------ .../reference/mcp_best_practices.md | 269 ----- .../mcp-builder/reference/node_mcp_server.md | 975 ------------------ .../reference/python_mcp_server.md | 738 ------------- .../mcp-builder/scripts/connections.py | 151 --- .../mcp-builder/scripts/evaluation.py | 373 ------- .../scripts/example_evaluation.xml | 22 - .../mcp-builder/scripts/requirements.txt | 2 - 23 files changed, 615 insertions(+), 7517 deletions(-) delete mode 100644 .roo/skills/mcp-builder/LICENSE.txt delete mode 100644 .roo/skills/mcp-builder/SKILL.md delete mode 100644 .roo/skills/mcp-builder/reference/evaluation.md delete mode 100644 .roo/skills/mcp-builder/reference/mcp_best_practices.md delete mode 100644 .roo/skills/mcp-builder/reference/node_mcp_server.md delete mode 100644 .roo/skills/mcp-builder/reference/python_mcp_server.md delete mode 100644 .roo/skills/mcp-builder/scripts/connections.py delete mode 100644 .roo/skills/mcp-builder/scripts/evaluation.py delete mode 100644 .roo/skills/mcp-builder/scripts/example_evaluation.xml delete mode 100644 .roo/skills/mcp-builder/scripts/requirements.txt create mode 100644 src/services/skills/built-in/create-mcp-server/SKILL.md delete mode 100644 src/services/skills/built-in/mcp-builder/LICENSE.txt delete mode 100644 src/services/skills/built-in/mcp-builder/SKILL.md delete mode 100644 src/services/skills/built-in/mcp-builder/reference/evaluation.md delete mode 100644 src/services/skills/built-in/mcp-builder/reference/mcp_best_practices.md delete mode 100644 src/services/skills/built-in/mcp-builder/reference/node_mcp_server.md delete mode 100644 src/services/skills/built-in/mcp-builder/reference/python_mcp_server.md delete mode 100644 src/services/skills/built-in/mcp-builder/scripts/connections.py delete mode 100644 src/services/skills/built-in/mcp-builder/scripts/evaluation.py delete mode 100644 src/services/skills/built-in/mcp-builder/scripts/example_evaluation.xml delete mode 100644 src/services/skills/built-in/mcp-builder/scripts/requirements.txt diff --git a/.roo/skills/mcp-builder/LICENSE.txt b/.roo/skills/mcp-builder/LICENSE.txt deleted file mode 100644 index 7a4a3ea2424..00000000000 --- a/.roo/skills/mcp-builder/LICENSE.txt +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/.roo/skills/mcp-builder/SKILL.md b/.roo/skills/mcp-builder/SKILL.md deleted file mode 100644 index bd45c9c8791..00000000000 --- a/.roo/skills/mcp-builder/SKILL.md +++ /dev/null @@ -1,256 +0,0 @@ ---- -name: mcp-builder -description: Guide for creating high-quality MCP (Model Context Protocol) servers that enable LLMs to interact with external services through well-designed tools. Use when building MCP servers to integrate external APIs or services, whether in Python (FastMCP) or Node/TypeScript (MCP SDK). -license: Complete terms in LICENSE.txt ---- - -# MCP Server Development Guide - -## Overview - -Create MCP (Model Context Protocol) servers that enable LLMs to interact with external services through well-designed tools. The quality of an MCP server is measured by how well it enables LLMs to accomplish real-world tasks. - ---- - -# Process - -## 🚀 High-Level Workflow - -Creating a high-quality MCP server involves four main phases: - -### Phase 1: Deep Research and Planning - -#### 1.1 Understand Modern MCP Design - -**API Coverage vs. Workflow Tools:** -Balance comprehensive API endpoint coverage with specialized workflow tools. Workflow tools can be more convenient for specific tasks, while comprehensive coverage gives agents flexibility to compose operations. Performance varies by client—some clients benefit from code execution that combines basic tools, while others work better with higher-level workflows. When uncertain, prioritize comprehensive API coverage. - -**Tool Naming and Discoverability:** -Clear, descriptive tool names help agents find the right tools quickly. Use consistent prefixes (e.g., `github_create_issue`, `github_list_repos`) and action-oriented naming. - -**Context Management:** -Agents benefit from concise tool descriptions and the ability to filter/paginate results. Design tools that return focused, relevant data. Some clients support code execution which can help agents filter and process data efficiently. - -**Actionable Error Messages:** -Error messages should guide agents toward solutions with specific suggestions and next steps. - -#### 1.2 Study MCP Protocol Documentation - -**Navigate the MCP specification:** - -Start with the sitemap to find relevant pages: `https://modelcontextprotocol.io/sitemap.xml` - -Then fetch specific pages with `.md` suffix for markdown format (e.g., `https://modelcontextprotocol.io/specification/draft.md`). - -Key pages to review: - -- Specification overview and architecture -- Transport mechanisms (streamable HTTP, stdio) -- Tool, resource, and prompt definitions - -#### 1.3 Study Framework Documentation - -**Recommended stack:** - -- **Language**: TypeScript (high-quality SDK support and good compatibility in many execution environments e.g. MCPB. Plus AI models are good at generating TypeScript code, benefiting from its broad usage, static typing and good linting tools) -- **Transport**: Streamable HTTP for remote servers, using stateless JSON (simpler to scale and maintain, as opposed to stateful sessions and streaming responses). stdio for local servers. - -**Load framework documentation:** - -- **MCP Best Practices**: [📋 View Best Practices](./reference/mcp_best_practices.md) - Core guidelines - -**For TypeScript (recommended):** - -- **TypeScript SDK**: Use WebFetch to load `https://raw.githubusercontent.com/modelcontextprotocol/typescript-sdk/main/README.md` -- [⚡ TypeScript Guide](./reference/node_mcp_server.md) - TypeScript patterns and examples - -**For Python:** - -- **Python SDK**: Use WebFetch to load `https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md` -- [🐍 Python Guide](./reference/python_mcp_server.md) - Python patterns and examples - -#### 1.4 Plan Your Implementation - -**Understand the API:** -Review the service's API documentation to identify key endpoints, authentication requirements, and data models. Use web search and WebFetch as needed. - -**Tool Selection:** -Prioritize comprehensive API coverage. List endpoints to implement, starting with the most common operations. - ---- - -### Phase 2: Implementation - -#### 2.1 Set Up Project Structure - -See language-specific guides for project setup: - -- [⚡ TypeScript Guide](./reference/node_mcp_server.md) - Project structure, package.json, tsconfig.json -- [🐍 Python Guide](./reference/python_mcp_server.md) - Module organization, dependencies - -#### 2.2 Implement Core Infrastructure - -Create shared utilities: - -- API client with authentication -- Error handling helpers -- Response formatting (JSON/Markdown) -- Pagination support - -#### 2.3 Implement Tools - -For each tool: - -**Input Schema:** - -- Use Zod (TypeScript) or Pydantic (Python) -- Include constraints and clear descriptions -- Add examples in field descriptions - -**Output Schema:** - -- Define `outputSchema` where possible for structured data -- Use `structuredContent` in tool responses (TypeScript SDK feature) -- Helps clients understand and process tool outputs - -**Tool Description:** - -- Concise summary of functionality -- Parameter descriptions -- Return type schema - -**Implementation:** - -- Async/await for I/O operations -- Proper error handling with actionable messages -- Support pagination where applicable -- Return both text content and structured data when using modern SDKs - -**Annotations:** - -- `readOnlyHint`: true/false -- `destructiveHint`: true/false -- `idempotentHint`: true/false -- `openWorldHint`: true/false - ---- - -### Phase 3: Review and Test - -#### 3.1 Code Quality - -Review for: - -- No duplicated code (DRY principle) -- Consistent error handling -- Full type coverage -- Clear tool descriptions - -#### 3.2 Build and Test - -**TypeScript:** - -- Run `npm run build` to verify compilation -- Test with MCP Inspector: `npx @modelcontextprotocol/inspector` - -**Python:** - -- Verify syntax: `python -m py_compile your_server.py` -- Test with MCP Inspector - -See language-specific guides for detailed testing approaches and quality checklists. - ---- - -### Phase 4: Create Evaluations - -After implementing your MCP server, create comprehensive evaluations to test its effectiveness. - -**Load [✅ Evaluation Guide](./reference/evaluation.md) for complete evaluation guidelines.** - -#### 4.1 Understand Evaluation Purpose - -Use evaluations to test whether LLMs can effectively use your MCP server to answer realistic, complex questions. - -#### 4.2 Create 10 Evaluation Questions - -To create effective evaluations, follow the process outlined in the evaluation guide: - -1. **Tool Inspection**: List available tools and understand their capabilities -2. **Content Exploration**: Use READ-ONLY operations to explore available data -3. **Question Generation**: Create 10 complex, realistic questions -4. **Answer Verification**: Solve each question yourself to verify answers - -#### 4.3 Evaluation Requirements - -Ensure each question is: - -- **Independent**: Not dependent on other questions -- **Read-only**: Only non-destructive operations required -- **Complex**: Requiring multiple tool calls and deep exploration -- **Realistic**: Based on real use cases humans would care about -- **Verifiable**: Single, clear answer that can be verified by string comparison -- **Stable**: Answer won't change over time - -#### 4.4 Output Format - -Create an XML file with this structure: - -```xml - - - Find discussions about AI model launches with animal codenames. One model needed a specific safety designation that uses the format ASL-X. What number X was being determined for the model named after a spotted wild cat? - 3 - - - -``` - ---- - -# Reference Files - -## 📚 Documentation Library - -Load these resources as needed during development: - -### Core MCP Documentation (Load First) - -- **MCP Protocol**: Start with sitemap at `https://modelcontextprotocol.io/sitemap.xml`, then fetch specific pages with `.md` suffix -- [📋 MCP Best Practices](./reference/mcp_best_practices.md) - Universal MCP guidelines including: - - Server and tool naming conventions - - Response format guidelines (JSON vs Markdown) - - Pagination best practices - - Transport selection (streamable HTTP vs stdio) - - Security and error handling standards - -### SDK Documentation (Load During Phase 1/2) - -- **Python SDK**: Fetch from `https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md` -- **TypeScript SDK**: Fetch from `https://raw.githubusercontent.com/modelcontextprotocol/typescript-sdk/main/README.md` - -### Language-Specific Implementation Guides (Load During Phase 2) - -- [🐍 Python Implementation Guide](./reference/python_mcp_server.md) - Complete Python/FastMCP guide with: - - - Server initialization patterns - - Pydantic model examples - - Tool registration with `@mcp.tool` - - Complete working examples - - Quality checklist - -- [⚡ TypeScript Implementation Guide](./reference/node_mcp_server.md) - Complete TypeScript guide with: - - Project structure - - Zod schema patterns - - Tool registration with `server.registerTool` - - Complete working examples - - Quality checklist - -### Evaluation Guide (Load During Phase 4) - -- [✅ Evaluation Guide](./reference/evaluation.md) - Complete evaluation creation guide with: - - Question creation guidelines - - Answer verification strategies - - XML format specifications - - Example questions and answers - - Running an evaluation with the provided scripts diff --git a/.roo/skills/mcp-builder/reference/evaluation.md b/.roo/skills/mcp-builder/reference/evaluation.md deleted file mode 100644 index c9375b535a3..00000000000 --- a/.roo/skills/mcp-builder/reference/evaluation.md +++ /dev/null @@ -1,642 +0,0 @@ -# MCP Server Evaluation Guide - -## Overview - -This document provides guidance on creating comprehensive evaluations for MCP servers. Evaluations test whether LLMs can effectively use your MCP server to answer realistic, complex questions using only the tools provided. - ---- - -## Quick Reference - -### Evaluation Requirements - -- Create 10 human-readable questions -- Questions must be READ-ONLY, INDEPENDENT, NON-DESTRUCTIVE -- Each question requires multiple tool calls (potentially dozens) -- Answers must be single, verifiable values -- Answers must be STABLE (won't change over time) - -### Output Format - -```xml - - - Your question here - Single verifiable answer - - -``` - ---- - -## Purpose of Evaluations - -The measure of quality of an MCP server is NOT how well or comprehensively the server implements tools, but how well these implementations (input/output schemas, docstrings/descriptions, functionality) enable LLMs with no other context and access ONLY to the MCP servers to answer realistic and difficult questions. - -## Evaluation Overview - -Create 10 human-readable questions requiring ONLY READ-ONLY, INDEPENDENT, NON-DESTRUCTIVE, and IDEMPOTENT operations to answer. Each question should be: - -- Realistic -- Clear and concise -- Unambiguous -- Complex, requiring potentially dozens of tool calls or steps -- Answerable with a single, verifiable value that you identify in advance - -## Question Guidelines - -### Core Requirements - -1. **Questions MUST be independent** - - - Each question should NOT depend on the answer to any other question - - Should not assume prior write operations from processing another question - -2. **Questions MUST require ONLY NON-DESTRUCTIVE AND IDEMPOTENT tool use** - - - Should not instruct or require modifying state to arrive at the correct answer - -3. **Questions must be REALISTIC, CLEAR, CONCISE, and COMPLEX** - - Must require another LLM to use multiple (potentially dozens of) tools or steps to answer - -### Complexity and Depth - -4. **Questions must require deep exploration** - - - Consider multi-hop questions requiring multiple sub-questions and sequential tool calls - - Each step should benefit from information found in previous questions - -5. **Questions may require extensive paging** - - - May need paging through multiple pages of results - - May require querying old data (1-2 years out-of-date) to find niche information - - The questions must be DIFFICULT - -6. **Questions must require deep understanding** - - - Rather than surface-level knowledge - - May pose complex ideas as True/False questions requiring evidence - - May use multiple-choice format where LLM must search different hypotheses - -7. **Questions must not be solvable with straightforward keyword search** - - Do not include specific keywords from the target content - - Use synonyms, related concepts, or paraphrases - - Require multiple searches, analyzing multiple related items, extracting context, then deriving the answer - -### Tool Testing - -8. **Questions should stress-test tool return values** - - - May elicit tools returning large JSON objects or lists, overwhelming the LLM - - Should require understanding multiple modalities of data: - - IDs and names - - Timestamps and datetimes (months, days, years, seconds) - - File IDs, names, extensions, and mimetypes - - URLs, GIDs, etc. - - Should probe the tool's ability to return all useful forms of data - -9. **Questions should MOSTLY reflect real human use cases** - - - The kinds of information retrieval tasks that HUMANS assisted by an LLM would care about - -10. **Questions may require dozens of tool calls** - - - This challenges LLMs with limited context - - Encourages MCP server tools to reduce information returned - -11. **Include ambiguous questions** - - May be ambiguous OR require difficult decisions on which tools to call - - Force the LLM to potentially make mistakes or misinterpret - - Ensure that despite AMBIGUITY, there is STILL A SINGLE VERIFIABLE ANSWER - -### Stability - -12. **Questions must be designed so the answer DOES NOT CHANGE** - - - Do not ask questions that rely on "current state" which is dynamic - - For example, do not count: - - Number of reactions to a post - - Number of replies to a thread - - Number of members in a channel - -13. **DO NOT let the MCP server RESTRICT the kinds of questions you create** - - Create challenging and complex questions - - Some may not be solvable with the available MCP server tools - - Questions may require specific output formats (datetime vs. epoch time, JSON vs. MARKDOWN) - - Questions may require dozens of tool calls to complete - -## Answer Guidelines - -### Verification - -1. **Answers must be VERIFIABLE via direct string comparison** - - If the answer can be re-written in many formats, clearly specify the output format in the QUESTION - - Examples: "Use YYYY/MM/DD.", "Respond True or False.", "Answer A, B, C, or D and nothing else." - - Answer should be a single VERIFIABLE value such as: - - User ID, user name, display name, first name, last name - - Channel ID, channel name - - Message ID, string - - URL, title - - Numerical quantity - - Timestamp, datetime - - Boolean (for True/False questions) - - Email address, phone number - - File ID, file name, file extension - - Multiple choice answer - - Answers must not require special formatting or complex, structured output - - Answer will be verified using DIRECT STRING COMPARISON - -### Readability - -2. **Answers should generally prefer HUMAN-READABLE formats** - - Examples: names, first name, last name, datetime, file name, message string, URL, yes/no, true/false, a/b/c/d - - Rather than opaque IDs (though IDs are acceptable) - - The VAST MAJORITY of answers should be human-readable - -### Stability - -3. **Answers must be STABLE/STATIONARY** - - - Look at old content (e.g., conversations that have ended, projects that have launched, questions answered) - - Create QUESTIONS based on "closed" concepts that will always return the same answer - - Questions may ask to consider a fixed time window to insulate from non-stationary answers - - Rely on context UNLIKELY to change - - Example: if finding a paper name, be SPECIFIC enough so answer is not confused with papers published later - -4. **Answers must be CLEAR and UNAMBIGUOUS** - - Questions must be designed so there is a single, clear answer - - Answer can be derived from using the MCP server tools - -### Diversity - -5. **Answers must be DIVERSE** - - - Answer should be a single VERIFIABLE value in diverse modalities and formats - - User concept: user ID, user name, display name, first name, last name, email address, phone number - - Channel concept: channel ID, channel name, channel topic - - Message concept: message ID, message string, timestamp, month, day, year - -6. **Answers must NOT be complex structures** - - Not a list of values - - Not a complex object - - Not a list of IDs or strings - - Not natural language text - - UNLESS the answer can be straightforwardly verified using DIRECT STRING COMPARISON - - And can be realistically reproduced - - It should be unlikely that an LLM would return the same list in any other order or format - -## Evaluation Process - -### Step 1: Documentation Inspection - -Read the documentation of the target API to understand: - -- Available endpoints and functionality -- If ambiguity exists, fetch additional information from the web -- Parallelize this step AS MUCH AS POSSIBLE -- Ensure each subagent is ONLY examining documentation from the file system or on the web - -### Step 2: Tool Inspection - -List the tools available in the MCP server: - -- Inspect the MCP server directly -- Understand input/output schemas, docstrings, and descriptions -- WITHOUT calling the tools themselves at this stage - -### Step 3: Developing Understanding - -Repeat steps 1 & 2 until you have a good understanding: - -- Iterate multiple times -- Think about the kinds of tasks you want to create -- Refine your understanding -- At NO stage should you READ the code of the MCP server implementation itself -- Use your intuition and understanding to create reasonable, realistic, but VERY challenging tasks - -### Step 4: Read-Only Content Inspection - -After understanding the API and tools, USE the MCP server tools: - -- Inspect content using READ-ONLY and NON-DESTRUCTIVE operations ONLY -- Goal: identify specific content (e.g., users, channels, messages, projects, tasks) for creating realistic questions -- Should NOT call any tools that modify state -- Will NOT read the code of the MCP server implementation itself -- Parallelize this step with individual sub-agents pursuing independent explorations -- Ensure each subagent is only performing READ-ONLY, NON-DESTRUCTIVE, and IDEMPOTENT operations -- BE CAREFUL: SOME TOOLS may return LOTS OF DATA which would cause you to run out of CONTEXT -- Make INCREMENTAL, SMALL, AND TARGETED tool calls for exploration -- In all tool call requests, use the `limit` parameter to limit results (<10) -- Use pagination - -### Step 5: Task Generation - -After inspecting the content, create 10 human-readable questions: - -- An LLM should be able to answer these with the MCP server -- Follow all question and answer guidelines above - -## Output Format - -Each QA pair consists of a question and an answer. The output should be an XML file with this structure: - -```xml - - - Find the project created in Q2 2024 with the highest number of completed tasks. What is the project name? - Website Redesign - - - Search for issues labeled as "bug" that were closed in March 2024. Which user closed the most issues? Provide their username. - sarah_dev - - - Look for pull requests that modified files in the /api directory and were merged between January 1 and January 31, 2024. How many different contributors worked on these PRs? - 7 - - - Find the repository with the most stars that was created before 2023. What is the repository name? - data-pipeline - - -``` - -## Evaluation Examples - -### Good Questions - -**Example 1: Multi-hop question requiring deep exploration (GitHub MCP)** - -```xml - - Find the repository that was archived in Q3 2023 and had previously been the most forked project in the organization. What was the primary programming language used in that repository? - Python - -``` - -This question is good because: - -- Requires multiple searches to find archived repositories -- Needs to identify which had the most forks before archival -- Requires examining repository details for the language -- Answer is a simple, verifiable value -- Based on historical (closed) data that won't change - -**Example 2: Requires understanding context without keyword matching (Project Management MCP)** - -```xml - - Locate the initiative focused on improving customer onboarding that was completed in late 2023. The project lead created a retrospective document after completion. What was the lead's role title at that time? - Product Manager - -``` - -This question is good because: - -- Doesn't use specific project name ("initiative focused on improving customer onboarding") -- Requires finding completed projects from specific timeframe -- Needs to identify the project lead and their role -- Requires understanding context from retrospective documents -- Answer is human-readable and stable -- Based on completed work (won't change) - -**Example 3: Complex aggregation requiring multiple steps (Issue Tracker MCP)** - -```xml - - Among all bugs reported in January 2024 that were marked as critical priority, which assignee resolved the highest percentage of their assigned bugs within 48 hours? Provide the assignee's username. - alex_eng - -``` - -This question is good because: - -- Requires filtering bugs by date, priority, and status -- Needs to group by assignee and calculate resolution rates -- Requires understanding timestamps to determine 48-hour windows -- Tests pagination (potentially many bugs to process) -- Answer is a single username -- Based on historical data from specific time period - -**Example 4: Requires synthesis across multiple data types (CRM MCP)** - -```xml - - Find the account that upgraded from the Starter to Enterprise plan in Q4 2023 and had the highest annual contract value. What industry does this account operate in? - Healthcare - -``` - -This question is good because: - -- Requires understanding subscription tier changes -- Needs to identify upgrade events in specific timeframe -- Requires comparing contract values -- Must access account industry information -- Answer is simple and verifiable -- Based on completed historical transactions - -### Poor Questions - -**Example 1: Answer changes over time** - -```xml - - How many open issues are currently assigned to the engineering team? - 47 - -``` - -This question is poor because: - -- The answer will change as issues are created, closed, or reassigned -- Not based on stable/stationary data -- Relies on "current state" which is dynamic - -**Example 2: Too easy with keyword search** - -```xml - - Find the pull request with title "Add authentication feature" and tell me who created it. - developer123 - -``` - -This question is poor because: - -- Can be solved with a straightforward keyword search for exact title -- Doesn't require deep exploration or understanding -- No synthesis or analysis needed - -**Example 3: Ambiguous answer format** - -```xml - - List all the repositories that have Python as their primary language. - repo1, repo2, repo3, data-pipeline, ml-tools - -``` - -This question is poor because: - -- Answer is a list that could be returned in any order -- Difficult to verify with direct string comparison -- LLM might format differently (JSON array, comma-separated, newline-separated) -- Better to ask for a specific aggregate (count) or superlative (most stars) - -## Verification Process - -After creating evaluations: - -1. **Examine the XML file** to understand the schema -2. **Load each task instruction** and in parallel using the MCP server and tools, identify the correct answer by attempting to solve the task YOURSELF -3. **Flag any operations** that require WRITE or DESTRUCTIVE operations -4. **Accumulate all CORRECT answers** and replace any incorrect answers in the document -5. **Remove any ``** that require WRITE or DESTRUCTIVE operations - -Remember to parallelize solving tasks to avoid running out of context, then accumulate all answers and make changes to the file at the end. - -## Tips for Creating Quality Evaluations - -1. **Think Hard and Plan Ahead** before generating tasks -2. **Parallelize Where Opportunity Arises** to speed up the process and manage context -3. **Focus on Realistic Use Cases** that humans would actually want to accomplish -4. **Create Challenging Questions** that test the limits of the MCP server's capabilities -5. **Ensure Stability** by using historical data and closed concepts -6. **Verify Answers** by solving the questions yourself using the MCP server tools -7. **Iterate and Refine** based on what you learn during the process - ---- - -# Running Evaluations - -After creating your evaluation file, you can use the provided evaluation harness to test your MCP server. - -## Setup - -1. **Install Dependencies** - - ```bash - pip install -r scripts/requirements.txt - ``` - - Or install manually: - - ```bash - pip install anthropic mcp - ``` - -2. **Set API Key** - - ```bash - export ANTHROPIC_API_KEY=your_api_key_here - ``` - -## Evaluation File Format - -Evaluation files use XML format with `` elements: - -```xml - - - Find the project created in Q2 2024 with the highest number of completed tasks. What is the project name? - Website Redesign - - - Search for issues labeled as "bug" that were closed in March 2024. Which user closed the most issues? Provide their username. - sarah_dev - - -``` - -## Running Evaluations - -The evaluation script (`scripts/evaluation.py`) supports three transport types: - -**Important:** - -- **stdio transport**: The evaluation script automatically launches and manages the MCP server process for you. Do not run the server manually. -- **sse/http transports**: You must start the MCP server separately before running the evaluation. The script connects to the already-running server at the specified URL. - -### 1. Local STDIO Server - -For locally-run MCP servers (script launches the server automatically): - -```bash -python scripts/evaluation.py \ - -t stdio \ - -c python \ - -a my_mcp_server.py \ - evaluation.xml -``` - -With environment variables: - -```bash -python scripts/evaluation.py \ - -t stdio \ - -c python \ - -a my_mcp_server.py \ - -e API_KEY=abc123 \ - -e DEBUG=true \ - evaluation.xml -``` - -### 2. Server-Sent Events (SSE) - -For SSE-based MCP servers (you must start the server first): - -```bash -python scripts/evaluation.py \ - -t sse \ - -u https://example.com/mcp \ - -H "Authorization: Bearer token123" \ - -H "X-Custom-Header: value" \ - evaluation.xml -``` - -### 3. HTTP (Streamable HTTP) - -For HTTP-based MCP servers (you must start the server first): - -```bash -python scripts/evaluation.py \ - -t http \ - -u https://example.com/mcp \ - -H "Authorization: Bearer token123" \ - evaluation.xml -``` - -## Command-Line Options - -``` -usage: evaluation.py [-h] [-t {stdio,sse,http}] [-m MODEL] [-c COMMAND] - [-a ARGS [ARGS ...]] [-e ENV [ENV ...]] [-u URL] - [-H HEADERS [HEADERS ...]] [-o OUTPUT] - eval_file - -positional arguments: - eval_file Path to evaluation XML file - -optional arguments: - -h, --help Show help message - -t, --transport Transport type: stdio, sse, or http (default: stdio) - -m, --model Claude model to use (default: claude-3-7-sonnet-20250219) - -o, --output Output file for report (default: print to stdout) - -stdio options: - -c, --command Command to run MCP server (e.g., python, node) - -a, --args Arguments for the command (e.g., server.py) - -e, --env Environment variables in KEY=VALUE format - -sse/http options: - -u, --url MCP server URL - -H, --header HTTP headers in 'Key: Value' format -``` - -## Output - -The evaluation script generates a detailed report including: - -- **Summary Statistics**: - - - Accuracy (correct/total) - - Average task duration - - Average tool calls per task - - Total tool calls - -- **Per-Task Results**: - - Prompt and expected response - - Actual response from the agent - - Whether the answer was correct (✅/❌) - - Duration and tool call details - - Agent's summary of its approach - - Agent's feedback on the tools - -### Save Report to File - -```bash -python scripts/evaluation.py \ - -t stdio \ - -c python \ - -a my_server.py \ - -o evaluation_report.md \ - evaluation.xml -``` - -## Complete Example Workflow - -Here's a complete example of creating and running an evaluation: - -1. **Create your evaluation file** (`my_evaluation.xml`): - -```xml - - - Find the user who created the most issues in January 2024. What is their username? - alice_developer - - - Among all pull requests merged in Q1 2024, which repository had the highest number? Provide the repository name. - backend-api - - - Find the project that was completed in December 2023 and had the longest duration from start to finish. How many days did it take? - 127 - - -``` - -2. **Install dependencies**: - -```bash -pip install -r scripts/requirements.txt -export ANTHROPIC_API_KEY=your_api_key -``` - -3. **Run evaluation**: - -```bash -python scripts/evaluation.py \ - -t stdio \ - -c python \ - -a github_mcp_server.py \ - -e GITHUB_TOKEN=ghp_xxx \ - -o github_eval_report.md \ - my_evaluation.xml -``` - -4. **Review the report** in `github_eval_report.md` to: - - See which questions passed/failed - - Read the agent's feedback on your tools - - Identify areas for improvement - - Iterate on your MCP server design - -## Troubleshooting - -### Connection Errors - -If you get connection errors: - -- **STDIO**: Verify the command and arguments are correct -- **SSE/HTTP**: Check the URL is accessible and headers are correct -- Ensure any required API keys are set in environment variables or headers - -### Low Accuracy - -If many evaluations fail: - -- Review the agent's feedback for each task -- Check if tool descriptions are clear and comprehensive -- Verify input parameters are well-documented -- Consider whether tools return too much or too little data -- Ensure error messages are actionable - -### Timeout Issues - -If tasks are timing out: - -- Use a more capable model (e.g., `claude-3-7-sonnet-20250219`) -- Check if tools are returning too much data -- Verify pagination is working correctly -- Consider simplifying complex questions diff --git a/.roo/skills/mcp-builder/reference/mcp_best_practices.md b/.roo/skills/mcp-builder/reference/mcp_best_practices.md deleted file mode 100644 index 428e1e80947..00000000000 --- a/.roo/skills/mcp-builder/reference/mcp_best_practices.md +++ /dev/null @@ -1,269 +0,0 @@ -# MCP Server Best Practices - -## Quick Reference - -### Server Naming - -- **Python**: `{service}_mcp` (e.g., `slack_mcp`) -- **Node/TypeScript**: `{service}-mcp-server` (e.g., `slack-mcp-server`) - -### Tool Naming - -- Use snake_case with service prefix -- Format: `{service}_{action}_{resource}` -- Example: `slack_send_message`, `github_create_issue` - -### Response Formats - -- Support both JSON and Markdown formats -- JSON for programmatic processing -- Markdown for human readability - -### Pagination - -- Always respect `limit` parameter -- Return `has_more`, `next_offset`, `total_count` -- Default to 20-50 items - -### Transport - -- **Streamable HTTP**: For remote servers, multi-client scenarios -- **stdio**: For local integrations, command-line tools -- Avoid SSE (deprecated in favor of streamable HTTP) - ---- - -## Server Naming Conventions - -Follow these standardized naming patterns: - -**Python**: Use format `{service}_mcp` (lowercase with underscores) - -- Examples: `slack_mcp`, `github_mcp`, `jira_mcp` - -**Node/TypeScript**: Use format `{service}-mcp-server` (lowercase with hyphens) - -- Examples: `slack-mcp-server`, `github-mcp-server`, `jira-mcp-server` - -The name should be general, descriptive of the service being integrated, easy to infer from the task description, and without version numbers. - ---- - -## Tool Naming and Design - -### Tool Naming - -1. **Use snake_case**: `search_users`, `create_project`, `get_channel_info` -2. **Include service prefix**: Anticipate that your MCP server may be used alongside other MCP servers - - Use `slack_send_message` instead of just `send_message` - - Use `github_create_issue` instead of just `create_issue` -3. **Be action-oriented**: Start with verbs (get, list, search, create, etc.) -4. **Be specific**: Avoid generic names that could conflict with other servers - -### Tool Design - -- Tool descriptions must narrowly and unambiguously describe functionality -- Descriptions must precisely match actual functionality -- Provide tool annotations (readOnlyHint, destructiveHint, idempotentHint, openWorldHint) -- Keep tool operations focused and atomic - ---- - -## Response Formats - -All tools that return data should support multiple formats: - -### JSON Format (`response_format="json"`) - -- Machine-readable structured data -- Include all available fields and metadata -- Consistent field names and types -- Use for programmatic processing - -### Markdown Format (`response_format="markdown"`, typically default) - -- Human-readable formatted text -- Use headers, lists, and formatting for clarity -- Convert timestamps to human-readable format -- Show display names with IDs in parentheses -- Omit verbose metadata - ---- - -## Pagination - -For tools that list resources: - -- **Always respect the `limit` parameter** -- **Implement pagination**: Use `offset` or cursor-based pagination -- **Return pagination metadata**: Include `has_more`, `next_offset`/`next_cursor`, `total_count` -- **Never load all results into memory**: Especially important for large datasets -- **Default to reasonable limits**: 20-50 items is typical - -Example pagination response: - -```json -{ - "total": 150, - "count": 20, - "offset": 0, - "items": [...], - "has_more": true, - "next_offset": 20 -} -``` - ---- - -## Transport Options - -### Streamable HTTP - -**Best for**: Remote servers, web services, multi-client scenarios - -**Characteristics**: - -- Bidirectional communication over HTTP -- Supports multiple simultaneous clients -- Can be deployed as a web service -- Enables server-to-client notifications - -**Use when**: - -- Serving multiple clients simultaneously -- Deploying as a cloud service -- Integration with web applications - -### stdio - -**Best for**: Local integrations, command-line tools - -**Characteristics**: - -- Standard input/output stream communication -- Simple setup, no network configuration needed -- Runs as a subprocess of the client - -**Use when**: - -- Building tools for local development environments -- Integrating with desktop applications -- Single-user, single-session scenarios - -**Note**: stdio servers should NOT log to stdout (use stderr for logging) - -### Transport Selection - -| Criterion | stdio | Streamable HTTP | -| -------------- | ------ | --------------- | -| **Deployment** | Local | Remote | -| **Clients** | Single | Multiple | -| **Complexity** | Low | Medium | -| **Real-time** | No | Yes | - ---- - -## Security Best Practices - -### Authentication and Authorization - -**OAuth 2.1**: - -- Use secure OAuth 2.1 with certificates from recognized authorities -- Validate access tokens before processing requests -- Only accept tokens specifically intended for your server - -**API Keys**: - -- Store API keys in environment variables, never in code -- Validate keys on server startup -- Provide clear error messages when authentication fails - -### Input Validation - -- Sanitize file paths to prevent directory traversal -- Validate URLs and external identifiers -- Check parameter sizes and ranges -- Prevent command injection in system calls -- Use schema validation (Pydantic/Zod) for all inputs - -### Error Handling - -- Don't expose internal errors to clients -- Log security-relevant errors server-side -- Provide helpful but not revealing error messages -- Clean up resources after errors - -### DNS Rebinding Protection - -For streamable HTTP servers running locally: - -- Enable DNS rebinding protection -- Validate the `Origin` header on all incoming connections -- Bind to `127.0.0.1` rather than `0.0.0.0` - ---- - -## Tool Annotations - -Provide annotations to help clients understand tool behavior: - -| Annotation | Type | Default | Description | -| ----------------- | ------- | ------- | ------------------------------------------------------- | -| `readOnlyHint` | boolean | false | Tool does not modify its environment | -| `destructiveHint` | boolean | true | Tool may perform destructive updates | -| `idempotentHint` | boolean | false | Repeated calls with same args have no additional effect | -| `openWorldHint` | boolean | true | Tool interacts with external entities | - -**Important**: Annotations are hints, not security guarantees. Clients should not make security-critical decisions based solely on annotations. - ---- - -## Error Handling - -- Use standard JSON-RPC error codes -- Report tool errors within result objects (not protocol-level errors) -- Provide helpful, specific error messages with suggested next steps -- Don't expose internal implementation details -- Clean up resources properly on errors - -Example error handling: - -```typescript -try { - const result = performOperation() - return { content: [{ type: "text", text: result }] } -} catch (error) { - return { - isError: true, - content: [ - { - type: "text", - text: `Error: ${error.message}. Try using filter='active_only' to reduce results.`, - }, - ], - } -} -``` - ---- - -## Testing Requirements - -Comprehensive testing should cover: - -- **Functional testing**: Verify correct execution with valid/invalid inputs -- **Integration testing**: Test interaction with external systems -- **Security testing**: Validate auth, input sanitization, rate limiting -- **Performance testing**: Check behavior under load, timeouts -- **Error handling**: Ensure proper error reporting and cleanup - ---- - -## Documentation Requirements - -- Provide clear documentation of all tools and capabilities -- Include working examples (at least 3 per major feature) -- Document security considerations -- Specify required permissions and access levels -- Document rate limits and performance characteristics diff --git a/.roo/skills/mcp-builder/reference/node_mcp_server.md b/.roo/skills/mcp-builder/reference/node_mcp_server.md deleted file mode 100644 index e645b0291de..00000000000 --- a/.roo/skills/mcp-builder/reference/node_mcp_server.md +++ /dev/null @@ -1,975 +0,0 @@ -# Node/TypeScript MCP Server Implementation Guide - -## Overview - -This document provides Node/TypeScript-specific best practices and examples for implementing MCP servers using the MCP TypeScript SDK. It covers project structure, server setup, tool registration patterns, input validation with Zod, error handling, and complete working examples. - ---- - -## Quick Reference - -### Key Imports - -```typescript -import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" -import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js" -import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" -import express from "express" -import { z } from "zod" -``` - -### Server Initialization - -```typescript -const server = new McpServer({ - name: "service-mcp-server", - version: "1.0.0", -}) -``` - -### Tool Registration Pattern - -```typescript -server.registerTool( - "tool_name", - { - title: "Tool Display Name", - description: "What the tool does", - inputSchema: { param: z.string() }, - outputSchema: { result: z.string() }, - }, - async ({ param }) => { - const output = { result: `Processed: ${param}` } - return { - content: [{ type: "text", text: JSON.stringify(output) }], - structuredContent: output, // Modern pattern for structured data - } - }, -) -``` - ---- - -## MCP TypeScript SDK - -The official MCP TypeScript SDK provides: - -- `McpServer` class for server initialization -- `registerTool` method for tool registration -- Zod schema integration for runtime input validation -- Type-safe tool handler implementations - -**IMPORTANT - Use Modern APIs Only:** - -- **DO use**: `server.registerTool()`, `server.registerResource()`, `server.registerPrompt()` -- **DO NOT use**: Old deprecated APIs such as `server.tool()`, `server.setRequestHandler(ListToolsRequestSchema, ...)`, or manual handler registration -- The `register*` methods provide better type safety, automatic schema handling, and are the recommended approach - -See the MCP SDK documentation in the references for complete details. - -## Server Naming Convention - -Node/TypeScript MCP servers must follow this naming pattern: - -- **Format**: `{service}-mcp-server` (lowercase with hyphens) -- **Examples**: `github-mcp-server`, `jira-mcp-server`, `stripe-mcp-server` - -The name should be: - -- General (not tied to specific features) -- Descriptive of the service/API being integrated -- Easy to infer from the task description -- Without version numbers or dates - -## Project Structure - -Create the following structure for Node/TypeScript MCP servers: - -``` -{service}-mcp-server/ -├── package.json -├── tsconfig.json -├── README.md -├── src/ -│ ├── index.ts # Main entry point with McpServer initialization -│ ├── types.ts # TypeScript type definitions and interfaces -│ ├── tools/ # Tool implementations (one file per domain) -│ ├── services/ # API clients and shared utilities -│ ├── schemas/ # Zod validation schemas -│ └── constants.ts # Shared constants (API_URL, CHARACTER_LIMIT, etc.) -└── dist/ # Built JavaScript files (entry point: dist/index.js) -``` - -## Tool Implementation - -### Tool Naming - -Use snake_case for tool names (e.g., "search_users", "create_project", "get_channel_info") with clear, action-oriented names. - -**Avoid Naming Conflicts**: Include the service context to prevent overlaps: - -- Use "slack_send_message" instead of just "send_message" -- Use "github_create_issue" instead of just "create_issue" -- Use "asana_list_tasks" instead of just "list_tasks" - -### Tool Structure - -Tools are registered using the `registerTool` method with the following requirements: - -- Use Zod schemas for runtime input validation and type safety -- The `description` field must be explicitly provided - JSDoc comments are NOT automatically extracted -- Explicitly provide `title`, `description`, `inputSchema`, and `annotations` -- The `inputSchema` must be a Zod schema object (not a JSON schema) -- Type all parameters and return values explicitly - -```typescript -import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" -import { z } from "zod" - -const server = new McpServer({ - name: "example-mcp", - version: "1.0.0", -}) - -// Zod schema for input validation -const UserSearchInputSchema = z - .object({ - query: z - .string() - .min(2, "Query must be at least 2 characters") - .max(200, "Query must not exceed 200 characters") - .describe("Search string to match against names/emails"), - limit: z.number().int().min(1).max(100).default(20).describe("Maximum results to return"), - offset: z.number().int().min(0).default(0).describe("Number of results to skip for pagination"), - response_format: z - .nativeEnum(ResponseFormat) - .default(ResponseFormat.MARKDOWN) - .describe("Output format: 'markdown' for human-readable or 'json' for machine-readable"), - }) - .strict() - -// Type definition from Zod schema -type UserSearchInput = z.infer - -server.registerTool( - "example_search_users", - { - title: "Search Example Users", - description: `Search for users in the Example system by name, email, or team. - -This tool searches across all user profiles in the Example platform, supporting partial matches and various search filters. It does NOT create or modify users, only searches existing ones. - -Args: - - query (string): Search string to match against names/emails - - limit (number): Maximum results to return, between 1-100 (default: 20) - - offset (number): Number of results to skip for pagination (default: 0) - - response_format ('markdown' | 'json'): Output format (default: 'markdown') - -Returns: - For JSON format: Structured data with schema: - { - "total": number, // Total number of matches found - "count": number, // Number of results in this response - "offset": number, // Current pagination offset - "users": [ - { - "id": string, // User ID (e.g., "U123456789") - "name": string, // Full name (e.g., "John Doe") - "email": string, // Email address - "team": string, // Team name (optional) - "active": boolean // Whether user is active - } - ], - "has_more": boolean, // Whether more results are available - "next_offset": number // Offset for next page (if has_more is true) - } - -Examples: - - Use when: "Find all marketing team members" -> params with query="team:marketing" - - Use when: "Search for John's account" -> params with query="john" - - Don't use when: You need to create a user (use example_create_user instead) - -Error Handling: - - Returns "Error: Rate limit exceeded" if too many requests (429 status) - - Returns "No users found matching ''" if search returns empty`, - inputSchema: UserSearchInputSchema, - annotations: { - readOnlyHint: true, - destructiveHint: false, - idempotentHint: true, - openWorldHint: true, - }, - }, - async (params: UserSearchInput) => { - try { - // Input validation is handled by Zod schema - // Make API request using validated parameters - const data = await makeApiRequest("users/search", "GET", undefined, { - q: params.query, - limit: params.limit, - offset: params.offset, - }) - - const users = data.users || [] - const total = data.total || 0 - - if (!users.length) { - return { - content: [ - { - type: "text", - text: `No users found matching '${params.query}'`, - }, - ], - } - } - - // Prepare structured output - const output = { - total, - count: users.length, - offset: params.offset, - users: users.map((user: any) => ({ - id: user.id, - name: user.name, - email: user.email, - ...(user.team ? { team: user.team } : {}), - active: user.active ?? true, - })), - has_more: total > params.offset + users.length, - ...(total > params.offset + users.length - ? { - next_offset: params.offset + users.length, - } - : {}), - } - - // Format text representation based on requested format - let textContent: string - if (params.response_format === ResponseFormat.MARKDOWN) { - const lines = [ - `# User Search Results: '${params.query}'`, - "", - `Found ${total} users (showing ${users.length})`, - "", - ] - for (const user of users) { - lines.push(`## ${user.name} (${user.id})`) - lines.push(`- **Email**: ${user.email}`) - if (user.team) lines.push(`- **Team**: ${user.team}`) - lines.push("") - } - textContent = lines.join("\n") - } else { - textContent = JSON.stringify(output, null, 2) - } - - return { - content: [{ type: "text", text: textContent }], - structuredContent: output, // Modern pattern for structured data - } - } catch (error) { - return { - content: [ - { - type: "text", - text: handleApiError(error), - }, - ], - } - } - }, -) -``` - -## Zod Schemas for Input Validation - -Zod provides runtime type validation: - -```typescript -import { z } from "zod" - -// Basic schema with validation -const CreateUserSchema = z - .object({ - name: z.string().min(1, "Name is required").max(100, "Name must not exceed 100 characters"), - email: z.string().email("Invalid email format"), - age: z - .number() - .int("Age must be a whole number") - .min(0, "Age cannot be negative") - .max(150, "Age cannot be greater than 150"), - }) - .strict() // Use .strict() to forbid extra fields - -// Enums -enum ResponseFormat { - MARKDOWN = "markdown", - JSON = "json", -} - -const SearchSchema = z.object({ - response_format: z.nativeEnum(ResponseFormat).default(ResponseFormat.MARKDOWN).describe("Output format"), -}) - -// Optional fields with defaults -const PaginationSchema = z.object({ - limit: z.number().int().min(1).max(100).default(20).describe("Maximum results to return"), - offset: z.number().int().min(0).default(0).describe("Number of results to skip"), -}) -``` - -## Response Format Options - -Support multiple output formats for flexibility: - -```typescript -enum ResponseFormat { - MARKDOWN = "markdown", - JSON = "json", -} - -const inputSchema = z.object({ - query: z.string(), - response_format: z - .nativeEnum(ResponseFormat) - .default(ResponseFormat.MARKDOWN) - .describe("Output format: 'markdown' for human-readable or 'json' for machine-readable"), -}) -``` - -**Markdown format**: - -- Use headers, lists, and formatting for clarity -- Convert timestamps to human-readable format -- Show display names with IDs in parentheses -- Omit verbose metadata -- Group related information logically - -**JSON format**: - -- Return complete, structured data suitable for programmatic processing -- Include all available fields and metadata -- Use consistent field names and types - -## Pagination Implementation - -For tools that list resources: - -```typescript -const ListSchema = z.object({ - limit: z.number().int().min(1).max(100).default(20), - offset: z.number().int().min(0).default(0), -}) - -async function listItems(params: z.infer) { - const data = await apiRequest(params.limit, params.offset) - - const response = { - total: data.total, - count: data.items.length, - offset: params.offset, - items: data.items, - has_more: data.total > params.offset + data.items.length, - next_offset: data.total > params.offset + data.items.length ? params.offset + data.items.length : undefined, - } - - return JSON.stringify(response, null, 2) -} -``` - -## Character Limits and Truncation - -Add a CHARACTER_LIMIT constant to prevent overwhelming responses: - -```typescript -// At module level in constants.ts -export const CHARACTER_LIMIT = 25000 // Maximum response size in characters - -async function searchTool(params: SearchInput) { - let result = generateResponse(data) - - // Check character limit and truncate if needed - if (result.length > CHARACTER_LIMIT) { - const truncatedData = data.slice(0, Math.max(1, data.length / 2)) - response.data = truncatedData - response.truncated = true - response.truncation_message = - `Response truncated from ${data.length} to ${truncatedData.length} items. ` + - `Use 'offset' parameter or add filters to see more results.` - result = JSON.stringify(response, null, 2) - } - - return result -} -``` - -## Error Handling - -Provide clear, actionable error messages: - -```typescript -import axios, { AxiosError } from "axios" - -function handleApiError(error: unknown): string { - if (error instanceof AxiosError) { - if (error.response) { - switch (error.response.status) { - case 404: - return "Error: Resource not found. Please check the ID is correct." - case 403: - return "Error: Permission denied. You don't have access to this resource." - case 429: - return "Error: Rate limit exceeded. Please wait before making more requests." - default: - return `Error: API request failed with status ${error.response.status}` - } - } else if (error.code === "ECONNABORTED") { - return "Error: Request timed out. Please try again." - } - } - return `Error: Unexpected error occurred: ${error instanceof Error ? error.message : String(error)}` -} -``` - -## Shared Utilities - -Extract common functionality into reusable functions: - -```typescript -// Shared API request function -async function makeApiRequest( - endpoint: string, - method: "GET" | "POST" | "PUT" | "DELETE" = "GET", - data?: any, - params?: any, -): Promise { - try { - const response = await axios({ - method, - url: `${API_BASE_URL}/${endpoint}`, - data, - params, - timeout: 30000, - headers: { - "Content-Type": "application/json", - Accept: "application/json", - }, - }) - return response.data - } catch (error) { - throw error - } -} -``` - -## Async/Await Best Practices - -Always use async/await for network requests and I/O operations: - -```typescript -// Good: Async network request -async function fetchData(resourceId: string): Promise { - const response = await axios.get(`${API_URL}/resource/${resourceId}`) - return response.data -} - -// Bad: Promise chains -function fetchData(resourceId: string): Promise { - return axios.get(`${API_URL}/resource/${resourceId}`).then((response) => response.data) // Harder to read and maintain -} -``` - -## TypeScript Best Practices - -1. **Use Strict TypeScript**: Enable strict mode in tsconfig.json -2. **Define Interfaces**: Create clear interface definitions for all data structures -3. **Avoid `any`**: Use proper types or `unknown` instead of `any` -4. **Zod for Runtime Validation**: Use Zod schemas to validate external data -5. **Type Guards**: Create type guard functions for complex type checking -6. **Error Handling**: Always use try-catch with proper error type checking -7. **Null Safety**: Use optional chaining (`?.`) and nullish coalescing (`??`) - -```typescript -// Good: Type-safe with Zod and interfaces -interface UserResponse { - id: string - name: string - email: string - team?: string - active: boolean -} - -const UserSchema = z.object({ - id: z.string(), - name: z.string(), - email: z.string().email(), - team: z.string().optional(), - active: z.boolean(), -}) - -type User = z.infer - -async function getUser(id: string): Promise { - const data = await apiCall(`/users/${id}`) - return UserSchema.parse(data) // Runtime validation -} - -// Bad: Using any -async function getUser(id: string): Promise { - return await apiCall(`/users/${id}`) // No type safety -} -``` - -## Package Configuration - -### package.json - -```json -{ - "name": "{service}-mcp-server", - "version": "1.0.0", - "description": "MCP server for {Service} API integration", - "type": "module", - "main": "dist/index.js", - "scripts": { - "start": "node dist/index.js", - "dev": "tsx watch src/index.ts", - "build": "tsc", - "clean": "rm -rf dist" - }, - "engines": { - "node": ">=18" - }, - "dependencies": { - "@modelcontextprotocol/sdk": "^1.6.1", - "axios": "^1.7.9", - "zod": "^3.23.8" - }, - "devDependencies": { - "@types/node": "^22.10.0", - "tsx": "^4.19.2", - "typescript": "^5.7.2" - } -} -``` - -### tsconfig.json - -```json -{ - "compilerOptions": { - "target": "ES2022", - "module": "Node16", - "moduleResolution": "Node16", - "lib": ["ES2022"], - "outDir": "./dist", - "rootDir": "./src", - "strict": true, - "esModuleInterop": true, - "skipLibCheck": true, - "forceConsistentCasingInFileNames": true, - "declaration": true, - "declarationMap": true, - "sourceMap": true, - "allowSyntheticDefaultImports": true - }, - "include": ["src/**/*"], - "exclude": ["node_modules", "dist"] -} -``` - -## Complete Example - -```typescript -#!/usr/bin/env node -/** - * MCP Server for Example Service. - * - * This server provides tools to interact with Example API, including user search, - * project management, and data export capabilities. - */ - -import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" -import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" -import { z } from "zod" -import axios, { AxiosError } from "axios" - -// Constants -const API_BASE_URL = "https://api.example.com/v1" -const CHARACTER_LIMIT = 25000 - -// Enums -enum ResponseFormat { - MARKDOWN = "markdown", - JSON = "json", -} - -// Zod schemas -const UserSearchInputSchema = z - .object({ - query: z - .string() - .min(2, "Query must be at least 2 characters") - .max(200, "Query must not exceed 200 characters") - .describe("Search string to match against names/emails"), - limit: z.number().int().min(1).max(100).default(20).describe("Maximum results to return"), - offset: z.number().int().min(0).default(0).describe("Number of results to skip for pagination"), - response_format: z - .nativeEnum(ResponseFormat) - .default(ResponseFormat.MARKDOWN) - .describe("Output format: 'markdown' for human-readable or 'json' for machine-readable"), - }) - .strict() - -type UserSearchInput = z.infer - -// Shared utility functions -async function makeApiRequest( - endpoint: string, - method: "GET" | "POST" | "PUT" | "DELETE" = "GET", - data?: any, - params?: any, -): Promise { - try { - const response = await axios({ - method, - url: `${API_BASE_URL}/${endpoint}`, - data, - params, - timeout: 30000, - headers: { - "Content-Type": "application/json", - Accept: "application/json", - }, - }) - return response.data - } catch (error) { - throw error - } -} - -function handleApiError(error: unknown): string { - if (error instanceof AxiosError) { - if (error.response) { - switch (error.response.status) { - case 404: - return "Error: Resource not found. Please check the ID is correct." - case 403: - return "Error: Permission denied. You don't have access to this resource." - case 429: - return "Error: Rate limit exceeded. Please wait before making more requests." - default: - return `Error: API request failed with status ${error.response.status}` - } - } else if (error.code === "ECONNABORTED") { - return "Error: Request timed out. Please try again." - } - } - return `Error: Unexpected error occurred: ${error instanceof Error ? error.message : String(error)}` -} - -// Create MCP server instance -const server = new McpServer({ - name: "example-mcp", - version: "1.0.0", -}) - -// Register tools -server.registerTool( - "example_search_users", - { - title: "Search Example Users", - description: `[Full description as shown above]`, - inputSchema: UserSearchInputSchema, - annotations: { - readOnlyHint: true, - destructiveHint: false, - idempotentHint: true, - openWorldHint: true, - }, - }, - async (params: UserSearchInput) => { - // Implementation as shown above - }, -) - -// Main function -// For stdio (local): -async function runStdio() { - if (!process.env.EXAMPLE_API_KEY) { - console.error("ERROR: EXAMPLE_API_KEY environment variable is required") - process.exit(1) - } - - const transport = new StdioServerTransport() - await server.connect(transport) - console.error("MCP server running via stdio") -} - -// For streamable HTTP (remote): -async function runHTTP() { - if (!process.env.EXAMPLE_API_KEY) { - console.error("ERROR: EXAMPLE_API_KEY environment variable is required") - process.exit(1) - } - - const app = express() - app.use(express.json()) - - app.post("/mcp", async (req, res) => { - const transport = new StreamableHTTPServerTransport({ - sessionIdGenerator: undefined, - enableJsonResponse: true, - }) - res.on("close", () => transport.close()) - await server.connect(transport) - await transport.handleRequest(req, res, req.body) - }) - - const port = parseInt(process.env.PORT || "3000") - app.listen(port, () => { - console.error(`MCP server running on http://localhost:${port}/mcp`) - }) -} - -// Choose transport based on environment -const transport = process.env.TRANSPORT || "stdio" -if (transport === "http") { - runHTTP().catch((error) => { - console.error("Server error:", error) - process.exit(1) - }) -} else { - runStdio().catch((error) => { - console.error("Server error:", error) - process.exit(1) - }) -} -``` - ---- - -## Advanced MCP Features - -### Resource Registration - -Expose data as resources for efficient, URI-based access: - -```typescript -import { ResourceTemplate } from "@modelcontextprotocol/sdk/types.js" - -// Register a resource with URI template -server.registerResource( - { - uri: "file://documents/{name}", - name: "Document Resource", - description: "Access documents by name", - mimeType: "text/plain", - }, - async (uri: string) => { - // Extract parameter from URI - const match = uri.match(/^file:\/\/documents\/(.+)$/) - if (!match) { - throw new Error("Invalid URI format") - } - - const documentName = match[1] - const content = await loadDocument(documentName) - - return { - contents: [ - { - uri, - mimeType: "text/plain", - text: content, - }, - ], - } - }, -) - -// List available resources dynamically -server.registerResourceList(async () => { - const documents = await getAvailableDocuments() - return { - resources: documents.map((doc) => ({ - uri: `file://documents/${doc.name}`, - name: doc.name, - mimeType: "text/plain", - description: doc.description, - })), - } -}) -``` - -**When to use Resources vs Tools:** - -- **Resources**: For data access with simple URI-based parameters -- **Tools**: For complex operations requiring validation and business logic -- **Resources**: When data is relatively static or template-based -- **Tools**: When operations have side effects or complex workflows - -### Transport Options - -The TypeScript SDK supports two main transport mechanisms: - -#### Streamable HTTP (Recommended for Remote Servers) - -```typescript -import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js" -import express from "express" - -const app = express() -app.use(express.json()) - -app.post("/mcp", async (req, res) => { - // Create new transport for each request (stateless, prevents request ID collisions) - const transport = new StreamableHTTPServerTransport({ - sessionIdGenerator: undefined, - enableJsonResponse: true, - }) - - res.on("close", () => transport.close()) - - await server.connect(transport) - await transport.handleRequest(req, res, req.body) -}) - -app.listen(3000) -``` - -#### stdio (For Local Integrations) - -```typescript -import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" - -const transport = new StdioServerTransport() -await server.connect(transport) -``` - -**Transport selection:** - -- **Streamable HTTP**: Web services, remote access, multiple clients -- **stdio**: Command-line tools, local development, subprocess integration - -### Notification Support - -Notify clients when server state changes: - -```typescript -// Notify when tools list changes -server.notification({ - method: "notifications/tools/list_changed", -}) - -// Notify when resources change -server.notification({ - method: "notifications/resources/list_changed", -}) -``` - -Use notifications sparingly - only when server capabilities genuinely change. - ---- - -## Code Best Practices - -### Code Composability and Reusability - -Your implementation MUST prioritize composability and code reuse: - -1. **Extract Common Functionality**: - - - Create reusable helper functions for operations used across multiple tools - - Build shared API clients for HTTP requests instead of duplicating code - - Centralize error handling logic in utility functions - - Extract business logic into dedicated functions that can be composed - - Extract shared markdown or JSON field selection & formatting functionality - -2. **Avoid Duplication**: - - NEVER copy-paste similar code between tools - - If you find yourself writing similar logic twice, extract it into a function - - Common operations like pagination, filtering, field selection, and formatting should be shared - - Authentication/authorization logic should be centralized - -## Building and Running - -Always build your TypeScript code before running: - -```bash -# Build the project -npm run build - -# Run the server -npm start - -# Development with auto-reload -npm run dev -``` - -Always ensure `npm run build` completes successfully before considering the implementation complete. - -## Quality Checklist - -Before finalizing your Node/TypeScript MCP server implementation, ensure: - -### Strategic Design - -- [ ] Tools enable complete workflows, not just API endpoint wrappers -- [ ] Tool names reflect natural task subdivisions -- [ ] Response formats optimize for agent context efficiency -- [ ] Human-readable identifiers used where appropriate -- [ ] Error messages guide agents toward correct usage - -### Implementation Quality - -- [ ] FOCUSED IMPLEMENTATION: Most important and valuable tools implemented -- [ ] All tools registered using `registerTool` with complete configuration -- [ ] All tools include `title`, `description`, `inputSchema`, and `annotations` -- [ ] Annotations correctly set (readOnlyHint, destructiveHint, idempotentHint, openWorldHint) -- [ ] All tools use Zod schemas for runtime input validation with `.strict()` enforcement -- [ ] All Zod schemas have proper constraints and descriptive error messages -- [ ] All tools have comprehensive descriptions with explicit input/output types -- [ ] Descriptions include return value examples and complete schema documentation -- [ ] Error messages are clear, actionable, and educational - -### TypeScript Quality - -- [ ] TypeScript interfaces are defined for all data structures -- [ ] Strict TypeScript is enabled in tsconfig.json -- [ ] No use of `any` type - use `unknown` or proper types instead -- [ ] All async functions have explicit Promise return types -- [ ] Error handling uses proper type guards (e.g., `axios.isAxiosError`, `z.ZodError`) - -### Advanced Features (where applicable) - -- [ ] Resources registered for appropriate data endpoints -- [ ] Appropriate transport configured (stdio or streamable HTTP) -- [ ] Notifications implemented for dynamic server capabilities -- [ ] Type-safe with SDK interfaces - -### Project Configuration - -- [ ] Package.json includes all necessary dependencies -- [ ] Build script produces working JavaScript in dist/ directory -- [ ] Main entry point is properly configured as dist/index.js -- [ ] Server name follows format: `{service}-mcp-server` -- [ ] tsconfig.json properly configured with strict mode - -### Code Quality - -- [ ] Pagination is properly implemented where applicable -- [ ] Large responses check CHARACTER_LIMIT constant and truncate with clear messages -- [ ] Filtering options are provided for potentially large result sets -- [ ] All network operations handle timeouts and connection errors gracefully -- [ ] Common functionality is extracted into reusable functions -- [ ] Return types are consistent across similar operations - -### Testing and Build - -- [ ] `npm run build` completes successfully without errors -- [ ] dist/index.js created and executable -- [ ] Server runs: `node dist/index.js --help` -- [ ] All imports resolve correctly -- [ ] Sample tool calls work as expected diff --git a/.roo/skills/mcp-builder/reference/python_mcp_server.md b/.roo/skills/mcp-builder/reference/python_mcp_server.md deleted file mode 100644 index bc6b789546e..00000000000 --- a/.roo/skills/mcp-builder/reference/python_mcp_server.md +++ /dev/null @@ -1,738 +0,0 @@ -# Python MCP Server Implementation Guide - -## Overview - -This document provides Python-specific best practices and examples for implementing MCP servers using the MCP Python SDK. It covers server setup, tool registration patterns, input validation with Pydantic, error handling, and complete working examples. - ---- - -## Quick Reference - -### Key Imports - -```python -from mcp.server.fastmcp import FastMCP -from pydantic import BaseModel, Field, field_validator, ConfigDict -from typing import Optional, List, Dict, Any -from enum import Enum -import httpx -``` - -### Server Initialization - -```python -mcp = FastMCP("service_mcp") -``` - -### Tool Registration Pattern - -```python -@mcp.tool(name="tool_name", annotations={...}) -async def tool_function(params: InputModel) -> str: - # Implementation - pass -``` - ---- - -## MCP Python SDK and FastMCP - -The official MCP Python SDK provides FastMCP, a high-level framework for building MCP servers. It provides: - -- Automatic description and inputSchema generation from function signatures and docstrings -- Pydantic model integration for input validation -- Decorator-based tool registration with `@mcp.tool` - -**For complete SDK documentation, use WebFetch to load:** -`https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md` - -## Server Naming Convention - -Python MCP servers must follow this naming pattern: - -- **Format**: `{service}_mcp` (lowercase with underscores) -- **Examples**: `github_mcp`, `jira_mcp`, `stripe_mcp` - -The name should be: - -- General (not tied to specific features) -- Descriptive of the service/API being integrated -- Easy to infer from the task description -- Without version numbers or dates - -## Tool Implementation - -### Tool Naming - -Use snake_case for tool names (e.g., "search_users", "create_project", "get_channel_info") with clear, action-oriented names. - -**Avoid Naming Conflicts**: Include the service context to prevent overlaps: - -- Use "slack_send_message" instead of just "send_message" -- Use "github_create_issue" instead of just "create_issue" -- Use "asana_list_tasks" instead of just "list_tasks" - -### Tool Structure with FastMCP - -Tools are defined using the `@mcp.tool` decorator with Pydantic models for input validation: - -```python -from pydantic import BaseModel, Field, ConfigDict -from mcp.server.fastmcp import FastMCP - -# Initialize the MCP server -mcp = FastMCP("example_mcp") - -# Define Pydantic model for input validation -class ServiceToolInput(BaseModel): - '''Input model for service tool operation.''' - model_config = ConfigDict( - str_strip_whitespace=True, # Auto-strip whitespace from strings - validate_assignment=True, # Validate on assignment - extra='forbid' # Forbid extra fields - ) - - param1: str = Field(..., description="First parameter description (e.g., 'user123', 'project-abc')", min_length=1, max_length=100) - param2: Optional[int] = Field(default=None, description="Optional integer parameter with constraints", ge=0, le=1000) - tags: Optional[List[str]] = Field(default_factory=list, description="List of tags to apply", max_items=10) - -@mcp.tool( - name="service_tool_name", - annotations={ - "title": "Human-Readable Tool Title", - "readOnlyHint": True, # Tool does not modify environment - "destructiveHint": False, # Tool does not perform destructive operations - "idempotentHint": True, # Repeated calls have no additional effect - "openWorldHint": False # Tool does not interact with external entities - } -) -async def service_tool_name(params: ServiceToolInput) -> str: - '''Tool description automatically becomes the 'description' field. - - This tool performs a specific operation on the service. It validates all inputs - using the ServiceToolInput Pydantic model before processing. - - Args: - params (ServiceToolInput): Validated input parameters containing: - - param1 (str): First parameter description - - param2 (Optional[int]): Optional parameter with default - - tags (Optional[List[str]]): List of tags - - Returns: - str: JSON-formatted response containing operation results - ''' - # Implementation here - pass -``` - -## Pydantic v2 Key Features - -- Use `model_config` instead of nested `Config` class -- Use `field_validator` instead of deprecated `validator` -- Use `model_dump()` instead of deprecated `dict()` -- Validators require `@classmethod` decorator -- Type hints are required for validator methods - -```python -from pydantic import BaseModel, Field, field_validator, ConfigDict - -class CreateUserInput(BaseModel): - model_config = ConfigDict( - str_strip_whitespace=True, - validate_assignment=True - ) - - name: str = Field(..., description="User's full name", min_length=1, max_length=100) - email: str = Field(..., description="User's email address", pattern=r'^[\w\.-]+@[\w\.-]+\.\w+$') - age: int = Field(..., description="User's age", ge=0, le=150) - - @field_validator('email') - @classmethod - def validate_email(cls, v: str) -> str: - if not v.strip(): - raise ValueError("Email cannot be empty") - return v.lower() -``` - -## Response Format Options - -Support multiple output formats for flexibility: - -```python -from enum import Enum - -class ResponseFormat(str, Enum): - '''Output format for tool responses.''' - MARKDOWN = "markdown" - JSON = "json" - -class UserSearchInput(BaseModel): - query: str = Field(..., description="Search query") - response_format: ResponseFormat = Field( - default=ResponseFormat.MARKDOWN, - description="Output format: 'markdown' for human-readable or 'json' for machine-readable" - ) -``` - -**Markdown format**: - -- Use headers, lists, and formatting for clarity -- Convert timestamps to human-readable format (e.g., "2024-01-15 10:30:00 UTC" instead of epoch) -- Show display names with IDs in parentheses (e.g., "@john.doe (U123456)") -- Omit verbose metadata (e.g., show only one profile image URL, not all sizes) -- Group related information logically - -**JSON format**: - -- Return complete, structured data suitable for programmatic processing -- Include all available fields and metadata -- Use consistent field names and types - -## Pagination Implementation - -For tools that list resources: - -```python -class ListInput(BaseModel): - limit: Optional[int] = Field(default=20, description="Maximum results to return", ge=1, le=100) - offset: Optional[int] = Field(default=0, description="Number of results to skip for pagination", ge=0) - -async def list_items(params: ListInput) -> str: - # Make API request with pagination - data = await api_request(limit=params.limit, offset=params.offset) - - # Return pagination info - response = { - "total": data["total"], - "count": len(data["items"]), - "offset": params.offset, - "items": data["items"], - "has_more": data["total"] > params.offset + len(data["items"]), - "next_offset": params.offset + len(data["items"]) if data["total"] > params.offset + len(data["items"]) else None - } - return json.dumps(response, indent=2) -``` - -## Error Handling - -Provide clear, actionable error messages: - -```python -def _handle_api_error(e: Exception) -> str: - '''Consistent error formatting across all tools.''' - if isinstance(e, httpx.HTTPStatusError): - if e.response.status_code == 404: - return "Error: Resource not found. Please check the ID is correct." - elif e.response.status_code == 403: - return "Error: Permission denied. You don't have access to this resource." - elif e.response.status_code == 429: - return "Error: Rate limit exceeded. Please wait before making more requests." - return f"Error: API request failed with status {e.response.status_code}" - elif isinstance(e, httpx.TimeoutException): - return "Error: Request timed out. Please try again." - return f"Error: Unexpected error occurred: {type(e).__name__}" -``` - -## Shared Utilities - -Extract common functionality into reusable functions: - -```python -# Shared API request function -async def _make_api_request(endpoint: str, method: str = "GET", **kwargs) -> dict: - '''Reusable function for all API calls.''' - async with httpx.AsyncClient() as client: - response = await client.request( - method, - f"{API_BASE_URL}/{endpoint}", - timeout=30.0, - **kwargs - ) - response.raise_for_status() - return response.json() -``` - -## Async/Await Best Practices - -Always use async/await for network requests and I/O operations: - -```python -# Good: Async network request -async def fetch_data(resource_id: str) -> dict: - async with httpx.AsyncClient() as client: - response = await client.get(f"{API_URL}/resource/{resource_id}") - response.raise_for_status() - return response.json() - -# Bad: Synchronous request -def fetch_data(resource_id: str) -> dict: - response = requests.get(f"{API_URL}/resource/{resource_id}") # Blocks - return response.json() -``` - -## Type Hints - -Use type hints throughout: - -```python -from typing import Optional, List, Dict, Any - -async def get_user(user_id: str) -> Dict[str, Any]: - data = await fetch_user(user_id) - return {"id": data["id"], "name": data["name"]} -``` - -## Tool Docstrings - -Every tool must have comprehensive docstrings with explicit type information: - -```python -async def search_users(params: UserSearchInput) -> str: - ''' - Search for users in the Example system by name, email, or team. - - This tool searches across all user profiles in the Example platform, - supporting partial matches and various search filters. It does NOT - create or modify users, only searches existing ones. - - Args: - params (UserSearchInput): Validated input parameters containing: - - query (str): Search string to match against names/emails (e.g., "john", "@example.com", "team:marketing") - - limit (Optional[int]): Maximum results to return, between 1-100 (default: 20) - - offset (Optional[int]): Number of results to skip for pagination (default: 0) - - Returns: - str: JSON-formatted string containing search results with the following schema: - - Success response: - { - "total": int, # Total number of matches found - "count": int, # Number of results in this response - "offset": int, # Current pagination offset - "users": [ - { - "id": str, # User ID (e.g., "U123456789") - "name": str, # Full name (e.g., "John Doe") - "email": str, # Email address (e.g., "john@example.com") - "team": str # Team name (e.g., "Marketing") - optional - } - ] - } - - Error response: - "Error: " or "No users found matching ''" - - Examples: - - Use when: "Find all marketing team members" -> params with query="team:marketing" - - Use when: "Search for John's account" -> params with query="john" - - Don't use when: You need to create a user (use example_create_user instead) - - Don't use when: You have a user ID and need full details (use example_get_user instead) - - Error Handling: - - Input validation errors are handled by Pydantic model - - Returns "Error: Rate limit exceeded" if too many requests (429 status) - - Returns "Error: Invalid API authentication" if API key is invalid (401 status) - - Returns formatted list of results or "No users found matching 'query'" - ''' -``` - -## Complete Example - -See below for a complete Python MCP server example: - -```python -#!/usr/bin/env python3 -''' -MCP Server for Example Service. - -This server provides tools to interact with Example API, including user search, -project management, and data export capabilities. -''' - -from typing import Optional, List, Dict, Any -from enum import Enum -import httpx -from pydantic import BaseModel, Field, field_validator, ConfigDict -from mcp.server.fastmcp import FastMCP - -# Initialize the MCP server -mcp = FastMCP("example_mcp") - -# Constants -API_BASE_URL = "https://api.example.com/v1" - -# Enums -class ResponseFormat(str, Enum): - '''Output format for tool responses.''' - MARKDOWN = "markdown" - JSON = "json" - -# Pydantic Models for Input Validation -class UserSearchInput(BaseModel): - '''Input model for user search operations.''' - model_config = ConfigDict( - str_strip_whitespace=True, - validate_assignment=True - ) - - query: str = Field(..., description="Search string to match against names/emails", min_length=2, max_length=200) - limit: Optional[int] = Field(default=20, description="Maximum results to return", ge=1, le=100) - offset: Optional[int] = Field(default=0, description="Number of results to skip for pagination", ge=0) - response_format: ResponseFormat = Field(default=ResponseFormat.MARKDOWN, description="Output format") - - @field_validator('query') - @classmethod - def validate_query(cls, v: str) -> str: - if not v.strip(): - raise ValueError("Query cannot be empty or whitespace only") - return v.strip() - -# Shared utility functions -async def _make_api_request(endpoint: str, method: str = "GET", **kwargs) -> dict: - '''Reusable function for all API calls.''' - async with httpx.AsyncClient() as client: - response = await client.request( - method, - f"{API_BASE_URL}/{endpoint}", - timeout=30.0, - **kwargs - ) - response.raise_for_status() - return response.json() - -def _handle_api_error(e: Exception) -> str: - '''Consistent error formatting across all tools.''' - if isinstance(e, httpx.HTTPStatusError): - if e.response.status_code == 404: - return "Error: Resource not found. Please check the ID is correct." - elif e.response.status_code == 403: - return "Error: Permission denied. You don't have access to this resource." - elif e.response.status_code == 429: - return "Error: Rate limit exceeded. Please wait before making more requests." - return f"Error: API request failed with status {e.response.status_code}" - elif isinstance(e, httpx.TimeoutException): - return "Error: Request timed out. Please try again." - return f"Error: Unexpected error occurred: {type(e).__name__}" - -# Tool definitions -@mcp.tool( - name="example_search_users", - annotations={ - "title": "Search Example Users", - "readOnlyHint": True, - "destructiveHint": False, - "idempotentHint": True, - "openWorldHint": True - } -) -async def example_search_users(params: UserSearchInput) -> str: - '''Search for users in the Example system by name, email, or team. - - [Full docstring as shown above] - ''' - try: - # Make API request using validated parameters - data = await _make_api_request( - "users/search", - params={ - "q": params.query, - "limit": params.limit, - "offset": params.offset - } - ) - - users = data.get("users", []) - total = data.get("total", 0) - - if not users: - return f"No users found matching '{params.query}'" - - # Format response based on requested format - if params.response_format == ResponseFormat.MARKDOWN: - lines = [f"# User Search Results: '{params.query}'", ""] - lines.append(f"Found {total} users (showing {len(users)})") - lines.append("") - - for user in users: - lines.append(f"## {user['name']} ({user['id']})") - lines.append(f"- **Email**: {user['email']}") - if user.get('team'): - lines.append(f"- **Team**: {user['team']}") - lines.append("") - - return "\n".join(lines) - - else: - # Machine-readable JSON format - import json - response = { - "total": total, - "count": len(users), - "offset": params.offset, - "users": users - } - return json.dumps(response, indent=2) - - except Exception as e: - return _handle_api_error(e) - -if __name__ == "__main__": - mcp.run() -``` - ---- - -## Advanced FastMCP Features - -### Context Parameter Injection - -FastMCP can automatically inject a `Context` parameter into tools for advanced capabilities like logging, progress reporting, resource reading, and user interaction: - -```python -from mcp.server.fastmcp import FastMCP, Context - -mcp = FastMCP("example_mcp") - -@mcp.tool() -async def advanced_search(query: str, ctx: Context) -> str: - '''Advanced tool with context access for logging and progress.''' - - # Report progress for long operations - await ctx.report_progress(0.25, "Starting search...") - - # Log information for debugging - await ctx.log_info("Processing query", {"query": query, "timestamp": datetime.now()}) - - # Perform search - results = await search_api(query) - await ctx.report_progress(0.75, "Formatting results...") - - # Access server configuration - server_name = ctx.fastmcp.name - - return format_results(results) - -@mcp.tool() -async def interactive_tool(resource_id: str, ctx: Context) -> str: - '''Tool that can request additional input from users.''' - - # Request sensitive information when needed - api_key = await ctx.elicit( - prompt="Please provide your API key:", - input_type="password" - ) - - # Use the provided key - return await api_call(resource_id, api_key) -``` - -**Context capabilities:** - -- `ctx.report_progress(progress, message)` - Report progress for long operations -- `ctx.log_info(message, data)` / `ctx.log_error()` / `ctx.log_debug()` - Logging -- `ctx.elicit(prompt, input_type)` - Request input from users -- `ctx.fastmcp.name` - Access server configuration -- `ctx.read_resource(uri)` - Read MCP resources - -### Resource Registration - -Expose data as resources for efficient, template-based access: - -```python -@mcp.resource("file://documents/{name}") -async def get_document(name: str) -> str: - '''Expose documents as MCP resources. - - Resources are useful for static or semi-static data that doesn't - require complex parameters. They use URI templates for flexible access. - ''' - document_path = f"./docs/{name}" - with open(document_path, "r") as f: - return f.read() - -@mcp.resource("config://settings/{key}") -async def get_setting(key: str, ctx: Context) -> str: - '''Expose configuration as resources with context.''' - settings = await load_settings() - return json.dumps(settings.get(key, {})) -``` - -**When to use Resources vs Tools:** - -- **Resources**: For data access with simple parameters (URI templates) -- **Tools**: For complex operations with validation and business logic - -### Structured Output Types - -FastMCP supports multiple return types beyond strings: - -```python -from typing import TypedDict -from dataclasses import dataclass -from pydantic import BaseModel - -# TypedDict for structured returns -class UserData(TypedDict): - id: str - name: str - email: str - -@mcp.tool() -async def get_user_typed(user_id: str) -> UserData: - '''Returns structured data - FastMCP handles serialization.''' - return {"id": user_id, "name": "John Doe", "email": "john@example.com"} - -# Pydantic models for complex validation -class DetailedUser(BaseModel): - id: str - name: str - email: str - created_at: datetime - metadata: Dict[str, Any] - -@mcp.tool() -async def get_user_detailed(user_id: str) -> DetailedUser: - '''Returns Pydantic model - automatically generates schema.''' - user = await fetch_user(user_id) - return DetailedUser(**user) -``` - -### Lifespan Management - -Initialize resources that persist across requests: - -```python -from contextlib import asynccontextmanager - -@asynccontextmanager -async def app_lifespan(): - '''Manage resources that live for the server's lifetime.''' - # Initialize connections, load config, etc. - db = await connect_to_database() - config = load_configuration() - - # Make available to all tools - yield {"db": db, "config": config} - - # Cleanup on shutdown - await db.close() - -mcp = FastMCP("example_mcp", lifespan=app_lifespan) - -@mcp.tool() -async def query_data(query: str, ctx: Context) -> str: - '''Access lifespan resources through context.''' - db = ctx.request_context.lifespan_state["db"] - results = await db.query(query) - return format_results(results) -``` - -### Transport Options - -FastMCP supports two main transport mechanisms: - -```python -# stdio transport (for local tools) - default -if __name__ == "__main__": - mcp.run() - -# Streamable HTTP transport (for remote servers) -if __name__ == "__main__": - mcp.run(transport="streamable_http", port=8000) -``` - -**Transport selection:** - -- **stdio**: Command-line tools, local integrations, subprocess execution -- **Streamable HTTP**: Web services, remote access, multiple clients - ---- - -## Code Best Practices - -### Code Composability and Reusability - -Your implementation MUST prioritize composability and code reuse: - -1. **Extract Common Functionality**: - - - Create reusable helper functions for operations used across multiple tools - - Build shared API clients for HTTP requests instead of duplicating code - - Centralize error handling logic in utility functions - - Extract business logic into dedicated functions that can be composed - - Extract shared markdown or JSON field selection & formatting functionality - -2. **Avoid Duplication**: - - NEVER copy-paste similar code between tools - - If you find yourself writing similar logic twice, extract it into a function - - Common operations like pagination, filtering, field selection, and formatting should be shared - - Authentication/authorization logic should be centralized - -### Python-Specific Best Practices - -1. **Use Type Hints**: Always include type annotations for function parameters and return values -2. **Pydantic Models**: Define clear Pydantic models for all input validation -3. **Avoid Manual Validation**: Let Pydantic handle input validation with constraints -4. **Proper Imports**: Group imports (standard library, third-party, local) -5. **Error Handling**: Use specific exception types (httpx.HTTPStatusError, not generic Exception) -6. **Async Context Managers**: Use `async with` for resources that need cleanup -7. **Constants**: Define module-level constants in UPPER_CASE - -## Quality Checklist - -Before finalizing your Python MCP server implementation, ensure: - -### Strategic Design - -- [ ] Tools enable complete workflows, not just API endpoint wrappers -- [ ] Tool names reflect natural task subdivisions -- [ ] Response formats optimize for agent context efficiency -- [ ] Human-readable identifiers used where appropriate -- [ ] Error messages guide agents toward correct usage - -### Implementation Quality - -- [ ] FOCUSED IMPLEMENTATION: Most important and valuable tools implemented -- [ ] All tools have descriptive names and documentation -- [ ] Return types are consistent across similar operations -- [ ] Error handling is implemented for all external calls -- [ ] Server name follows format: `{service}_mcp` -- [ ] All network operations use async/await -- [ ] Common functionality is extracted into reusable functions -- [ ] Error messages are clear, actionable, and educational -- [ ] Outputs are properly validated and formatted - -### Tool Configuration - -- [ ] All tools implement 'name' and 'annotations' in the decorator -- [ ] Annotations correctly set (readOnlyHint, destructiveHint, idempotentHint, openWorldHint) -- [ ] All tools use Pydantic BaseModel for input validation with Field() definitions -- [ ] All Pydantic Fields have explicit types and descriptions with constraints -- [ ] All tools have comprehensive docstrings with explicit input/output types -- [ ] Docstrings include complete schema structure for dict/JSON returns -- [ ] Pydantic models handle input validation (no manual validation needed) - -### Advanced Features (where applicable) - -- [ ] Context injection used for logging, progress, or elicitation -- [ ] Resources registered for appropriate data endpoints -- [ ] Lifespan management implemented for persistent connections -- [ ] Structured output types used (TypedDict, Pydantic models) -- [ ] Appropriate transport configured (stdio or streamable HTTP) - -### Code Quality - -- [ ] File includes proper imports including Pydantic imports -- [ ] Pagination is properly implemented where applicable -- [ ] Filtering options are provided for potentially large result sets -- [ ] All async functions are properly defined with `async def` -- [ ] HTTP client usage follows async patterns with proper context managers -- [ ] Type hints are used throughout the code -- [ ] Constants are defined at module level in UPPER_CASE - -### Testing - -- [ ] Server runs successfully: `python your_server.py --help` -- [ ] All imports resolve correctly -- [ ] Sample tool calls work as expected -- [ ] Error scenarios handled gracefully diff --git a/.roo/skills/mcp-builder/scripts/connections.py b/.roo/skills/mcp-builder/scripts/connections.py deleted file mode 100644 index ffcd0da3fbe..00000000000 --- a/.roo/skills/mcp-builder/scripts/connections.py +++ /dev/null @@ -1,151 +0,0 @@ -"""Lightweight connection handling for MCP servers.""" - -from abc import ABC, abstractmethod -from contextlib import AsyncExitStack -from typing import Any - -from mcp import ClientSession, StdioServerParameters -from mcp.client.sse import sse_client -from mcp.client.stdio import stdio_client -from mcp.client.streamable_http import streamablehttp_client - - -class MCPConnection(ABC): - """Base class for MCP server connections.""" - - def __init__(self): - self.session = None - self._stack = None - - @abstractmethod - def _create_context(self): - """Create the connection context based on connection type.""" - - async def __aenter__(self): - """Initialize MCP server connection.""" - self._stack = AsyncExitStack() - await self._stack.__aenter__() - - try: - ctx = self._create_context() - result = await self._stack.enter_async_context(ctx) - - if len(result) == 2: - read, write = result - elif len(result) == 3: - read, write, _ = result - else: - raise ValueError(f"Unexpected context result: {result}") - - session_ctx = ClientSession(read, write) - self.session = await self._stack.enter_async_context(session_ctx) - await self.session.initialize() - return self - except BaseException: - await self._stack.__aexit__(None, None, None) - raise - - async def __aexit__(self, exc_type, exc_val, exc_tb): - """Clean up MCP server connection resources.""" - if self._stack: - await self._stack.__aexit__(exc_type, exc_val, exc_tb) - self.session = None - self._stack = None - - async def list_tools(self) -> list[dict[str, Any]]: - """Retrieve available tools from the MCP server.""" - response = await self.session.list_tools() - return [ - { - "name": tool.name, - "description": tool.description, - "input_schema": tool.inputSchema, - } - for tool in response.tools - ] - - async def call_tool(self, tool_name: str, arguments: dict[str, Any]) -> Any: - """Call a tool on the MCP server with provided arguments.""" - result = await self.session.call_tool(tool_name, arguments=arguments) - return result.content - - -class MCPConnectionStdio(MCPConnection): - """MCP connection using standard input/output.""" - - def __init__(self, command: str, args: list[str] = None, env: dict[str, str] = None): - super().__init__() - self.command = command - self.args = args or [] - self.env = env - - def _create_context(self): - return stdio_client( - StdioServerParameters(command=self.command, args=self.args, env=self.env) - ) - - -class MCPConnectionSSE(MCPConnection): - """MCP connection using Server-Sent Events.""" - - def __init__(self, url: str, headers: dict[str, str] = None): - super().__init__() - self.url = url - self.headers = headers or {} - - def _create_context(self): - return sse_client(url=self.url, headers=self.headers) - - -class MCPConnectionHTTP(MCPConnection): - """MCP connection using Streamable HTTP.""" - - def __init__(self, url: str, headers: dict[str, str] = None): - super().__init__() - self.url = url - self.headers = headers or {} - - def _create_context(self): - return streamablehttp_client(url=self.url, headers=self.headers) - - -def create_connection( - transport: str, - command: str = None, - args: list[str] = None, - env: dict[str, str] = None, - url: str = None, - headers: dict[str, str] = None, -) -> MCPConnection: - """Factory function to create the appropriate MCP connection. - - Args: - transport: Connection type ("stdio", "sse", or "http") - command: Command to run (stdio only) - args: Command arguments (stdio only) - env: Environment variables (stdio only) - url: Server URL (sse and http only) - headers: HTTP headers (sse and http only) - - Returns: - MCPConnection instance - """ - transport = transport.lower() - - if transport == "stdio": - if not command: - raise ValueError("Command is required for stdio transport") - return MCPConnectionStdio(command=command, args=args, env=env) - - elif transport == "sse": - if not url: - raise ValueError("URL is required for sse transport") - return MCPConnectionSSE(url=url, headers=headers) - - elif transport in ["http", "streamable_http", "streamable-http"]: - if not url: - raise ValueError("URL is required for http transport") - return MCPConnectionHTTP(url=url, headers=headers) - - else: - raise ValueError(f"Unsupported transport type: {transport}. Use 'stdio', 'sse', or 'http'") diff --git a/.roo/skills/mcp-builder/scripts/evaluation.py b/.roo/skills/mcp-builder/scripts/evaluation.py deleted file mode 100644 index 41778569c45..00000000000 --- a/.roo/skills/mcp-builder/scripts/evaluation.py +++ /dev/null @@ -1,373 +0,0 @@ -"""MCP Server Evaluation Harness - -This script evaluates MCP servers by running test questions against them using Claude. -""" - -import argparse -import asyncio -import json -import re -import sys -import time -import traceback -import xml.etree.ElementTree as ET -from pathlib import Path -from typing import Any - -from anthropic import Anthropic - -from connections import create_connection - -EVALUATION_PROMPT = """You are an AI assistant with access to tools. - -When given a task, you MUST: -1. Use the available tools to complete the task -2. Provide summary of each step in your approach, wrapped in tags -3. Provide feedback on the tools provided, wrapped in tags -4. Provide your final response, wrapped in tags - -Summary Requirements: -- In your tags, you must explain: - - The steps you took to complete the task - - Which tools you used, in what order, and why - - The inputs you provided to each tool - - The outputs you received from each tool - - A summary for how you arrived at the response - -Feedback Requirements: -- In your tags, provide constructive feedback on the tools: - - Comment on tool names: Are they clear and descriptive? - - Comment on input parameters: Are they well-documented? Are required vs optional parameters clear? - - Comment on descriptions: Do they accurately describe what the tool does? - - Comment on any errors encountered during tool usage: Did the tool fail to execute? Did the tool return too many tokens? - - Identify specific areas for improvement and explain WHY they would help - - Be specific and actionable in your suggestions - -Response Requirements: -- Your response should be concise and directly address what was asked -- Always wrap your final response in tags -- If you cannot solve the task return NOT_FOUND -- For numeric responses, provide just the number -- For IDs, provide just the ID -- For names or text, provide the exact text requested -- Your response should go last""" - - -def parse_evaluation_file(file_path: Path) -> list[dict[str, Any]]: - """Parse XML evaluation file with qa_pair elements.""" - try: - tree = ET.parse(file_path) - root = tree.getroot() - evaluations = [] - - for qa_pair in root.findall(".//qa_pair"): - question_elem = qa_pair.find("question") - answer_elem = qa_pair.find("answer") - - if question_elem is not None and answer_elem is not None: - evaluations.append({ - "question": (question_elem.text or "").strip(), - "answer": (answer_elem.text or "").strip(), - }) - - return evaluations - except Exception as e: - print(f"Error parsing evaluation file {file_path}: {e}") - return [] - - -def extract_xml_content(text: str, tag: str) -> str | None: - """Extract content from XML tags.""" - pattern = rf"<{tag}>(.*?)" - matches = re.findall(pattern, text, re.DOTALL) - return matches[-1].strip() if matches else None - - -async def agent_loop( - client: Anthropic, - model: str, - question: str, - tools: list[dict[str, Any]], - connection: Any, -) -> tuple[str, dict[str, Any]]: - """Run the agent loop with MCP tools.""" - messages = [{"role": "user", "content": question}] - - response = await asyncio.to_thread( - client.messages.create, - model=model, - max_tokens=4096, - system=EVALUATION_PROMPT, - messages=messages, - tools=tools, - ) - - messages.append({"role": "assistant", "content": response.content}) - - tool_metrics = {} - - while response.stop_reason == "tool_use": - tool_use = next(block for block in response.content if block.type == "tool_use") - tool_name = tool_use.name - tool_input = tool_use.input - - tool_start_ts = time.time() - try: - tool_result = await connection.call_tool(tool_name, tool_input) - tool_response = json.dumps(tool_result) if isinstance(tool_result, (dict, list)) else str(tool_result) - except Exception as e: - tool_response = f"Error executing tool {tool_name}: {str(e)}\n" - tool_response += traceback.format_exc() - tool_duration = time.time() - tool_start_ts - - if tool_name not in tool_metrics: - tool_metrics[tool_name] = {"count": 0, "durations": []} - tool_metrics[tool_name]["count"] += 1 - tool_metrics[tool_name]["durations"].append(tool_duration) - - messages.append({ - "role": "user", - "content": [{ - "type": "tool_result", - "tool_use_id": tool_use.id, - "content": tool_response, - }] - }) - - response = await asyncio.to_thread( - client.messages.create, - model=model, - max_tokens=4096, - system=EVALUATION_PROMPT, - messages=messages, - tools=tools, - ) - messages.append({"role": "assistant", "content": response.content}) - - response_text = next( - (block.text for block in response.content if hasattr(block, "text")), - None, - ) - return response_text, tool_metrics - - -async def evaluate_single_task( - client: Anthropic, - model: str, - qa_pair: dict[str, Any], - tools: list[dict[str, Any]], - connection: Any, - task_index: int, -) -> dict[str, Any]: - """Evaluate a single QA pair with the given tools.""" - start_time = time.time() - - print(f"Task {task_index + 1}: Running task with question: {qa_pair['question']}") - response, tool_metrics = await agent_loop(client, model, qa_pair["question"], tools, connection) - - response_value = extract_xml_content(response, "response") - summary = extract_xml_content(response, "summary") - feedback = extract_xml_content(response, "feedback") - - duration_seconds = time.time() - start_time - - return { - "question": qa_pair["question"], - "expected": qa_pair["answer"], - "actual": response_value, - "score": int(response_value == qa_pair["answer"]) if response_value else 0, - "total_duration": duration_seconds, - "tool_calls": tool_metrics, - "num_tool_calls": sum(len(metrics["durations"]) for metrics in tool_metrics.values()), - "summary": summary, - "feedback": feedback, - } - - -REPORT_HEADER = """ -# Evaluation Report - -## Summary - -- **Accuracy**: {correct}/{total} ({accuracy:.1f}%) -- **Average Task Duration**: {average_duration_s:.2f}s -- **Average Tool Calls per Task**: {average_tool_calls:.2f} -- **Total Tool Calls**: {total_tool_calls} - ---- -""" - -TASK_TEMPLATE = """ -### Task {task_num} - -**Question**: {question} -**Ground Truth Answer**: `{expected_answer}` -**Actual Answer**: `{actual_answer}` -**Correct**: {correct_indicator} -**Duration**: {total_duration:.2f}s -**Tool Calls**: {tool_calls} - -**Summary** -{summary} - -**Feedback** -{feedback} - ---- -""" - - -async def run_evaluation( - eval_path: Path, - connection: Any, - model: str = "claude-3-7-sonnet-20250219", -) -> str: - """Run evaluation with MCP server tools.""" - print("🚀 Starting Evaluation") - - client = Anthropic() - - tools = await connection.list_tools() - print(f"📋 Loaded {len(tools)} tools from MCP server") - - qa_pairs = parse_evaluation_file(eval_path) - print(f"📋 Loaded {len(qa_pairs)} evaluation tasks") - - results = [] - for i, qa_pair in enumerate(qa_pairs): - print(f"Processing task {i + 1}/{len(qa_pairs)}") - result = await evaluate_single_task(client, model, qa_pair, tools, connection, i) - results.append(result) - - correct = sum(r["score"] for r in results) - accuracy = (correct / len(results)) * 100 if results else 0 - average_duration_s = sum(r["total_duration"] for r in results) / len(results) if results else 0 - average_tool_calls = sum(r["num_tool_calls"] for r in results) / len(results) if results else 0 - total_tool_calls = sum(r["num_tool_calls"] for r in results) - - report = REPORT_HEADER.format( - correct=correct, - total=len(results), - accuracy=accuracy, - average_duration_s=average_duration_s, - average_tool_calls=average_tool_calls, - total_tool_calls=total_tool_calls, - ) - - report += "".join([ - TASK_TEMPLATE.format( - task_num=i + 1, - question=qa_pair["question"], - expected_answer=qa_pair["answer"], - actual_answer=result["actual"] or "N/A", - correct_indicator="✅" if result["score"] else "❌", - total_duration=result["total_duration"], - tool_calls=json.dumps(result["tool_calls"], indent=2), - summary=result["summary"] or "N/A", - feedback=result["feedback"] or "N/A", - ) - for i, (qa_pair, result) in enumerate(zip(qa_pairs, results)) - ]) - - return report - - -def parse_headers(header_list: list[str]) -> dict[str, str]: - """Parse header strings in format 'Key: Value' into a dictionary.""" - headers = {} - if not header_list: - return headers - - for header in header_list: - if ":" in header: - key, value = header.split(":", 1) - headers[key.strip()] = value.strip() - else: - print(f"Warning: Ignoring malformed header: {header}") - return headers - - -def parse_env_vars(env_list: list[str]) -> dict[str, str]: - """Parse environment variable strings in format 'KEY=VALUE' into a dictionary.""" - env = {} - if not env_list: - return env - - for env_var in env_list: - if "=" in env_var: - key, value = env_var.split("=", 1) - env[key.strip()] = value.strip() - else: - print(f"Warning: Ignoring malformed environment variable: {env_var}") - return env - - -async def main(): - parser = argparse.ArgumentParser( - description="Evaluate MCP servers using test questions", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=""" -Examples: - # Evaluate a local stdio MCP server - python evaluation.py -t stdio -c python -a my_server.py eval.xml - - # Evaluate an SSE MCP server - python evaluation.py -t sse -u https://example.com/mcp -H "Authorization: Bearer token" eval.xml - - # Evaluate an HTTP MCP server with custom model - python evaluation.py -t http -u https://example.com/mcp -m claude-3-5-sonnet-20241022 eval.xml - """, - ) - - parser.add_argument("eval_file", type=Path, help="Path to evaluation XML file") - parser.add_argument("-t", "--transport", choices=["stdio", "sse", "http"], default="stdio", help="Transport type (default: stdio)") - parser.add_argument("-m", "--model", default="claude-3-7-sonnet-20250219", help="Claude model to use (default: claude-3-7-sonnet-20250219)") - - stdio_group = parser.add_argument_group("stdio options") - stdio_group.add_argument("-c", "--command", help="Command to run MCP server (stdio only)") - stdio_group.add_argument("-a", "--args", nargs="+", help="Arguments for the command (stdio only)") - stdio_group.add_argument("-e", "--env", nargs="+", help="Environment variables in KEY=VALUE format (stdio only)") - - remote_group = parser.add_argument_group("sse/http options") - remote_group.add_argument("-u", "--url", help="MCP server URL (sse/http only)") - remote_group.add_argument("-H", "--header", nargs="+", dest="headers", help="HTTP headers in 'Key: Value' format (sse/http only)") - - parser.add_argument("-o", "--output", type=Path, help="Output file for evaluation report (default: stdout)") - - args = parser.parse_args() - - if not args.eval_file.exists(): - print(f"Error: Evaluation file not found: {args.eval_file}") - sys.exit(1) - - headers = parse_headers(args.headers) if args.headers else None - env_vars = parse_env_vars(args.env) if args.env else None - - try: - connection = create_connection( - transport=args.transport, - command=args.command, - args=args.args, - env=env_vars, - url=args.url, - headers=headers, - ) - except ValueError as e: - print(f"Error: {e}") - sys.exit(1) - - print(f"🔗 Connecting to MCP server via {args.transport}...") - - async with connection: - print("✅ Connected successfully") - report = await run_evaluation(args.eval_file, connection, args.model) - - if args.output: - args.output.write_text(report) - print(f"\n✅ Report saved to {args.output}") - else: - print("\n" + report) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/.roo/skills/mcp-builder/scripts/example_evaluation.xml b/.roo/skills/mcp-builder/scripts/example_evaluation.xml deleted file mode 100644 index 41e4459b5af..00000000000 --- a/.roo/skills/mcp-builder/scripts/example_evaluation.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - Calculate the compound interest on $10,000 invested at 5% annual interest rate, compounded monthly for 3 years. What is the final amount in dollars (rounded to 2 decimal places)? - 11614.72 - - - A projectile is launched at a 45-degree angle with an initial velocity of 50 m/s. Calculate the total distance (in meters) it has traveled from the launch point after 2 seconds, assuming g=9.8 m/s². Round to 2 decimal places. - 87.25 - - - A sphere has a volume of 500 cubic meters. Calculate its surface area in square meters. Round to 2 decimal places. - 304.65 - - - Calculate the population standard deviation of this dataset: [12, 15, 18, 22, 25, 30, 35]. Round to 2 decimal places. - 7.61 - - - Calculate the pH of a solution with a hydrogen ion concentration of 3.5 × 10^-5 M. Round to 2 decimal places. - 4.46 - - diff --git a/.roo/skills/mcp-builder/scripts/requirements.txt b/.roo/skills/mcp-builder/scripts/requirements.txt deleted file mode 100644 index e73e5d1e356..00000000000 --- a/.roo/skills/mcp-builder/scripts/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -anthropic>=0.39.0 -mcp>=1.1.0 diff --git a/src/services/skills/__tests__/generate-built-in-skills.spec.ts b/src/services/skills/__tests__/generate-built-in-skills.spec.ts index 0c954953a5f..10b44b87163 100644 --- a/src/services/skills/__tests__/generate-built-in-skills.spec.ts +++ b/src/services/skills/__tests__/generate-built-in-skills.spec.ts @@ -152,7 +152,7 @@ describe("built-in skills integration", () => { // Verify we have the expected skills const skillNames = skills.map((s) => s.name) - expect(skillNames).toContain("mcp-builder") + expect(skillNames).toContain("create-mcp-server") expect(skillNames).toContain("create-mode") // Verify each skill has valid content diff --git a/src/services/skills/built-in-skills.ts b/src/services/skills/built-in-skills.ts index afe479942c6..a47092b38b4 100644 --- a/src/services/skills/built-in-skills.ts +++ b/src/services/skills/built-in-skills.ts @@ -5,7 +5,7 @@ * in the built-in/ directory. To modify built-in skills, edit the corresponding * SKILL.md file and run: pnpm generate:skills * - * Generated at: 2026-01-28T22:42:03.971Z + * Generated at: 2026-01-28T23:09:14.137Z */ import { SkillMetadata, SkillContent } from "../../shared/skills" @@ -17,6 +17,315 @@ interface BuiltInSkillDefinition { } const BUILT_IN_SKILLS: Record = { + "create-mcp-server": { + name: "create-mcp-server", + description: + "Instructions for creating MCP (Model Context Protocol) servers that expose tools and resources for the agent to use. Use when the user asks to create a new MCP server or add MCP capabilities.", + instructions: `You have the ability to create an MCP server and add it to a configuration file that will then expose the tools and resources for you to use with \`use_mcp_tool\` and \`access_mcp_resource\`. + +When creating MCP servers, it's important to understand that they operate in a non-interactive environment. The server cannot initiate OAuth flows, open browser windows, or prompt for user input during runtime. All credentials and authentication tokens must be provided upfront through environment variables in the MCP settings configuration. For example, Spotify's API uses OAuth to get a refresh token for the user, but the MCP server cannot initiate this flow. While you can walk the user through obtaining an application client ID and secret, you may have to create a separate one-time setup script (like get-refresh-token.js) that captures and logs the final piece of the puzzle: the user's refresh token (i.e. you might run the script using execute_command which would open a browser for authentication, and then log the refresh token so that you can see it in the command output for you to use in the MCP settings configuration). + +Unless the user specifies otherwise, new local MCP servers should be created in your MCP servers directory. You can find the path to this directory by checking the MCP settings file, or ask the user where they'd like the server created. + +### MCP Server Types and Configuration + +MCP servers can be configured in two ways in the MCP settings file: + +1. Local (Stdio) Server Configuration: +\`\`\`json +{ + "mcpServers": { + "local-weather": { + "command": "node", + "args": ["/path/to/weather-server/build/index.js"], + "env": { + "OPENWEATHER_API_KEY": "your-api-key" + } + } + } +} +\`\`\` + +2. Remote (SSE) Server Configuration: +\`\`\`json +{ + "mcpServers": { + "remote-weather": { + "url": "https://api.example.com/mcp", + "headers": { + "Authorization": "Bearer your-api-key" + } + } + } +} +\`\`\` + +Common configuration options for both types: +- \`disabled\`: (optional) Set to true to temporarily disable the server +- \`timeout\`: (optional) Maximum time in seconds to wait for server responses (default: 60) +- \`alwaysAllow\`: (optional) Array of tool names that don't require user confirmation +- \`disabledTools\`: (optional) Array of tool names that are not included in the system prompt and won't be used + +### Example Local MCP Server + +For example, if the user wanted to give you the ability to retrieve weather information, you could create an MCP server that uses the OpenWeather API to get weather information, add it to the MCP settings configuration file, and then notice that you now have access to new tools and resources in the system prompt that you might use to show the user your new capabilities. + +The following example demonstrates how to build a local MCP server that provides weather data functionality using the Stdio transport. While this example shows how to implement resources, resource templates, and tools, in practice you should prefer using tools since they are more flexible and can handle dynamic parameters. The resource and resource template implementations are included here mainly for demonstration purposes of the different MCP capabilities, but a real weather server would likely just expose tools for fetching weather data. (The following steps are for macOS) + +1. Use the \`create-typescript-server\` tool to bootstrap a new project in your MCP servers directory: + +\`\`\`bash +cd /path/to/your/mcp-servers +npx @modelcontextprotocol/create-server weather-server +cd weather-server +# Install dependencies +npm install axios zod @modelcontextprotocol/sdk +\`\`\` + +This will create a new project with the following structure: + +\`\`\` +weather-server/ + ├── package.json + { + ... + "type": "module", // added by default, uses ES module syntax (import/export) rather than CommonJS (require/module.exports) (Important to know if you create additional scripts in this server repository like a get-refresh-token.js script) + "scripts": { + "build": "tsc && node -e \\"require('fs').chmodSync('build/index.js', '755')\\"", + ... + } + ... + } + ├── tsconfig.json + └── src/ + └── index.ts # Main server implementation +\`\`\` + +2. Replace \`src/index.ts\` with the following: + +\`\`\`typescript +#!/usr/bin/env node +import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; +import { z } from "zod"; +import axios from 'axios'; + +const API_KEY = process.env.OPENWEATHER_API_KEY; // provided by MCP config +if (!API_KEY) { + throw new Error('OPENWEATHER_API_KEY environment variable is required'); +} + +// Define types for OpenWeather API responses +interface WeatherData { + main: { + temp: number; + humidity: number; + }; + weather: Array<{ + description: string; + }>; + wind: { + speed: number; + }; +} + +interface ForecastData { + list: Array; +} + +// Create an MCP server +const server = new McpServer({ + name: "weather-server", + version: "0.1.0" +}); + +// Create axios instance for OpenWeather API +const weatherApi = axios.create({ + baseURL: 'http://api.openweathermap.org/data/2.5', + params: { + appid: API_KEY, + units: 'metric', + }, +}); + +// Add a tool for getting weather forecasts +server.tool( + "get_forecast", + { + city: z.string().describe("City name"), + days: z.number().min(1).max(5).optional().describe("Number of days (1-5)"), + }, + async ({ city, days = 3 }) => { + try { + const response = await weatherApi.get('forecast', { + params: { + q: city, + cnt: Math.min(days, 5) * 8, + }, + }); + + return { + content: [ + { + type: "text", + text: JSON.stringify(response.data.list, null, 2), + }, + ], + }; + } catch (error) { + if (axios.isAxiosError(error)) { + return { + content: [ + { + type: "text", + text: \`Weather API error: \${ + error.response?.data.message ?? error.message + }\`, + }, + ], + isError: true, + }; + } + throw error; + } + } +); + +// Add a resource for current weather in San Francisco +server.resource( + "sf_weather", + { uri: "weather://San Francisco/current", list: true }, + async (uri) => { + try { + const response = weatherApi.get('weather', { + params: { q: "San Francisco" }, + }); + + return { + contents: [ + { + uri: uri.href, + mimeType: "application/json", + text: JSON.stringify( + { + temperature: response.data.main.temp, + conditions: response.data.weather[0].description, + humidity: response.data.main.humidity, + wind_speed: response.data.wind.speed, + timestamp: new Date().toISOString(), + }, + null, + 2 + ), + }, + ], + }; + } catch (error) { + if (axios.isAxiosError(error)) { + throw new Error(\`Weather API error: \${ + error.response?.data.message ?? error.message + }\`); + } + throw error; + } + } +); + +// Add a dynamic resource template for current weather by city +server.resource( + "current_weather", + new ResourceTemplate("weather://{city}/current", { list: true }), + async (uri, { city }) => { + try { + const response = await weatherApi.get('weather', { + params: { q: city }, + }); + + return { + contents: [ + { + uri: uri.href, + mimeType: "application/json", + text: JSON.stringify( + { + temperature: response.data.main.temp, + conditions: response.data.weather[0].description, + humidity: response.data.main.humidity, + wind_speed: response.data.wind.speed, + timestamp: new Date().toISOString(), + }, + null, + 2 + ), + }, + ], + }; + } catch (error) { + if (axios.isAxiosError(error)) { + throw new Error(\`Weather API error: \${ + error.response?.data.message ?? error.message + }\`); + } + throw error; + } + } +); + +// Start receiving messages on stdin and sending messages on stdout +const transport = new StdioServerTransport(); +await server.connect(transport); +console.error('Weather MCP server running on stdio'); +\`\`\` + +(Remember: This is just an example–you may use different dependencies, break the implementation up into multiple files, etc.) + +3. Build and compile the executable JavaScript file + +\`\`\`bash +npm run build +\`\`\` + +4. Whenever you need an environment variable such as an API key to configure the MCP server, walk the user through the process of getting the key. For example, they may need to create an account and go to a developer dashboard to generate the key. Provide step-by-step instructions and URLs to make it easy for the user to retrieve the necessary information. Then use the ask_followup_question tool to ask the user for the key, in this case the OpenWeather API key. + +5. Install the MCP Server by adding the MCP server configuration to the MCP settings file. On macOS/Linux this is typically at \`~/.roo-code/settings/mcp_settings.json\`, on Windows at \`%APPDATA%\\roo-code\\settings\\mcp_settings.json\`. The settings file may have other MCP servers already configured, so you would read it first and then add your new server to the existing \`mcpServers\` object. + +IMPORTANT: Regardless of what else you see in the MCP settings file, you must default any new MCP servers you create to disabled=false, alwaysAllow=[] and disabledTools=[]. + +\`\`\`json +{ + "mcpServers": { + ..., + "weather": { + "command": "node", + "args": ["/path/to/weather-server/build/index.js"], + "env": { + "OPENWEATHER_API_KEY": "user-provided-api-key" + } + }, + } +} +\`\`\` + +(Note: the user may also ask you to install the MCP server to the Claude desktop app, in which case you would read then modify \`~/Library/Application\\ Support/Claude/claude_desktop_config.json\` on macOS for example. It follows the same format of a top level \`mcpServers\` object.) + +6. After you have edited the MCP settings configuration file, the system will automatically run all the servers and expose the available tools and resources in the 'Connected MCP Servers' section. + +7. Now that you have access to these new tools and resources, you may suggest ways the user can command you to invoke them - for example, with this new weather tool now available, you can invite the user to ask "what's the weather in San Francisco?" + +## Editing MCP Servers + +The user may ask to add tools or resources that may make sense to add to an existing MCP server (listed under 'Connected MCP Servers' in the system prompt), e.g. if it would use the same API. This would be possible if you can locate the MCP server repository on the user's system by looking at the server arguments for a filepath. You might then use list_files and read_file to explore the files in the repository, and use write_to_file or apply_diff to make changes to the files. + +However some MCP servers may be running from installed packages rather than a local repository, in which case it may make more sense to create a new MCP server. + +# MCP Servers Are Not Always Necessary + +The user may not always request the use or creation of MCP servers. Instead, they might provide tasks that can be completed with existing tools. While using the MCP SDK to extend your capabilities can be useful, it's important to understand that this is just one specialized type of task you can accomplish. You should only implement MCP servers when the user explicitly requests it (e.g., "add a tool that..."). + +Remember: The MCP documentation and example provided above are to help you understand and work with existing MCP servers or create new ones when requested by the user. You already have access to tools and capabilities that can be used to accomplish a wide range of tasks.`, + }, "create-mode": { name: "create-mode", description: @@ -74,261 +383,6 @@ customModes: - mcp # MCP group (use_mcp_tool, access_mcp_resource) customInstructions: Additional instructions for the Designer mode # Optional`, }, - "mcp-builder": { - name: "mcp-builder", - description: - "Guide for creating high-quality MCP (Model Context Protocol) servers that enable LLMs to interact with external services through well-designed tools. Use when building MCP servers to integrate external APIs or services, whether in Python (FastMCP) or Node/TypeScript (MCP SDK).", - instructions: `# MCP Server Development Guide - -## Overview - -Create MCP (Model Context Protocol) servers that enable LLMs to interact with external services through well-designed tools. The quality of an MCP server is measured by how well it enables LLMs to accomplish real-world tasks. - ---- - -# Process - -## 🚀 High-Level Workflow - -Creating a high-quality MCP server involves four main phases: - -### Phase 1: Deep Research and Planning - -#### 1.1 Understand Modern MCP Design - -**API Coverage vs. Workflow Tools:** -Balance comprehensive API endpoint coverage with specialized workflow tools. Workflow tools can be more convenient for specific tasks, while comprehensive coverage gives agents flexibility to compose operations. Performance varies by client—some clients benefit from code execution that combines basic tools, while others work better with higher-level workflows. When uncertain, prioritize comprehensive API coverage. - -**Tool Naming and Discoverability:** -Clear, descriptive tool names help agents find the right tools quickly. Use consistent prefixes (e.g., \`github_create_issue\`, \`github_list_repos\`) and action-oriented naming. - -**Context Management:** -Agents benefit from concise tool descriptions and the ability to filter/paginate results. Design tools that return focused, relevant data. Some clients support code execution which can help agents filter and process data efficiently. - -**Actionable Error Messages:** -Error messages should guide agents toward solutions with specific suggestions and next steps. - -#### 1.2 Study MCP Protocol Documentation - -**Navigate the MCP specification:** - -Start with the sitemap to find relevant pages: \`https://modelcontextprotocol.io/sitemap.xml\` - -Then fetch specific pages with \`.md\` suffix for markdown format (e.g., \`https://modelcontextprotocol.io/specification/draft.md\`). - -Key pages to review: - -- Specification overview and architecture -- Transport mechanisms (streamable HTTP, stdio) -- Tool, resource, and prompt definitions - -#### 1.3 Study Framework Documentation - -**Recommended stack:** - -- **Language**: TypeScript (high-quality SDK support and good compatibility in many execution environments e.g. MCPB. Plus AI models are good at generating TypeScript code, benefiting from its broad usage, static typing and good linting tools) -- **Transport**: Streamable HTTP for remote servers, using stateless JSON (simpler to scale and maintain, as opposed to stateful sessions and streaming responses). stdio for local servers. - -**Load framework documentation:** - -- **MCP Best Practices**: [📋 View Best Practices](./reference/mcp_best_practices.md) - Core guidelines - -**For TypeScript (recommended):** - -- **TypeScript SDK**: Use WebFetch to load \`https://raw.githubusercontent.com/modelcontextprotocol/typescript-sdk/main/README.md\` -- [⚡ TypeScript Guide](./reference/node_mcp_server.md) - TypeScript patterns and examples - -**For Python:** - -- **Python SDK**: Use WebFetch to load \`https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md\` -- [🐍 Python Guide](./reference/python_mcp_server.md) - Python patterns and examples - -#### 1.4 Plan Your Implementation - -**Understand the API:** -Review the service's API documentation to identify key endpoints, authentication requirements, and data models. Use web search and WebFetch as needed. - -**Tool Selection:** -Prioritize comprehensive API coverage. List endpoints to implement, starting with the most common operations. - ---- - -### Phase 2: Implementation - -#### 2.1 Set Up Project Structure - -See language-specific guides for project setup: - -- [⚡ TypeScript Guide](./reference/node_mcp_server.md) - Project structure, package.json, tsconfig.json -- [🐍 Python Guide](./reference/python_mcp_server.md) - Module organization, dependencies - -#### 2.2 Implement Core Infrastructure - -Create shared utilities: - -- API client with authentication -- Error handling helpers -- Response formatting (JSON/Markdown) -- Pagination support - -#### 2.3 Implement Tools - -For each tool: - -**Input Schema:** - -- Use Zod (TypeScript) or Pydantic (Python) -- Include constraints and clear descriptions -- Add examples in field descriptions - -**Output Schema:** - -- Define \`outputSchema\` where possible for structured data -- Use \`structuredContent\` in tool responses (TypeScript SDK feature) -- Helps clients understand and process tool outputs - -**Tool Description:** - -- Concise summary of functionality -- Parameter descriptions -- Return type schema - -**Implementation:** - -- Async/await for I/O operations -- Proper error handling with actionable messages -- Support pagination where applicable -- Return both text content and structured data when using modern SDKs - -**Annotations:** - -- \`readOnlyHint\`: true/false -- \`destructiveHint\`: true/false -- \`idempotentHint\`: true/false -- \`openWorldHint\`: true/false - ---- - -### Phase 3: Review and Test - -#### 3.1 Code Quality - -Review for: - -- No duplicated code (DRY principle) -- Consistent error handling -- Full type coverage -- Clear tool descriptions - -#### 3.2 Build and Test - -**TypeScript:** - -- Run \`npm run build\` to verify compilation -- Test with MCP Inspector: \`npx @modelcontextprotocol/inspector\` - -**Python:** - -- Verify syntax: \`python -m py_compile your_server.py\` -- Test with MCP Inspector - -See language-specific guides for detailed testing approaches and quality checklists. - ---- - -### Phase 4: Create Evaluations - -After implementing your MCP server, create comprehensive evaluations to test its effectiveness. - -**Load [✅ Evaluation Guide](./reference/evaluation.md) for complete evaluation guidelines.** - -#### 4.1 Understand Evaluation Purpose - -Use evaluations to test whether LLMs can effectively use your MCP server to answer realistic, complex questions. - -#### 4.2 Create 10 Evaluation Questions - -To create effective evaluations, follow the process outlined in the evaluation guide: - -1. **Tool Inspection**: List available tools and understand their capabilities -2. **Content Exploration**: Use READ-ONLY operations to explore available data -3. **Question Generation**: Create 10 complex, realistic questions -4. **Answer Verification**: Solve each question yourself to verify answers - -#### 4.3 Evaluation Requirements - -Ensure each question is: - -- **Independent**: Not dependent on other questions -- **Read-only**: Only non-destructive operations required -- **Complex**: Requiring multiple tool calls and deep exploration -- **Realistic**: Based on real use cases humans would care about -- **Verifiable**: Single, clear answer that can be verified by string comparison -- **Stable**: Answer won't change over time - -#### 4.4 Output Format - -Create an XML file with this structure: - -\`\`\`xml - - - Find discussions about AI model launches with animal codenames. One model needed a specific safety designation that uses the format ASL-X. What number X was being determined for the model named after a spotted wild cat? - 3 - - - -\`\`\` - ---- - -# Reference Files - -## 📚 Documentation Library - -Load these resources as needed during development: - -### Core MCP Documentation (Load First) - -- **MCP Protocol**: Start with sitemap at \`https://modelcontextprotocol.io/sitemap.xml\`, then fetch specific pages with \`.md\` suffix -- [📋 MCP Best Practices](./reference/mcp_best_practices.md) - Universal MCP guidelines including: - - Server and tool naming conventions - - Response format guidelines (JSON vs Markdown) - - Pagination best practices - - Transport selection (streamable HTTP vs stdio) - - Security and error handling standards - -### SDK Documentation (Load During Phase 1/2) - -- **Python SDK**: Fetch from \`https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md\` -- **TypeScript SDK**: Fetch from \`https://raw.githubusercontent.com/modelcontextprotocol/typescript-sdk/main/README.md\` - -### Language-Specific Implementation Guides (Load During Phase 2) - -- [🐍 Python Implementation Guide](./reference/python_mcp_server.md) - Complete Python/FastMCP guide with: - - - Server initialization patterns - - Pydantic model examples - - Tool registration with \`@mcp.tool\` - - Complete working examples - - Quality checklist - -- [⚡ TypeScript Implementation Guide](./reference/node_mcp_server.md) - Complete TypeScript guide with: - - Project structure - - Zod schema patterns - - Tool registration with \`server.registerTool\` - - Complete working examples - - Quality checklist - -### Evaluation Guide (Load During Phase 4) - -- [✅ Evaluation Guide](./reference/evaluation.md) - Complete evaluation creation guide with: - - Question creation guidelines - - Answer verification strategies - - XML format specifications - - Example questions and answers - - Running an evaluation with the provided scripts`, - }, } /** diff --git a/src/services/skills/built-in/create-mcp-server/SKILL.md b/src/services/skills/built-in/create-mcp-server/SKILL.md new file mode 100644 index 00000000000..be52e91c890 --- /dev/null +++ b/src/services/skills/built-in/create-mcp-server/SKILL.md @@ -0,0 +1,304 @@ +--- +name: create-mcp-server +description: Instructions for creating MCP (Model Context Protocol) servers that expose tools and resources for the agent to use. Use when the user asks to create a new MCP server or add MCP capabilities. +--- + +You have the ability to create an MCP server and add it to a configuration file that will then expose the tools and resources for you to use with `use_mcp_tool` and `access_mcp_resource`. + +When creating MCP servers, it's important to understand that they operate in a non-interactive environment. The server cannot initiate OAuth flows, open browser windows, or prompt for user input during runtime. All credentials and authentication tokens must be provided upfront through environment variables in the MCP settings configuration. For example, Spotify's API uses OAuth to get a refresh token for the user, but the MCP server cannot initiate this flow. While you can walk the user through obtaining an application client ID and secret, you may have to create a separate one-time setup script (like get-refresh-token.js) that captures and logs the final piece of the puzzle: the user's refresh token (i.e. you might run the script using execute_command which would open a browser for authentication, and then log the refresh token so that you can see it in the command output for you to use in the MCP settings configuration). + +Unless the user specifies otherwise, new local MCP servers should be created in your MCP servers directory. You can find the path to this directory by checking the MCP settings file, or ask the user where they'd like the server created. + +### MCP Server Types and Configuration + +MCP servers can be configured in two ways in the MCP settings file: + +1. Local (Stdio) Server Configuration: + +```json +{ + "mcpServers": { + "local-weather": { + "command": "node", + "args": ["/path/to/weather-server/build/index.js"], + "env": { + "OPENWEATHER_API_KEY": "your-api-key" + } + } + } +} +``` + +2. Remote (SSE) Server Configuration: + +```json +{ + "mcpServers": { + "remote-weather": { + "url": "https://api.example.com/mcp", + "headers": { + "Authorization": "Bearer your-api-key" + } + } + } +} +``` + +Common configuration options for both types: + +- `disabled`: (optional) Set to true to temporarily disable the server +- `timeout`: (optional) Maximum time in seconds to wait for server responses (default: 60) +- `alwaysAllow`: (optional) Array of tool names that don't require user confirmation +- `disabledTools`: (optional) Array of tool names that are not included in the system prompt and won't be used + +### Example Local MCP Server + +For example, if the user wanted to give you the ability to retrieve weather information, you could create an MCP server that uses the OpenWeather API to get weather information, add it to the MCP settings configuration file, and then notice that you now have access to new tools and resources in the system prompt that you might use to show the user your new capabilities. + +The following example demonstrates how to build a local MCP server that provides weather data functionality using the Stdio transport. While this example shows how to implement resources, resource templates, and tools, in practice you should prefer using tools since they are more flexible and can handle dynamic parameters. The resource and resource template implementations are included here mainly for demonstration purposes of the different MCP capabilities, but a real weather server would likely just expose tools for fetching weather data. (The following steps are for macOS) + +1. Use the `create-typescript-server` tool to bootstrap a new project in your MCP servers directory: + +```bash +cd /path/to/your/mcp-servers +npx @modelcontextprotocol/create-server weather-server +cd weather-server +# Install dependencies +npm install axios zod @modelcontextprotocol/sdk +``` + +This will create a new project with the following structure: + +``` +weather-server/ + ├── package.json + { + ... + "type": "module", // added by default, uses ES module syntax (import/export) rather than CommonJS (require/module.exports) (Important to know if you create additional scripts in this server repository like a get-refresh-token.js script) + "scripts": { + "build": "tsc && node -e \"require('fs').chmodSync('build/index.js', '755')\"", + ... + } + ... + } + ├── tsconfig.json + └── src/ + └── index.ts # Main server implementation +``` + +2. Replace `src/index.ts` with the following: + +```typescript +#!/usr/bin/env node +import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js" +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" +import { z } from "zod" +import axios from "axios" + +const API_KEY = process.env.OPENWEATHER_API_KEY // provided by MCP config +if (!API_KEY) { + throw new Error("OPENWEATHER_API_KEY environment variable is required") +} + +// Define types for OpenWeather API responses +interface WeatherData { + main: { + temp: number + humidity: number + } + weather: Array<{ + description: string + }> + wind: { + speed: number + } +} + +interface ForecastData { + list: Array< + WeatherData & { + dt_txt: string + } + > +} + +// Create an MCP server +const server = new McpServer({ + name: "weather-server", + version: "0.1.0", +}) + +// Create axios instance for OpenWeather API +const weatherApi = axios.create({ + baseURL: "http://api.openweathermap.org/data/2.5", + params: { + appid: API_KEY, + units: "metric", + }, +}) + +// Add a tool for getting weather forecasts +server.tool( + "get_forecast", + { + city: z.string().describe("City name"), + days: z.number().min(1).max(5).optional().describe("Number of days (1-5)"), + }, + async ({ city, days = 3 }) => { + try { + const response = await weatherApi.get("forecast", { + params: { + q: city, + cnt: Math.min(days, 5) * 8, + }, + }) + + return { + content: [ + { + type: "text", + text: JSON.stringify(response.data.list, null, 2), + }, + ], + } + } catch (error) { + if (axios.isAxiosError(error)) { + return { + content: [ + { + type: "text", + text: `Weather API error: ${error.response?.data.message ?? error.message}`, + }, + ], + isError: true, + } + } + throw error + } + }, +) + +// Add a resource for current weather in San Francisco +server.resource("sf_weather", { uri: "weather://San Francisco/current", list: true }, async (uri) => { + try { + const response = weatherApi.get("weather", { + params: { q: "San Francisco" }, + }) + + return { + contents: [ + { + uri: uri.href, + mimeType: "application/json", + text: JSON.stringify( + { + temperature: response.data.main.temp, + conditions: response.data.weather[0].description, + humidity: response.data.main.humidity, + wind_speed: response.data.wind.speed, + timestamp: new Date().toISOString(), + }, + null, + 2, + ), + }, + ], + } + } catch (error) { + if (axios.isAxiosError(error)) { + throw new Error(`Weather API error: ${error.response?.data.message ?? error.message}`) + } + throw error + } +}) + +// Add a dynamic resource template for current weather by city +server.resource( + "current_weather", + new ResourceTemplate("weather://{city}/current", { list: true }), + async (uri, { city }) => { + try { + const response = await weatherApi.get("weather", { + params: { q: city }, + }) + + return { + contents: [ + { + uri: uri.href, + mimeType: "application/json", + text: JSON.stringify( + { + temperature: response.data.main.temp, + conditions: response.data.weather[0].description, + humidity: response.data.main.humidity, + wind_speed: response.data.wind.speed, + timestamp: new Date().toISOString(), + }, + null, + 2, + ), + }, + ], + } + } catch (error) { + if (axios.isAxiosError(error)) { + throw new Error(`Weather API error: ${error.response?.data.message ?? error.message}`) + } + throw error + } + }, +) + +// Start receiving messages on stdin and sending messages on stdout +const transport = new StdioServerTransport() +await server.connect(transport) +console.error("Weather MCP server running on stdio") +``` + +(Remember: This is just an example–you may use different dependencies, break the implementation up into multiple files, etc.) + +3. Build and compile the executable JavaScript file + +```bash +npm run build +``` + +4. Whenever you need an environment variable such as an API key to configure the MCP server, walk the user through the process of getting the key. For example, they may need to create an account and go to a developer dashboard to generate the key. Provide step-by-step instructions and URLs to make it easy for the user to retrieve the necessary information. Then use the ask_followup_question tool to ask the user for the key, in this case the OpenWeather API key. + +5. Install the MCP Server by adding the MCP server configuration to the MCP settings file. On macOS/Linux this is typically at `~/.roo-code/settings/mcp_settings.json`, on Windows at `%APPDATA%\roo-code\settings\mcp_settings.json`. The settings file may have other MCP servers already configured, so you would read it first and then add your new server to the existing `mcpServers` object. + +IMPORTANT: Regardless of what else you see in the MCP settings file, you must default any new MCP servers you create to disabled=false, alwaysAllow=[] and disabledTools=[]. + +```json +{ + "mcpServers": { + ..., + "weather": { + "command": "node", + "args": ["/path/to/weather-server/build/index.js"], + "env": { + "OPENWEATHER_API_KEY": "user-provided-api-key" + } + }, + } +} +``` + +(Note: the user may also ask you to install the MCP server to the Claude desktop app, in which case you would read then modify `~/Library/Application\ Support/Claude/claude_desktop_config.json` on macOS for example. It follows the same format of a top level `mcpServers` object.) + +6. After you have edited the MCP settings configuration file, the system will automatically run all the servers and expose the available tools and resources in the 'Connected MCP Servers' section. + +7. Now that you have access to these new tools and resources, you may suggest ways the user can command you to invoke them - for example, with this new weather tool now available, you can invite the user to ask "what's the weather in San Francisco?" + +## Editing MCP Servers + +The user may ask to add tools or resources that may make sense to add to an existing MCP server (listed under 'Connected MCP Servers' in the system prompt), e.g. if it would use the same API. This would be possible if you can locate the MCP server repository on the user's system by looking at the server arguments for a filepath. You might then use list_files and read_file to explore the files in the repository, and use write_to_file or apply_diff to make changes to the files. + +However some MCP servers may be running from installed packages rather than a local repository, in which case it may make more sense to create a new MCP server. + +# MCP Servers Are Not Always Necessary + +The user may not always request the use or creation of MCP servers. Instead, they might provide tasks that can be completed with existing tools. While using the MCP SDK to extend your capabilities can be useful, it's important to understand that this is just one specialized type of task you can accomplish. You should only implement MCP servers when the user explicitly requests it (e.g., "add a tool that..."). + +Remember: The MCP documentation and example provided above are to help you understand and work with existing MCP servers or create new ones when requested by the user. You already have access to tools and capabilities that can be used to accomplish a wide range of tasks. diff --git a/src/services/skills/built-in/mcp-builder/LICENSE.txt b/src/services/skills/built-in/mcp-builder/LICENSE.txt deleted file mode 100644 index 7a4a3ea2424..00000000000 --- a/src/services/skills/built-in/mcp-builder/LICENSE.txt +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/src/services/skills/built-in/mcp-builder/SKILL.md b/src/services/skills/built-in/mcp-builder/SKILL.md deleted file mode 100644 index bd45c9c8791..00000000000 --- a/src/services/skills/built-in/mcp-builder/SKILL.md +++ /dev/null @@ -1,256 +0,0 @@ ---- -name: mcp-builder -description: Guide for creating high-quality MCP (Model Context Protocol) servers that enable LLMs to interact with external services through well-designed tools. Use when building MCP servers to integrate external APIs or services, whether in Python (FastMCP) or Node/TypeScript (MCP SDK). -license: Complete terms in LICENSE.txt ---- - -# MCP Server Development Guide - -## Overview - -Create MCP (Model Context Protocol) servers that enable LLMs to interact with external services through well-designed tools. The quality of an MCP server is measured by how well it enables LLMs to accomplish real-world tasks. - ---- - -# Process - -## 🚀 High-Level Workflow - -Creating a high-quality MCP server involves four main phases: - -### Phase 1: Deep Research and Planning - -#### 1.1 Understand Modern MCP Design - -**API Coverage vs. Workflow Tools:** -Balance comprehensive API endpoint coverage with specialized workflow tools. Workflow tools can be more convenient for specific tasks, while comprehensive coverage gives agents flexibility to compose operations. Performance varies by client—some clients benefit from code execution that combines basic tools, while others work better with higher-level workflows. When uncertain, prioritize comprehensive API coverage. - -**Tool Naming and Discoverability:** -Clear, descriptive tool names help agents find the right tools quickly. Use consistent prefixes (e.g., `github_create_issue`, `github_list_repos`) and action-oriented naming. - -**Context Management:** -Agents benefit from concise tool descriptions and the ability to filter/paginate results. Design tools that return focused, relevant data. Some clients support code execution which can help agents filter and process data efficiently. - -**Actionable Error Messages:** -Error messages should guide agents toward solutions with specific suggestions and next steps. - -#### 1.2 Study MCP Protocol Documentation - -**Navigate the MCP specification:** - -Start with the sitemap to find relevant pages: `https://modelcontextprotocol.io/sitemap.xml` - -Then fetch specific pages with `.md` suffix for markdown format (e.g., `https://modelcontextprotocol.io/specification/draft.md`). - -Key pages to review: - -- Specification overview and architecture -- Transport mechanisms (streamable HTTP, stdio) -- Tool, resource, and prompt definitions - -#### 1.3 Study Framework Documentation - -**Recommended stack:** - -- **Language**: TypeScript (high-quality SDK support and good compatibility in many execution environments e.g. MCPB. Plus AI models are good at generating TypeScript code, benefiting from its broad usage, static typing and good linting tools) -- **Transport**: Streamable HTTP for remote servers, using stateless JSON (simpler to scale and maintain, as opposed to stateful sessions and streaming responses). stdio for local servers. - -**Load framework documentation:** - -- **MCP Best Practices**: [📋 View Best Practices](./reference/mcp_best_practices.md) - Core guidelines - -**For TypeScript (recommended):** - -- **TypeScript SDK**: Use WebFetch to load `https://raw.githubusercontent.com/modelcontextprotocol/typescript-sdk/main/README.md` -- [⚡ TypeScript Guide](./reference/node_mcp_server.md) - TypeScript patterns and examples - -**For Python:** - -- **Python SDK**: Use WebFetch to load `https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md` -- [🐍 Python Guide](./reference/python_mcp_server.md) - Python patterns and examples - -#### 1.4 Plan Your Implementation - -**Understand the API:** -Review the service's API documentation to identify key endpoints, authentication requirements, and data models. Use web search and WebFetch as needed. - -**Tool Selection:** -Prioritize comprehensive API coverage. List endpoints to implement, starting with the most common operations. - ---- - -### Phase 2: Implementation - -#### 2.1 Set Up Project Structure - -See language-specific guides for project setup: - -- [⚡ TypeScript Guide](./reference/node_mcp_server.md) - Project structure, package.json, tsconfig.json -- [🐍 Python Guide](./reference/python_mcp_server.md) - Module organization, dependencies - -#### 2.2 Implement Core Infrastructure - -Create shared utilities: - -- API client with authentication -- Error handling helpers -- Response formatting (JSON/Markdown) -- Pagination support - -#### 2.3 Implement Tools - -For each tool: - -**Input Schema:** - -- Use Zod (TypeScript) or Pydantic (Python) -- Include constraints and clear descriptions -- Add examples in field descriptions - -**Output Schema:** - -- Define `outputSchema` where possible for structured data -- Use `structuredContent` in tool responses (TypeScript SDK feature) -- Helps clients understand and process tool outputs - -**Tool Description:** - -- Concise summary of functionality -- Parameter descriptions -- Return type schema - -**Implementation:** - -- Async/await for I/O operations -- Proper error handling with actionable messages -- Support pagination where applicable -- Return both text content and structured data when using modern SDKs - -**Annotations:** - -- `readOnlyHint`: true/false -- `destructiveHint`: true/false -- `idempotentHint`: true/false -- `openWorldHint`: true/false - ---- - -### Phase 3: Review and Test - -#### 3.1 Code Quality - -Review for: - -- No duplicated code (DRY principle) -- Consistent error handling -- Full type coverage -- Clear tool descriptions - -#### 3.2 Build and Test - -**TypeScript:** - -- Run `npm run build` to verify compilation -- Test with MCP Inspector: `npx @modelcontextprotocol/inspector` - -**Python:** - -- Verify syntax: `python -m py_compile your_server.py` -- Test with MCP Inspector - -See language-specific guides for detailed testing approaches and quality checklists. - ---- - -### Phase 4: Create Evaluations - -After implementing your MCP server, create comprehensive evaluations to test its effectiveness. - -**Load [✅ Evaluation Guide](./reference/evaluation.md) for complete evaluation guidelines.** - -#### 4.1 Understand Evaluation Purpose - -Use evaluations to test whether LLMs can effectively use your MCP server to answer realistic, complex questions. - -#### 4.2 Create 10 Evaluation Questions - -To create effective evaluations, follow the process outlined in the evaluation guide: - -1. **Tool Inspection**: List available tools and understand their capabilities -2. **Content Exploration**: Use READ-ONLY operations to explore available data -3. **Question Generation**: Create 10 complex, realistic questions -4. **Answer Verification**: Solve each question yourself to verify answers - -#### 4.3 Evaluation Requirements - -Ensure each question is: - -- **Independent**: Not dependent on other questions -- **Read-only**: Only non-destructive operations required -- **Complex**: Requiring multiple tool calls and deep exploration -- **Realistic**: Based on real use cases humans would care about -- **Verifiable**: Single, clear answer that can be verified by string comparison -- **Stable**: Answer won't change over time - -#### 4.4 Output Format - -Create an XML file with this structure: - -```xml - - - Find discussions about AI model launches with animal codenames. One model needed a specific safety designation that uses the format ASL-X. What number X was being determined for the model named after a spotted wild cat? - 3 - - - -``` - ---- - -# Reference Files - -## 📚 Documentation Library - -Load these resources as needed during development: - -### Core MCP Documentation (Load First) - -- **MCP Protocol**: Start with sitemap at `https://modelcontextprotocol.io/sitemap.xml`, then fetch specific pages with `.md` suffix -- [📋 MCP Best Practices](./reference/mcp_best_practices.md) - Universal MCP guidelines including: - - Server and tool naming conventions - - Response format guidelines (JSON vs Markdown) - - Pagination best practices - - Transport selection (streamable HTTP vs stdio) - - Security and error handling standards - -### SDK Documentation (Load During Phase 1/2) - -- **Python SDK**: Fetch from `https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md` -- **TypeScript SDK**: Fetch from `https://raw.githubusercontent.com/modelcontextprotocol/typescript-sdk/main/README.md` - -### Language-Specific Implementation Guides (Load During Phase 2) - -- [🐍 Python Implementation Guide](./reference/python_mcp_server.md) - Complete Python/FastMCP guide with: - - - Server initialization patterns - - Pydantic model examples - - Tool registration with `@mcp.tool` - - Complete working examples - - Quality checklist - -- [⚡ TypeScript Implementation Guide](./reference/node_mcp_server.md) - Complete TypeScript guide with: - - Project structure - - Zod schema patterns - - Tool registration with `server.registerTool` - - Complete working examples - - Quality checklist - -### Evaluation Guide (Load During Phase 4) - -- [✅ Evaluation Guide](./reference/evaluation.md) - Complete evaluation creation guide with: - - Question creation guidelines - - Answer verification strategies - - XML format specifications - - Example questions and answers - - Running an evaluation with the provided scripts diff --git a/src/services/skills/built-in/mcp-builder/reference/evaluation.md b/src/services/skills/built-in/mcp-builder/reference/evaluation.md deleted file mode 100644 index c9375b535a3..00000000000 --- a/src/services/skills/built-in/mcp-builder/reference/evaluation.md +++ /dev/null @@ -1,642 +0,0 @@ -# MCP Server Evaluation Guide - -## Overview - -This document provides guidance on creating comprehensive evaluations for MCP servers. Evaluations test whether LLMs can effectively use your MCP server to answer realistic, complex questions using only the tools provided. - ---- - -## Quick Reference - -### Evaluation Requirements - -- Create 10 human-readable questions -- Questions must be READ-ONLY, INDEPENDENT, NON-DESTRUCTIVE -- Each question requires multiple tool calls (potentially dozens) -- Answers must be single, verifiable values -- Answers must be STABLE (won't change over time) - -### Output Format - -```xml - - - Your question here - Single verifiable answer - - -``` - ---- - -## Purpose of Evaluations - -The measure of quality of an MCP server is NOT how well or comprehensively the server implements tools, but how well these implementations (input/output schemas, docstrings/descriptions, functionality) enable LLMs with no other context and access ONLY to the MCP servers to answer realistic and difficult questions. - -## Evaluation Overview - -Create 10 human-readable questions requiring ONLY READ-ONLY, INDEPENDENT, NON-DESTRUCTIVE, and IDEMPOTENT operations to answer. Each question should be: - -- Realistic -- Clear and concise -- Unambiguous -- Complex, requiring potentially dozens of tool calls or steps -- Answerable with a single, verifiable value that you identify in advance - -## Question Guidelines - -### Core Requirements - -1. **Questions MUST be independent** - - - Each question should NOT depend on the answer to any other question - - Should not assume prior write operations from processing another question - -2. **Questions MUST require ONLY NON-DESTRUCTIVE AND IDEMPOTENT tool use** - - - Should not instruct or require modifying state to arrive at the correct answer - -3. **Questions must be REALISTIC, CLEAR, CONCISE, and COMPLEX** - - Must require another LLM to use multiple (potentially dozens of) tools or steps to answer - -### Complexity and Depth - -4. **Questions must require deep exploration** - - - Consider multi-hop questions requiring multiple sub-questions and sequential tool calls - - Each step should benefit from information found in previous questions - -5. **Questions may require extensive paging** - - - May need paging through multiple pages of results - - May require querying old data (1-2 years out-of-date) to find niche information - - The questions must be DIFFICULT - -6. **Questions must require deep understanding** - - - Rather than surface-level knowledge - - May pose complex ideas as True/False questions requiring evidence - - May use multiple-choice format where LLM must search different hypotheses - -7. **Questions must not be solvable with straightforward keyword search** - - Do not include specific keywords from the target content - - Use synonyms, related concepts, or paraphrases - - Require multiple searches, analyzing multiple related items, extracting context, then deriving the answer - -### Tool Testing - -8. **Questions should stress-test tool return values** - - - May elicit tools returning large JSON objects or lists, overwhelming the LLM - - Should require understanding multiple modalities of data: - - IDs and names - - Timestamps and datetimes (months, days, years, seconds) - - File IDs, names, extensions, and mimetypes - - URLs, GIDs, etc. - - Should probe the tool's ability to return all useful forms of data - -9. **Questions should MOSTLY reflect real human use cases** - - - The kinds of information retrieval tasks that HUMANS assisted by an LLM would care about - -10. **Questions may require dozens of tool calls** - - - This challenges LLMs with limited context - - Encourages MCP server tools to reduce information returned - -11. **Include ambiguous questions** - - May be ambiguous OR require difficult decisions on which tools to call - - Force the LLM to potentially make mistakes or misinterpret - - Ensure that despite AMBIGUITY, there is STILL A SINGLE VERIFIABLE ANSWER - -### Stability - -12. **Questions must be designed so the answer DOES NOT CHANGE** - - - Do not ask questions that rely on "current state" which is dynamic - - For example, do not count: - - Number of reactions to a post - - Number of replies to a thread - - Number of members in a channel - -13. **DO NOT let the MCP server RESTRICT the kinds of questions you create** - - Create challenging and complex questions - - Some may not be solvable with the available MCP server tools - - Questions may require specific output formats (datetime vs. epoch time, JSON vs. MARKDOWN) - - Questions may require dozens of tool calls to complete - -## Answer Guidelines - -### Verification - -1. **Answers must be VERIFIABLE via direct string comparison** - - If the answer can be re-written in many formats, clearly specify the output format in the QUESTION - - Examples: "Use YYYY/MM/DD.", "Respond True or False.", "Answer A, B, C, or D and nothing else." - - Answer should be a single VERIFIABLE value such as: - - User ID, user name, display name, first name, last name - - Channel ID, channel name - - Message ID, string - - URL, title - - Numerical quantity - - Timestamp, datetime - - Boolean (for True/False questions) - - Email address, phone number - - File ID, file name, file extension - - Multiple choice answer - - Answers must not require special formatting or complex, structured output - - Answer will be verified using DIRECT STRING COMPARISON - -### Readability - -2. **Answers should generally prefer HUMAN-READABLE formats** - - Examples: names, first name, last name, datetime, file name, message string, URL, yes/no, true/false, a/b/c/d - - Rather than opaque IDs (though IDs are acceptable) - - The VAST MAJORITY of answers should be human-readable - -### Stability - -3. **Answers must be STABLE/STATIONARY** - - - Look at old content (e.g., conversations that have ended, projects that have launched, questions answered) - - Create QUESTIONS based on "closed" concepts that will always return the same answer - - Questions may ask to consider a fixed time window to insulate from non-stationary answers - - Rely on context UNLIKELY to change - - Example: if finding a paper name, be SPECIFIC enough so answer is not confused with papers published later - -4. **Answers must be CLEAR and UNAMBIGUOUS** - - Questions must be designed so there is a single, clear answer - - Answer can be derived from using the MCP server tools - -### Diversity - -5. **Answers must be DIVERSE** - - - Answer should be a single VERIFIABLE value in diverse modalities and formats - - User concept: user ID, user name, display name, first name, last name, email address, phone number - - Channel concept: channel ID, channel name, channel topic - - Message concept: message ID, message string, timestamp, month, day, year - -6. **Answers must NOT be complex structures** - - Not a list of values - - Not a complex object - - Not a list of IDs or strings - - Not natural language text - - UNLESS the answer can be straightforwardly verified using DIRECT STRING COMPARISON - - And can be realistically reproduced - - It should be unlikely that an LLM would return the same list in any other order or format - -## Evaluation Process - -### Step 1: Documentation Inspection - -Read the documentation of the target API to understand: - -- Available endpoints and functionality -- If ambiguity exists, fetch additional information from the web -- Parallelize this step AS MUCH AS POSSIBLE -- Ensure each subagent is ONLY examining documentation from the file system or on the web - -### Step 2: Tool Inspection - -List the tools available in the MCP server: - -- Inspect the MCP server directly -- Understand input/output schemas, docstrings, and descriptions -- WITHOUT calling the tools themselves at this stage - -### Step 3: Developing Understanding - -Repeat steps 1 & 2 until you have a good understanding: - -- Iterate multiple times -- Think about the kinds of tasks you want to create -- Refine your understanding -- At NO stage should you READ the code of the MCP server implementation itself -- Use your intuition and understanding to create reasonable, realistic, but VERY challenging tasks - -### Step 4: Read-Only Content Inspection - -After understanding the API and tools, USE the MCP server tools: - -- Inspect content using READ-ONLY and NON-DESTRUCTIVE operations ONLY -- Goal: identify specific content (e.g., users, channels, messages, projects, tasks) for creating realistic questions -- Should NOT call any tools that modify state -- Will NOT read the code of the MCP server implementation itself -- Parallelize this step with individual sub-agents pursuing independent explorations -- Ensure each subagent is only performing READ-ONLY, NON-DESTRUCTIVE, and IDEMPOTENT operations -- BE CAREFUL: SOME TOOLS may return LOTS OF DATA which would cause you to run out of CONTEXT -- Make INCREMENTAL, SMALL, AND TARGETED tool calls for exploration -- In all tool call requests, use the `limit` parameter to limit results (<10) -- Use pagination - -### Step 5: Task Generation - -After inspecting the content, create 10 human-readable questions: - -- An LLM should be able to answer these with the MCP server -- Follow all question and answer guidelines above - -## Output Format - -Each QA pair consists of a question and an answer. The output should be an XML file with this structure: - -```xml - - - Find the project created in Q2 2024 with the highest number of completed tasks. What is the project name? - Website Redesign - - - Search for issues labeled as "bug" that were closed in March 2024. Which user closed the most issues? Provide their username. - sarah_dev - - - Look for pull requests that modified files in the /api directory and were merged between January 1 and January 31, 2024. How many different contributors worked on these PRs? - 7 - - - Find the repository with the most stars that was created before 2023. What is the repository name? - data-pipeline - - -``` - -## Evaluation Examples - -### Good Questions - -**Example 1: Multi-hop question requiring deep exploration (GitHub MCP)** - -```xml - - Find the repository that was archived in Q3 2023 and had previously been the most forked project in the organization. What was the primary programming language used in that repository? - Python - -``` - -This question is good because: - -- Requires multiple searches to find archived repositories -- Needs to identify which had the most forks before archival -- Requires examining repository details for the language -- Answer is a simple, verifiable value -- Based on historical (closed) data that won't change - -**Example 2: Requires understanding context without keyword matching (Project Management MCP)** - -```xml - - Locate the initiative focused on improving customer onboarding that was completed in late 2023. The project lead created a retrospective document after completion. What was the lead's role title at that time? - Product Manager - -``` - -This question is good because: - -- Doesn't use specific project name ("initiative focused on improving customer onboarding") -- Requires finding completed projects from specific timeframe -- Needs to identify the project lead and their role -- Requires understanding context from retrospective documents -- Answer is human-readable and stable -- Based on completed work (won't change) - -**Example 3: Complex aggregation requiring multiple steps (Issue Tracker MCP)** - -```xml - - Among all bugs reported in January 2024 that were marked as critical priority, which assignee resolved the highest percentage of their assigned bugs within 48 hours? Provide the assignee's username. - alex_eng - -``` - -This question is good because: - -- Requires filtering bugs by date, priority, and status -- Needs to group by assignee and calculate resolution rates -- Requires understanding timestamps to determine 48-hour windows -- Tests pagination (potentially many bugs to process) -- Answer is a single username -- Based on historical data from specific time period - -**Example 4: Requires synthesis across multiple data types (CRM MCP)** - -```xml - - Find the account that upgraded from the Starter to Enterprise plan in Q4 2023 and had the highest annual contract value. What industry does this account operate in? - Healthcare - -``` - -This question is good because: - -- Requires understanding subscription tier changes -- Needs to identify upgrade events in specific timeframe -- Requires comparing contract values -- Must access account industry information -- Answer is simple and verifiable -- Based on completed historical transactions - -### Poor Questions - -**Example 1: Answer changes over time** - -```xml - - How many open issues are currently assigned to the engineering team? - 47 - -``` - -This question is poor because: - -- The answer will change as issues are created, closed, or reassigned -- Not based on stable/stationary data -- Relies on "current state" which is dynamic - -**Example 2: Too easy with keyword search** - -```xml - - Find the pull request with title "Add authentication feature" and tell me who created it. - developer123 - -``` - -This question is poor because: - -- Can be solved with a straightforward keyword search for exact title -- Doesn't require deep exploration or understanding -- No synthesis or analysis needed - -**Example 3: Ambiguous answer format** - -```xml - - List all the repositories that have Python as their primary language. - repo1, repo2, repo3, data-pipeline, ml-tools - -``` - -This question is poor because: - -- Answer is a list that could be returned in any order -- Difficult to verify with direct string comparison -- LLM might format differently (JSON array, comma-separated, newline-separated) -- Better to ask for a specific aggregate (count) or superlative (most stars) - -## Verification Process - -After creating evaluations: - -1. **Examine the XML file** to understand the schema -2. **Load each task instruction** and in parallel using the MCP server and tools, identify the correct answer by attempting to solve the task YOURSELF -3. **Flag any operations** that require WRITE or DESTRUCTIVE operations -4. **Accumulate all CORRECT answers** and replace any incorrect answers in the document -5. **Remove any ``** that require WRITE or DESTRUCTIVE operations - -Remember to parallelize solving tasks to avoid running out of context, then accumulate all answers and make changes to the file at the end. - -## Tips for Creating Quality Evaluations - -1. **Think Hard and Plan Ahead** before generating tasks -2. **Parallelize Where Opportunity Arises** to speed up the process and manage context -3. **Focus on Realistic Use Cases** that humans would actually want to accomplish -4. **Create Challenging Questions** that test the limits of the MCP server's capabilities -5. **Ensure Stability** by using historical data and closed concepts -6. **Verify Answers** by solving the questions yourself using the MCP server tools -7. **Iterate and Refine** based on what you learn during the process - ---- - -# Running Evaluations - -After creating your evaluation file, you can use the provided evaluation harness to test your MCP server. - -## Setup - -1. **Install Dependencies** - - ```bash - pip install -r scripts/requirements.txt - ``` - - Or install manually: - - ```bash - pip install anthropic mcp - ``` - -2. **Set API Key** - - ```bash - export ANTHROPIC_API_KEY=your_api_key_here - ``` - -## Evaluation File Format - -Evaluation files use XML format with `` elements: - -```xml - - - Find the project created in Q2 2024 with the highest number of completed tasks. What is the project name? - Website Redesign - - - Search for issues labeled as "bug" that were closed in March 2024. Which user closed the most issues? Provide their username. - sarah_dev - - -``` - -## Running Evaluations - -The evaluation script (`scripts/evaluation.py`) supports three transport types: - -**Important:** - -- **stdio transport**: The evaluation script automatically launches and manages the MCP server process for you. Do not run the server manually. -- **sse/http transports**: You must start the MCP server separately before running the evaluation. The script connects to the already-running server at the specified URL. - -### 1. Local STDIO Server - -For locally-run MCP servers (script launches the server automatically): - -```bash -python scripts/evaluation.py \ - -t stdio \ - -c python \ - -a my_mcp_server.py \ - evaluation.xml -``` - -With environment variables: - -```bash -python scripts/evaluation.py \ - -t stdio \ - -c python \ - -a my_mcp_server.py \ - -e API_KEY=abc123 \ - -e DEBUG=true \ - evaluation.xml -``` - -### 2. Server-Sent Events (SSE) - -For SSE-based MCP servers (you must start the server first): - -```bash -python scripts/evaluation.py \ - -t sse \ - -u https://example.com/mcp \ - -H "Authorization: Bearer token123" \ - -H "X-Custom-Header: value" \ - evaluation.xml -``` - -### 3. HTTP (Streamable HTTP) - -For HTTP-based MCP servers (you must start the server first): - -```bash -python scripts/evaluation.py \ - -t http \ - -u https://example.com/mcp \ - -H "Authorization: Bearer token123" \ - evaluation.xml -``` - -## Command-Line Options - -``` -usage: evaluation.py [-h] [-t {stdio,sse,http}] [-m MODEL] [-c COMMAND] - [-a ARGS [ARGS ...]] [-e ENV [ENV ...]] [-u URL] - [-H HEADERS [HEADERS ...]] [-o OUTPUT] - eval_file - -positional arguments: - eval_file Path to evaluation XML file - -optional arguments: - -h, --help Show help message - -t, --transport Transport type: stdio, sse, or http (default: stdio) - -m, --model Claude model to use (default: claude-3-7-sonnet-20250219) - -o, --output Output file for report (default: print to stdout) - -stdio options: - -c, --command Command to run MCP server (e.g., python, node) - -a, --args Arguments for the command (e.g., server.py) - -e, --env Environment variables in KEY=VALUE format - -sse/http options: - -u, --url MCP server URL - -H, --header HTTP headers in 'Key: Value' format -``` - -## Output - -The evaluation script generates a detailed report including: - -- **Summary Statistics**: - - - Accuracy (correct/total) - - Average task duration - - Average tool calls per task - - Total tool calls - -- **Per-Task Results**: - - Prompt and expected response - - Actual response from the agent - - Whether the answer was correct (✅/❌) - - Duration and tool call details - - Agent's summary of its approach - - Agent's feedback on the tools - -### Save Report to File - -```bash -python scripts/evaluation.py \ - -t stdio \ - -c python \ - -a my_server.py \ - -o evaluation_report.md \ - evaluation.xml -``` - -## Complete Example Workflow - -Here's a complete example of creating and running an evaluation: - -1. **Create your evaluation file** (`my_evaluation.xml`): - -```xml - - - Find the user who created the most issues in January 2024. What is their username? - alice_developer - - - Among all pull requests merged in Q1 2024, which repository had the highest number? Provide the repository name. - backend-api - - - Find the project that was completed in December 2023 and had the longest duration from start to finish. How many days did it take? - 127 - - -``` - -2. **Install dependencies**: - -```bash -pip install -r scripts/requirements.txt -export ANTHROPIC_API_KEY=your_api_key -``` - -3. **Run evaluation**: - -```bash -python scripts/evaluation.py \ - -t stdio \ - -c python \ - -a github_mcp_server.py \ - -e GITHUB_TOKEN=ghp_xxx \ - -o github_eval_report.md \ - my_evaluation.xml -``` - -4. **Review the report** in `github_eval_report.md` to: - - See which questions passed/failed - - Read the agent's feedback on your tools - - Identify areas for improvement - - Iterate on your MCP server design - -## Troubleshooting - -### Connection Errors - -If you get connection errors: - -- **STDIO**: Verify the command and arguments are correct -- **SSE/HTTP**: Check the URL is accessible and headers are correct -- Ensure any required API keys are set in environment variables or headers - -### Low Accuracy - -If many evaluations fail: - -- Review the agent's feedback for each task -- Check if tool descriptions are clear and comprehensive -- Verify input parameters are well-documented -- Consider whether tools return too much or too little data -- Ensure error messages are actionable - -### Timeout Issues - -If tasks are timing out: - -- Use a more capable model (e.g., `claude-3-7-sonnet-20250219`) -- Check if tools are returning too much data -- Verify pagination is working correctly -- Consider simplifying complex questions diff --git a/src/services/skills/built-in/mcp-builder/reference/mcp_best_practices.md b/src/services/skills/built-in/mcp-builder/reference/mcp_best_practices.md deleted file mode 100644 index 428e1e80947..00000000000 --- a/src/services/skills/built-in/mcp-builder/reference/mcp_best_practices.md +++ /dev/null @@ -1,269 +0,0 @@ -# MCP Server Best Practices - -## Quick Reference - -### Server Naming - -- **Python**: `{service}_mcp` (e.g., `slack_mcp`) -- **Node/TypeScript**: `{service}-mcp-server` (e.g., `slack-mcp-server`) - -### Tool Naming - -- Use snake_case with service prefix -- Format: `{service}_{action}_{resource}` -- Example: `slack_send_message`, `github_create_issue` - -### Response Formats - -- Support both JSON and Markdown formats -- JSON for programmatic processing -- Markdown for human readability - -### Pagination - -- Always respect `limit` parameter -- Return `has_more`, `next_offset`, `total_count` -- Default to 20-50 items - -### Transport - -- **Streamable HTTP**: For remote servers, multi-client scenarios -- **stdio**: For local integrations, command-line tools -- Avoid SSE (deprecated in favor of streamable HTTP) - ---- - -## Server Naming Conventions - -Follow these standardized naming patterns: - -**Python**: Use format `{service}_mcp` (lowercase with underscores) - -- Examples: `slack_mcp`, `github_mcp`, `jira_mcp` - -**Node/TypeScript**: Use format `{service}-mcp-server` (lowercase with hyphens) - -- Examples: `slack-mcp-server`, `github-mcp-server`, `jira-mcp-server` - -The name should be general, descriptive of the service being integrated, easy to infer from the task description, and without version numbers. - ---- - -## Tool Naming and Design - -### Tool Naming - -1. **Use snake_case**: `search_users`, `create_project`, `get_channel_info` -2. **Include service prefix**: Anticipate that your MCP server may be used alongside other MCP servers - - Use `slack_send_message` instead of just `send_message` - - Use `github_create_issue` instead of just `create_issue` -3. **Be action-oriented**: Start with verbs (get, list, search, create, etc.) -4. **Be specific**: Avoid generic names that could conflict with other servers - -### Tool Design - -- Tool descriptions must narrowly and unambiguously describe functionality -- Descriptions must precisely match actual functionality -- Provide tool annotations (readOnlyHint, destructiveHint, idempotentHint, openWorldHint) -- Keep tool operations focused and atomic - ---- - -## Response Formats - -All tools that return data should support multiple formats: - -### JSON Format (`response_format="json"`) - -- Machine-readable structured data -- Include all available fields and metadata -- Consistent field names and types -- Use for programmatic processing - -### Markdown Format (`response_format="markdown"`, typically default) - -- Human-readable formatted text -- Use headers, lists, and formatting for clarity -- Convert timestamps to human-readable format -- Show display names with IDs in parentheses -- Omit verbose metadata - ---- - -## Pagination - -For tools that list resources: - -- **Always respect the `limit` parameter** -- **Implement pagination**: Use `offset` or cursor-based pagination -- **Return pagination metadata**: Include `has_more`, `next_offset`/`next_cursor`, `total_count` -- **Never load all results into memory**: Especially important for large datasets -- **Default to reasonable limits**: 20-50 items is typical - -Example pagination response: - -```json -{ - "total": 150, - "count": 20, - "offset": 0, - "items": [...], - "has_more": true, - "next_offset": 20 -} -``` - ---- - -## Transport Options - -### Streamable HTTP - -**Best for**: Remote servers, web services, multi-client scenarios - -**Characteristics**: - -- Bidirectional communication over HTTP -- Supports multiple simultaneous clients -- Can be deployed as a web service -- Enables server-to-client notifications - -**Use when**: - -- Serving multiple clients simultaneously -- Deploying as a cloud service -- Integration with web applications - -### stdio - -**Best for**: Local integrations, command-line tools - -**Characteristics**: - -- Standard input/output stream communication -- Simple setup, no network configuration needed -- Runs as a subprocess of the client - -**Use when**: - -- Building tools for local development environments -- Integrating with desktop applications -- Single-user, single-session scenarios - -**Note**: stdio servers should NOT log to stdout (use stderr for logging) - -### Transport Selection - -| Criterion | stdio | Streamable HTTP | -| -------------- | ------ | --------------- | -| **Deployment** | Local | Remote | -| **Clients** | Single | Multiple | -| **Complexity** | Low | Medium | -| **Real-time** | No | Yes | - ---- - -## Security Best Practices - -### Authentication and Authorization - -**OAuth 2.1**: - -- Use secure OAuth 2.1 with certificates from recognized authorities -- Validate access tokens before processing requests -- Only accept tokens specifically intended for your server - -**API Keys**: - -- Store API keys in environment variables, never in code -- Validate keys on server startup -- Provide clear error messages when authentication fails - -### Input Validation - -- Sanitize file paths to prevent directory traversal -- Validate URLs and external identifiers -- Check parameter sizes and ranges -- Prevent command injection in system calls -- Use schema validation (Pydantic/Zod) for all inputs - -### Error Handling - -- Don't expose internal errors to clients -- Log security-relevant errors server-side -- Provide helpful but not revealing error messages -- Clean up resources after errors - -### DNS Rebinding Protection - -For streamable HTTP servers running locally: - -- Enable DNS rebinding protection -- Validate the `Origin` header on all incoming connections -- Bind to `127.0.0.1` rather than `0.0.0.0` - ---- - -## Tool Annotations - -Provide annotations to help clients understand tool behavior: - -| Annotation | Type | Default | Description | -| ----------------- | ------- | ------- | ------------------------------------------------------- | -| `readOnlyHint` | boolean | false | Tool does not modify its environment | -| `destructiveHint` | boolean | true | Tool may perform destructive updates | -| `idempotentHint` | boolean | false | Repeated calls with same args have no additional effect | -| `openWorldHint` | boolean | true | Tool interacts with external entities | - -**Important**: Annotations are hints, not security guarantees. Clients should not make security-critical decisions based solely on annotations. - ---- - -## Error Handling - -- Use standard JSON-RPC error codes -- Report tool errors within result objects (not protocol-level errors) -- Provide helpful, specific error messages with suggested next steps -- Don't expose internal implementation details -- Clean up resources properly on errors - -Example error handling: - -```typescript -try { - const result = performOperation() - return { content: [{ type: "text", text: result }] } -} catch (error) { - return { - isError: true, - content: [ - { - type: "text", - text: `Error: ${error.message}. Try using filter='active_only' to reduce results.`, - }, - ], - } -} -``` - ---- - -## Testing Requirements - -Comprehensive testing should cover: - -- **Functional testing**: Verify correct execution with valid/invalid inputs -- **Integration testing**: Test interaction with external systems -- **Security testing**: Validate auth, input sanitization, rate limiting -- **Performance testing**: Check behavior under load, timeouts -- **Error handling**: Ensure proper error reporting and cleanup - ---- - -## Documentation Requirements - -- Provide clear documentation of all tools and capabilities -- Include working examples (at least 3 per major feature) -- Document security considerations -- Specify required permissions and access levels -- Document rate limits and performance characteristics diff --git a/src/services/skills/built-in/mcp-builder/reference/node_mcp_server.md b/src/services/skills/built-in/mcp-builder/reference/node_mcp_server.md deleted file mode 100644 index e645b0291de..00000000000 --- a/src/services/skills/built-in/mcp-builder/reference/node_mcp_server.md +++ /dev/null @@ -1,975 +0,0 @@ -# Node/TypeScript MCP Server Implementation Guide - -## Overview - -This document provides Node/TypeScript-specific best practices and examples for implementing MCP servers using the MCP TypeScript SDK. It covers project structure, server setup, tool registration patterns, input validation with Zod, error handling, and complete working examples. - ---- - -## Quick Reference - -### Key Imports - -```typescript -import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" -import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js" -import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" -import express from "express" -import { z } from "zod" -``` - -### Server Initialization - -```typescript -const server = new McpServer({ - name: "service-mcp-server", - version: "1.0.0", -}) -``` - -### Tool Registration Pattern - -```typescript -server.registerTool( - "tool_name", - { - title: "Tool Display Name", - description: "What the tool does", - inputSchema: { param: z.string() }, - outputSchema: { result: z.string() }, - }, - async ({ param }) => { - const output = { result: `Processed: ${param}` } - return { - content: [{ type: "text", text: JSON.stringify(output) }], - structuredContent: output, // Modern pattern for structured data - } - }, -) -``` - ---- - -## MCP TypeScript SDK - -The official MCP TypeScript SDK provides: - -- `McpServer` class for server initialization -- `registerTool` method for tool registration -- Zod schema integration for runtime input validation -- Type-safe tool handler implementations - -**IMPORTANT - Use Modern APIs Only:** - -- **DO use**: `server.registerTool()`, `server.registerResource()`, `server.registerPrompt()` -- **DO NOT use**: Old deprecated APIs such as `server.tool()`, `server.setRequestHandler(ListToolsRequestSchema, ...)`, or manual handler registration -- The `register*` methods provide better type safety, automatic schema handling, and are the recommended approach - -See the MCP SDK documentation in the references for complete details. - -## Server Naming Convention - -Node/TypeScript MCP servers must follow this naming pattern: - -- **Format**: `{service}-mcp-server` (lowercase with hyphens) -- **Examples**: `github-mcp-server`, `jira-mcp-server`, `stripe-mcp-server` - -The name should be: - -- General (not tied to specific features) -- Descriptive of the service/API being integrated -- Easy to infer from the task description -- Without version numbers or dates - -## Project Structure - -Create the following structure for Node/TypeScript MCP servers: - -``` -{service}-mcp-server/ -├── package.json -├── tsconfig.json -├── README.md -├── src/ -│ ├── index.ts # Main entry point with McpServer initialization -│ ├── types.ts # TypeScript type definitions and interfaces -│ ├── tools/ # Tool implementations (one file per domain) -│ ├── services/ # API clients and shared utilities -│ ├── schemas/ # Zod validation schemas -│ └── constants.ts # Shared constants (API_URL, CHARACTER_LIMIT, etc.) -└── dist/ # Built JavaScript files (entry point: dist/index.js) -``` - -## Tool Implementation - -### Tool Naming - -Use snake_case for tool names (e.g., "search_users", "create_project", "get_channel_info") with clear, action-oriented names. - -**Avoid Naming Conflicts**: Include the service context to prevent overlaps: - -- Use "slack_send_message" instead of just "send_message" -- Use "github_create_issue" instead of just "create_issue" -- Use "asana_list_tasks" instead of just "list_tasks" - -### Tool Structure - -Tools are registered using the `registerTool` method with the following requirements: - -- Use Zod schemas for runtime input validation and type safety -- The `description` field must be explicitly provided - JSDoc comments are NOT automatically extracted -- Explicitly provide `title`, `description`, `inputSchema`, and `annotations` -- The `inputSchema` must be a Zod schema object (not a JSON schema) -- Type all parameters and return values explicitly - -```typescript -import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" -import { z } from "zod" - -const server = new McpServer({ - name: "example-mcp", - version: "1.0.0", -}) - -// Zod schema for input validation -const UserSearchInputSchema = z - .object({ - query: z - .string() - .min(2, "Query must be at least 2 characters") - .max(200, "Query must not exceed 200 characters") - .describe("Search string to match against names/emails"), - limit: z.number().int().min(1).max(100).default(20).describe("Maximum results to return"), - offset: z.number().int().min(0).default(0).describe("Number of results to skip for pagination"), - response_format: z - .nativeEnum(ResponseFormat) - .default(ResponseFormat.MARKDOWN) - .describe("Output format: 'markdown' for human-readable or 'json' for machine-readable"), - }) - .strict() - -// Type definition from Zod schema -type UserSearchInput = z.infer - -server.registerTool( - "example_search_users", - { - title: "Search Example Users", - description: `Search for users in the Example system by name, email, or team. - -This tool searches across all user profiles in the Example platform, supporting partial matches and various search filters. It does NOT create or modify users, only searches existing ones. - -Args: - - query (string): Search string to match against names/emails - - limit (number): Maximum results to return, between 1-100 (default: 20) - - offset (number): Number of results to skip for pagination (default: 0) - - response_format ('markdown' | 'json'): Output format (default: 'markdown') - -Returns: - For JSON format: Structured data with schema: - { - "total": number, // Total number of matches found - "count": number, // Number of results in this response - "offset": number, // Current pagination offset - "users": [ - { - "id": string, // User ID (e.g., "U123456789") - "name": string, // Full name (e.g., "John Doe") - "email": string, // Email address - "team": string, // Team name (optional) - "active": boolean // Whether user is active - } - ], - "has_more": boolean, // Whether more results are available - "next_offset": number // Offset for next page (if has_more is true) - } - -Examples: - - Use when: "Find all marketing team members" -> params with query="team:marketing" - - Use when: "Search for John's account" -> params with query="john" - - Don't use when: You need to create a user (use example_create_user instead) - -Error Handling: - - Returns "Error: Rate limit exceeded" if too many requests (429 status) - - Returns "No users found matching ''" if search returns empty`, - inputSchema: UserSearchInputSchema, - annotations: { - readOnlyHint: true, - destructiveHint: false, - idempotentHint: true, - openWorldHint: true, - }, - }, - async (params: UserSearchInput) => { - try { - // Input validation is handled by Zod schema - // Make API request using validated parameters - const data = await makeApiRequest("users/search", "GET", undefined, { - q: params.query, - limit: params.limit, - offset: params.offset, - }) - - const users = data.users || [] - const total = data.total || 0 - - if (!users.length) { - return { - content: [ - { - type: "text", - text: `No users found matching '${params.query}'`, - }, - ], - } - } - - // Prepare structured output - const output = { - total, - count: users.length, - offset: params.offset, - users: users.map((user: any) => ({ - id: user.id, - name: user.name, - email: user.email, - ...(user.team ? { team: user.team } : {}), - active: user.active ?? true, - })), - has_more: total > params.offset + users.length, - ...(total > params.offset + users.length - ? { - next_offset: params.offset + users.length, - } - : {}), - } - - // Format text representation based on requested format - let textContent: string - if (params.response_format === ResponseFormat.MARKDOWN) { - const lines = [ - `# User Search Results: '${params.query}'`, - "", - `Found ${total} users (showing ${users.length})`, - "", - ] - for (const user of users) { - lines.push(`## ${user.name} (${user.id})`) - lines.push(`- **Email**: ${user.email}`) - if (user.team) lines.push(`- **Team**: ${user.team}`) - lines.push("") - } - textContent = lines.join("\n") - } else { - textContent = JSON.stringify(output, null, 2) - } - - return { - content: [{ type: "text", text: textContent }], - structuredContent: output, // Modern pattern for structured data - } - } catch (error) { - return { - content: [ - { - type: "text", - text: handleApiError(error), - }, - ], - } - } - }, -) -``` - -## Zod Schemas for Input Validation - -Zod provides runtime type validation: - -```typescript -import { z } from "zod" - -// Basic schema with validation -const CreateUserSchema = z - .object({ - name: z.string().min(1, "Name is required").max(100, "Name must not exceed 100 characters"), - email: z.string().email("Invalid email format"), - age: z - .number() - .int("Age must be a whole number") - .min(0, "Age cannot be negative") - .max(150, "Age cannot be greater than 150"), - }) - .strict() // Use .strict() to forbid extra fields - -// Enums -enum ResponseFormat { - MARKDOWN = "markdown", - JSON = "json", -} - -const SearchSchema = z.object({ - response_format: z.nativeEnum(ResponseFormat).default(ResponseFormat.MARKDOWN).describe("Output format"), -}) - -// Optional fields with defaults -const PaginationSchema = z.object({ - limit: z.number().int().min(1).max(100).default(20).describe("Maximum results to return"), - offset: z.number().int().min(0).default(0).describe("Number of results to skip"), -}) -``` - -## Response Format Options - -Support multiple output formats for flexibility: - -```typescript -enum ResponseFormat { - MARKDOWN = "markdown", - JSON = "json", -} - -const inputSchema = z.object({ - query: z.string(), - response_format: z - .nativeEnum(ResponseFormat) - .default(ResponseFormat.MARKDOWN) - .describe("Output format: 'markdown' for human-readable or 'json' for machine-readable"), -}) -``` - -**Markdown format**: - -- Use headers, lists, and formatting for clarity -- Convert timestamps to human-readable format -- Show display names with IDs in parentheses -- Omit verbose metadata -- Group related information logically - -**JSON format**: - -- Return complete, structured data suitable for programmatic processing -- Include all available fields and metadata -- Use consistent field names and types - -## Pagination Implementation - -For tools that list resources: - -```typescript -const ListSchema = z.object({ - limit: z.number().int().min(1).max(100).default(20), - offset: z.number().int().min(0).default(0), -}) - -async function listItems(params: z.infer) { - const data = await apiRequest(params.limit, params.offset) - - const response = { - total: data.total, - count: data.items.length, - offset: params.offset, - items: data.items, - has_more: data.total > params.offset + data.items.length, - next_offset: data.total > params.offset + data.items.length ? params.offset + data.items.length : undefined, - } - - return JSON.stringify(response, null, 2) -} -``` - -## Character Limits and Truncation - -Add a CHARACTER_LIMIT constant to prevent overwhelming responses: - -```typescript -// At module level in constants.ts -export const CHARACTER_LIMIT = 25000 // Maximum response size in characters - -async function searchTool(params: SearchInput) { - let result = generateResponse(data) - - // Check character limit and truncate if needed - if (result.length > CHARACTER_LIMIT) { - const truncatedData = data.slice(0, Math.max(1, data.length / 2)) - response.data = truncatedData - response.truncated = true - response.truncation_message = - `Response truncated from ${data.length} to ${truncatedData.length} items. ` + - `Use 'offset' parameter or add filters to see more results.` - result = JSON.stringify(response, null, 2) - } - - return result -} -``` - -## Error Handling - -Provide clear, actionable error messages: - -```typescript -import axios, { AxiosError } from "axios" - -function handleApiError(error: unknown): string { - if (error instanceof AxiosError) { - if (error.response) { - switch (error.response.status) { - case 404: - return "Error: Resource not found. Please check the ID is correct." - case 403: - return "Error: Permission denied. You don't have access to this resource." - case 429: - return "Error: Rate limit exceeded. Please wait before making more requests." - default: - return `Error: API request failed with status ${error.response.status}` - } - } else if (error.code === "ECONNABORTED") { - return "Error: Request timed out. Please try again." - } - } - return `Error: Unexpected error occurred: ${error instanceof Error ? error.message : String(error)}` -} -``` - -## Shared Utilities - -Extract common functionality into reusable functions: - -```typescript -// Shared API request function -async function makeApiRequest( - endpoint: string, - method: "GET" | "POST" | "PUT" | "DELETE" = "GET", - data?: any, - params?: any, -): Promise { - try { - const response = await axios({ - method, - url: `${API_BASE_URL}/${endpoint}`, - data, - params, - timeout: 30000, - headers: { - "Content-Type": "application/json", - Accept: "application/json", - }, - }) - return response.data - } catch (error) { - throw error - } -} -``` - -## Async/Await Best Practices - -Always use async/await for network requests and I/O operations: - -```typescript -// Good: Async network request -async function fetchData(resourceId: string): Promise { - const response = await axios.get(`${API_URL}/resource/${resourceId}`) - return response.data -} - -// Bad: Promise chains -function fetchData(resourceId: string): Promise { - return axios.get(`${API_URL}/resource/${resourceId}`).then((response) => response.data) // Harder to read and maintain -} -``` - -## TypeScript Best Practices - -1. **Use Strict TypeScript**: Enable strict mode in tsconfig.json -2. **Define Interfaces**: Create clear interface definitions for all data structures -3. **Avoid `any`**: Use proper types or `unknown` instead of `any` -4. **Zod for Runtime Validation**: Use Zod schemas to validate external data -5. **Type Guards**: Create type guard functions for complex type checking -6. **Error Handling**: Always use try-catch with proper error type checking -7. **Null Safety**: Use optional chaining (`?.`) and nullish coalescing (`??`) - -```typescript -// Good: Type-safe with Zod and interfaces -interface UserResponse { - id: string - name: string - email: string - team?: string - active: boolean -} - -const UserSchema = z.object({ - id: z.string(), - name: z.string(), - email: z.string().email(), - team: z.string().optional(), - active: z.boolean(), -}) - -type User = z.infer - -async function getUser(id: string): Promise { - const data = await apiCall(`/users/${id}`) - return UserSchema.parse(data) // Runtime validation -} - -// Bad: Using any -async function getUser(id: string): Promise { - return await apiCall(`/users/${id}`) // No type safety -} -``` - -## Package Configuration - -### package.json - -```json -{ - "name": "{service}-mcp-server", - "version": "1.0.0", - "description": "MCP server for {Service} API integration", - "type": "module", - "main": "dist/index.js", - "scripts": { - "start": "node dist/index.js", - "dev": "tsx watch src/index.ts", - "build": "tsc", - "clean": "rm -rf dist" - }, - "engines": { - "node": ">=18" - }, - "dependencies": { - "@modelcontextprotocol/sdk": "^1.6.1", - "axios": "^1.7.9", - "zod": "^3.23.8" - }, - "devDependencies": { - "@types/node": "^22.10.0", - "tsx": "^4.19.2", - "typescript": "^5.7.2" - } -} -``` - -### tsconfig.json - -```json -{ - "compilerOptions": { - "target": "ES2022", - "module": "Node16", - "moduleResolution": "Node16", - "lib": ["ES2022"], - "outDir": "./dist", - "rootDir": "./src", - "strict": true, - "esModuleInterop": true, - "skipLibCheck": true, - "forceConsistentCasingInFileNames": true, - "declaration": true, - "declarationMap": true, - "sourceMap": true, - "allowSyntheticDefaultImports": true - }, - "include": ["src/**/*"], - "exclude": ["node_modules", "dist"] -} -``` - -## Complete Example - -```typescript -#!/usr/bin/env node -/** - * MCP Server for Example Service. - * - * This server provides tools to interact with Example API, including user search, - * project management, and data export capabilities. - */ - -import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" -import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" -import { z } from "zod" -import axios, { AxiosError } from "axios" - -// Constants -const API_BASE_URL = "https://api.example.com/v1" -const CHARACTER_LIMIT = 25000 - -// Enums -enum ResponseFormat { - MARKDOWN = "markdown", - JSON = "json", -} - -// Zod schemas -const UserSearchInputSchema = z - .object({ - query: z - .string() - .min(2, "Query must be at least 2 characters") - .max(200, "Query must not exceed 200 characters") - .describe("Search string to match against names/emails"), - limit: z.number().int().min(1).max(100).default(20).describe("Maximum results to return"), - offset: z.number().int().min(0).default(0).describe("Number of results to skip for pagination"), - response_format: z - .nativeEnum(ResponseFormat) - .default(ResponseFormat.MARKDOWN) - .describe("Output format: 'markdown' for human-readable or 'json' for machine-readable"), - }) - .strict() - -type UserSearchInput = z.infer - -// Shared utility functions -async function makeApiRequest( - endpoint: string, - method: "GET" | "POST" | "PUT" | "DELETE" = "GET", - data?: any, - params?: any, -): Promise { - try { - const response = await axios({ - method, - url: `${API_BASE_URL}/${endpoint}`, - data, - params, - timeout: 30000, - headers: { - "Content-Type": "application/json", - Accept: "application/json", - }, - }) - return response.data - } catch (error) { - throw error - } -} - -function handleApiError(error: unknown): string { - if (error instanceof AxiosError) { - if (error.response) { - switch (error.response.status) { - case 404: - return "Error: Resource not found. Please check the ID is correct." - case 403: - return "Error: Permission denied. You don't have access to this resource." - case 429: - return "Error: Rate limit exceeded. Please wait before making more requests." - default: - return `Error: API request failed with status ${error.response.status}` - } - } else if (error.code === "ECONNABORTED") { - return "Error: Request timed out. Please try again." - } - } - return `Error: Unexpected error occurred: ${error instanceof Error ? error.message : String(error)}` -} - -// Create MCP server instance -const server = new McpServer({ - name: "example-mcp", - version: "1.0.0", -}) - -// Register tools -server.registerTool( - "example_search_users", - { - title: "Search Example Users", - description: `[Full description as shown above]`, - inputSchema: UserSearchInputSchema, - annotations: { - readOnlyHint: true, - destructiveHint: false, - idempotentHint: true, - openWorldHint: true, - }, - }, - async (params: UserSearchInput) => { - // Implementation as shown above - }, -) - -// Main function -// For stdio (local): -async function runStdio() { - if (!process.env.EXAMPLE_API_KEY) { - console.error("ERROR: EXAMPLE_API_KEY environment variable is required") - process.exit(1) - } - - const transport = new StdioServerTransport() - await server.connect(transport) - console.error("MCP server running via stdio") -} - -// For streamable HTTP (remote): -async function runHTTP() { - if (!process.env.EXAMPLE_API_KEY) { - console.error("ERROR: EXAMPLE_API_KEY environment variable is required") - process.exit(1) - } - - const app = express() - app.use(express.json()) - - app.post("/mcp", async (req, res) => { - const transport = new StreamableHTTPServerTransport({ - sessionIdGenerator: undefined, - enableJsonResponse: true, - }) - res.on("close", () => transport.close()) - await server.connect(transport) - await transport.handleRequest(req, res, req.body) - }) - - const port = parseInt(process.env.PORT || "3000") - app.listen(port, () => { - console.error(`MCP server running on http://localhost:${port}/mcp`) - }) -} - -// Choose transport based on environment -const transport = process.env.TRANSPORT || "stdio" -if (transport === "http") { - runHTTP().catch((error) => { - console.error("Server error:", error) - process.exit(1) - }) -} else { - runStdio().catch((error) => { - console.error("Server error:", error) - process.exit(1) - }) -} -``` - ---- - -## Advanced MCP Features - -### Resource Registration - -Expose data as resources for efficient, URI-based access: - -```typescript -import { ResourceTemplate } from "@modelcontextprotocol/sdk/types.js" - -// Register a resource with URI template -server.registerResource( - { - uri: "file://documents/{name}", - name: "Document Resource", - description: "Access documents by name", - mimeType: "text/plain", - }, - async (uri: string) => { - // Extract parameter from URI - const match = uri.match(/^file:\/\/documents\/(.+)$/) - if (!match) { - throw new Error("Invalid URI format") - } - - const documentName = match[1] - const content = await loadDocument(documentName) - - return { - contents: [ - { - uri, - mimeType: "text/plain", - text: content, - }, - ], - } - }, -) - -// List available resources dynamically -server.registerResourceList(async () => { - const documents = await getAvailableDocuments() - return { - resources: documents.map((doc) => ({ - uri: `file://documents/${doc.name}`, - name: doc.name, - mimeType: "text/plain", - description: doc.description, - })), - } -}) -``` - -**When to use Resources vs Tools:** - -- **Resources**: For data access with simple URI-based parameters -- **Tools**: For complex operations requiring validation and business logic -- **Resources**: When data is relatively static or template-based -- **Tools**: When operations have side effects or complex workflows - -### Transport Options - -The TypeScript SDK supports two main transport mechanisms: - -#### Streamable HTTP (Recommended for Remote Servers) - -```typescript -import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js" -import express from "express" - -const app = express() -app.use(express.json()) - -app.post("/mcp", async (req, res) => { - // Create new transport for each request (stateless, prevents request ID collisions) - const transport = new StreamableHTTPServerTransport({ - sessionIdGenerator: undefined, - enableJsonResponse: true, - }) - - res.on("close", () => transport.close()) - - await server.connect(transport) - await transport.handleRequest(req, res, req.body) -}) - -app.listen(3000) -``` - -#### stdio (For Local Integrations) - -```typescript -import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" - -const transport = new StdioServerTransport() -await server.connect(transport) -``` - -**Transport selection:** - -- **Streamable HTTP**: Web services, remote access, multiple clients -- **stdio**: Command-line tools, local development, subprocess integration - -### Notification Support - -Notify clients when server state changes: - -```typescript -// Notify when tools list changes -server.notification({ - method: "notifications/tools/list_changed", -}) - -// Notify when resources change -server.notification({ - method: "notifications/resources/list_changed", -}) -``` - -Use notifications sparingly - only when server capabilities genuinely change. - ---- - -## Code Best Practices - -### Code Composability and Reusability - -Your implementation MUST prioritize composability and code reuse: - -1. **Extract Common Functionality**: - - - Create reusable helper functions for operations used across multiple tools - - Build shared API clients for HTTP requests instead of duplicating code - - Centralize error handling logic in utility functions - - Extract business logic into dedicated functions that can be composed - - Extract shared markdown or JSON field selection & formatting functionality - -2. **Avoid Duplication**: - - NEVER copy-paste similar code between tools - - If you find yourself writing similar logic twice, extract it into a function - - Common operations like pagination, filtering, field selection, and formatting should be shared - - Authentication/authorization logic should be centralized - -## Building and Running - -Always build your TypeScript code before running: - -```bash -# Build the project -npm run build - -# Run the server -npm start - -# Development with auto-reload -npm run dev -``` - -Always ensure `npm run build` completes successfully before considering the implementation complete. - -## Quality Checklist - -Before finalizing your Node/TypeScript MCP server implementation, ensure: - -### Strategic Design - -- [ ] Tools enable complete workflows, not just API endpoint wrappers -- [ ] Tool names reflect natural task subdivisions -- [ ] Response formats optimize for agent context efficiency -- [ ] Human-readable identifiers used where appropriate -- [ ] Error messages guide agents toward correct usage - -### Implementation Quality - -- [ ] FOCUSED IMPLEMENTATION: Most important and valuable tools implemented -- [ ] All tools registered using `registerTool` with complete configuration -- [ ] All tools include `title`, `description`, `inputSchema`, and `annotations` -- [ ] Annotations correctly set (readOnlyHint, destructiveHint, idempotentHint, openWorldHint) -- [ ] All tools use Zod schemas for runtime input validation with `.strict()` enforcement -- [ ] All Zod schemas have proper constraints and descriptive error messages -- [ ] All tools have comprehensive descriptions with explicit input/output types -- [ ] Descriptions include return value examples and complete schema documentation -- [ ] Error messages are clear, actionable, and educational - -### TypeScript Quality - -- [ ] TypeScript interfaces are defined for all data structures -- [ ] Strict TypeScript is enabled in tsconfig.json -- [ ] No use of `any` type - use `unknown` or proper types instead -- [ ] All async functions have explicit Promise return types -- [ ] Error handling uses proper type guards (e.g., `axios.isAxiosError`, `z.ZodError`) - -### Advanced Features (where applicable) - -- [ ] Resources registered for appropriate data endpoints -- [ ] Appropriate transport configured (stdio or streamable HTTP) -- [ ] Notifications implemented for dynamic server capabilities -- [ ] Type-safe with SDK interfaces - -### Project Configuration - -- [ ] Package.json includes all necessary dependencies -- [ ] Build script produces working JavaScript in dist/ directory -- [ ] Main entry point is properly configured as dist/index.js -- [ ] Server name follows format: `{service}-mcp-server` -- [ ] tsconfig.json properly configured with strict mode - -### Code Quality - -- [ ] Pagination is properly implemented where applicable -- [ ] Large responses check CHARACTER_LIMIT constant and truncate with clear messages -- [ ] Filtering options are provided for potentially large result sets -- [ ] All network operations handle timeouts and connection errors gracefully -- [ ] Common functionality is extracted into reusable functions -- [ ] Return types are consistent across similar operations - -### Testing and Build - -- [ ] `npm run build` completes successfully without errors -- [ ] dist/index.js created and executable -- [ ] Server runs: `node dist/index.js --help` -- [ ] All imports resolve correctly -- [ ] Sample tool calls work as expected diff --git a/src/services/skills/built-in/mcp-builder/reference/python_mcp_server.md b/src/services/skills/built-in/mcp-builder/reference/python_mcp_server.md deleted file mode 100644 index bc6b789546e..00000000000 --- a/src/services/skills/built-in/mcp-builder/reference/python_mcp_server.md +++ /dev/null @@ -1,738 +0,0 @@ -# Python MCP Server Implementation Guide - -## Overview - -This document provides Python-specific best practices and examples for implementing MCP servers using the MCP Python SDK. It covers server setup, tool registration patterns, input validation with Pydantic, error handling, and complete working examples. - ---- - -## Quick Reference - -### Key Imports - -```python -from mcp.server.fastmcp import FastMCP -from pydantic import BaseModel, Field, field_validator, ConfigDict -from typing import Optional, List, Dict, Any -from enum import Enum -import httpx -``` - -### Server Initialization - -```python -mcp = FastMCP("service_mcp") -``` - -### Tool Registration Pattern - -```python -@mcp.tool(name="tool_name", annotations={...}) -async def tool_function(params: InputModel) -> str: - # Implementation - pass -``` - ---- - -## MCP Python SDK and FastMCP - -The official MCP Python SDK provides FastMCP, a high-level framework for building MCP servers. It provides: - -- Automatic description and inputSchema generation from function signatures and docstrings -- Pydantic model integration for input validation -- Decorator-based tool registration with `@mcp.tool` - -**For complete SDK documentation, use WebFetch to load:** -`https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md` - -## Server Naming Convention - -Python MCP servers must follow this naming pattern: - -- **Format**: `{service}_mcp` (lowercase with underscores) -- **Examples**: `github_mcp`, `jira_mcp`, `stripe_mcp` - -The name should be: - -- General (not tied to specific features) -- Descriptive of the service/API being integrated -- Easy to infer from the task description -- Without version numbers or dates - -## Tool Implementation - -### Tool Naming - -Use snake_case for tool names (e.g., "search_users", "create_project", "get_channel_info") with clear, action-oriented names. - -**Avoid Naming Conflicts**: Include the service context to prevent overlaps: - -- Use "slack_send_message" instead of just "send_message" -- Use "github_create_issue" instead of just "create_issue" -- Use "asana_list_tasks" instead of just "list_tasks" - -### Tool Structure with FastMCP - -Tools are defined using the `@mcp.tool` decorator with Pydantic models for input validation: - -```python -from pydantic import BaseModel, Field, ConfigDict -from mcp.server.fastmcp import FastMCP - -# Initialize the MCP server -mcp = FastMCP("example_mcp") - -# Define Pydantic model for input validation -class ServiceToolInput(BaseModel): - '''Input model for service tool operation.''' - model_config = ConfigDict( - str_strip_whitespace=True, # Auto-strip whitespace from strings - validate_assignment=True, # Validate on assignment - extra='forbid' # Forbid extra fields - ) - - param1: str = Field(..., description="First parameter description (e.g., 'user123', 'project-abc')", min_length=1, max_length=100) - param2: Optional[int] = Field(default=None, description="Optional integer parameter with constraints", ge=0, le=1000) - tags: Optional[List[str]] = Field(default_factory=list, description="List of tags to apply", max_items=10) - -@mcp.tool( - name="service_tool_name", - annotations={ - "title": "Human-Readable Tool Title", - "readOnlyHint": True, # Tool does not modify environment - "destructiveHint": False, # Tool does not perform destructive operations - "idempotentHint": True, # Repeated calls have no additional effect - "openWorldHint": False # Tool does not interact with external entities - } -) -async def service_tool_name(params: ServiceToolInput) -> str: - '''Tool description automatically becomes the 'description' field. - - This tool performs a specific operation on the service. It validates all inputs - using the ServiceToolInput Pydantic model before processing. - - Args: - params (ServiceToolInput): Validated input parameters containing: - - param1 (str): First parameter description - - param2 (Optional[int]): Optional parameter with default - - tags (Optional[List[str]]): List of tags - - Returns: - str: JSON-formatted response containing operation results - ''' - # Implementation here - pass -``` - -## Pydantic v2 Key Features - -- Use `model_config` instead of nested `Config` class -- Use `field_validator` instead of deprecated `validator` -- Use `model_dump()` instead of deprecated `dict()` -- Validators require `@classmethod` decorator -- Type hints are required for validator methods - -```python -from pydantic import BaseModel, Field, field_validator, ConfigDict - -class CreateUserInput(BaseModel): - model_config = ConfigDict( - str_strip_whitespace=True, - validate_assignment=True - ) - - name: str = Field(..., description="User's full name", min_length=1, max_length=100) - email: str = Field(..., description="User's email address", pattern=r'^[\w\.-]+@[\w\.-]+\.\w+$') - age: int = Field(..., description="User's age", ge=0, le=150) - - @field_validator('email') - @classmethod - def validate_email(cls, v: str) -> str: - if not v.strip(): - raise ValueError("Email cannot be empty") - return v.lower() -``` - -## Response Format Options - -Support multiple output formats for flexibility: - -```python -from enum import Enum - -class ResponseFormat(str, Enum): - '''Output format for tool responses.''' - MARKDOWN = "markdown" - JSON = "json" - -class UserSearchInput(BaseModel): - query: str = Field(..., description="Search query") - response_format: ResponseFormat = Field( - default=ResponseFormat.MARKDOWN, - description="Output format: 'markdown' for human-readable or 'json' for machine-readable" - ) -``` - -**Markdown format**: - -- Use headers, lists, and formatting for clarity -- Convert timestamps to human-readable format (e.g., "2024-01-15 10:30:00 UTC" instead of epoch) -- Show display names with IDs in parentheses (e.g., "@john.doe (U123456)") -- Omit verbose metadata (e.g., show only one profile image URL, not all sizes) -- Group related information logically - -**JSON format**: - -- Return complete, structured data suitable for programmatic processing -- Include all available fields and metadata -- Use consistent field names and types - -## Pagination Implementation - -For tools that list resources: - -```python -class ListInput(BaseModel): - limit: Optional[int] = Field(default=20, description="Maximum results to return", ge=1, le=100) - offset: Optional[int] = Field(default=0, description="Number of results to skip for pagination", ge=0) - -async def list_items(params: ListInput) -> str: - # Make API request with pagination - data = await api_request(limit=params.limit, offset=params.offset) - - # Return pagination info - response = { - "total": data["total"], - "count": len(data["items"]), - "offset": params.offset, - "items": data["items"], - "has_more": data["total"] > params.offset + len(data["items"]), - "next_offset": params.offset + len(data["items"]) if data["total"] > params.offset + len(data["items"]) else None - } - return json.dumps(response, indent=2) -``` - -## Error Handling - -Provide clear, actionable error messages: - -```python -def _handle_api_error(e: Exception) -> str: - '''Consistent error formatting across all tools.''' - if isinstance(e, httpx.HTTPStatusError): - if e.response.status_code == 404: - return "Error: Resource not found. Please check the ID is correct." - elif e.response.status_code == 403: - return "Error: Permission denied. You don't have access to this resource." - elif e.response.status_code == 429: - return "Error: Rate limit exceeded. Please wait before making more requests." - return f"Error: API request failed with status {e.response.status_code}" - elif isinstance(e, httpx.TimeoutException): - return "Error: Request timed out. Please try again." - return f"Error: Unexpected error occurred: {type(e).__name__}" -``` - -## Shared Utilities - -Extract common functionality into reusable functions: - -```python -# Shared API request function -async def _make_api_request(endpoint: str, method: str = "GET", **kwargs) -> dict: - '''Reusable function for all API calls.''' - async with httpx.AsyncClient() as client: - response = await client.request( - method, - f"{API_BASE_URL}/{endpoint}", - timeout=30.0, - **kwargs - ) - response.raise_for_status() - return response.json() -``` - -## Async/Await Best Practices - -Always use async/await for network requests and I/O operations: - -```python -# Good: Async network request -async def fetch_data(resource_id: str) -> dict: - async with httpx.AsyncClient() as client: - response = await client.get(f"{API_URL}/resource/{resource_id}") - response.raise_for_status() - return response.json() - -# Bad: Synchronous request -def fetch_data(resource_id: str) -> dict: - response = requests.get(f"{API_URL}/resource/{resource_id}") # Blocks - return response.json() -``` - -## Type Hints - -Use type hints throughout: - -```python -from typing import Optional, List, Dict, Any - -async def get_user(user_id: str) -> Dict[str, Any]: - data = await fetch_user(user_id) - return {"id": data["id"], "name": data["name"]} -``` - -## Tool Docstrings - -Every tool must have comprehensive docstrings with explicit type information: - -```python -async def search_users(params: UserSearchInput) -> str: - ''' - Search for users in the Example system by name, email, or team. - - This tool searches across all user profiles in the Example platform, - supporting partial matches and various search filters. It does NOT - create or modify users, only searches existing ones. - - Args: - params (UserSearchInput): Validated input parameters containing: - - query (str): Search string to match against names/emails (e.g., "john", "@example.com", "team:marketing") - - limit (Optional[int]): Maximum results to return, between 1-100 (default: 20) - - offset (Optional[int]): Number of results to skip for pagination (default: 0) - - Returns: - str: JSON-formatted string containing search results with the following schema: - - Success response: - { - "total": int, # Total number of matches found - "count": int, # Number of results in this response - "offset": int, # Current pagination offset - "users": [ - { - "id": str, # User ID (e.g., "U123456789") - "name": str, # Full name (e.g., "John Doe") - "email": str, # Email address (e.g., "john@example.com") - "team": str # Team name (e.g., "Marketing") - optional - } - ] - } - - Error response: - "Error: " or "No users found matching ''" - - Examples: - - Use when: "Find all marketing team members" -> params with query="team:marketing" - - Use when: "Search for John's account" -> params with query="john" - - Don't use when: You need to create a user (use example_create_user instead) - - Don't use when: You have a user ID and need full details (use example_get_user instead) - - Error Handling: - - Input validation errors are handled by Pydantic model - - Returns "Error: Rate limit exceeded" if too many requests (429 status) - - Returns "Error: Invalid API authentication" if API key is invalid (401 status) - - Returns formatted list of results or "No users found matching 'query'" - ''' -``` - -## Complete Example - -See below for a complete Python MCP server example: - -```python -#!/usr/bin/env python3 -''' -MCP Server for Example Service. - -This server provides tools to interact with Example API, including user search, -project management, and data export capabilities. -''' - -from typing import Optional, List, Dict, Any -from enum import Enum -import httpx -from pydantic import BaseModel, Field, field_validator, ConfigDict -from mcp.server.fastmcp import FastMCP - -# Initialize the MCP server -mcp = FastMCP("example_mcp") - -# Constants -API_BASE_URL = "https://api.example.com/v1" - -# Enums -class ResponseFormat(str, Enum): - '''Output format for tool responses.''' - MARKDOWN = "markdown" - JSON = "json" - -# Pydantic Models for Input Validation -class UserSearchInput(BaseModel): - '''Input model for user search operations.''' - model_config = ConfigDict( - str_strip_whitespace=True, - validate_assignment=True - ) - - query: str = Field(..., description="Search string to match against names/emails", min_length=2, max_length=200) - limit: Optional[int] = Field(default=20, description="Maximum results to return", ge=1, le=100) - offset: Optional[int] = Field(default=0, description="Number of results to skip for pagination", ge=0) - response_format: ResponseFormat = Field(default=ResponseFormat.MARKDOWN, description="Output format") - - @field_validator('query') - @classmethod - def validate_query(cls, v: str) -> str: - if not v.strip(): - raise ValueError("Query cannot be empty or whitespace only") - return v.strip() - -# Shared utility functions -async def _make_api_request(endpoint: str, method: str = "GET", **kwargs) -> dict: - '''Reusable function for all API calls.''' - async with httpx.AsyncClient() as client: - response = await client.request( - method, - f"{API_BASE_URL}/{endpoint}", - timeout=30.0, - **kwargs - ) - response.raise_for_status() - return response.json() - -def _handle_api_error(e: Exception) -> str: - '''Consistent error formatting across all tools.''' - if isinstance(e, httpx.HTTPStatusError): - if e.response.status_code == 404: - return "Error: Resource not found. Please check the ID is correct." - elif e.response.status_code == 403: - return "Error: Permission denied. You don't have access to this resource." - elif e.response.status_code == 429: - return "Error: Rate limit exceeded. Please wait before making more requests." - return f"Error: API request failed with status {e.response.status_code}" - elif isinstance(e, httpx.TimeoutException): - return "Error: Request timed out. Please try again." - return f"Error: Unexpected error occurred: {type(e).__name__}" - -# Tool definitions -@mcp.tool( - name="example_search_users", - annotations={ - "title": "Search Example Users", - "readOnlyHint": True, - "destructiveHint": False, - "idempotentHint": True, - "openWorldHint": True - } -) -async def example_search_users(params: UserSearchInput) -> str: - '''Search for users in the Example system by name, email, or team. - - [Full docstring as shown above] - ''' - try: - # Make API request using validated parameters - data = await _make_api_request( - "users/search", - params={ - "q": params.query, - "limit": params.limit, - "offset": params.offset - } - ) - - users = data.get("users", []) - total = data.get("total", 0) - - if not users: - return f"No users found matching '{params.query}'" - - # Format response based on requested format - if params.response_format == ResponseFormat.MARKDOWN: - lines = [f"# User Search Results: '{params.query}'", ""] - lines.append(f"Found {total} users (showing {len(users)})") - lines.append("") - - for user in users: - lines.append(f"## {user['name']} ({user['id']})") - lines.append(f"- **Email**: {user['email']}") - if user.get('team'): - lines.append(f"- **Team**: {user['team']}") - lines.append("") - - return "\n".join(lines) - - else: - # Machine-readable JSON format - import json - response = { - "total": total, - "count": len(users), - "offset": params.offset, - "users": users - } - return json.dumps(response, indent=2) - - except Exception as e: - return _handle_api_error(e) - -if __name__ == "__main__": - mcp.run() -``` - ---- - -## Advanced FastMCP Features - -### Context Parameter Injection - -FastMCP can automatically inject a `Context` parameter into tools for advanced capabilities like logging, progress reporting, resource reading, and user interaction: - -```python -from mcp.server.fastmcp import FastMCP, Context - -mcp = FastMCP("example_mcp") - -@mcp.tool() -async def advanced_search(query: str, ctx: Context) -> str: - '''Advanced tool with context access for logging and progress.''' - - # Report progress for long operations - await ctx.report_progress(0.25, "Starting search...") - - # Log information for debugging - await ctx.log_info("Processing query", {"query": query, "timestamp": datetime.now()}) - - # Perform search - results = await search_api(query) - await ctx.report_progress(0.75, "Formatting results...") - - # Access server configuration - server_name = ctx.fastmcp.name - - return format_results(results) - -@mcp.tool() -async def interactive_tool(resource_id: str, ctx: Context) -> str: - '''Tool that can request additional input from users.''' - - # Request sensitive information when needed - api_key = await ctx.elicit( - prompt="Please provide your API key:", - input_type="password" - ) - - # Use the provided key - return await api_call(resource_id, api_key) -``` - -**Context capabilities:** - -- `ctx.report_progress(progress, message)` - Report progress for long operations -- `ctx.log_info(message, data)` / `ctx.log_error()` / `ctx.log_debug()` - Logging -- `ctx.elicit(prompt, input_type)` - Request input from users -- `ctx.fastmcp.name` - Access server configuration -- `ctx.read_resource(uri)` - Read MCP resources - -### Resource Registration - -Expose data as resources for efficient, template-based access: - -```python -@mcp.resource("file://documents/{name}") -async def get_document(name: str) -> str: - '''Expose documents as MCP resources. - - Resources are useful for static or semi-static data that doesn't - require complex parameters. They use URI templates for flexible access. - ''' - document_path = f"./docs/{name}" - with open(document_path, "r") as f: - return f.read() - -@mcp.resource("config://settings/{key}") -async def get_setting(key: str, ctx: Context) -> str: - '''Expose configuration as resources with context.''' - settings = await load_settings() - return json.dumps(settings.get(key, {})) -``` - -**When to use Resources vs Tools:** - -- **Resources**: For data access with simple parameters (URI templates) -- **Tools**: For complex operations with validation and business logic - -### Structured Output Types - -FastMCP supports multiple return types beyond strings: - -```python -from typing import TypedDict -from dataclasses import dataclass -from pydantic import BaseModel - -# TypedDict for structured returns -class UserData(TypedDict): - id: str - name: str - email: str - -@mcp.tool() -async def get_user_typed(user_id: str) -> UserData: - '''Returns structured data - FastMCP handles serialization.''' - return {"id": user_id, "name": "John Doe", "email": "john@example.com"} - -# Pydantic models for complex validation -class DetailedUser(BaseModel): - id: str - name: str - email: str - created_at: datetime - metadata: Dict[str, Any] - -@mcp.tool() -async def get_user_detailed(user_id: str) -> DetailedUser: - '''Returns Pydantic model - automatically generates schema.''' - user = await fetch_user(user_id) - return DetailedUser(**user) -``` - -### Lifespan Management - -Initialize resources that persist across requests: - -```python -from contextlib import asynccontextmanager - -@asynccontextmanager -async def app_lifespan(): - '''Manage resources that live for the server's lifetime.''' - # Initialize connections, load config, etc. - db = await connect_to_database() - config = load_configuration() - - # Make available to all tools - yield {"db": db, "config": config} - - # Cleanup on shutdown - await db.close() - -mcp = FastMCP("example_mcp", lifespan=app_lifespan) - -@mcp.tool() -async def query_data(query: str, ctx: Context) -> str: - '''Access lifespan resources through context.''' - db = ctx.request_context.lifespan_state["db"] - results = await db.query(query) - return format_results(results) -``` - -### Transport Options - -FastMCP supports two main transport mechanisms: - -```python -# stdio transport (for local tools) - default -if __name__ == "__main__": - mcp.run() - -# Streamable HTTP transport (for remote servers) -if __name__ == "__main__": - mcp.run(transport="streamable_http", port=8000) -``` - -**Transport selection:** - -- **stdio**: Command-line tools, local integrations, subprocess execution -- **Streamable HTTP**: Web services, remote access, multiple clients - ---- - -## Code Best Practices - -### Code Composability and Reusability - -Your implementation MUST prioritize composability and code reuse: - -1. **Extract Common Functionality**: - - - Create reusable helper functions for operations used across multiple tools - - Build shared API clients for HTTP requests instead of duplicating code - - Centralize error handling logic in utility functions - - Extract business logic into dedicated functions that can be composed - - Extract shared markdown or JSON field selection & formatting functionality - -2. **Avoid Duplication**: - - NEVER copy-paste similar code between tools - - If you find yourself writing similar logic twice, extract it into a function - - Common operations like pagination, filtering, field selection, and formatting should be shared - - Authentication/authorization logic should be centralized - -### Python-Specific Best Practices - -1. **Use Type Hints**: Always include type annotations for function parameters and return values -2. **Pydantic Models**: Define clear Pydantic models for all input validation -3. **Avoid Manual Validation**: Let Pydantic handle input validation with constraints -4. **Proper Imports**: Group imports (standard library, third-party, local) -5. **Error Handling**: Use specific exception types (httpx.HTTPStatusError, not generic Exception) -6. **Async Context Managers**: Use `async with` for resources that need cleanup -7. **Constants**: Define module-level constants in UPPER_CASE - -## Quality Checklist - -Before finalizing your Python MCP server implementation, ensure: - -### Strategic Design - -- [ ] Tools enable complete workflows, not just API endpoint wrappers -- [ ] Tool names reflect natural task subdivisions -- [ ] Response formats optimize for agent context efficiency -- [ ] Human-readable identifiers used where appropriate -- [ ] Error messages guide agents toward correct usage - -### Implementation Quality - -- [ ] FOCUSED IMPLEMENTATION: Most important and valuable tools implemented -- [ ] All tools have descriptive names and documentation -- [ ] Return types are consistent across similar operations -- [ ] Error handling is implemented for all external calls -- [ ] Server name follows format: `{service}_mcp` -- [ ] All network operations use async/await -- [ ] Common functionality is extracted into reusable functions -- [ ] Error messages are clear, actionable, and educational -- [ ] Outputs are properly validated and formatted - -### Tool Configuration - -- [ ] All tools implement 'name' and 'annotations' in the decorator -- [ ] Annotations correctly set (readOnlyHint, destructiveHint, idempotentHint, openWorldHint) -- [ ] All tools use Pydantic BaseModel for input validation with Field() definitions -- [ ] All Pydantic Fields have explicit types and descriptions with constraints -- [ ] All tools have comprehensive docstrings with explicit input/output types -- [ ] Docstrings include complete schema structure for dict/JSON returns -- [ ] Pydantic models handle input validation (no manual validation needed) - -### Advanced Features (where applicable) - -- [ ] Context injection used for logging, progress, or elicitation -- [ ] Resources registered for appropriate data endpoints -- [ ] Lifespan management implemented for persistent connections -- [ ] Structured output types used (TypedDict, Pydantic models) -- [ ] Appropriate transport configured (stdio or streamable HTTP) - -### Code Quality - -- [ ] File includes proper imports including Pydantic imports -- [ ] Pagination is properly implemented where applicable -- [ ] Filtering options are provided for potentially large result sets -- [ ] All async functions are properly defined with `async def` -- [ ] HTTP client usage follows async patterns with proper context managers -- [ ] Type hints are used throughout the code -- [ ] Constants are defined at module level in UPPER_CASE - -### Testing - -- [ ] Server runs successfully: `python your_server.py --help` -- [ ] All imports resolve correctly -- [ ] Sample tool calls work as expected -- [ ] Error scenarios handled gracefully diff --git a/src/services/skills/built-in/mcp-builder/scripts/connections.py b/src/services/skills/built-in/mcp-builder/scripts/connections.py deleted file mode 100644 index ffcd0da3fbe..00000000000 --- a/src/services/skills/built-in/mcp-builder/scripts/connections.py +++ /dev/null @@ -1,151 +0,0 @@ -"""Lightweight connection handling for MCP servers.""" - -from abc import ABC, abstractmethod -from contextlib import AsyncExitStack -from typing import Any - -from mcp import ClientSession, StdioServerParameters -from mcp.client.sse import sse_client -from mcp.client.stdio import stdio_client -from mcp.client.streamable_http import streamablehttp_client - - -class MCPConnection(ABC): - """Base class for MCP server connections.""" - - def __init__(self): - self.session = None - self._stack = None - - @abstractmethod - def _create_context(self): - """Create the connection context based on connection type.""" - - async def __aenter__(self): - """Initialize MCP server connection.""" - self._stack = AsyncExitStack() - await self._stack.__aenter__() - - try: - ctx = self._create_context() - result = await self._stack.enter_async_context(ctx) - - if len(result) == 2: - read, write = result - elif len(result) == 3: - read, write, _ = result - else: - raise ValueError(f"Unexpected context result: {result}") - - session_ctx = ClientSession(read, write) - self.session = await self._stack.enter_async_context(session_ctx) - await self.session.initialize() - return self - except BaseException: - await self._stack.__aexit__(None, None, None) - raise - - async def __aexit__(self, exc_type, exc_val, exc_tb): - """Clean up MCP server connection resources.""" - if self._stack: - await self._stack.__aexit__(exc_type, exc_val, exc_tb) - self.session = None - self._stack = None - - async def list_tools(self) -> list[dict[str, Any]]: - """Retrieve available tools from the MCP server.""" - response = await self.session.list_tools() - return [ - { - "name": tool.name, - "description": tool.description, - "input_schema": tool.inputSchema, - } - for tool in response.tools - ] - - async def call_tool(self, tool_name: str, arguments: dict[str, Any]) -> Any: - """Call a tool on the MCP server with provided arguments.""" - result = await self.session.call_tool(tool_name, arguments=arguments) - return result.content - - -class MCPConnectionStdio(MCPConnection): - """MCP connection using standard input/output.""" - - def __init__(self, command: str, args: list[str] = None, env: dict[str, str] = None): - super().__init__() - self.command = command - self.args = args or [] - self.env = env - - def _create_context(self): - return stdio_client( - StdioServerParameters(command=self.command, args=self.args, env=self.env) - ) - - -class MCPConnectionSSE(MCPConnection): - """MCP connection using Server-Sent Events.""" - - def __init__(self, url: str, headers: dict[str, str] = None): - super().__init__() - self.url = url - self.headers = headers or {} - - def _create_context(self): - return sse_client(url=self.url, headers=self.headers) - - -class MCPConnectionHTTP(MCPConnection): - """MCP connection using Streamable HTTP.""" - - def __init__(self, url: str, headers: dict[str, str] = None): - super().__init__() - self.url = url - self.headers = headers or {} - - def _create_context(self): - return streamablehttp_client(url=self.url, headers=self.headers) - - -def create_connection( - transport: str, - command: str = None, - args: list[str] = None, - env: dict[str, str] = None, - url: str = None, - headers: dict[str, str] = None, -) -> MCPConnection: - """Factory function to create the appropriate MCP connection. - - Args: - transport: Connection type ("stdio", "sse", or "http") - command: Command to run (stdio only) - args: Command arguments (stdio only) - env: Environment variables (stdio only) - url: Server URL (sse and http only) - headers: HTTP headers (sse and http only) - - Returns: - MCPConnection instance - """ - transport = transport.lower() - - if transport == "stdio": - if not command: - raise ValueError("Command is required for stdio transport") - return MCPConnectionStdio(command=command, args=args, env=env) - - elif transport == "sse": - if not url: - raise ValueError("URL is required for sse transport") - return MCPConnectionSSE(url=url, headers=headers) - - elif transport in ["http", "streamable_http", "streamable-http"]: - if not url: - raise ValueError("URL is required for http transport") - return MCPConnectionHTTP(url=url, headers=headers) - - else: - raise ValueError(f"Unsupported transport type: {transport}. Use 'stdio', 'sse', or 'http'") diff --git a/src/services/skills/built-in/mcp-builder/scripts/evaluation.py b/src/services/skills/built-in/mcp-builder/scripts/evaluation.py deleted file mode 100644 index 41778569c45..00000000000 --- a/src/services/skills/built-in/mcp-builder/scripts/evaluation.py +++ /dev/null @@ -1,373 +0,0 @@ -"""MCP Server Evaluation Harness - -This script evaluates MCP servers by running test questions against them using Claude. -""" - -import argparse -import asyncio -import json -import re -import sys -import time -import traceback -import xml.etree.ElementTree as ET -from pathlib import Path -from typing import Any - -from anthropic import Anthropic - -from connections import create_connection - -EVALUATION_PROMPT = """You are an AI assistant with access to tools. - -When given a task, you MUST: -1. Use the available tools to complete the task -2. Provide summary of each step in your approach, wrapped in tags -3. Provide feedback on the tools provided, wrapped in tags -4. Provide your final response, wrapped in tags - -Summary Requirements: -- In your tags, you must explain: - - The steps you took to complete the task - - Which tools you used, in what order, and why - - The inputs you provided to each tool - - The outputs you received from each tool - - A summary for how you arrived at the response - -Feedback Requirements: -- In your tags, provide constructive feedback on the tools: - - Comment on tool names: Are they clear and descriptive? - - Comment on input parameters: Are they well-documented? Are required vs optional parameters clear? - - Comment on descriptions: Do they accurately describe what the tool does? - - Comment on any errors encountered during tool usage: Did the tool fail to execute? Did the tool return too many tokens? - - Identify specific areas for improvement and explain WHY they would help - - Be specific and actionable in your suggestions - -Response Requirements: -- Your response should be concise and directly address what was asked -- Always wrap your final response in tags -- If you cannot solve the task return NOT_FOUND -- For numeric responses, provide just the number -- For IDs, provide just the ID -- For names or text, provide the exact text requested -- Your response should go last""" - - -def parse_evaluation_file(file_path: Path) -> list[dict[str, Any]]: - """Parse XML evaluation file with qa_pair elements.""" - try: - tree = ET.parse(file_path) - root = tree.getroot() - evaluations = [] - - for qa_pair in root.findall(".//qa_pair"): - question_elem = qa_pair.find("question") - answer_elem = qa_pair.find("answer") - - if question_elem is not None and answer_elem is not None: - evaluations.append({ - "question": (question_elem.text or "").strip(), - "answer": (answer_elem.text or "").strip(), - }) - - return evaluations - except Exception as e: - print(f"Error parsing evaluation file {file_path}: {e}") - return [] - - -def extract_xml_content(text: str, tag: str) -> str | None: - """Extract content from XML tags.""" - pattern = rf"<{tag}>(.*?)" - matches = re.findall(pattern, text, re.DOTALL) - return matches[-1].strip() if matches else None - - -async def agent_loop( - client: Anthropic, - model: str, - question: str, - tools: list[dict[str, Any]], - connection: Any, -) -> tuple[str, dict[str, Any]]: - """Run the agent loop with MCP tools.""" - messages = [{"role": "user", "content": question}] - - response = await asyncio.to_thread( - client.messages.create, - model=model, - max_tokens=4096, - system=EVALUATION_PROMPT, - messages=messages, - tools=tools, - ) - - messages.append({"role": "assistant", "content": response.content}) - - tool_metrics = {} - - while response.stop_reason == "tool_use": - tool_use = next(block for block in response.content if block.type == "tool_use") - tool_name = tool_use.name - tool_input = tool_use.input - - tool_start_ts = time.time() - try: - tool_result = await connection.call_tool(tool_name, tool_input) - tool_response = json.dumps(tool_result) if isinstance(tool_result, (dict, list)) else str(tool_result) - except Exception as e: - tool_response = f"Error executing tool {tool_name}: {str(e)}\n" - tool_response += traceback.format_exc() - tool_duration = time.time() - tool_start_ts - - if tool_name not in tool_metrics: - tool_metrics[tool_name] = {"count": 0, "durations": []} - tool_metrics[tool_name]["count"] += 1 - tool_metrics[tool_name]["durations"].append(tool_duration) - - messages.append({ - "role": "user", - "content": [{ - "type": "tool_result", - "tool_use_id": tool_use.id, - "content": tool_response, - }] - }) - - response = await asyncio.to_thread( - client.messages.create, - model=model, - max_tokens=4096, - system=EVALUATION_PROMPT, - messages=messages, - tools=tools, - ) - messages.append({"role": "assistant", "content": response.content}) - - response_text = next( - (block.text for block in response.content if hasattr(block, "text")), - None, - ) - return response_text, tool_metrics - - -async def evaluate_single_task( - client: Anthropic, - model: str, - qa_pair: dict[str, Any], - tools: list[dict[str, Any]], - connection: Any, - task_index: int, -) -> dict[str, Any]: - """Evaluate a single QA pair with the given tools.""" - start_time = time.time() - - print(f"Task {task_index + 1}: Running task with question: {qa_pair['question']}") - response, tool_metrics = await agent_loop(client, model, qa_pair["question"], tools, connection) - - response_value = extract_xml_content(response, "response") - summary = extract_xml_content(response, "summary") - feedback = extract_xml_content(response, "feedback") - - duration_seconds = time.time() - start_time - - return { - "question": qa_pair["question"], - "expected": qa_pair["answer"], - "actual": response_value, - "score": int(response_value == qa_pair["answer"]) if response_value else 0, - "total_duration": duration_seconds, - "tool_calls": tool_metrics, - "num_tool_calls": sum(len(metrics["durations"]) for metrics in tool_metrics.values()), - "summary": summary, - "feedback": feedback, - } - - -REPORT_HEADER = """ -# Evaluation Report - -## Summary - -- **Accuracy**: {correct}/{total} ({accuracy:.1f}%) -- **Average Task Duration**: {average_duration_s:.2f}s -- **Average Tool Calls per Task**: {average_tool_calls:.2f} -- **Total Tool Calls**: {total_tool_calls} - ---- -""" - -TASK_TEMPLATE = """ -### Task {task_num} - -**Question**: {question} -**Ground Truth Answer**: `{expected_answer}` -**Actual Answer**: `{actual_answer}` -**Correct**: {correct_indicator} -**Duration**: {total_duration:.2f}s -**Tool Calls**: {tool_calls} - -**Summary** -{summary} - -**Feedback** -{feedback} - ---- -""" - - -async def run_evaluation( - eval_path: Path, - connection: Any, - model: str = "claude-3-7-sonnet-20250219", -) -> str: - """Run evaluation with MCP server tools.""" - print("🚀 Starting Evaluation") - - client = Anthropic() - - tools = await connection.list_tools() - print(f"📋 Loaded {len(tools)} tools from MCP server") - - qa_pairs = parse_evaluation_file(eval_path) - print(f"📋 Loaded {len(qa_pairs)} evaluation tasks") - - results = [] - for i, qa_pair in enumerate(qa_pairs): - print(f"Processing task {i + 1}/{len(qa_pairs)}") - result = await evaluate_single_task(client, model, qa_pair, tools, connection, i) - results.append(result) - - correct = sum(r["score"] for r in results) - accuracy = (correct / len(results)) * 100 if results else 0 - average_duration_s = sum(r["total_duration"] for r in results) / len(results) if results else 0 - average_tool_calls = sum(r["num_tool_calls"] for r in results) / len(results) if results else 0 - total_tool_calls = sum(r["num_tool_calls"] for r in results) - - report = REPORT_HEADER.format( - correct=correct, - total=len(results), - accuracy=accuracy, - average_duration_s=average_duration_s, - average_tool_calls=average_tool_calls, - total_tool_calls=total_tool_calls, - ) - - report += "".join([ - TASK_TEMPLATE.format( - task_num=i + 1, - question=qa_pair["question"], - expected_answer=qa_pair["answer"], - actual_answer=result["actual"] or "N/A", - correct_indicator="✅" if result["score"] else "❌", - total_duration=result["total_duration"], - tool_calls=json.dumps(result["tool_calls"], indent=2), - summary=result["summary"] or "N/A", - feedback=result["feedback"] or "N/A", - ) - for i, (qa_pair, result) in enumerate(zip(qa_pairs, results)) - ]) - - return report - - -def parse_headers(header_list: list[str]) -> dict[str, str]: - """Parse header strings in format 'Key: Value' into a dictionary.""" - headers = {} - if not header_list: - return headers - - for header in header_list: - if ":" in header: - key, value = header.split(":", 1) - headers[key.strip()] = value.strip() - else: - print(f"Warning: Ignoring malformed header: {header}") - return headers - - -def parse_env_vars(env_list: list[str]) -> dict[str, str]: - """Parse environment variable strings in format 'KEY=VALUE' into a dictionary.""" - env = {} - if not env_list: - return env - - for env_var in env_list: - if "=" in env_var: - key, value = env_var.split("=", 1) - env[key.strip()] = value.strip() - else: - print(f"Warning: Ignoring malformed environment variable: {env_var}") - return env - - -async def main(): - parser = argparse.ArgumentParser( - description="Evaluate MCP servers using test questions", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=""" -Examples: - # Evaluate a local stdio MCP server - python evaluation.py -t stdio -c python -a my_server.py eval.xml - - # Evaluate an SSE MCP server - python evaluation.py -t sse -u https://example.com/mcp -H "Authorization: Bearer token" eval.xml - - # Evaluate an HTTP MCP server with custom model - python evaluation.py -t http -u https://example.com/mcp -m claude-3-5-sonnet-20241022 eval.xml - """, - ) - - parser.add_argument("eval_file", type=Path, help="Path to evaluation XML file") - parser.add_argument("-t", "--transport", choices=["stdio", "sse", "http"], default="stdio", help="Transport type (default: stdio)") - parser.add_argument("-m", "--model", default="claude-3-7-sonnet-20250219", help="Claude model to use (default: claude-3-7-sonnet-20250219)") - - stdio_group = parser.add_argument_group("stdio options") - stdio_group.add_argument("-c", "--command", help="Command to run MCP server (stdio only)") - stdio_group.add_argument("-a", "--args", nargs="+", help="Arguments for the command (stdio only)") - stdio_group.add_argument("-e", "--env", nargs="+", help="Environment variables in KEY=VALUE format (stdio only)") - - remote_group = parser.add_argument_group("sse/http options") - remote_group.add_argument("-u", "--url", help="MCP server URL (sse/http only)") - remote_group.add_argument("-H", "--header", nargs="+", dest="headers", help="HTTP headers in 'Key: Value' format (sse/http only)") - - parser.add_argument("-o", "--output", type=Path, help="Output file for evaluation report (default: stdout)") - - args = parser.parse_args() - - if not args.eval_file.exists(): - print(f"Error: Evaluation file not found: {args.eval_file}") - sys.exit(1) - - headers = parse_headers(args.headers) if args.headers else None - env_vars = parse_env_vars(args.env) if args.env else None - - try: - connection = create_connection( - transport=args.transport, - command=args.command, - args=args.args, - env=env_vars, - url=args.url, - headers=headers, - ) - except ValueError as e: - print(f"Error: {e}") - sys.exit(1) - - print(f"🔗 Connecting to MCP server via {args.transport}...") - - async with connection: - print("✅ Connected successfully") - report = await run_evaluation(args.eval_file, connection, args.model) - - if args.output: - args.output.write_text(report) - print(f"\n✅ Report saved to {args.output}") - else: - print("\n" + report) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/src/services/skills/built-in/mcp-builder/scripts/example_evaluation.xml b/src/services/skills/built-in/mcp-builder/scripts/example_evaluation.xml deleted file mode 100644 index 41e4459b5af..00000000000 --- a/src/services/skills/built-in/mcp-builder/scripts/example_evaluation.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - Calculate the compound interest on $10,000 invested at 5% annual interest rate, compounded monthly for 3 years. What is the final amount in dollars (rounded to 2 decimal places)? - 11614.72 - - - A projectile is launched at a 45-degree angle with an initial velocity of 50 m/s. Calculate the total distance (in meters) it has traveled from the launch point after 2 seconds, assuming g=9.8 m/s². Round to 2 decimal places. - 87.25 - - - A sphere has a volume of 500 cubic meters. Calculate its surface area in square meters. Round to 2 decimal places. - 304.65 - - - Calculate the population standard deviation of this dataset: [12, 15, 18, 22, 25, 30, 35]. Round to 2 decimal places. - 7.61 - - - Calculate the pH of a solution with a hydrogen ion concentration of 3.5 × 10^-5 M. Round to 2 decimal places. - 4.46 - - diff --git a/src/services/skills/built-in/mcp-builder/scripts/requirements.txt b/src/services/skills/built-in/mcp-builder/scripts/requirements.txt deleted file mode 100644 index e73e5d1e356..00000000000 --- a/src/services/skills/built-in/mcp-builder/scripts/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -anthropic>=0.39.0 -mcp>=1.1.0 From d011fadf529862aad47129d256010f74eaded17e Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Wed, 28 Jan 2026 16:20:13 -0700 Subject: [PATCH 08/14] fix: auto-approve skill tool (no user confirmation required) Skills are informational content that don't perform any actions, so they should be auto-approved like updateTodoList. --- src/core/auto-approval/index.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/core/auto-approval/index.ts b/src/core/auto-approval/index.ts index 72d567a96ce..7e9ad9397fc 100644 --- a/src/core/auto-approval/index.ts +++ b/src/core/auto-approval/index.ts @@ -151,6 +151,10 @@ export async function checkAutoApproval({ return { decision: "approve" } } + if (tool.tool === "skill") { + return { decision: "approve" } + } + if (tool?.tool === "switchMode") { return state.alwaysAllowModeSwitch === true ? { decision: "approve" } : { decision: "ask" } } From 6280775a9f633564a8adde5257930f4b0bdd1a0f Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Wed, 28 Jan 2026 16:32:56 -0700 Subject: [PATCH 09/14] refactor: remove approval flow from skill tool Skills execute silently without requiring user confirmation, similar to how the old fetch_instructions worked. Removed: - askApproval call from SkillTool - handlePartial method (no streaming UI) - ChatRow UI case for skill - User rejection and partial block tests Skills are read-only informational content that don't perform actions, so they don't need approval. --- src/core/auto-approval/index.ts | 4 -- src/core/tools/SkillTool.ts | 33 +-------- src/core/tools/__tests__/skillTool.spec.ts | 81 ++-------------------- webview-ui/src/components/chat/ChatRow.tsx | 69 ------------------ 4 files changed, 7 insertions(+), 180 deletions(-) diff --git a/src/core/auto-approval/index.ts b/src/core/auto-approval/index.ts index 7e9ad9397fc..72d567a96ce 100644 --- a/src/core/auto-approval/index.ts +++ b/src/core/auto-approval/index.ts @@ -151,10 +151,6 @@ export async function checkAutoApproval({ return { decision: "approve" } } - if (tool.tool === "skill") { - return { decision: "approve" } - } - if (tool?.tool === "switchMode") { return state.alwaysAllowModeSwitch === true ? { decision: "approve" } : { decision: "ask" } } diff --git a/src/core/tools/SkillTool.ts b/src/core/tools/SkillTool.ts index 213cfd91ee8..bdc03b79eaa 100644 --- a/src/core/tools/SkillTool.ts +++ b/src/core/tools/SkillTool.ts @@ -1,7 +1,6 @@ import { Task } from "../task/Task" import { formatResponse } from "../prompts/responses" import { BaseTool, ToolCallbacks } from "./BaseTool" -import type { ToolUse } from "../../shared/tools" interface SkillParams { skill: string @@ -13,7 +12,7 @@ export class SkillTool extends BaseTool<"skill"> { async execute(params: SkillParams, task: Task, callbacks: ToolCallbacks): Promise { const { skill: skillName, args } = params - const { askApproval, handleError, pushToolResult } = callbacks + const { handleError, pushToolResult } = callbacks try { // Validate skill name parameter @@ -60,22 +59,7 @@ export class SkillTool extends BaseTool<"skill"> { return } - // Build approval message - const toolMessage = JSON.stringify({ - tool: "skill", - skill: skillName, - args: args, - source: skillContent.source, - description: skillContent.description, - }) - - const didApprove = await askApproval("tool", toolMessage) - - if (!didApprove) { - return - } - - // Build the result message + // Build the result message - no approval needed, skills just execute let result = `Skill: ${skillName}` if (skillContent.description) { @@ -95,18 +79,7 @@ export class SkillTool extends BaseTool<"skill"> { } } - override async handlePartial(task: Task, block: ToolUse<"skill">): Promise { - const skillName: string | undefined = block.params.skill - const args: string | undefined = block.params.args - - const partialMessage = JSON.stringify({ - tool: "skill", - skill: skillName, - args: args, - }) - - await task.ask("tool", partialMessage, block.partial).catch(() => {}) - } + // No handlePartial - skills execute silently without streaming UI } export const skillTool = new SkillTool() diff --git a/src/core/tools/__tests__/skillTool.spec.ts b/src/core/tools/__tests__/skillTool.spec.ts index fc1b3396e50..9d771b0a8cc 100644 --- a/src/core/tools/__tests__/skillTool.spec.ts +++ b/src/core/tools/__tests__/skillTool.spec.ts @@ -22,7 +22,6 @@ describe("skillTool", () => { recordToolError: vi.fn(), didToolFailInCurrentTurn: false, sayAndCreateMissingParamError: vi.fn().mockResolvedValue("Missing parameter error"), - ask: vi.fn().mockResolvedValue({}), providerRef: { deref: vi.fn().mockReturnValue({ getState: vi.fn().mockResolvedValue({ mode: "code" }), @@ -32,7 +31,6 @@ describe("skillTool", () => { } mockCallbacks = { - askApproval: vi.fn().mockResolvedValue(true), handleError: vi.fn(), pushToolResult: vi.fn(), } @@ -99,7 +97,7 @@ describe("skillTool", () => { ) }) - it("should successfully load built-in skill", async () => { + it("should successfully load built-in skill without approval", async () => { const block: ToolUse<"skill"> = { type: "tool_use" as const, name: "skill" as const, @@ -121,17 +119,7 @@ describe("skillTool", () => { await skillTool.handle(mockTask as Task, block, mockCallbacks) - expect(mockCallbacks.askApproval).toHaveBeenCalledWith( - "tool", - JSON.stringify({ - tool: "skill", - skill: "create-mcp-server", - args: undefined, - source: "built-in", - description: "Instructions for creating MCP servers", - }), - ) - + // Skills execute directly without approval expect(mockCallbacks.pushToolResult).toHaveBeenCalledWith( `Skill: create-mcp-server Description: Instructions for creating MCP servers @@ -178,57 +166,6 @@ Step 1: Create the server...`, ) }) - it("should handle user rejection", async () => { - const block: ToolUse<"skill"> = { - type: "tool_use" as const, - name: "skill" as const, - params: {}, - partial: false, - nativeArgs: { - skill: "create-mcp-server", - }, - } - - mockSkillsManager.getSkillContent.mockResolvedValue({ - name: "create-mcp-server", - description: "Test", - source: "built-in", - instructions: "Test instructions", - }) - - mockCallbacks.askApproval.mockResolvedValue(false) - - await skillTool.handle(mockTask as Task, block, mockCallbacks) - - expect(mockCallbacks.pushToolResult).not.toHaveBeenCalled() - }) - - it("should handle partial block", async () => { - const block: ToolUse<"skill"> = { - type: "tool_use" as const, - name: "skill" as const, - params: { - skill: "create-mcp-server", - args: "", - }, - partial: true, - } - - await skillTool.handle(mockTask as Task, block, mockCallbacks) - - expect(mockTask.ask).toHaveBeenCalledWith( - "tool", - JSON.stringify({ - tool: "skill", - skill: "create-mcp-server", - args: "", - }), - true, - ) - - expect(mockCallbacks.pushToolResult).not.toHaveBeenCalled() - }) - it("should handle errors during execution", async () => { const block: ToolUse<"skill"> = { type: "tool_use" as const, @@ -299,7 +236,7 @@ Step 1: Create the server...`, ) }) - it("should load project skill", async () => { + it("should load project skill without approval", async () => { const block: ToolUse<"skill"> = { type: "tool_use" as const, name: "skill" as const, @@ -321,17 +258,7 @@ Step 1: Create the server...`, await skillTool.handle(mockTask as Task, block, mockCallbacks) - expect(mockCallbacks.askApproval).toHaveBeenCalledWith( - "tool", - JSON.stringify({ - tool: "skill", - skill: "my-project-skill", - args: undefined, - source: "project", - description: "A custom project skill", - }), - ) - + // Skills execute directly without approval expect(mockCallbacks.pushToolResult).toHaveBeenCalledWith( `Skill: my-project-skill Description: A custom project skill diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx index 25bcd61ee3f..b67bce9d571 100644 --- a/webview-ui/src/components/chat/ChatRow.tsx +++ b/webview-ui/src/components/chat/ChatRow.tsx @@ -666,75 +666,6 @@ export const ChatRowContent = ({
) - case "skill": { - const skillInfo = tool - return ( - <> -
- {toolIcon("book")} - - {message.type === "ask" ? t("chat:skill.wantsToLoad") : t("chat:skill.didLoad")} - -
-
- -
- - {skillInfo.skill} - - {skillInfo.source && ( - - {skillInfo.source} - - )} -
- -
- {isExpanded && (skillInfo.args || skillInfo.description) && ( -
- {skillInfo.description && ( -
- {skillInfo.description} -
- )} - {skillInfo.args && ( -
- Arguments: - - {skillInfo.args} - -
- )} -
- )} -
- - ) - } case "listFilesTopLevel": return ( <> From b3bfbb3598080312873c0a71e0c38323adaa4c83 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Wed, 28 Jan 2026 16:38:58 -0700 Subject: [PATCH 10/14] Revert "refactor: remove approval flow from skill tool" This reverts commit 40dd577492a4be5b2c920be484d7abf39588909b. --- src/core/auto-approval/index.ts | 4 ++ src/core/tools/SkillTool.ts | 33 ++++++++- src/core/tools/__tests__/skillTool.spec.ts | 81 ++++++++++++++++++++-- webview-ui/src/components/chat/ChatRow.tsx | 69 ++++++++++++++++++ 4 files changed, 180 insertions(+), 7 deletions(-) diff --git a/src/core/auto-approval/index.ts b/src/core/auto-approval/index.ts index 72d567a96ce..7e9ad9397fc 100644 --- a/src/core/auto-approval/index.ts +++ b/src/core/auto-approval/index.ts @@ -151,6 +151,10 @@ export async function checkAutoApproval({ return { decision: "approve" } } + if (tool.tool === "skill") { + return { decision: "approve" } + } + if (tool?.tool === "switchMode") { return state.alwaysAllowModeSwitch === true ? { decision: "approve" } : { decision: "ask" } } diff --git a/src/core/tools/SkillTool.ts b/src/core/tools/SkillTool.ts index bdc03b79eaa..213cfd91ee8 100644 --- a/src/core/tools/SkillTool.ts +++ b/src/core/tools/SkillTool.ts @@ -1,6 +1,7 @@ import { Task } from "../task/Task" import { formatResponse } from "../prompts/responses" import { BaseTool, ToolCallbacks } from "./BaseTool" +import type { ToolUse } from "../../shared/tools" interface SkillParams { skill: string @@ -12,7 +13,7 @@ export class SkillTool extends BaseTool<"skill"> { async execute(params: SkillParams, task: Task, callbacks: ToolCallbacks): Promise { const { skill: skillName, args } = params - const { handleError, pushToolResult } = callbacks + const { askApproval, handleError, pushToolResult } = callbacks try { // Validate skill name parameter @@ -59,7 +60,22 @@ export class SkillTool extends BaseTool<"skill"> { return } - // Build the result message - no approval needed, skills just execute + // Build approval message + const toolMessage = JSON.stringify({ + tool: "skill", + skill: skillName, + args: args, + source: skillContent.source, + description: skillContent.description, + }) + + const didApprove = await askApproval("tool", toolMessage) + + if (!didApprove) { + return + } + + // Build the result message let result = `Skill: ${skillName}` if (skillContent.description) { @@ -79,7 +95,18 @@ export class SkillTool extends BaseTool<"skill"> { } } - // No handlePartial - skills execute silently without streaming UI + override async handlePartial(task: Task, block: ToolUse<"skill">): Promise { + const skillName: string | undefined = block.params.skill + const args: string | undefined = block.params.args + + const partialMessage = JSON.stringify({ + tool: "skill", + skill: skillName, + args: args, + }) + + await task.ask("tool", partialMessage, block.partial).catch(() => {}) + } } export const skillTool = new SkillTool() diff --git a/src/core/tools/__tests__/skillTool.spec.ts b/src/core/tools/__tests__/skillTool.spec.ts index 9d771b0a8cc..fc1b3396e50 100644 --- a/src/core/tools/__tests__/skillTool.spec.ts +++ b/src/core/tools/__tests__/skillTool.spec.ts @@ -22,6 +22,7 @@ describe("skillTool", () => { recordToolError: vi.fn(), didToolFailInCurrentTurn: false, sayAndCreateMissingParamError: vi.fn().mockResolvedValue("Missing parameter error"), + ask: vi.fn().mockResolvedValue({}), providerRef: { deref: vi.fn().mockReturnValue({ getState: vi.fn().mockResolvedValue({ mode: "code" }), @@ -31,6 +32,7 @@ describe("skillTool", () => { } mockCallbacks = { + askApproval: vi.fn().mockResolvedValue(true), handleError: vi.fn(), pushToolResult: vi.fn(), } @@ -97,7 +99,7 @@ describe("skillTool", () => { ) }) - it("should successfully load built-in skill without approval", async () => { + it("should successfully load built-in skill", async () => { const block: ToolUse<"skill"> = { type: "tool_use" as const, name: "skill" as const, @@ -119,7 +121,17 @@ describe("skillTool", () => { await skillTool.handle(mockTask as Task, block, mockCallbacks) - // Skills execute directly without approval + expect(mockCallbacks.askApproval).toHaveBeenCalledWith( + "tool", + JSON.stringify({ + tool: "skill", + skill: "create-mcp-server", + args: undefined, + source: "built-in", + description: "Instructions for creating MCP servers", + }), + ) + expect(mockCallbacks.pushToolResult).toHaveBeenCalledWith( `Skill: create-mcp-server Description: Instructions for creating MCP servers @@ -166,6 +178,57 @@ Step 1: Create the server...`, ) }) + it("should handle user rejection", async () => { + const block: ToolUse<"skill"> = { + type: "tool_use" as const, + name: "skill" as const, + params: {}, + partial: false, + nativeArgs: { + skill: "create-mcp-server", + }, + } + + mockSkillsManager.getSkillContent.mockResolvedValue({ + name: "create-mcp-server", + description: "Test", + source: "built-in", + instructions: "Test instructions", + }) + + mockCallbacks.askApproval.mockResolvedValue(false) + + await skillTool.handle(mockTask as Task, block, mockCallbacks) + + expect(mockCallbacks.pushToolResult).not.toHaveBeenCalled() + }) + + it("should handle partial block", async () => { + const block: ToolUse<"skill"> = { + type: "tool_use" as const, + name: "skill" as const, + params: { + skill: "create-mcp-server", + args: "", + }, + partial: true, + } + + await skillTool.handle(mockTask as Task, block, mockCallbacks) + + expect(mockTask.ask).toHaveBeenCalledWith( + "tool", + JSON.stringify({ + tool: "skill", + skill: "create-mcp-server", + args: "", + }), + true, + ) + + expect(mockCallbacks.pushToolResult).not.toHaveBeenCalled() + }) + it("should handle errors during execution", async () => { const block: ToolUse<"skill"> = { type: "tool_use" as const, @@ -236,7 +299,7 @@ Step 1: Create the server...`, ) }) - it("should load project skill without approval", async () => { + it("should load project skill", async () => { const block: ToolUse<"skill"> = { type: "tool_use" as const, name: "skill" as const, @@ -258,7 +321,17 @@ Step 1: Create the server...`, await skillTool.handle(mockTask as Task, block, mockCallbacks) - // Skills execute directly without approval + expect(mockCallbacks.askApproval).toHaveBeenCalledWith( + "tool", + JSON.stringify({ + tool: "skill", + skill: "my-project-skill", + args: undefined, + source: "project", + description: "A custom project skill", + }), + ) + expect(mockCallbacks.pushToolResult).toHaveBeenCalledWith( `Skill: my-project-skill Description: A custom project skill diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx index b67bce9d571..25bcd61ee3f 100644 --- a/webview-ui/src/components/chat/ChatRow.tsx +++ b/webview-ui/src/components/chat/ChatRow.tsx @@ -666,6 +666,75 @@ export const ChatRowContent = ({ ) + case "skill": { + const skillInfo = tool + return ( + <> +
+ {toolIcon("book")} + + {message.type === "ask" ? t("chat:skill.wantsToLoad") : t("chat:skill.didLoad")} + +
+
+ +
+ + {skillInfo.skill} + + {skillInfo.source && ( + + {skillInfo.source} + + )} +
+ +
+ {isExpanded && (skillInfo.args || skillInfo.description) && ( +
+ {skillInfo.description && ( +
+ {skillInfo.description} +
+ )} + {skillInfo.args && ( +
+ Arguments: + + {skillInfo.args} + +
+ )} +
+ )} +
+ + ) + } case "listFilesTopLevel": return ( <> From 3d07128e36f8c0a4daa63b8acbbdb877c8efe914 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Wed, 28 Jan 2026 16:58:14 -0700 Subject: [PATCH 11/14] fix: update CLI tool renderer to recognize skill tool and add auto-approval comment - Replace fetchInstructions/fetch_instructions with skill in CLI types.ts - Update CLI utils.ts display names for skill tool - Add explanatory comment for skill auto-approval policy --- apps/cli/src/ui/components/tools/types.ts | 10 +--------- apps/cli/src/ui/components/tools/utils.ts | 3 +-- src/core/auto-approval/index.ts | 3 +++ 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/apps/cli/src/ui/components/tools/types.ts b/apps/cli/src/ui/components/tools/types.ts index 28a1b5faa02..a16fbd60ea3 100644 --- a/apps/cli/src/ui/components/tools/types.ts +++ b/apps/cli/src/ui/components/tools/types.ts @@ -16,15 +16,7 @@ export type ToolCategory = | "other" export function getToolCategory(toolName: string): ToolCategory { - const fileReadTools = [ - "readFile", - "read_file", - "fetchInstructions", - "fetch_instructions", - "listFilesTopLevel", - "listFilesRecursive", - "list_files", - ] + const fileReadTools = ["readFile", "read_file", "skill", "listFilesTopLevel", "listFilesRecursive", "list_files"] const fileWriteTools = [ "editedExistingFile", diff --git a/apps/cli/src/ui/components/tools/utils.ts b/apps/cli/src/ui/components/tools/utils.ts index 5eaee33b127..01195d9000e 100644 --- a/apps/cli/src/ui/components/tools/utils.ts +++ b/apps/cli/src/ui/components/tools/utils.ts @@ -50,8 +50,7 @@ export function getToolDisplayName(toolName: string): string { // File read operations readFile: "Read", read_file: "Read", - fetchInstructions: "Fetch Instructions", - fetch_instructions: "Fetch Instructions", + skill: "Load Skill", listFilesTopLevel: "List Files", listFilesRecursive: "List Files (Recursive)", list_files: "List Files", diff --git a/src/core/auto-approval/index.ts b/src/core/auto-approval/index.ts index 7e9ad9397fc..f9de2ccfe36 100644 --- a/src/core/auto-approval/index.ts +++ b/src/core/auto-approval/index.ts @@ -151,6 +151,9 @@ export async function checkAutoApproval({ return { decision: "approve" } } + // The skill tool only loads pre-defined instructions from built-in, global, or project skills. + // It does not read arbitrary files - skills must be explicitly installed/defined by the user. + // Auto-approval is intentional to provide a seamless experience when loading task instructions. if (tool.tool === "skill") { return { decision: "approve" } } From 2420ff7d029a82fa3e94ce87210bc161231cbedb Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Wed, 28 Jan 2026 17:24:26 -0700 Subject: [PATCH 12/14] fix: remove remaining fetchInstructions references in CLI utils --- apps/cli/src/ui/components/tools/utils.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/apps/cli/src/ui/components/tools/utils.ts b/apps/cli/src/ui/components/tools/utils.ts index 01195d9000e..31acf2cccbc 100644 --- a/apps/cli/src/ui/components/tools/utils.ts +++ b/apps/cli/src/ui/components/tools/utils.ts @@ -106,8 +106,7 @@ export function getToolIconName(toolName: string): IconName { // File read operations readFile: "file", read_file: "file", - fetchInstructions: "file", - fetch_instructions: "file", + skill: "file", listFilesTopLevel: "folder", listFilesRecursive: "folder", list_files: "folder", From f9978902c50287dea5518697b6998189d1cfc9b1 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Thu, 29 Jan 2026 06:14:29 +0000 Subject: [PATCH 13/14] Remove all diffEnabled references per review request - Remove diffEnabled from global-settings.ts Zod schema - Remove diffEnabled from ExtensionState Pick type in vscode-extension-host.ts - Remove diffEnabled parameter from SYSTEM_PROMPT function signature in system.ts - Remove diffEnabled from ClineProvider.ts (destructuring, getState, getStateToPostToWebview) - Remove diffEnabled from generateSystemPrompt.ts - Remove diffEnabled from Task.ts getSystemPrompt call - Remove diffEnabled from test files (ClineProvider.spec.ts, system-prompt.spec.ts, add-custom-instructions.spec.ts) - Remove test cases specifically testing diffEnabled behavior --- packages/types/src/global-settings.ts | 5 -- packages/types/src/vscode-extension-host.ts | 1 - .../__tests__/add-custom-instructions.spec.ts | 4 - .../prompts/__tests__/system-prompt.spec.ts | 88 ------------------- src/core/prompts/system.ts | 11 +-- src/core/task/Task.ts | 1 - src/core/webview/ClineProvider.ts | 3 - .../webview/__tests__/ClineProvider.spec.ts | 77 ---------------- src/core/webview/generateSystemPrompt.ts | 1 - 9 files changed, 1 insertion(+), 190 deletions(-) diff --git a/packages/types/src/global-settings.ts b/packages/types/src/global-settings.ts index 75a39b41e65..72c8b8256f8 100644 --- a/packages/types/src/global-settings.ts +++ b/packages/types/src/global-settings.ts @@ -100,11 +100,6 @@ export const globalSettingsSchema = z.object({ alwaysAllowWrite: z.boolean().optional(), alwaysAllowWriteOutsideWorkspace: z.boolean().optional(), alwaysAllowWriteProtected: z.boolean().optional(), - /** - * Whether diff-based editing tools are enabled. - * When disabled, the extension should avoid providing diff strategies / diff-based tooling. - */ - diffEnabled: z.boolean().optional(), writeDelayMs: z.number().min(0).optional(), alwaysAllowBrowser: z.boolean().optional(), requestDelaySeconds: z.number().optional(), diff --git a/packages/types/src/vscode-extension-host.ts b/packages/types/src/vscode-extension-host.ts index ad460dd97a4..5670fa1ade9 100644 --- a/packages/types/src/vscode-extension-host.ts +++ b/packages/types/src/vscode-extension-host.ts @@ -280,7 +280,6 @@ export type ExtensionState = Pick< | "alwaysAllowWrite" | "alwaysAllowWriteOutsideWorkspace" | "alwaysAllowWriteProtected" - | "diffEnabled" | "alwaysAllowBrowser" | "alwaysAllowMcp" | "alwaysAllowModeSwitch" diff --git a/src/core/prompts/__tests__/add-custom-instructions.spec.ts b/src/core/prompts/__tests__/add-custom-instructions.spec.ts index 3f7b6e06e43..b7813d0f5b8 100644 --- a/src/core/prompts/__tests__/add-custom-instructions.spec.ts +++ b/src/core/prompts/__tests__/add-custom-instructions.spec.ts @@ -210,7 +210,6 @@ describe("addCustomInstructions", () => { undefined, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions - undefined, // diffEnabled undefined, // experiments undefined, // language undefined, // rooIgnoreInstructions @@ -232,7 +231,6 @@ describe("addCustomInstructions", () => { undefined, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions - undefined, // diffEnabled undefined, // experiments undefined, // language undefined, // rooIgnoreInstructions @@ -256,7 +254,6 @@ describe("addCustomInstructions", () => { undefined, // customModePrompts undefined, // customModes, undefined, // globalCustomInstructions - undefined, // diffEnabled undefined, // experiments undefined, // language undefined, // rooIgnoreInstructions @@ -279,7 +276,6 @@ describe("addCustomInstructions", () => { undefined, // customModePrompts undefined, // customModes, undefined, // globalCustomInstructions - undefined, // diffEnabled undefined, // experiments undefined, // language undefined, // rooIgnoreInstructions diff --git a/src/core/prompts/__tests__/system-prompt.spec.ts b/src/core/prompts/__tests__/system-prompt.spec.ts index bc782930b10..91fb9350b4c 100644 --- a/src/core/prompts/__tests__/system-prompt.spec.ts +++ b/src/core/prompts/__tests__/system-prompt.spec.ts @@ -225,7 +225,6 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions - undefined, // diffEnabled experiments, undefined, // language undefined, // rooIgnoreInstructions @@ -247,7 +246,6 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes, undefined, // globalCustomInstructions - undefined, // diffEnabled experiments, undefined, // language undefined, // rooIgnoreInstructions @@ -271,7 +269,6 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes, undefined, // globalCustomInstructions - undefined, // diffEnabled experiments, undefined, // language undefined, // rooIgnoreInstructions @@ -293,7 +290,6 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes, undefined, // globalCustomInstructions - undefined, // diffEnabled experiments, undefined, // language undefined, // rooIgnoreInstructions @@ -315,7 +311,6 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes, undefined, // globalCustomInstructions - undefined, // diffEnabled experiments, undefined, // language undefined, // rooIgnoreInstructions @@ -325,81 +320,6 @@ describe("SYSTEM_PROMPT", () => { expect(prompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-different-viewport-size.snap") }) - it("should include diff strategy tool description when diffEnabled is true", async () => { - const prompt = await SYSTEM_PROMPT( - mockContext, - "/test/path", - false, - undefined, // mcpHub - new MultiSearchReplaceDiffStrategy(), // Use actual diff strategy from the codebase - undefined, // browserViewportSize - defaultModeSlug, // mode - undefined, // customModePrompts - undefined, // customModes - undefined, // globalCustomInstructions - true, // diffEnabled - experiments, - undefined, // language - undefined, // rooIgnoreInstructions - undefined, // partialReadsEnabled - ) - - // Native-only: tool catalog isn't embedded in the system prompt anymore. - expect(prompt).not.toContain("# Tools") - expect(prompt).not.toContain("apply_diff") - expect(prompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-diff-enabled-true.snap") - }) - - it("should exclude diff strategy tool description when diffEnabled is false", async () => { - const prompt = await SYSTEM_PROMPT( - mockContext, - "/test/path", - false, // supportsImages - undefined, // mcpHub - new MultiSearchReplaceDiffStrategy(), // Use actual diff strategy from the codebase - undefined, // browserViewportSize - defaultModeSlug, // mode - undefined, // customModePrompts - undefined, // customModes - undefined, // globalCustomInstructions - false, // diffEnabled - experiments, - undefined, // language - undefined, // rooIgnoreInstructions - undefined, // partialReadsEnabled - ) - - // Native-only: tool catalog isn't embedded in the system prompt anymore. - expect(prompt).not.toContain("# Tools") - expect(prompt).not.toContain("apply_diff") - expect(prompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-diff-enabled-false.snap") - }) - - it("should exclude diff strategy tool description when diffEnabled is undefined", async () => { - const prompt = await SYSTEM_PROMPT( - mockContext, - "/test/path", - false, - undefined, // mcpHub - new MultiSearchReplaceDiffStrategy(), // Use actual diff strategy from the codebase - undefined, // browserViewportSize - defaultModeSlug, // mode - undefined, // customModePrompts - undefined, // customModes - undefined, // globalCustomInstructions - undefined, // diffEnabled - experiments, - undefined, // language - undefined, // rooIgnoreInstructions - undefined, // partialReadsEnabled - ) - - // Native-only: tool catalog isn't embedded in the system prompt anymore. - expect(prompt).not.toContain("# Tools") - expect(prompt).not.toContain("apply_diff") - expect(prompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-diff-enabled-undefined.snap") - }) - it("should include vscode language in custom instructions", async () => { // Mock vscode.env.language const vscode = vi.mocked(await import("vscode")) as any @@ -439,7 +359,6 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions - undefined, // diffEnabled undefined, // experiments undefined, // language undefined, // rooIgnoreInstructions @@ -499,7 +418,6 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts customModes, // customModes "Global instructions", // globalCustomInstructions - undefined, // diffEnabled experiments, undefined, // language undefined, // rooIgnoreInstructions @@ -536,7 +454,6 @@ describe("SYSTEM_PROMPT", () => { customModePrompts, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions - undefined, // diffEnabled undefined, // experiments undefined, // language undefined, // rooIgnoreInstructions @@ -568,7 +485,6 @@ describe("SYSTEM_PROMPT", () => { customModePrompts, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions - undefined, // diffEnabled undefined, // experiments undefined, // language undefined, // rooIgnoreInstructions @@ -598,7 +514,6 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions - undefined, // diffEnabled experiments, undefined, // language undefined, // rooIgnoreInstructions @@ -630,7 +545,6 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions - undefined, // diffEnabled experiments, undefined, // language undefined, // rooIgnoreInstructions @@ -662,7 +576,6 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions - undefined, // diffEnabled experiments, undefined, // language undefined, // rooIgnoreInstructions @@ -694,7 +607,6 @@ describe("SYSTEM_PROMPT", () => { undefined, // customModePrompts undefined, // customModes undefined, // globalCustomInstructions - undefined, // diffEnabled experiments, undefined, // language undefined, // rooIgnoreInstructions diff --git a/src/core/prompts/system.ts b/src/core/prompts/system.ts index 6e978c2ece4..6404ec990e4 100644 --- a/src/core/prompts/system.ts +++ b/src/core/prompts/system.ts @@ -52,7 +52,6 @@ async function generatePrompt( promptComponent?: PromptComponent, customModeConfigs?: ModeConfig[], globalCustomInstructions?: string, - diffEnabled?: boolean, experiments?: Record, language?: string, rooIgnoreInstructions?: string, @@ -66,9 +65,6 @@ async function generatePrompt( throw new Error("Extension context is required for generating system prompt") } - // If diff is disabled, don't pass the diffStrategy - const effectiveDiffStrategy = diffEnabled ? diffStrategy : undefined - // Get the full mode config to ensure we have the role definition (used for groups, etc.) const modeConfig = getModeBySlug(mode, customModeConfigs) || modes.find((m) => m.slug === mode) || modes[0] const { roleDefinition, baseInstructions } = getModeSelection(mode, promptComponent, customModeConfigs) @@ -129,7 +125,6 @@ export const SYSTEM_PROMPT = async ( customModePrompts?: CustomModePrompts, customModes?: ModeConfig[], globalCustomInstructions?: string, - diffEnabled?: boolean, experiments?: Record, language?: string, rooIgnoreInstructions?: string, @@ -187,21 +182,17 @@ ${fileCustomSystemPrompt} ${customInstructions}` } - // If diff is disabled, don't pass the diffStrategy - const effectiveDiffStrategy = diffEnabled ? diffStrategy : undefined - return generatePrompt( context, cwd, supportsComputerUse, currentMode.slug, mcpHub, - effectiveDiffStrategy, + diffStrategy, browserViewportSize, promptComponent, customModes, globalCustomInstructions, - diffEnabled, experiments, language, rooIgnoreInstructions, diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 1e8ecd43a29..0c54e560b89 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -3795,7 +3795,6 @@ export class Task extends EventEmitter implements TaskLike { customModePrompts, customModes, customInstructions, - undefined, // diffEnabled experiments, language, rooIgnoreInstructions, diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 0fa8528913a..e722ce37f85 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -1991,7 +1991,6 @@ export class ClineProvider alwaysAllowWrite, alwaysAllowWriteOutsideWorkspace, alwaysAllowWriteProtected, - diffEnabled, alwaysAllowExecute, allowedCommands, deniedCommands, @@ -2117,7 +2116,6 @@ export class ClineProvider alwaysAllowWrite: alwaysAllowWrite ?? false, alwaysAllowWriteOutsideWorkspace: alwaysAllowWriteOutsideWorkspace ?? false, alwaysAllowWriteProtected: alwaysAllowWriteProtected ?? false, - diffEnabled: diffEnabled ?? true, alwaysAllowExecute: alwaysAllowExecute ?? false, alwaysAllowBrowser: alwaysAllowBrowser ?? false, alwaysAllowMcp: alwaysAllowMcp ?? false, @@ -2368,7 +2366,6 @@ export class ClineProvider alwaysAllowWrite: stateValues.alwaysAllowWrite ?? false, alwaysAllowWriteOutsideWorkspace: stateValues.alwaysAllowWriteOutsideWorkspace ?? false, alwaysAllowWriteProtected: stateValues.alwaysAllowWriteProtected ?? false, - diffEnabled: stateValues.diffEnabled ?? true, alwaysAllowExecute: stateValues.alwaysAllowExecute ?? false, alwaysAllowBrowser: stateValues.alwaysAllowBrowser ?? false, alwaysAllowMcp: stateValues.alwaysAllowMcp ?? false, diff --git a/src/core/webview/__tests__/ClineProvider.spec.ts b/src/core/webview/__tests__/ClineProvider.spec.ts index 158dee9db90..c08ff8cad92 100644 --- a/src/core/webview/__tests__/ClineProvider.spec.ts +++ b/src/core/webview/__tests__/ClineProvider.spec.ts @@ -554,7 +554,6 @@ describe("ClineProvider", () => { uriScheme: "vscode", soundEnabled: false, ttsEnabled: false, - diffEnabled: false, enableCheckpoints: false, writeDelayMs: 1000, browserViewportSize: "900x600", @@ -765,7 +764,6 @@ describe("ClineProvider", () => { expect(state).toHaveProperty("taskHistory") expect(state).toHaveProperty("soundEnabled") expect(state).toHaveProperty("ttsEnabled") - expect(state).toHaveProperty("diffEnabled") expect(state).toHaveProperty("writeDelayMs") }) @@ -777,15 +775,6 @@ describe("ClineProvider", () => { expect(state.language).toBe("pt-BR") }) - test("diffEnabled defaults to true when not set", async () => { - // Mock globalState.get to return undefined for diffEnabled - ;(mockContext.globalState.get as any).mockReturnValue(undefined) - - const state = await provider.getState() - - expect(state.diffEnabled).toBe(true) - }) - test("writeDelayMs defaults to 1000ms", async () => { // Mock globalState.get to return undefined for writeDelayMs ;(mockContext.globalState.get as any).mockImplementation((key: string) => @@ -1440,72 +1429,6 @@ describe("ClineProvider", () => { ) }) - test("generates system prompt with diff enabled", async () => { - await provider.resolveWebviewView(mockWebviewView) - - // Mock getState to return diffEnabled: true - vi.spyOn(provider, "getState").mockResolvedValue({ - apiConfiguration: { - apiProvider: "openrouter", - apiModelId: "test-model", - }, - customModePrompts: {}, - mode: "code", - mcpEnabled: false, - browserViewportSize: "900x600", - diffEnabled: true, - fuzzyMatchThreshold: 0.8, - experiments: experimentDefault, - browserToolEnabled: true, - } as any) - - // Trigger getSystemPrompt - const handler = getMessageHandler() - await handler({ type: "getSystemPrompt", mode: "code" }) - - // Verify system prompt was generated and sent - expect(mockPostMessage).toHaveBeenCalledWith( - expect.objectContaining({ - type: "systemPrompt", - text: expect.any(String), - mode: "code", - }), - ) - }) - - test("generates system prompt with diff disabled", async () => { - await provider.resolveWebviewView(mockWebviewView) - - // Mock getState to return diffEnabled: false - vi.spyOn(provider, "getState").mockResolvedValue({ - apiConfiguration: { - apiProvider: "openrouter", - apiModelId: "test-model", - }, - customModePrompts: {}, - mode: "code", - mcpEnabled: false, - browserViewportSize: "900x600", - diffEnabled: false, - fuzzyMatchThreshold: 0.8, - experiments: experimentDefault, - browserToolEnabled: false, - } as any) - - // Trigger getSystemPrompt - const handler = getMessageHandler() - await handler({ type: "getSystemPrompt", mode: "code" }) - - // Verify system prompt was generated and sent - expect(mockPostMessage).toHaveBeenCalledWith( - expect.objectContaining({ - type: "systemPrompt", - text: expect.any(String), - mode: "code", - }), - ) - }) - test("uses correct mode-specific instructions when mode is specified", async () => { await provider.resolveWebviewView(mockWebviewView) diff --git a/src/core/webview/generateSystemPrompt.ts b/src/core/webview/generateSystemPrompt.ts index 100d2976590..b6f77d3842c 100644 --- a/src/core/webview/generateSystemPrompt.ts +++ b/src/core/webview/generateSystemPrompt.ts @@ -67,7 +67,6 @@ export const generateSystemPrompt = async (provider: ClineProvider, message: Web customModePrompts, customModes, customInstructions, - undefined, // diffEnabled experiments, language, rooIgnoreInstructions, From 51a199d503763ee44bfca8e91e7596b2ce8dc934 Mon Sep 17 00:00:00 2001 From: "roomote[bot]" <219738659+roomote[bot]@users.noreply.github.com> Date: Thu, 29 Jan 2026 06:42:24 +0000 Subject: [PATCH 14/14] Revert "chore: merge main and resolve conflicts in skills module" This reverts commit 34ef7d34f2e6c64fbb1443e75ef2575dfb0d997b. --- apps/web-roo-code/src/app/linear/page.tsx | 413 ---------------- .../src/components/chromes/nav-bar.tsx | 20 - .../components/linear/linear-issue-demo.tsx | 442 ------------------ packages/types/src/__tests__/skills.test.ts | 144 ------ packages/types/src/index.ts | 1 - packages/types/src/skills.ts | 71 --- packages/types/src/vscode-extension-host.ts | 13 +- .../__tests__/skillsMessageHandler.spec.ts | 334 ------------- src/core/webview/skillsMessageHandler.ts | 133 ------ src/core/webview/webviewMessageHandler.ts | 17 - src/i18n/locales/ca/skills.json | 14 - src/i18n/locales/de/skills.json | 14 - src/i18n/locales/en/skills.json | 14 - src/i18n/locales/es/skills.json | 14 - src/i18n/locales/fr/skills.json | 14 - src/i18n/locales/hi/skills.json | 14 - src/i18n/locales/id/skills.json | 14 - src/i18n/locales/it/skills.json | 14 - src/i18n/locales/ja/skills.json | 14 - src/i18n/locales/ko/skills.json | 14 - src/i18n/locales/nl/skills.json | 14 - src/i18n/locales/pl/skills.json | 14 - src/i18n/locales/pt-BR/skills.json | 14 - src/i18n/locales/ru/skills.json | 14 - src/i18n/locales/tr/skills.json | 14 - src/i18n/locales/vi/skills.json | 14 - src/i18n/locales/zh-CN/skills.json | 14 - src/i18n/locales/zh-TW/skills.json | 14 - src/services/skills/SkillsManager.ts | 169 +------ .../skills/__tests__/SkillsManager.spec.ts | 319 +------------ .../components/settings/CreateSkillDialog.tsx | 254 ---------- .../src/components/settings/SettingsView.tsx | 7 - .../src/components/settings/SkillItem.tsx | 61 --- .../components/settings/SkillsSettings.tsx | 228 --------- .../__tests__/CreateSkillDialog.spec.tsx | 404 ---------------- .../SettingsView.change-detection.spec.tsx | 25 - .../settings/__tests__/SettingsView.spec.tsx | 11 - .../SettingsView.unsaved-changes.spec.tsx | 25 - .../settings/__tests__/SkillItem.spec.tsx | 141 ------ .../__tests__/SkillsSettings.spec.tsx | 436 ----------------- .../src/context/ExtensionStateContext.tsx | 8 - webview-ui/src/i18n/locales/ca/settings.json | 48 +- webview-ui/src/i18n/locales/de/settings.json | 48 +- webview-ui/src/i18n/locales/en/settings.json | 46 -- webview-ui/src/i18n/locales/es/settings.json | 48 +- webview-ui/src/i18n/locales/fr/settings.json | 48 +- webview-ui/src/i18n/locales/hi/settings.json | 48 +- webview-ui/src/i18n/locales/id/settings.json | 48 +- webview-ui/src/i18n/locales/it/settings.json | 48 +- webview-ui/src/i18n/locales/ja/settings.json | 48 +- webview-ui/src/i18n/locales/ko/settings.json | 48 +- webview-ui/src/i18n/locales/nl/settings.json | 48 +- webview-ui/src/i18n/locales/pl/settings.json | 48 +- .../src/i18n/locales/pt-BR/settings.json | 48 +- webview-ui/src/i18n/locales/ru/settings.json | 48 +- webview-ui/src/i18n/locales/tr/settings.json | 48 +- webview-ui/src/i18n/locales/vi/settings.json | 48 +- .../src/i18n/locales/zh-CN/settings.json | 48 +- .../src/i18n/locales/zh-TW/settings.json | 58 +-- 59 files changed, 45 insertions(+), 4755 deletions(-) delete mode 100644 apps/web-roo-code/src/app/linear/page.tsx delete mode 100644 apps/web-roo-code/src/components/linear/linear-issue-demo.tsx delete mode 100644 packages/types/src/__tests__/skills.test.ts delete mode 100644 packages/types/src/skills.ts delete mode 100644 src/core/webview/__tests__/skillsMessageHandler.spec.ts delete mode 100644 src/core/webview/skillsMessageHandler.ts delete mode 100644 src/i18n/locales/ca/skills.json delete mode 100644 src/i18n/locales/de/skills.json delete mode 100644 src/i18n/locales/en/skills.json delete mode 100644 src/i18n/locales/es/skills.json delete mode 100644 src/i18n/locales/fr/skills.json delete mode 100644 src/i18n/locales/hi/skills.json delete mode 100644 src/i18n/locales/id/skills.json delete mode 100644 src/i18n/locales/it/skills.json delete mode 100644 src/i18n/locales/ja/skills.json delete mode 100644 src/i18n/locales/ko/skills.json delete mode 100644 src/i18n/locales/nl/skills.json delete mode 100644 src/i18n/locales/pl/skills.json delete mode 100644 src/i18n/locales/pt-BR/skills.json delete mode 100644 src/i18n/locales/ru/skills.json delete mode 100644 src/i18n/locales/tr/skills.json delete mode 100644 src/i18n/locales/vi/skills.json delete mode 100644 src/i18n/locales/zh-CN/skills.json delete mode 100644 src/i18n/locales/zh-TW/skills.json delete mode 100644 webview-ui/src/components/settings/CreateSkillDialog.tsx delete mode 100644 webview-ui/src/components/settings/SkillItem.tsx delete mode 100644 webview-ui/src/components/settings/SkillsSettings.tsx delete mode 100644 webview-ui/src/components/settings/__tests__/CreateSkillDialog.spec.tsx delete mode 100644 webview-ui/src/components/settings/__tests__/SkillItem.spec.tsx delete mode 100644 webview-ui/src/components/settings/__tests__/SkillsSettings.spec.tsx diff --git a/apps/web-roo-code/src/app/linear/page.tsx b/apps/web-roo-code/src/app/linear/page.tsx deleted file mode 100644 index 40334e2698a..00000000000 --- a/apps/web-roo-code/src/app/linear/page.tsx +++ /dev/null @@ -1,413 +0,0 @@ -import { - ArrowRight, - CheckCircle, - CreditCard, - Eye, - GitBranch, - GitPullRequest, - Link2, - MessageSquare, - Settings, - Shield, -} from "lucide-react" -import type { LucideIcon } from "lucide-react" -import type { Metadata } from "next" - -import { AnimatedBackground } from "@/components/homepage" -import { LinearIssueDemo } from "@/components/linear/linear-issue-demo" -import { Button } from "@/components/ui" -import { EXTERNAL_LINKS } from "@/lib/constants" -import { SEO } from "@/lib/seo" -import { ogImageUrl } from "@/lib/og" - -const TITLE = "Roo Code for Linear" -const DESCRIPTION = "Assign development work to @Roo Code directly from Linear. Get PRs back without switching tools." -const OG_DESCRIPTION = "Turn Linear Issues into Pull Requests" -const PATH = "/linear" - -// Featured Workflow section is temporarily commented out until video is ready -// const LINEAR_DEMO_YOUTUBE_ID = "" - -export const metadata: Metadata = { - title: TITLE, - description: DESCRIPTION, - alternates: { - canonical: `${SEO.url}${PATH}`, - }, - openGraph: { - title: TITLE, - description: DESCRIPTION, - url: `${SEO.url}${PATH}`, - siteName: SEO.name, - images: [ - { - url: ogImageUrl(TITLE, OG_DESCRIPTION), - width: 1200, - height: 630, - alt: TITLE, - }, - ], - locale: SEO.locale, - type: "website", - }, - twitter: { - card: SEO.twitterCard, - title: TITLE, - description: DESCRIPTION, - images: [ogImageUrl(TITLE, OG_DESCRIPTION)], - }, - keywords: [ - ...SEO.keywords, - "linear integration", - "issue to PR", - "AI in Linear", - "engineering workflow automation", - "Roo Code Cloud", - ], -} - -// Invalidate cache when a request comes in, at most once every hour. -export const revalidate = 3600 - -type ValueProp = { - icon: LucideIcon - title: string - description: string -} - -const VALUE_PROPS: ValueProp[] = [ - { - icon: GitBranch, - title: "Work where you already work.", - description: - "Assign development work to @Roo Code directly from Linear. No new tools to learn, no context switching required.", - }, - { - icon: Eye, - title: "Progress is visible.", - description: - "Watch progress unfold in real-time. Roo Code posts updates as comments, so your whole team stays in the loop.", - }, - { - icon: MessageSquare, - title: "Mention for refinement.", - description: - 'Need changes? Just comment "@Roo Code also add dark mode support" and the agent picks up where it left off.', - }, - { - icon: Link2, - title: "Full traceability.", - description: - "Every PR links back to the originating issue. Every issue shows its linked PR. Your audit trail stays clean.", - }, - { - icon: Settings, - title: "Organization-level setup.", - description: - "Connect once, use everywhere. Your team members can assign issues to @Roo Code without individual configuration.", - }, - { - icon: Shield, - title: "Safe by design.", - description: - "Agents never touch main/master directly. They produce branches and PRs. You review and approve before merge.", - }, -] - -// type WorkflowStep = { -// step: number -// title: string -// description: string -// } - -// const WORKFLOW_STEPS: WorkflowStep[] = [ -// { -// step: 1, -// title: "Create an issue", -// description: "Write your issue with acceptance criteria. Be as detailed as you like.", -// }, -// { -// step: 2, -// title: "Call @Roo Code", -// description: "Mention @Roo Code in a comment to start. The agent begins working immediately.", -// }, -// { -// step: 3, -// title: "Watch progress", -// description: "Roo Code posts status updates as comments. Refine with @-mentions if needed.", -// }, -// { -// step: 4, -// title: "Review the PR", -// description: "When ready, the PR link appears in the issue. Review, iterate, and ship.", -// }, -// ] - -type OnboardingStep = { - icon: LucideIcon - title: string - description: string - link?: { - href: string - text: string - } -} - -const ONBOARDING_STEPS: OnboardingStep[] = [ - { - icon: CreditCard, - title: "1. Team Plan", - description: "Linear integration requires a Team plan.", - link: { - href: EXTERNAL_LINKS.CLOUD_APP_TEAM_TRIAL, - text: "Start a free trial", - }, - }, - { - icon: GitPullRequest, - title: "2. Connect GitHub", - description: "Link your repositories so Roo Code can open PRs on your behalf.", - }, - { - icon: Settings, - title: "3. Connect Linear", - description: "Authorize via OAuth. No API keys to manage or rotate.", - }, - { - icon: CheckCircle, - title: "4. Link & Start", - description: "Map your Linear project to a repo, then assign or mention @Roo Code.", - }, -] - -function LinearIcon({ className }: { className?: string }) { - return ( - - - - ) -} - -export default function LinearPage(): JSX.Element { - return ( - <> - {/* Hero Section */} -
- -
-
-
-
- - Powered by Roo Code Cloud -
-

- Turn Linear Issues into Pull Requests -

-

- Assign development work to @Roo Code directly from Linear. Get PRs back without - switching tools. -

- -
- -
- -
-
-
-
- - {/* Value Props Section */} -
-
-
-
-
-
-

- Why your team will love using Roo Code in Linear -

-

- AI agents that understand context, keep your team in the loop, and deliver PRs you can - review. -

-
-
- {VALUE_PROPS.map((prop, index) => { - const Icon = prop.icon - return ( -
-
- -
-

{prop.title}

-

{prop.description}

-
- ) - })} -
-
-
- - {/* Featured Workflow Section - temporarily commented out until video is ready -
-
-
-
-
- -
-
- - Featured Workflow -
-

Issue to Shipped Feature

-

- Stay in Linear from assignment to review. Roo Code keeps the issue updated and links the PR - when it's ready. -

-
- -
-
- {/* YouTube Video Embed or Placeholder */} - {/*
- {LINEAR_DEMO_YOUTUBE_ID ? ( -