Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 69 additions & 16 deletions server/routes/mcp/evals.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,69 @@ import {
transformLLMConfigToLlmsConfig,
} from "../../utils/eval-transformer";
import { ConvexHttpClient } from "convex/browser";
import { generateTestCases } from "../../services/eval-agent";
import {
generateTestCases,
type DiscoveredTool,
} from "../../services/eval-agent";
import type { MCPClientManager } from "@/shared/mcp-client-manager";
import "../../types/hono";

function resolveServerIdsOrThrow(
requestedIds: string[],
clientManager: MCPClientManager,
): string[] {
const available = clientManager.listServers();
const resolved: string[] = [];

for (const requestedId of requestedIds) {
const match =
available.find((id) => id === requestedId) ??
available.find((id) => id.toLowerCase() === requestedId.toLowerCase());

if (!match) {
throw new Error(`Server '${requestedId}' not found`);
}

if (!resolved.includes(match)) {
resolved.push(match);
}
}

return resolved;
}

async function collectToolsForServers(
clientManager: MCPClientManager,
serverIds: string[],
): Promise<DiscoveredTool[]> {
const perServerTools = await Promise.all(
serverIds.map(async (serverId) => {
if (clientManager.getConnectionStatus(serverId) !== "connected") {
return [] as DiscoveredTool[];
}

try {
const { tools } = await clientManager.listTools(serverId);
return tools.map((tool) => ({
name: tool.name,
description: tool.description,
inputSchema: tool.inputSchema,
outputSchema: (tool as { outputSchema?: unknown }).outputSchema,
serverId,
}));
} catch (error) {
console.warn(
`[evals] Failed to list tools for server ${serverId}:`,
error,
);
return [] as DiscoveredTool[];
}
}),
);

return perServerTools.flat();
}

const evals = new Hono();

const RunEvalsRequestSchema = z.object({
Expand Down Expand Up @@ -59,10 +119,11 @@ evals.post("/run", async (c) => {
const { tests, serverIds, llmConfig, convexAuthToken } =
validationResult.data as RunEvalsRequest;

const clientManager = c.mcpJamClientManager;
const clientManager = c.mcpClientManager;
const resolvedServerIds = resolveServerIdsOrThrow(serverIds, clientManager);

const environment = transformServerConfigsToEnvironment(
serverIds,
resolvedServerIds,
clientManager,
);
const modelId = tests.length > 0 ? tests[0].model : undefined;
Expand Down Expand Up @@ -134,20 +195,12 @@ evals.post("/generate-tests", async (c) => {
const { serverIds, convexAuthToken } =
validationResult.data as GenerateTestsRequest;

const clientManager = c.mcpJamClientManager;

// Get all available tools
const allTools = clientManager.getAvailableTools();
const clientManager = c.mcpClientManager;
const resolvedServerIds = resolveServerIdsOrThrow(serverIds, clientManager);

// Filter tools by selected servers
const serverIdSet = new Set(
serverIds
.map((name) => clientManager.getServerIdForName(name))
.filter(Boolean),
);

const filteredTools = allTools.filter((tool) =>
serverIdSet.has(tool.serverId),
const filteredTools = await collectToolsForServers(
clientManager,
resolvedServerIds,
);

if (filteredTools.length === 0) {
Expand Down
9 changes: 8 additions & 1 deletion server/services/eval-agent.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
import type { DiscoveredTool } from "./mcpjam-client-manager";
import type { ModelMessage } from "ai";

export interface DiscoveredTool {
name: string;
description?: string;
inputSchema: any;
outputSchema?: any;
serverId: string;
}

export interface GenerateTestsRequest {
serverIds: string[];
tools: DiscoveredTool[];
Expand Down
24 changes: 9 additions & 15 deletions server/utils/eval-transformer.ts
Original file line number Diff line number Diff line change
@@ -1,41 +1,35 @@
import { MCPClientOptions } from "@mastra/mcp";
import { MCPServerConfig } from "@/shared/mcp-client-manager";
import { MCPJamClientManager } from "../services/mcpjam-client-manager";
import { MCPClientManager, MCPServerConfig } from "@/shared/mcp-client-manager";
import {
LlmsConfig,
LlmsConfigSchema,
} from "../../evals-cli/src/utils/validators";
import { isMCPJamProvidedModel } from "../../shared/types";

/**
* Transforms server IDs from MCPJamClientManager to MCPClientOptions format
* Transforms server IDs from MCPClientManager to MCPClientOptions format
* required by runEvals
*/
export function transformServerConfigsToEnvironment(
serverIds: string[],
clientManager: MCPJamClientManager,
clientManager: MCPClientManager,
): MCPClientOptions {
const connectedServers = clientManager.getConnectedServers();
const servers: Record<string, MCPServerConfig> = {};

for (const serverId of serverIds) {
const serverData = connectedServers[serverId];

if (!serverData) {
const config = clientManager.getServerConfig(serverId);
if (!config) {
throw new Error(`Server '${serverId}' not found`);
}

if (serverData.status !== "connected") {
const status = clientManager.getConnectionStatus(serverId);
if (status !== "connected") {
throw new Error(
`Server '${serverId}' is not connected (status: ${serverData.status})`,
`Server '${serverId}' is not connected (status: ${status})`,
);
}

if (!serverData.config) {
throw new Error(`Server '${serverId}' has no configuration`);
}

servers[serverId] = serverData.config;
servers[serverId] = config;
}

if (Object.keys(servers).length === 0) {
Expand Down