diff --git a/apps/web-evals/src/actions/runs.ts b/apps/web-evals/src/actions/runs.ts index 90387d3257b..be4664d4d31 100644 --- a/apps/web-evals/src/actions/runs.ts +++ b/apps/web-evals/src/actions/runs.ts @@ -22,9 +22,10 @@ import { CreateRun } from "@/lib/schemas" const EVALS_REPO_PATH = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../../../evals") // eslint-disable-next-line @typescript-eslint/no-unused-vars -export async function createRun({ suite, exercises = [], systemPrompt, ...values }: CreateRun) { +export async function createRun({ suite, exercises = [], systemPrompt, timeout, ...values }: CreateRun) { const run = await _createRun({ ...values, + timeout, socketPath: "", // TODO: Get rid of this. }) diff --git a/apps/web-evals/src/app/runs/new/new-run.tsx b/apps/web-evals/src/app/runs/new/new-run.tsx index 444086bd59f..90717d6fec9 100644 --- a/apps/web-evals/src/app/runs/new/new-run.tsx +++ b/apps/web-evals/src/app/runs/new/new-run.tsx @@ -21,6 +21,9 @@ import { CONCURRENCY_MIN, CONCURRENCY_MAX, CONCURRENCY_DEFAULT, + TIMEOUT_MIN, + TIMEOUT_MAX, + TIMEOUT_DEFAULT, } from "@/lib/schemas" import { cn } from "@/lib/utils" import { useOpenRouterModels } from "@/hooks/use-open-router-models" @@ -77,6 +80,7 @@ export function NewRun() { exercises: [], settings: undefined, concurrency: CONCURRENCY_DEFAULT, + timeout: TIMEOUT_DEFAULT, }, }) @@ -341,6 +345,29 @@ export function NewRun() { )} /> + ( + + Timeout (minutes) + +
+ field.onChange(value[0])} + /> +
{field.value} min
+
+
+ +
+ )} + /> + data.suite === "full" || (data.exercises || []).length > 0, { diff --git a/packages/evals/src/cli/redis.ts b/packages/evals/src/cli/redis.ts index 8f2c164e49c..7e6fa77da54 100644 --- a/packages/evals/src/cli/redis.ts +++ b/packages/evals/src/cli/redis.ts @@ -1,7 +1,5 @@ import { createClient, type RedisClientType } from "redis" -import { EVALS_TIMEOUT } from "@roo-code/types" - let redis: RedisClientType | undefined export const redisClient = async () => { @@ -18,11 +16,19 @@ export const getPubSubKey = (runId: number) => `evals:${runId}` export const getRunnersKey = (runId: number) => `runners:${runId}` export const getHeartbeatKey = (runId: number) => `heartbeat:${runId}` -export const registerRunner = async ({ runId, taskId }: { runId: number; taskId: number }) => { +export const registerRunner = async ({ + runId, + taskId, + timeoutSeconds, +}: { + runId: number + taskId: number + timeoutSeconds: number +}) => { const redis = await redisClient() const runnersKey = getRunnersKey(runId) await redis.sAdd(runnersKey, `task-${taskId}:${process.env.HOSTNAME ?? process.pid}`) - await redis.expire(runnersKey, EVALS_TIMEOUT / 1_000) + await redis.expire(runnersKey, timeoutSeconds) } export const deregisterRunner = async ({ runId, taskId }: { runId: number; taskId: number }) => { diff --git a/packages/evals/src/cli/runTask.ts b/packages/evals/src/cli/runTask.ts index 507d614ea5a..0683cd72388 100644 --- a/packages/evals/src/cli/runTask.ts +++ b/packages/evals/src/cli/runTask.ts @@ -5,14 +5,7 @@ import * as os from "node:os" import pWaitFor from "p-wait-for" import { execa } from "execa" -import { - type TaskEvent, - TaskCommandName, - RooCodeEventName, - IpcMessageType, - EVALS_SETTINGS, - EVALS_TIMEOUT, -} from "@roo-code/types" +import { type TaskEvent, TaskCommandName, RooCodeEventName, IpcMessageType, EVALS_SETTINGS } from "@roo-code/types" import { IpcClient } from "@roo-code/ipc" import { @@ -42,7 +35,7 @@ export const processTask = async ({ taskId, logger }: { taskId: number; logger?: const task = await findTask(taskId) const { language, exercise } = task const run = await findRun(task.runId) - await registerRunner({ runId: run.id, taskId }) + await registerRunner({ runId: run.id, taskId, timeoutSeconds: (run.timeout || 5) * 60 }) const containerized = isDockerContainer() @@ -304,9 +297,10 @@ export const runTask = async ({ run, task, publish, logger }: RunTaskOptions) => }) try { + const timeoutMs = (run.timeout || 5) * 60 * 1_000 // Convert minutes to milliseconds await pWaitFor(() => !!taskFinishedAt || !!taskAbortedAt || isClientDisconnected, { interval: 1_000, - timeout: EVALS_TIMEOUT, + timeout: timeoutMs, }) } catch (_error) { taskTimedOut = true diff --git a/packages/evals/src/db/migrations/0001_add_timeout_to_runs.sql b/packages/evals/src/db/migrations/0001_add_timeout_to_runs.sql new file mode 100644 index 00000000000..16d3cc1bddc --- /dev/null +++ b/packages/evals/src/db/migrations/0001_add_timeout_to_runs.sql @@ -0,0 +1 @@ +ALTER TABLE "runs" ADD COLUMN "timeout" integer DEFAULT 5 NOT NULL; \ No newline at end of file diff --git a/packages/evals/src/db/queries/__tests__/copyRun.spec.ts b/packages/evals/src/db/queries/__tests__/copyRun.spec.ts index c693e471db8..079373d568a 100644 --- a/packages/evals/src/db/queries/__tests__/copyRun.spec.ts +++ b/packages/evals/src/db/queries/__tests__/copyRun.spec.ts @@ -23,6 +23,7 @@ describe("copyRun", () => { socketPath: "/tmp/roo.sock", description: "Test run for copying", concurrency: 4, + timeout: 5, }) sourceRunId = run.id @@ -271,7 +272,7 @@ describe("copyRun", () => { }) it("should copy run without task metrics", async () => { - const minimalRun = await createRun({ model: "gpt-3.5-turbo", socketPath: "/tmp/minimal.sock" }) + const minimalRun = await createRun({ model: "gpt-3.5-turbo", socketPath: "/tmp/minimal.sock", timeout: 5 }) const newRunId = await copyRun({ sourceDb: db, targetDb: db, runId: minimalRun.id }) diff --git a/packages/evals/src/db/schema.ts b/packages/evals/src/db/schema.ts index 0338b812e22..73705ac054d 100644 --- a/packages/evals/src/db/schema.ts +++ b/packages/evals/src/db/schema.ts @@ -18,6 +18,7 @@ export const runs = pgTable("runs", { pid: integer(), socketPath: text("socket_path").notNull(), concurrency: integer().default(2).notNull(), + timeout: integer().default(5).notNull(), passed: integer().default(0).notNull(), failed: integer().default(0).notNull(), createdAt: timestamp("created_at").notNull(),