From dd419043a82ee8268def4a6b5a2ebc35bd049dd3 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Fri, 18 Jul 2025 01:57:17 +0000 Subject: [PATCH 1/4] feat: add configurable timeout for evals (5-10 min) - Add timeout field to CreateRun schema with min 5, max 10, default 5 - Add timeout slider UI component to /runs/new page - Update database schema to include timeout column in runs table - Create migration to add timeout column with default value of 5 - Update runTask.ts to use configurable timeout from run settings - Pass timeout parameter through the createRun action --- apps/web-evals/src/actions/runs.ts | 3 ++- apps/web-evals/src/app/runs/new/new-run.tsx | 27 +++++++++++++++++++ apps/web-evals/src/lib/schemas.ts | 5 ++++ packages/evals/src/cli/runTask.ts | 3 ++- .../migrations/0001_add_timeout_to_runs.sql | 1 + packages/evals/src/db/schema.ts | 1 + 6 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 packages/evals/src/db/migrations/0001_add_timeout_to_runs.sql diff --git a/apps/web-evals/src/actions/runs.ts b/apps/web-evals/src/actions/runs.ts index 90387d3257b..be4664d4d31 100644 --- a/apps/web-evals/src/actions/runs.ts +++ b/apps/web-evals/src/actions/runs.ts @@ -22,9 +22,10 @@ import { CreateRun } from "@/lib/schemas" const EVALS_REPO_PATH = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../../../evals") // eslint-disable-next-line @typescript-eslint/no-unused-vars -export async function createRun({ suite, exercises = [], systemPrompt, ...values }: CreateRun) { +export async function createRun({ suite, exercises = [], systemPrompt, timeout, ...values }: CreateRun) { const run = await _createRun({ ...values, + timeout, socketPath: "", // TODO: Get rid of this. }) diff --git a/apps/web-evals/src/app/runs/new/new-run.tsx b/apps/web-evals/src/app/runs/new/new-run.tsx index 444086bd59f..90717d6fec9 100644 --- a/apps/web-evals/src/app/runs/new/new-run.tsx +++ b/apps/web-evals/src/app/runs/new/new-run.tsx @@ -21,6 +21,9 @@ import { CONCURRENCY_MIN, CONCURRENCY_MAX, CONCURRENCY_DEFAULT, + TIMEOUT_MIN, + TIMEOUT_MAX, + TIMEOUT_DEFAULT, } from "@/lib/schemas" import { cn } from "@/lib/utils" import { useOpenRouterModels } from "@/hooks/use-open-router-models" @@ -77,6 +80,7 @@ export function NewRun() { exercises: [], settings: undefined, concurrency: CONCURRENCY_DEFAULT, + timeout: TIMEOUT_DEFAULT, }, }) @@ -341,6 +345,29 @@ export function NewRun() { )} /> + ( + + Timeout (minutes) + +
+ field.onChange(value[0])} + /> +
{field.value} min
+
+
+ +
+ )} + /> + data.suite === "full" || (data.exercises || []).length > 0, { diff --git a/packages/evals/src/cli/runTask.ts b/packages/evals/src/cli/runTask.ts index 507d614ea5a..c291f7f620a 100644 --- a/packages/evals/src/cli/runTask.ts +++ b/packages/evals/src/cli/runTask.ts @@ -304,9 +304,10 @@ export const runTask = async ({ run, task, publish, logger }: RunTaskOptions) => }) try { + const timeoutMs = (run.timeout || 5) * 60 * 1_000 // Convert minutes to milliseconds await pWaitFor(() => !!taskFinishedAt || !!taskAbortedAt || isClientDisconnected, { interval: 1_000, - timeout: EVALS_TIMEOUT, + timeout: timeoutMs, }) } catch (_error) { taskTimedOut = true diff --git a/packages/evals/src/db/migrations/0001_add_timeout_to_runs.sql b/packages/evals/src/db/migrations/0001_add_timeout_to_runs.sql new file mode 100644 index 00000000000..16d3cc1bddc --- /dev/null +++ b/packages/evals/src/db/migrations/0001_add_timeout_to_runs.sql @@ -0,0 +1 @@ +ALTER TABLE "runs" ADD COLUMN "timeout" integer DEFAULT 5 NOT NULL; \ No newline at end of file diff --git a/packages/evals/src/db/schema.ts b/packages/evals/src/db/schema.ts index 0338b812e22..73705ac054d 100644 --- a/packages/evals/src/db/schema.ts +++ b/packages/evals/src/db/schema.ts @@ -18,6 +18,7 @@ export const runs = pgTable("runs", { pid: integer(), socketPath: text("socket_path").notNull(), concurrency: integer().default(2).notNull(), + timeout: integer().default(5).notNull(), passed: integer().default(0).notNull(), failed: integer().default(0).notNull(), createdAt: timestamp("created_at").notNull(), From 1ef5b6e677e069e2ad4daa2d4cb2799cf0e1cb18 Mon Sep 17 00:00:00 2001 From: hannesrudolph Date: Thu, 17 Jul 2025 20:25:36 -0600 Subject: [PATCH 2/4] fix: remove unused EVALS_TIMEOUT import --- packages/evals/src/cli/runTask.ts | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/packages/evals/src/cli/runTask.ts b/packages/evals/src/cli/runTask.ts index c291f7f620a..780c4a77346 100644 --- a/packages/evals/src/cli/runTask.ts +++ b/packages/evals/src/cli/runTask.ts @@ -5,14 +5,7 @@ import * as os from "node:os" import pWaitFor from "p-wait-for" import { execa } from "execa" -import { - type TaskEvent, - TaskCommandName, - RooCodeEventName, - IpcMessageType, - EVALS_SETTINGS, - EVALS_TIMEOUT, -} from "@roo-code/types" +import { type TaskEvent, TaskCommandName, RooCodeEventName, IpcMessageType, EVALS_SETTINGS } from "@roo-code/types" import { IpcClient } from "@roo-code/ipc" import { From e421de7bb99256d9c4722f2ce5d466a5ec55d32f Mon Sep 17 00:00:00 2001 From: Roo Code Date: Fri, 18 Jul 2025 02:46:47 +0000 Subject: [PATCH 3/4] fix: add timeout field to createRun calls in copyRun test - Added timeout: 5 to both createRun calls in copyRun.spec.ts - This fixes the test failure caused by the new required timeout field in the runs schema - The timeout field was added in the configurable timeout feature but the test was not updated --- packages/evals/src/db/queries/__tests__/copyRun.spec.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/evals/src/db/queries/__tests__/copyRun.spec.ts b/packages/evals/src/db/queries/__tests__/copyRun.spec.ts index c693e471db8..079373d568a 100644 --- a/packages/evals/src/db/queries/__tests__/copyRun.spec.ts +++ b/packages/evals/src/db/queries/__tests__/copyRun.spec.ts @@ -23,6 +23,7 @@ describe("copyRun", () => { socketPath: "/tmp/roo.sock", description: "Test run for copying", concurrency: 4, + timeout: 5, }) sourceRunId = run.id @@ -271,7 +272,7 @@ describe("copyRun", () => { }) it("should copy run without task metrics", async () => { - const minimalRun = await createRun({ model: "gpt-3.5-turbo", socketPath: "/tmp/minimal.sock" }) + const minimalRun = await createRun({ model: "gpt-3.5-turbo", socketPath: "/tmp/minimal.sock", timeout: 5 }) const newRunId = await copyRun({ sourceDb: db, targetDb: db, runId: minimalRun.id }) From 19d0f28205cdeaa506517d53dd4f83a5f38446dd Mon Sep 17 00:00:00 2001 From: Roo Code Date: Fri, 18 Jul 2025 21:46:34 +0000 Subject: [PATCH 4/4] fix: use configurable timeout for Redis key expiration in registerRunner - Updated registerRunner function to accept timeoutSeconds parameter - Modified call in runTask.ts to pass configurable timeout instead of hardcoded EVALS_TIMEOUT - Removed unused EVALS_TIMEOUT import from redis.ts - Ensures Redis keys remain valid for the entire duration of task execution (up to 10 minutes) --- packages/evals/src/cli/redis.ts | 14 ++++++++++---- packages/evals/src/cli/runTask.ts | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/packages/evals/src/cli/redis.ts b/packages/evals/src/cli/redis.ts index 8f2c164e49c..7e6fa77da54 100644 --- a/packages/evals/src/cli/redis.ts +++ b/packages/evals/src/cli/redis.ts @@ -1,7 +1,5 @@ import { createClient, type RedisClientType } from "redis" -import { EVALS_TIMEOUT } from "@roo-code/types" - let redis: RedisClientType | undefined export const redisClient = async () => { @@ -18,11 +16,19 @@ export const getPubSubKey = (runId: number) => `evals:${runId}` export const getRunnersKey = (runId: number) => `runners:${runId}` export const getHeartbeatKey = (runId: number) => `heartbeat:${runId}` -export const registerRunner = async ({ runId, taskId }: { runId: number; taskId: number }) => { +export const registerRunner = async ({ + runId, + taskId, + timeoutSeconds, +}: { + runId: number + taskId: number + timeoutSeconds: number +}) => { const redis = await redisClient() const runnersKey = getRunnersKey(runId) await redis.sAdd(runnersKey, `task-${taskId}:${process.env.HOSTNAME ?? process.pid}`) - await redis.expire(runnersKey, EVALS_TIMEOUT / 1_000) + await redis.expire(runnersKey, timeoutSeconds) } export const deregisterRunner = async ({ runId, taskId }: { runId: number; taskId: number }) => { diff --git a/packages/evals/src/cli/runTask.ts b/packages/evals/src/cli/runTask.ts index 780c4a77346..0683cd72388 100644 --- a/packages/evals/src/cli/runTask.ts +++ b/packages/evals/src/cli/runTask.ts @@ -35,7 +35,7 @@ export const processTask = async ({ taskId, logger }: { taskId: number; logger?: const task = await findTask(taskId) const { language, exercise } = task const run = await findRun(task.runId) - await registerRunner({ runId: run.id, taskId }) + await registerRunner({ runId: run.id, taskId, timeoutSeconds: (run.timeout || 5) * 60 }) const containerized = isDockerContainer()