diff --git a/packages/agent/src/server/agent-server.test.ts b/packages/agent/src/server/agent-server.test.ts index f18e4ebb7..36597c7fc 100644 --- a/packages/agent/src/server/agent-server.test.ts +++ b/packages/agent/src/server/agent-server.test.ts @@ -382,6 +382,88 @@ describe("AgentServer HTTP Mode", () => { }, 20000); }); + describe("set_token command", () => { + // Build an AgentServer that is NOT stored in the outer `server` ref so the + // afterEach hook doesn't try to clean up its (fake) session and cascade + // cleanup failures into the test output. + const buildBareServer = () => { + const bare = new AgentServer({ + port, + jwtPublicKey: TEST_PUBLIC_KEY, + repositoryPath: repo.path, + apiUrl: "http://localhost:8000", + apiKey: "test-api-key", + projectId: 1, + mode: "interactive", + taskId: "test-task-id", + runId: "test-run-id", + }) as unknown as { + session: unknown; + executeCommand( + method: string, + params: Record, + ): Promise; + }; + // Pass executeCommand's "no active session" guard. + bare.session = {}; + return bare; + }; + + it.each([ + { method: "set_token", token: "ghp_refreshed" }, + { method: "posthog/set_token", token: "ghp_aliased" }, + ])( + "updates GH_TOKEN and GITHUB_TOKEN via $method", + async ({ method, token }) => { + const previousGh = process.env.GH_TOKEN; + const previousGithub = process.env.GITHUB_TOKEN; + delete process.env.GH_TOKEN; + delete process.env.GITHUB_TOKEN; + + try { + const result = await buildBareServer().executeCommand(method, { + token, + }); + + expect(result).toEqual({ updated: true }); + expect(process.env.GH_TOKEN).toBe(token); + expect(process.env.GITHUB_TOKEN).toBe(token); + } finally { + if (previousGh === undefined) delete process.env.GH_TOKEN; + else process.env.GH_TOKEN = previousGh; + if (previousGithub === undefined) delete process.env.GITHUB_TOKEN; + else process.env.GITHUB_TOKEN = previousGithub; + } + }, + ); + }); + + describe("POST /gh", () => { + // Each case rejects with 400 (validation), proving 1) the body schema is + // enforced and 2) the route doesn't require a JWT — a 401 would mean the + // route is gated, which would defeat the wrapper-script use case. + it.each([ + { name: "malformed JSON", body: "not-json" }, + { name: "missing args", body: JSON.stringify({ cwd: "/tmp" }) }, + { name: "empty args array", body: JSON.stringify({ args: [] }) }, + { name: "empty body (no JWT required)", body: JSON.stringify({}) }, + ])( + "rejects $name with 400", + async ({ body }) => { + await createServer().start(); + + const response = await fetch(`http://localhost:${port}/gh`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body, + }); + + expect(response.status).toBe(400); + }, + 20000, + ); + }); + describe("session lifecycle", () => { it("emits _posthog/run_started after session initialization", async () => { await createServer().start(); diff --git a/packages/agent/src/server/agent-server.ts b/packages/agent/src/server/agent-server.ts index 8e949b8c8..2836a1372 100644 --- a/packages/agent/src/server/agent-server.ts +++ b/packages/agent/src/server/agent-server.ts @@ -12,6 +12,7 @@ import { PROTOCOL_VERSION, } from "@agentclientprotocol/sdk"; import { type ServerType, serve } from "@hono/node-server"; +import { getConnInfo } from "@hono/node-server/conninfo"; import { getCurrentBranch } from "@posthog/git/queries"; import { Hono } from "hono"; import { z } from "zod"; @@ -53,8 +54,10 @@ import { normalizeCloudPromptContent, promptBlocksToText, } from "./cloud-prompt"; +import { isLoopbackAddress, runGh } from "./gh-exec"; import { type JwtPayload, JwtValidationError, validateJwt } from "./jwt"; import { + ghRequestSchema, handoffLocalGitStateSchema, jsonRpcRequestSchema, validateCommandParams, @@ -450,6 +453,43 @@ export class AgentServer { } }); + // Sandbox-local exec for `gh`. Codex agents run in an isolated child process + // whose environment is captured at spawn time, so refreshing GH_TOKEN in the + // agent server doesn't reach them. Their gh-wrapper script calls this route + // to run gh against the agent server's freshly-refreshed env. Loopback-only; + // intentionally not JWT-protected so the wrapper has nothing to forward. + app.post("/gh", async (c) => { + const remote = getConnInfo(c).remote.address; + if (!isLoopbackAddress(remote)) { + this.logger.warn("Rejected non-loopback /gh request", { remote }); + return c.json({ error: "Forbidden" }, 403); + } + + const rawBody = await c.req.json().catch(() => null); + const parsed = ghRequestSchema.safeParse(rawBody); + if (!parsed.success) { + return c.json({ error: parsed.error.message }, 400); + } + + const { args, cwd, timeoutMs } = parsed.data; + try { + const result = await runGh(args, { + cwd: cwd ?? this.config.repositoryPath ?? process.cwd(), + timeoutMs: timeoutMs ?? 60_000, + logger: this.logger, + }); + return c.json(result); + } catch (error) { + this.logger.error("Failed to run gh", error); + return c.json( + { + error: error instanceof Error ? error.message : "Unknown error", + }, + 500, + ); + } + }); + app.notFound((c) => { return c.json({ error: "Not found" }, 404); }); @@ -694,6 +734,15 @@ export class AgentServer { ); } + case "posthog/set_token": + case "set_token": { + const token = params.token as string; + process.env.GH_TOKEN = token; + process.env.GITHUB_TOKEN = token; + this.logger.info("GH token refreshed"); + return { updated: true }; + } + case POSTHOG_NOTIFICATIONS.PERMISSION_RESPONSE: case "permission_response": { const requestId = params.requestId as string; diff --git a/packages/agent/src/server/gh-exec.test.ts b/packages/agent/src/server/gh-exec.test.ts new file mode 100644 index 000000000..f24fe9500 --- /dev/null +++ b/packages/agent/src/server/gh-exec.test.ts @@ -0,0 +1,90 @@ +import { tmpdir } from "node:os"; +import { describe, expect, it } from "vitest"; +import { isLoopbackAddress, spawnAndCollect } from "./gh-exec"; + +describe("isLoopbackAddress", () => { + it.each([ + ["127.0.0.1", true], + ["127.0.0.2", true], + ["127.1.2.3", true], + ["::1", true], + ["::ffff:127.0.0.1", true], + ["::ffff:127.1.2.3", true], + ["localhost", true], + ["10.0.0.1", false], + ["192.168.1.1", false], + ["::ffff:10.0.0.1", false], + ["8.8.8.8", false], + ["128.0.0.1", false], + ["", false], + [undefined, false], + ])("address %s -> %s", (input, expected) => { + expect(isLoopbackAddress(input)).toBe(expected); + }); +}); + +// Test the spawn/collect plumbing via spawnAndCollect. runGh is a thin wrapper +// that pins the binary to "gh" — verified by inspection, not exercised here so +// tests don't depend on a `gh` install. +describe("spawnAndCollect", () => { + it("captures stdout, stderr, and a zero exit code", async () => { + const result = await spawnAndCollect( + process.execPath, + ["-e", "process.stdout.write('hi'); process.stderr.write('err');"], + { cwd: tmpdir(), timeoutMs: 5_000 }, + ); + + expect(result.stdout).toBe("hi"); + expect(result.stderr).toBe("err"); + expect(result.exitCode).toBe(0); + expect(result.timedOut).toBe(false); + }); + + it("surfaces non-zero exit codes without throwing", async () => { + const result = await spawnAndCollect( + process.execPath, + ["-e", "process.exit(2)"], + { cwd: tmpdir(), timeoutMs: 5_000 }, + ); + + expect(result.exitCode).toBe(2); + expect(result.timedOut).toBe(false); + }); + + it("rejects when the binary is missing", async () => { + await expect( + spawnAndCollect( + "/nonexistent/binary/that/cannot/possibly/exist", + ["--version"], + { cwd: tmpdir(), timeoutMs: 5_000 }, + ), + ).rejects.toThrow(); + }); + + it("kills the process on timeout and reports timedOut: true", async () => { + const start = Date.now(); + const result = await spawnAndCollect( + process.execPath, + ["-e", "setTimeout(() => process.exit(0), 60_000);"], + { cwd: tmpdir(), timeoutMs: 200 }, + ); + + expect(result.timedOut).toBe(true); + // Should be killed well before the 60s sleep would have completed. + expect(Date.now() - start).toBeLessThan(10_000); + }); + + it("respects the cwd option", async () => { + const expected = tmpdir(); + const result = await spawnAndCollect( + process.execPath, + ["-e", "process.stdout.write(process.cwd());"], + { cwd: expected, timeoutMs: 5_000 }, + ); + + // Resolve through realpath-style equivalence: tmpdir on macOS may be a + // symlink (e.g. /var/folders -> /private/var/folders). + expect([expected, `/private${expected}`]).toContain(result.stdout); + expect(result.exitCode).toBe(0); + }); +}); diff --git a/packages/agent/src/server/gh-exec.ts b/packages/agent/src/server/gh-exec.ts new file mode 100644 index 000000000..b07a35b16 --- /dev/null +++ b/packages/agent/src/server/gh-exec.ts @@ -0,0 +1,109 @@ +import { spawn } from "node:child_process"; +import type { Logger } from "../utils/logger"; + +export interface GhExecOptions { + cwd: string; + timeoutMs: number; + logger?: Logger; +} + +export interface GhExecResult { + stdout: string; + stderr: string; + exitCode: number | null; + signal: NodeJS.Signals | null; + timedOut: boolean; +} + +const KILL_GRACE_MS = 2_000; + +export function runGh( + args: string[], + options: GhExecOptions, +): Promise { + // Binary is pinned here. Callers (incl. the HTTP route) cannot override it — + // the body schema strips unknowns and runGh has no binary parameter. + return spawnAndCollect("gh", args, options); +} + +export async function spawnAndCollect( + binary: string, + args: string[], + options: GhExecOptions, +): Promise { + const logger = options.logger; + + logger?.debug("Running command", { + binary, + args, + cwd: options.cwd, + timeoutMs: options.timeoutMs, + }); + + return new Promise((resolve, reject) => { + const child = spawn(binary, args, { + cwd: options.cwd, + env: process.env, + stdio: ["ignore", "pipe", "pipe"], + }); + + let stdout = ""; + let stderr = ""; + let timedOut = false; + let killTimer: NodeJS.Timeout | null = null; + + const timeout = setTimeout(() => { + timedOut = true; + logger?.warn("Command timed out, sending SIGTERM", { + pid: child.pid, + timeoutMs: options.timeoutMs, + }); + child.kill("SIGTERM"); + killTimer = setTimeout(() => { + if (!child.killed) { + logger?.warn("Command did not exit, sending SIGKILL", { + pid: child.pid, + }); + child.kill("SIGKILL"); + } + }, KILL_GRACE_MS); + }, options.timeoutMs); + + child.stdout?.setEncoding("utf8"); + child.stderr?.setEncoding("utf8"); + child.stdout?.on("data", (chunk: string) => { + stdout += chunk; + }); + child.stderr?.on("data", (chunk: string) => { + stderr += chunk; + }); + + child.on("error", (err) => { + clearTimeout(timeout); + if (killTimer) clearTimeout(killTimer); + reject(err); + }); + + child.on("close", (code, signal) => { + clearTimeout(timeout); + if (killTimer) clearTimeout(killTimer); + resolve({ + stdout, + stderr, + exitCode: code, + signal, + timedOut, + }); + }); + }); +} + +export function isLoopbackAddress(address: string | undefined): boolean { + if (!address) return false; + if (address === "::1" || address === "localhost") return true; + // IPv4 reserves the full 127.0.0.0/8 range as loopback, and IPv4-mapped + // IPv6 covers ::ffff:127.0.0.0/104 — match by prefix rather than equality. + if (address.startsWith("127.")) return true; + if (address.startsWith("::ffff:127.")) return true; + return false; +} diff --git a/packages/agent/src/server/schemas.ts b/packages/agent/src/server/schemas.ts index 2dfa791a9..ed1c71310 100644 --- a/packages/agent/src/server/schemas.ts +++ b/packages/agent/src/server/schemas.ts @@ -96,6 +96,18 @@ export const refreshSessionParamsSchema = z.object({ mcpServers: mcpServersSchema, }); +export const setTokenParamsSchema = z.object({ + token: z.string().min(1, "token is required"), +}); + +export const ghRequestSchema = z.object({ + args: z.array(z.string()).min(1, "args is required"), + cwd: z.string().optional(), + timeoutMs: z.number().int().positive().max(600_000).optional(), +}); + +export type GhRequest = z.infer; + export const closeParamsSchema = z .object({ localGitState: handoffLocalGitStateSchema.optional(), @@ -116,6 +128,8 @@ export const commandParamsSchemas = { refresh_session: refreshSessionParamsSchema, "posthog/refresh_session": refreshSessionParamsSchema, "_posthog/refresh_session": refreshSessionParamsSchema, + set_token: setTokenParamsSchema, + "posthog/set_token": setTokenParamsSchema, } as const; export type CommandMethod = keyof typeof commandParamsSchemas;