Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions packages/agent/src/server/agent-server.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,88 @@ describe("AgentServer HTTP Mode", () => {
}, 20000);
});

describe("set_token command", () => {
// Build an AgentServer that is NOT stored in the outer `server` ref so the
// afterEach hook doesn't try to clean up its (fake) session and cascade
// cleanup failures into the test output.
const buildBareServer = () => {
const bare = new AgentServer({
port,
jwtPublicKey: TEST_PUBLIC_KEY,
repositoryPath: repo.path,
apiUrl: "http://localhost:8000",
apiKey: "test-api-key",
projectId: 1,
mode: "interactive",
taskId: "test-task-id",
runId: "test-run-id",
}) as unknown as {
session: unknown;
executeCommand(
method: string,
params: Record<string, unknown>,
): Promise<unknown>;
};
// Pass executeCommand's "no active session" guard.
bare.session = {};
return bare;
};

it.each([
{ method: "set_token", token: "ghp_refreshed" },
{ method: "posthog/set_token", token: "ghp_aliased" },
])(
"updates GH_TOKEN and GITHUB_TOKEN via $method",
async ({ method, token }) => {
const previousGh = process.env.GH_TOKEN;
const previousGithub = process.env.GITHUB_TOKEN;
delete process.env.GH_TOKEN;
delete process.env.GITHUB_TOKEN;

try {
const result = await buildBareServer().executeCommand(method, {
token,
});

expect(result).toEqual({ updated: true });
expect(process.env.GH_TOKEN).toBe(token);
expect(process.env.GITHUB_TOKEN).toBe(token);
} finally {
if (previousGh === undefined) delete process.env.GH_TOKEN;
else process.env.GH_TOKEN = previousGh;
if (previousGithub === undefined) delete process.env.GITHUB_TOKEN;
else process.env.GITHUB_TOKEN = previousGithub;
}
},
);
});

describe("POST /gh", () => {
// Each case rejects with 400 (validation), proving 1) the body schema is
// enforced and 2) the route doesn't require a JWT — a 401 would mean the
// route is gated, which would defeat the wrapper-script use case.
it.each([
{ name: "malformed JSON", body: "not-json" },
{ name: "missing args", body: JSON.stringify({ cwd: "/tmp" }) },
{ name: "empty args array", body: JSON.stringify({ args: [] }) },
{ name: "empty body (no JWT required)", body: JSON.stringify({}) },
])(
"rejects $name with 400",
async ({ body }) => {
await createServer().start();

const response = await fetch(`http://localhost:${port}/gh`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body,
});

expect(response.status).toBe(400);
},
20000,
);
});

describe("session lifecycle", () => {
it("emits _posthog/run_started after session initialization", async () => {
await createServer().start();
Expand Down
49 changes: 49 additions & 0 deletions packages/agent/src/server/agent-server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import {
PROTOCOL_VERSION,
} from "@agentclientprotocol/sdk";
import { type ServerType, serve } from "@hono/node-server";
import { getConnInfo } from "@hono/node-server/conninfo";
import { getCurrentBranch } from "@posthog/git/queries";
import { Hono } from "hono";
import { z } from "zod";
Expand Down Expand Up @@ -53,8 +54,10 @@ import {
normalizeCloudPromptContent,
promptBlocksToText,
} from "./cloud-prompt";
import { isLoopbackAddress, runGh } from "./gh-exec";
import { type JwtPayload, JwtValidationError, validateJwt } from "./jwt";
import {
ghRequestSchema,
handoffLocalGitStateSchema,
jsonRpcRequestSchema,
validateCommandParams,
Expand Down Expand Up @@ -450,6 +453,43 @@ export class AgentServer {
}
});

// Sandbox-local exec for `gh`. Codex agents run in an isolated child process
// whose environment is captured at spawn time, so refreshing GH_TOKEN in the
// agent server doesn't reach them. Their gh-wrapper script calls this route
// to run gh against the agent server's freshly-refreshed env. Loopback-only;
// intentionally not JWT-protected so the wrapper has nothing to forward.
app.post("/gh", async (c) => {
const remote = getConnInfo(c).remote.address;
if (!isLoopbackAddress(remote)) {
this.logger.warn("Rejected non-loopback /gh request", { remote });
return c.json({ error: "Forbidden" }, 403);
}

const rawBody = await c.req.json().catch(() => null);
const parsed = ghRequestSchema.safeParse(rawBody);
if (!parsed.success) {
return c.json({ error: parsed.error.message }, 400);
}

const { args, cwd, timeoutMs } = parsed.data;
try {
const result = await runGh(args, {
cwd: cwd ?? this.config.repositoryPath ?? process.cwd(),
timeoutMs: timeoutMs ?? 60_000,
logger: this.logger,
});
return c.json(result);
} catch (error) {
this.logger.error("Failed to run gh", error);
return c.json(
{
error: error instanceof Error ? error.message : "Unknown error",
},
500,
);
}
});

app.notFound((c) => {
return c.json({ error: "Not found" }, 404);
Comment on lines 453 to 494
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 security /gh executes any gh subcommand, not just token-refresh operations

The args array is unconstrained, so any local process can invoke gh secret set, gh auth logout, gh repo delete, or other destructive subcommands with the agent's inherited credentials. The loopback restriction limits exposure to co-located processes, but it's broader than the stated use case requires. Consider restricting args[0] to an allowlist of safe subcommands (e.g. auth) or documenting why full gh pass-through is intentionally required.

Prompt To Fix With AI
This is a comment left during a code review.
Path: packages/agent/src/server/agent-server.ts
Line: 453-494

Comment:
**`/gh` executes any `gh` subcommand, not just token-refresh operations**

The `args` array is unconstrained, so any local process can invoke `gh secret set`, `gh auth logout`, `gh repo delete`, or other destructive subcommands with the agent's inherited credentials. The loopback restriction limits exposure to co-located processes, but it's broader than the stated use case requires. Consider restricting `args[0]` to an allowlist of safe subcommands (e.g. `auth`) or documenting why full `gh` pass-through is intentionally required.

How can I resolve this? If you propose a fix, please make it concise.

});
Expand Down Expand Up @@ -694,6 +734,15 @@ export class AgentServer {
);
}

case "posthog/set_token":
case "set_token": {
const token = params.token as string;
process.env.GH_TOKEN = token;
process.env.GITHUB_TOKEN = token;
this.logger.info("GH token refreshed");
return { updated: true };
}

case POSTHOG_NOTIFICATIONS.PERMISSION_RESPONSE:
case "permission_response": {
const requestId = params.requestId as string;
Expand Down
90 changes: 90 additions & 0 deletions packages/agent/src/server/gh-exec.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import { tmpdir } from "node:os";
import { describe, expect, it } from "vitest";
import { isLoopbackAddress, spawnAndCollect } from "./gh-exec";

describe("isLoopbackAddress", () => {
it.each([
["127.0.0.1", true],
["127.0.0.2", true],
["127.1.2.3", true],
["::1", true],
["::ffff:127.0.0.1", true],
["::ffff:127.1.2.3", true],
["localhost", true],
["10.0.0.1", false],
["192.168.1.1", false],
["::ffff:10.0.0.1", false],
["8.8.8.8", false],
["128.0.0.1", false],
["", false],
[undefined, false],
])("address %s -> %s", (input, expected) => {
expect(isLoopbackAddress(input)).toBe(expected);
});
});

// Test the spawn/collect plumbing via spawnAndCollect. runGh is a thin wrapper
// that pins the binary to "gh" — verified by inspection, not exercised here so
// tests don't depend on a `gh` install.
describe("spawnAndCollect", () => {
it("captures stdout, stderr, and a zero exit code", async () => {
const result = await spawnAndCollect(
process.execPath,
["-e", "process.stdout.write('hi'); process.stderr.write('err');"],
{ cwd: tmpdir(), timeoutMs: 5_000 },
);

expect(result.stdout).toBe("hi");
expect(result.stderr).toBe("err");
expect(result.exitCode).toBe(0);
expect(result.timedOut).toBe(false);
});

it("surfaces non-zero exit codes without throwing", async () => {
const result = await spawnAndCollect(
process.execPath,
["-e", "process.exit(2)"],
{ cwd: tmpdir(), timeoutMs: 5_000 },
);

expect(result.exitCode).toBe(2);
expect(result.timedOut).toBe(false);
});

it("rejects when the binary is missing", async () => {
await expect(
spawnAndCollect(
"/nonexistent/binary/that/cannot/possibly/exist",
["--version"],
{ cwd: tmpdir(), timeoutMs: 5_000 },
),
).rejects.toThrow();
});

it("kills the process on timeout and reports timedOut: true", async () => {
const start = Date.now();
const result = await spawnAndCollect(
process.execPath,
["-e", "setTimeout(() => process.exit(0), 60_000);"],
{ cwd: tmpdir(), timeoutMs: 200 },
);

expect(result.timedOut).toBe(true);
// Should be killed well before the 60s sleep would have completed.
expect(Date.now() - start).toBeLessThan(10_000);
});

it("respects the cwd option", async () => {
const expected = tmpdir();
const result = await spawnAndCollect(
process.execPath,
["-e", "process.stdout.write(process.cwd());"],
{ cwd: expected, timeoutMs: 5_000 },
);

// Resolve through realpath-style equivalence: tmpdir on macOS may be a
// symlink (e.g. /var/folders -> /private/var/folders).
expect([expected, `/private${expected}`]).toContain(result.stdout);
expect(result.exitCode).toBe(0);
});
});
109 changes: 109 additions & 0 deletions packages/agent/src/server/gh-exec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import { spawn } from "node:child_process";
import type { Logger } from "../utils/logger";

export interface GhExecOptions {
cwd: string;
timeoutMs: number;
logger?: Logger;
}

export interface GhExecResult {
stdout: string;
stderr: string;
exitCode: number | null;
signal: NodeJS.Signals | null;
timedOut: boolean;
}

const KILL_GRACE_MS = 2_000;

export function runGh(
args: string[],
options: GhExecOptions,
): Promise<GhExecResult> {
// Binary is pinned here. Callers (incl. the HTTP route) cannot override it —
// the body schema strips unknowns and runGh has no binary parameter.
return spawnAndCollect("gh", args, options);
}

export async function spawnAndCollect(
binary: string,
args: string[],
options: GhExecOptions,
): Promise<GhExecResult> {
const logger = options.logger;

logger?.debug("Running command", {
binary,
args,
cwd: options.cwd,
timeoutMs: options.timeoutMs,
});

return new Promise<GhExecResult>((resolve, reject) => {
const child = spawn(binary, args, {
cwd: options.cwd,
env: process.env,
stdio: ["ignore", "pipe", "pipe"],
});

let stdout = "";
let stderr = "";
let timedOut = false;
let killTimer: NodeJS.Timeout | null = null;

const timeout = setTimeout(() => {
timedOut = true;
logger?.warn("Command timed out, sending SIGTERM", {
pid: child.pid,
timeoutMs: options.timeoutMs,
});
child.kill("SIGTERM");
killTimer = setTimeout(() => {
if (!child.killed) {
logger?.warn("Command did not exit, sending SIGKILL", {
pid: child.pid,
});
child.kill("SIGKILL");
}
}, KILL_GRACE_MS);
}, options.timeoutMs);

child.stdout?.setEncoding("utf8");
child.stderr?.setEncoding("utf8");
child.stdout?.on("data", (chunk: string) => {
stdout += chunk;
});
child.stderr?.on("data", (chunk: string) => {
stderr += chunk;
});

child.on("error", (err) => {
clearTimeout(timeout);
if (killTimer) clearTimeout(killTimer);
reject(err);
});

child.on("close", (code, signal) => {
clearTimeout(timeout);
if (killTimer) clearTimeout(killTimer);
resolve({
stdout,
stderr,
exitCode: code,
signal,
timedOut,
});
});
});
}

export function isLoopbackAddress(address: string | undefined): boolean {
Comment on lines +96 to +101
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 isLoopbackAddress only recognises a four-value fixed set, not the full loopback range

IPv4 reserves the entire 127.0.0.0/8 subnet as loopback, and IPv4-mapped loopback in IPv6 covers ::ffff:127.0.0.0/104. The set currently contains only 127.0.0.1 and ::ffff:127.0.0.1, so a connection arriving on any other address in those ranges (e.g. 127.0.0.2, ::ffff:127.1.2.3) is silently rejected with 403. A safer check would be a proper prefix test for 127. and ::ffff:127..

Prompt To Fix With AI
This is a comment left during a code review.
Path: packages/agent/src/server/gh-exec.ts
Line: 88-100

Comment:
**`isLoopbackAddress` only recognises a four-value fixed set, not the full loopback range**

IPv4 reserves the entire `127.0.0.0/8` subnet as loopback, and IPv4-mapped loopback in IPv6 covers `::ffff:127.0.0.0/104`. The set currently contains only `127.0.0.1` and `::ffff:127.0.0.1`, so a connection arriving on any other address in those ranges (e.g. `127.0.0.2`, `::ffff:127.1.2.3`) is silently rejected with 403. A safer check would be a proper prefix test for `127.` and `::ffff:127.`.

How can I resolve this? If you propose a fix, please make it concise.

if (!address) return false;
if (address === "::1" || address === "localhost") return true;
// IPv4 reserves the full 127.0.0.0/8 range as loopback, and IPv4-mapped
// IPv6 covers ::ffff:127.0.0.0/104 — match by prefix rather than equality.
if (address.startsWith("127.")) return true;
if (address.startsWith("::ffff:127.")) return true;
return false;
}
14 changes: 14 additions & 0 deletions packages/agent/src/server/schemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,18 @@ export const refreshSessionParamsSchema = z.object({
mcpServers: mcpServersSchema,
});

export const setTokenParamsSchema = z.object({
token: z.string().min(1, "token is required"),
});

export const ghRequestSchema = z.object({
args: z.array(z.string()).min(1, "args is required"),
cwd: z.string().optional(),
timeoutMs: z.number().int().positive().max(600_000).optional(),
});

export type GhRequest = z.infer<typeof ghRequestSchema>;

export const closeParamsSchema = z
.object({
localGitState: handoffLocalGitStateSchema.optional(),
Expand All @@ -116,6 +128,8 @@ export const commandParamsSchemas = {
refresh_session: refreshSessionParamsSchema,
"posthog/refresh_session": refreshSessionParamsSchema,
"_posthog/refresh_session": refreshSessionParamsSchema,
set_token: setTokenParamsSchema,
"posthog/set_token": setTokenParamsSchema,
} as const;

export type CommandMethod = keyof typeof commandParamsSchemas;
Expand Down
Loading