Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,22 @@ export interface CodeGateConfig {
workflow_audits?: WorkflowAuditConfig;
suppress_findings: string[];
suppression_rules?: SuppressionRule[];
/**
* Timeout (in milliseconds) applied to Layer 3 remote resource fetches
* (npm/PyPI registry lookups, git ls-remote, and any http/sse MCP probes).
* Kept deliberately low so a slow or deliberately stalling host cannot
* hang a scan. Overridable via `CODEGATE_LAYER3_REMOTE_FETCH_TIMEOUT_MS`.
*/
layer3_remote_fetch_timeout_ms: number;
/**
* Maximum response size (in bytes) accepted from a Layer 3 remote fetch.
* A declared `Content-Length` above this value is rejected immediately,
* and the streaming reader aborts once the running byte count exceeds
* this limit (defends against servers that lie about or omit
* `Content-Length`). Overridable via
* `CODEGATE_LAYER3_REMOTE_FETCH_MAX_BYTES`.
*/
layer3_remote_fetch_max_bytes: number;
}

interface PartialTuiConfig {
Expand Down Expand Up @@ -122,6 +138,8 @@ interface PartialCodeGateConfig {
};
suppress_findings?: string[];
suppression_rules?: SuppressionRule[];
layer3_remote_fetch_timeout_ms?: number;
layer3_remote_fetch_max_bytes?: number;
}

export interface CliConfigOverrides {
Expand Down Expand Up @@ -175,8 +193,36 @@ export const DEFAULT_CONFIG: CodeGateConfig = {
workflow_audits: { enabled: false },
suppress_findings: [],
suppression_rules: [],
layer3_remote_fetch_timeout_ms: 5000,
layer3_remote_fetch_max_bytes: 1_048_576,
};

/** Env var name that overrides `layer3_remote_fetch_timeout_ms`. */
export const LAYER3_REMOTE_FETCH_TIMEOUT_ENV = "CODEGATE_LAYER3_REMOTE_FETCH_TIMEOUT_MS";
/** Env var name that overrides `layer3_remote_fetch_max_bytes`. */
export const LAYER3_REMOTE_FETCH_MAX_BYTES_ENV = "CODEGATE_LAYER3_REMOTE_FETCH_MAX_BYTES";

function normalizePositiveInteger(value: unknown): number | undefined {
if (typeof value === "number" && Number.isFinite(value) && value > 0) {
return Math.floor(value);
}
if (typeof value === "string" && value.trim().length > 0) {
const parsed = Number(value.trim());
if (Number.isFinite(parsed) && parsed > 0) {
return Math.floor(parsed);
}
}
return undefined;
}

function readEnvOverride(name: string): number | undefined {
const raw = process.env[name];
if (raw === undefined) {
return undefined;
}
return normalizePositiveInteger(raw);
}

interface PartialRulePolicyConfig {
disable?: boolean;
ignore?: string[];
Expand Down Expand Up @@ -627,6 +673,20 @@ export function resolveEffectiveConfig(options: ResolveConfigOptions): CodeGateC
...(globalConfig.suppression_rules ?? []),
...(projectConfig.suppression_rules ?? []),
],
layer3_remote_fetch_timeout_ms:
pickFirst(
readEnvOverride(LAYER3_REMOTE_FETCH_TIMEOUT_ENV),
normalizePositiveInteger(projectConfig.layer3_remote_fetch_timeout_ms),
normalizePositiveInteger(globalConfig.layer3_remote_fetch_timeout_ms),
DEFAULT_CONFIG.layer3_remote_fetch_timeout_ms,
) ?? DEFAULT_CONFIG.layer3_remote_fetch_timeout_ms,
layer3_remote_fetch_max_bytes:
pickFirst(
readEnvOverride(LAYER3_REMOTE_FETCH_MAX_BYTES_ENV),
normalizePositiveInteger(projectConfig.layer3_remote_fetch_max_bytes),
normalizePositiveInteger(globalConfig.layer3_remote_fetch_max_bytes),
DEFAULT_CONFIG.layer3_remote_fetch_max_bytes,
) ?? DEFAULT_CONFIG.layer3_remote_fetch_max_bytes,
};
}

Expand Down
126 changes: 119 additions & 7 deletions src/layer3-dynamic/resource-fetcher.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,29 @@ export interface ResourceRequest {
export interface ResourceFetcherOptions {
maxRetries?: number;
timeoutMs?: number;
/**
* Maximum number of bytes accepted in the response body. Enforced against
* both the declared `Content-Length` header (if present) and the running
* byte count during streaming read. Defaults to 1 MiB.
*/
maxBytes?: number;
}

export const DEFAULT_FETCH_TIMEOUT_MS = 5000;
export const DEFAULT_FETCH_MAX_BYTES = 1_048_576;

/**
* Extract Layer 3 remote-fetch limits from the resolved CodeGate config.
* Kept here so callers don't have to remember the config field names.
*/
export function resourceFetcherOptionsFromConfig(config: {
layer3_remote_fetch_timeout_ms: number;
layer3_remote_fetch_max_bytes: number;
}): ResourceFetcherOptions {
return {
timeoutMs: config.layer3_remote_fetch_timeout_ms,
maxBytes: config.layer3_remote_fetch_max_bytes,
};
}

export interface ResourceFetcherDeps {
Expand Down Expand Up @@ -58,12 +81,80 @@ function endpointFor(request: ResourceRequest): string {
return request.locator;
}

async function parseResponse(response: Response): Promise<unknown> {
/**
* Read a response body while enforcing `maxBytes`. Returns the collected
* string, or throws a tagged error if the declared `Content-Length` or the
* streamed size exceeds the cap.
*/
async function readBodyWithLimit(response: Response, maxBytes: number): Promise<string> {
const declared = response.headers.get("content-length");
if (declared !== null) {
const parsed = Number(declared);
if (Number.isFinite(parsed) && parsed > maxBytes) {
// Drain & release the stream without reading bytes.
try {
await response.body?.cancel();
} catch {
// no-op: cancel failures are non-fatal.
}
throw new Error(
`response_too_large: declared Content-Length ${parsed} exceeds limit ${maxBytes}`,
);
}
}

const body = response.body;
if (!body) {
// No stream (e.g., HEAD or empty body): fall back to text().
const text = await response.text();
if (Buffer.byteLength(text, "utf8") > maxBytes) {
throw new Error(`response_too_large: body ${Buffer.byteLength(text, "utf8")} > ${maxBytes}`);
}
return text;
}

const reader = body.getReader();
const chunks: Uint8Array[] = [];
let total = 0;
try {
while (true) {
const { done, value } = await reader.read();
if (done) {
break;
}
if (!value) {
continue;
}
total += value.byteLength;
if (total > maxBytes) {
try {
await reader.cancel();
} catch {
// no-op
}
throw new Error(`response_too_large: streamed ${total} > ${maxBytes}`);
}
chunks.push(value);
}
} finally {
try {
reader.releaseLock();
} catch {
// releaseLock throws if the reader was already cancelled; ignore.
}
}

const buffer = Buffer.concat(chunks.map((chunk) => Buffer.from(chunk)));
return buffer.toString("utf8");
}

async function parseResponse(response: Response, maxBytes: number): Promise<unknown> {
const contentType = response.headers.get("content-type") ?? "";
const text = await readBodyWithLimit(response, maxBytes);
if (contentType.includes("application/json")) {
return (await response.json()) as unknown;
return JSON.parse(text) as unknown;
}
return await response.text();
return text;
}

function timeoutError(error: unknown): boolean {
Expand All @@ -72,6 +163,11 @@ function timeoutError(error: unknown): boolean {
return message.includes("timeout") || message.includes("aborted");
}

function isResponseTooLarge(error: unknown): boolean {
const message = error instanceof Error ? error.message : String(error);
return message.startsWith("response_too_large");
}

export async function fetchResourceMetadata(
request: ResourceRequest,
customDeps: ResourceFetcherDeps = defaultDeps(),
Expand Down Expand Up @@ -104,14 +200,19 @@ export async function fetchResourceMetadata(
}

const endpoint = endpointFor(request);
const timeoutMs = options.timeoutMs ?? 5000;
const timeoutMs = options.timeoutMs ?? DEFAULT_FETCH_TIMEOUT_MS;
const maxBytes = options.maxBytes ?? DEFAULT_FETCH_MAX_BYTES;

for (let attempt = 0; attempt <= maxRetries; attempt += 1) {
try {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeoutMs);
const response = await deps.fetch(endpoint, { signal: controller.signal });
clearTimeout(timer);
let response: Response;
try {
response = await deps.fetch(endpoint, { signal: controller.signal });
} finally {
clearTimeout(timer);
}

if (response.status === 401 || response.status === 403) {
return {
Expand All @@ -135,13 +236,24 @@ export async function fetchResourceMetadata(
};
}

const metadata = await parseResponse(response, maxBytes);
return {
status: "ok",
attempts: attempt + 1,
elapsedMs: deps.now() - startedAt,
metadata: await parseResponse(response),
metadata,
};
} catch (error) {
// Size-limit breaches are deterministic — do not retry, surface as network_error.
if (isResponseTooLarge(error)) {
return {
status: "network_error",
attempts: attempt + 1,
elapsedMs: deps.now() - startedAt,
error: error instanceof Error ? error.message : String(error),
};
}

if (attempt < maxRetries) {
await deps.sleep(100 * (attempt + 1));
continue;
Expand Down
12 changes: 10 additions & 2 deletions src/layer3-dynamic/tool-description-acquisition.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import {
fetchResourceMetadata,
type ResourceFetchResult,
type ResourceFetcherOptions,
type ResourceKind,
type ResourceRequest,
} from "./resource-fetcher.js";
Expand Down Expand Up @@ -37,9 +38,16 @@ export interface ToolDescriptionAcquisitionDeps {
fetchMetadata: (request: ResourceRequest) => Promise<ResourceFetchResult>;
}

function defaultDeps(): ToolDescriptionAcquisitionDeps {
export interface ToolDescriptionAcquisitionOptions {
fetchOptions?: ResourceFetcherOptions;
}

function defaultDeps(
options: ToolDescriptionAcquisitionOptions = {},
): ToolDescriptionAcquisitionDeps {
return {
fetchMetadata: async (request) => fetchResourceMetadata(request),
fetchMetadata: async (request) =>
fetchResourceMetadata(request, undefined, options.fetchOptions),
};
}

Expand Down
93 changes: 93 additions & 0 deletions src/layer3-dynamic/url-validation.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/**
* URL validation and normalisation helpers for Layer 3 remote resource handling.
*
* These helpers guarantee that remote resources (HTTP/SSE MCP endpoints,
* skill-referenced URLs) use a safe, canonical form before they are fed into
* finding `rule_id` / `file_path` fields or into the fetcher.
*
* Historically, L3 resource IDs were composed as `${kind}:${url}` which, for
* http/sse kinds, produced malformed values like `http:https://mcp.linear.app/mcp`
* (the kind collides with the URL's own scheme). `buildResourceId` avoids that
* double-scheme shape by reusing the URL itself as the id for http/sse kinds.
*/
export type RemoteScheme = "http" | "https";

export interface NormalizeRemoteUrlResult {
ok: true;
url: string;
scheme: RemoteScheme;
}

export interface NormalizeRemoteUrlError {
ok: false;
reason: "empty" | "unsupported_scheme" | "missing_host" | "missing_scheme" | "invalid_url";
}

/**
* Validate and canonicalise a remote URL. Rejects non http/https schemes,
* missing hosts, and malformed inputs. Normalises a bare-host path to a
* single trailing slash and strips trailing slashes from longer paths.
*/
export function normalizeRemoteUrl(
input: string,
): NormalizeRemoteUrlResult | NormalizeRemoteUrlError {
if (typeof input !== "string" || input.trim().length === 0) {
return { ok: false, reason: "empty" };
}

const trimmed = input.trim();

// Quick reject for bare `http:` / `https:` without `//` and host.
if (/^https?:\/?$/iu.test(trimmed)) {
return { ok: false, reason: "missing_host" };
}

// Must start with http:// or https:// (case-insensitive).
if (!/^https?:\/\//iu.test(trimmed)) {
return { ok: false, reason: "missing_scheme" };
}

let parsed: URL;
try {
parsed = new URL(trimmed);
} catch {
return { ok: false, reason: "invalid_url" };
}

const scheme = parsed.protocol.replace(":", "").toLowerCase();
if (scheme !== "http" && scheme !== "https") {
return { ok: false, reason: "unsupported_scheme" };
}

if (parsed.hostname.length === 0) {
return { ok: false, reason: "missing_host" };
}

// Normalise trailing slashes: keep `/` for root paths, strip for others.
if (parsed.pathname.length > 1 && parsed.pathname.endsWith("/")) {
parsed.pathname = parsed.pathname.replace(/\/+$/u, "");
}

return {
ok: true,
url: parsed.toString(),
scheme: scheme as RemoteScheme,
};
}

export type DeepScanResourceKind = "npm" | "pypi" | "git" | "http" | "sse";

/**
* Build a canonical resource id used for findings (`rule_id`, `file_path`) and
* for consent prompts. For http/sse kinds the id is the URL itself (no
* `http:` / `sse:` prefix) to avoid the malformed `http:https://...` shape.
* For npm/pypi/git, the `<kind>:<locator>` prefix is preserved because those
* locators are not URLs and other code (e.g. `isRegistryMetadataResource`)
* keys on that prefix.
*/
export function buildResourceId(kind: DeepScanResourceKind, locator: string): string {
if (kind === "http" || kind === "sse") {
return locator;
}
return `${kind}:${locator}`;
}
Loading
Loading