From 9a76657431970bf22a190e5e31cb90b241499a38 Mon Sep 17 00:00:00 2001
From: Jonathan Santilli <1774227+jonathansantilli@users.noreply.github.com>
Date: Wed, 22 Apr 2026 12:16:50 +0100
Subject: [PATCH 1/2] fix(layer3): produce clean resource IDs for remote
 HTTP/SSE URLs

Layer 3 resource discovery composed IDs as `${kind}:${url}`, which for
http/sse kinds collided with the URL's own scheme and produced malformed
values like `http:https://mcp.linear.app/mcp`. These IDs leaked into
`rule_id` and `file_path` on Layer 3 findings.

Introduce `url-validation.ts` with `normalizeRemoteUrl` (reject non
http/https schemes, missing hosts, canonicalise trailing slashes) and
`buildResourceId` (use the URL itself for http/sse kinds, preserve the
`<kind>:<locator>` shape for npm/pypi/git). Wire both into the scan
resource collector so IDs are clean and URLs validated before becoming
findings.
---
 src/layer3-dynamic/url-validation.ts         | 93 ++++++++++++++++++++
 src/scan.ts                                  | 38 ++++----
 tests/cli/deep-scan-command.test.ts          | 24 ++---
 tests/commands/clawhub-wrapper.test.ts       |  4 +-
 tests/commands/skills-wrapper.test.ts        |  4 +-
 tests/layer1/user-scope-discovery.test.ts    |  2 +-
 tests/layer3/consent-flow.test.ts            |  6 +-
 tests/layer3/deep-resource-discovery.test.ts | 10 +--
 tests/layer3/layer3-integration.test.ts      |  4 +-
 tests/layer3/no-outbound-calls.test.ts       |  4 +-
 tests/layer3/url-validation.test.ts          | 91 +++++++++++++++++++
 tests/path-display.test.ts                   |  4 +-
 12 files changed, 238 insertions(+), 46 deletions(-)
 create mode 100644 src/layer3-dynamic/url-validation.ts
 create mode 100644 tests/layer3/url-validation.test.ts
diff --git a/src/layer3-dynamic/url-validation.ts b/src/layer3-dynamic/url-validation.ts
new file mode 100644
index 0000000..68d2f03
--- /dev/null
+++ b/src/layer3-dynamic/url-validation.ts
@@ -0,0 +1,93 @@
+/**
+ * URL validation and normalisation helpers for Layer 3 remote resource handling.
+ *
+ * These helpers guarantee that remote resources (HTTP/SSE MCP endpoints,
+ * skill-referenced URLs) use a safe, canonical form before they are fed into
+ * finding `rule_id` / `file_path` fields or into the fetcher.
+ *
+ * Historically, L3 resource IDs were composed as `${kind}:${url}` which, for
+ * http/sse kinds, produced malformed values like `http:https://mcp.linear.app/mcp`
+ * (the kind collides with the URL's own scheme). `buildResourceId` avoids that
+ * double-scheme shape by reusing the URL itself as the id for http/sse kinds.
+ */
+export type RemoteScheme = "http" | "https";
+
+export interface NormalizeRemoteUrlResult {
+  ok: true;
+  url: string;
+  scheme: RemoteScheme;
+}
+
+export interface NormalizeRemoteUrlError {
+  ok: false;
+  reason: "empty" | "unsupported_scheme" | "missing_host" | "missing_scheme" | "invalid_url";
+}
+
+/**
+ * Validate and canonicalise a remote URL. Rejects non http/https schemes,
+ * missing hosts, and malformed inputs. Normalises a bare-host path to a
+ * single trailing slash and strips trailing slashes from longer paths.
+ */
+export function normalizeRemoteUrl(
+  input: string,
+): NormalizeRemoteUrlResult | NormalizeRemoteUrlError {
+  if (typeof input !== "string" || input.trim().length === 0) {
+    return { ok: false, reason: "empty" };
+  }
+
+  const trimmed = input.trim();
+
+  // Quick reject for bare `http:` / `https:` without `//` and host.
+  if (/^https?:\/?$/iu.test(trimmed)) {
+    return { ok: false, reason: "missing_host" };
+  }
+
+  // Must start with http:// or https:// (case-insensitive).
+  if (!/^https?:\/\//iu.test(trimmed)) {
+    return { ok: false, reason: "missing_scheme" };
+  }
+
+  let parsed: URL;
+  try {
+    parsed = new URL(trimmed);
+  } catch {
+    return { ok: false, reason: "invalid_url" };
+  }
+
+  const scheme = parsed.protocol.replace(":", "").toLowerCase();
+  if (scheme !== "http" && scheme !== "https") {
+    return { ok: false, reason: "unsupported_scheme" };
+  }
+
+  if (parsed.hostname.length === 0) {
+    return { ok: false, reason: "missing_host" };
+  }
+
+  // Normalise trailing slashes: keep `/` for root paths, strip for others.
+  if (parsed.pathname.length > 1 && parsed.pathname.endsWith("/")) {
+    parsed.pathname = parsed.pathname.replace(/\/+$/u, "");
+  }
+
+  return {
+    ok: true,
+    url: parsed.toString(),
+    scheme: scheme as RemoteScheme,
+  };
+}
+
+export type DeepScanResourceKind = "npm" | "pypi" | "git" | "http" | "sse";
+
+/**
+ * Build a canonical resource id used for findings (`rule_id`, `file_path`) and
+ * for consent prompts. For http/sse kinds the id is the URL itself (no
+ * `http:` / `sse:` prefix) to avoid the malformed `http:https://...` shape.
+ * For npm/pypi/git, the `<kind>:<locator>` prefix is preserved because those
+ * locators are not URLs and other code (e.g. `isRegistryMetadataResource`)
+ * keys on that prefix.
+ */
+export function buildResourceId(kind: DeepScanResourceKind, locator: string): string {
+  if (kind === "http" || kind === "sse") {
+    return locator;
+  }
+  return `${kind}:${locator}`;
+}
diff --git a/src/scan.ts b/src/scan.ts
index 2ef1e15..6175259 100644
--- a/src/scan.ts
+++ b/src/scan.ts
@@ -5,6 +5,7 @@ import {
   collectLocalTextAnalysisTargets,
   type LocalTextAnalysisTarget,
 } from "./layer3-dynamic/local-text-analysis.js";
+import { buildResourceId, normalizeRemoteUrl } from "./layer3-dynamic/url-validation.js";
 import { runStaticPipeline } from "./pipeline.js";
 import type { StaticFileInput } from "./layer2-static/engine.js";
 import { applyReportSummary } from "./report-summary.js";
@@ -690,18 +691,21 @@ function collectDeepScanResourcesFromParsed(
       }
 
       if (typeof config.url === "string" && isHttpLikeUrl(config.url)) {
-        const kind = inferHttpKind(config.url);
-        const id = `${kind}:${config.url}`;
-        if (!resources.has(id)) {
-          resources.set(id, {
-            id,
-            request: {
+        const normalized = normalizeRemoteUrl(config.url);
+        if (normalized.ok) {
+          const kind = inferHttpKind(normalized.url);
+          const id = buildResourceId(kind, normalized.url);
+          if (!resources.has(id)) {
+            resources.set(id, {
               id,
-              kind,
-              locator: config.url,
-            },
-            commandPreview: `GET ${config.url}  (from ${filePath} -> ${container.key}.${serverName}.url)`,
-          });
+              request: {
+                id,
+                kind,
+                locator: normalized.url,
+              },
+              commandPreview: `GET ${normalized.url}  (from ${filePath} -> ${container.key}.${serverName}.url)`,
+            });
+          }
         }
       }
 
@@ -733,8 +737,12 @@ function collectDeepScanResourcesFromParsed(
       if (typeof config.url !== "string" || !isHttpLikeUrl(config.url)) {
         return;
       }
-      const kind = inferHttpKind(config.url);
-      const id = `${kind}:${config.url}`;
+      const normalized = normalizeRemoteUrl(config.url);
+      if (!normalized.ok) {
+        return;
+      }
+      const kind = inferHttpKind(normalized.url);
+      const id = buildResourceId(kind, normalized.url);
       if (resources.has(id)) {
         return;
       }
@@ -743,9 +751,9 @@ function collectDeepScanResourcesFromParsed(
         request: {
           id,
           kind,
-          locator: config.url,
+          locator: normalized.url,
         },
-        commandPreview: `GET ${config.url}  (from ${filePath} -> ${remoteArray.key}.${index}.url)`,
+        commandPreview: `GET ${normalized.url}  (from ${filePath} -> ${remoteArray.key}.${index}.url)`,
       });
     });
   }
diff --git a/tests/cli/deep-scan-command.test.ts b/tests/cli/deep-scan-command.test.ts
index 0051ec7..988ab1f 100644
--- a/tests/cli/deep-scan-command.test.ts
+++ b/tests/cli/deep-scan-command.test.ts
@@ -63,9 +63,9 @@ describe("scan --deep behavior", () => {
   it("executes deep scan resources with consent and merges findings into exit code", async () => {
     const deepResources: DeepScanResource[] = [
       {
-        id: "http:https://mcp.example/tools",
+        id: "https://mcp.example/tools",
         request: {
-          id: "http:https://mcp.example/tools",
+          id: "https://mcp.example/tools",
           kind: "http",
           locator: "https://mcp.example/tools",
         },
@@ -115,9 +115,9 @@ describe("scan --deep behavior", () => {
       async () =>
         [
           {
-            id: "http:https://mcp.example/tools",
+            id: "https://mcp.example/tools",
             request: {
-              id: "http:https://mcp.example/tools",
+              id: "https://mcp.example/tools",
               kind: "http",
               locator: "https://mcp.example/tools",
             },
@@ -190,9 +190,9 @@ describe("scan --deep behavior", () => {
     const stdout: string[] = [];
     const deepResources: DeepScanResource[] = [
       {
-        id: "http:https://mcp.example/tools",
+        id: "https://mcp.example/tools",
         request: {
-          id: "http:https://mcp.example/tools",
+          id: "https://mcp.example/tools",
           kind: "http",
           locator: "https://mcp.example/tools",
         },
@@ -240,9 +240,9 @@ describe("scan --deep behavior", () => {
   it("allows choosing a deep agent and runs approved meta-agent commands", async () => {
     const deepResources: DeepScanResource[] = [
       {
-        id: "http:https://mcp.example/tools",
+        id: "https://mcp.example/tools",
         request: {
-          id: "http:https://mcp.example/tools",
+          id: "https://mcp.example/tools",
           kind: "http",
           locator: "https://mcp.example/tools",
         },
@@ -327,9 +327,9 @@ describe("scan --deep behavior", () => {
   it("unwraps Claude JSON result envelopes from meta-agent output", async () => {
     const deepResources: DeepScanResource[] = [
       {
-        id: "http:https://mcp.example/tools",
+        id: "https://mcp.example/tools",
         request: {
-          id: "http:https://mcp.example/tools",
+          id: "https://mcp.example/tools",
           kind: "http",
           locator: "https://mcp.example/tools",
         },
@@ -406,9 +406,9 @@ describe("scan --deep behavior", () => {
   it("disables deep-scan prompts when --no-tui is passed", async () => {
     const deepResources: DeepScanResource[] = [
       {
-        id: "http:https://mcp.example/tools",
+        id: "https://mcp.example/tools",
         request: {
-          id: "http:https://mcp.example/tools",
+          id: "https://mcp.example/tools",
           kind: "http",
           locator: "https://mcp.example/tools",
         },
diff --git a/tests/commands/clawhub-wrapper.test.ts b/tests/commands/clawhub-wrapper.test.ts
index 37c73d3..7e6180d 100644
--- a/tests/commands/clawhub-wrapper.test.ts
+++ b/tests/commands/clawhub-wrapper.test.ts
@@ -456,9 +456,9 @@ describe("clawhub wrapper execution", () => {
   it("does not invoke deep consent callbacks in non-interactive mode", async () => {
     const discoverDeepResources = vi.fn(async () => [
       {
-        id: "http:https://mcp.example/tools",
+        id: "https://mcp.example/tools",
         request: {
-          id: "http:https://mcp.example/tools",
+          id: "https://mcp.example/tools",
           kind: "http",
           locator: "https://mcp.example/tools",
         },
diff --git a/tests/commands/skills-wrapper.test.ts b/tests/commands/skills-wrapper.test.ts
index 1070c2c..6873a23 100644
--- a/tests/commands/skills-wrapper.test.ts
+++ b/tests/commands/skills-wrapper.test.ts
@@ -683,9 +683,9 @@ describe("skills wrapper execution", () => {
   it("does not invoke deep consent callbacks in non-interactive mode", async () => {
     const discoverDeepResources = vi.fn(async () => [
       {
-        id: "http:https://mcp.example/tools",
+        id: "https://mcp.example/tools",
         request: {
-          id: "http:https://mcp.example/tools",
+          id: "https://mcp.example/tools",
           kind: "http",
           locator: "https://mcp.example/tools",
         },
diff --git a/tests/layer1/user-scope-discovery.test.ts b/tests/layer1/user-scope-discovery.test.ts
index 7a3c1c4..96f5519 100644
--- a/tests/layer1/user-scope-discovery.test.ts
+++ b/tests/layer1/user-scope-discovery.test.ts
@@ -100,7 +100,7 @@ describe("user-scope discovery", () => {
       includeUserScope: true,
       homeDir: home,
     });
-    expect(resources.map((resource) => resource.id)).toEqual(["sse:https://example.com/sse"]);
+    expect(resources.map((resource) => resource.id)).toEqual(["https://example.com/sse"]);
   });
 
   it("includes user-scope paths for additional tools (roo)", async () => {
diff --git a/tests/layer3/consent-flow.test.ts b/tests/layer3/consent-flow.test.ts
index c602e48..4b4677a 100644
--- a/tests/layer3/consent-flow.test.ts
+++ b/tests/layer3/consent-flow.test.ts
@@ -8,8 +8,8 @@ const RESOURCES: DeepScanResource[] = [
     commandPreview: "fetch npm metadata",
   },
   {
-    id: "http:https://example.com",
-    request: { id: "http:https://example.com", kind: "http", locator: "https://example.com" },
+    id: "https://example.com",
+    request: { id: "https://example.com", kind: "http", locator: "https://example.com" },
     commandPreview: "fetch remote metadata",
   },
 ];
@@ -25,7 +25,7 @@ describe("task 26 deep scan consent flow", () => {
 
     const outcomes = await runDeepScanWithConsent(
       RESOURCES,
-      async (resource) => resource.id === "http:https://example.com",
+      async (resource) => resource.id === "https://example.com",
       execute,
     );
 
diff --git a/tests/layer3/deep-resource-discovery.test.ts b/tests/layer3/deep-resource-discovery.test.ts
index 32a8f7e..56860b8 100644
--- a/tests/layer3/deep-resource-discovery.test.ts
+++ b/tests/layer3/deep-resource-discovery.test.ts
@@ -36,10 +36,10 @@ describe("deep resource discovery", () => {
 
     const resources = discoverDeepScanResources(root);
     expect(resources.map((resource) => resource.id)).toEqual([
-      "http:https://example.com/mcp/tools",
+      "https://example.com/mcp/tools",
+      "https://example.com/sse",
       "npm:@example/local-mcp",
       "pypi:example-python-mcp",
-      "sse:https://example.com/sse",
     ]);
     expect(
       resources.every(
@@ -80,8 +80,8 @@ describe("deep resource discovery", () => {
 
     const resources = discoverDeepScanResources(root);
     expect(resources.map((resource) => resource.id)).toEqual([
+      "https://example.com/eventstream",
       "npm:@example/alias-mcp",
-      "sse:https://example.com/eventstream",
     ]);
   });
 
@@ -117,8 +117,8 @@ describe("deep resource discovery", () => {
     });
 
     expect(resources.map((resource) => resource.id)).toEqual([
-      "http:https://internal.example.com/mcp",
-      "sse:https://internal.example.com/eventstream",
+      "https://internal.example.com/eventstream",
+      "https://internal.example.com/mcp",
     ]);
   });
 });
diff --git a/tests/layer3/layer3-integration.test.ts b/tests/layer3/layer3-integration.test.ts
index 7339701..e0faaa3 100644
--- a/tests/layer3/layer3-integration.test.ts
+++ b/tests/layer3/layer3-integration.test.ts
@@ -27,7 +27,7 @@ describe("task 28 layer3 integration", () => {
         },
       },
       {
-        resourceId: "http:https://example.invalid/schema",
+        resourceId: "https://example.invalid/schema",
         approved: true,
         status: "ok",
         result: {
@@ -121,7 +121,7 @@ describe("task 28 layer3 integration", () => {
   it("derives tool-description and toxic-flow findings from metadata tools", () => {
     const outcomes: DeepScanOutcome[] = [
       {
-        resourceId: "http:https://mcp.example/tools",
+        resourceId: "https://mcp.example/tools",
         approved: true,
         status: "ok",
         result: {
diff --git a/tests/layer3/no-outbound-calls.test.ts b/tests/layer3/no-outbound-calls.test.ts
index 608c238..e5dfa8c 100644
--- a/tests/layer3/no-outbound-calls.test.ts
+++ b/tests/layer3/no-outbound-calls.test.ts
@@ -37,11 +37,11 @@ describe("deep scan makes no outbound HTTP calls", () => {
 
     const testCases = [
       {
-        id: "http:https://mcp.evil.com/tools",
+        id: "https://mcp.evil.com/tools",
         request: { kind: "http", locator: "https://mcp.evil.com/tools" },
       },
       {
-        id: "sse:https://mcp.evil.com/sse",
+        id: "https://mcp.evil.com/sse",
         request: { kind: "sse", locator: "https://mcp.evil.com/sse" },
       },
       { id: "npm:@evil/backdoor", request: { kind: "npm", locator: "@evil/backdoor" } },
diff --git a/tests/layer3/url-validation.test.ts b/tests/layer3/url-validation.test.ts
new file mode 100644
index 0000000..c22eca7
--- /dev/null
+++ b/tests/layer3/url-validation.test.ts
@@ -0,0 +1,91 @@
+import { describe, expect, it } from "vitest";
+import { buildResourceId, normalizeRemoteUrl } from "../../src/layer3-dynamic/url-validation";
+
+describe("normalizeRemoteUrl", () => {
+  it("accepts http and https URLs and returns them in canonical form", () => {
+    const http = normalizeRemoteUrl("http://0.0.0.0:8007/mcp");
+    expect(http.ok).toBe(true);
+    if (http.ok) {
+      expect(http.url).toBe("http://0.0.0.0:8007/mcp");
+      expect(http.scheme).toBe("http");
+    }
+
+    const https = normalizeRemoteUrl("https://mcp.linear.app/mcp");
+    expect(https.ok).toBe(true);
+    if (https.ok) {
+      expect(https.url).toBe("https://mcp.linear.app/mcp");
+      expect(https.scheme).toBe("https");
+    }
+  });
+
+  it("rejects non http/https schemes", () => {
+    expect(normalizeRemoteUrl("file:///etc/passwd").ok).toBe(false);
+    expect(normalizeRemoteUrl("ftp://ftp.example.com/pub").ok).toBe(false);
+    expect(normalizeRemoteUrl("ssh://user@example.com").ok).toBe(false);
+    const result = normalizeRemoteUrl("javascript:alert(1)");
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.reason).toBe("missing_scheme");
+    }
+  });
+
+  it("rejects URLs without a host", () => {
+    const bare = normalizeRemoteUrl("http://");
+    expect(bare.ok).toBe(false);
+    const justScheme = normalizeRemoteUrl("http:");
+    expect(justScheme.ok).toBe(false);
+    if (!justScheme.ok) {
+      expect(justScheme.reason).toBe("missing_host");
+    }
+  });
+
+  it("rejects empty / whitespace input", () => {
+    expect(normalizeRemoteUrl("").ok).toBe(false);
+    expect(normalizeRemoteUrl("   ").ok).toBe(false);
+  });
+
+  it("strips trailing slashes from non-root paths and preserves root slash", () => {
+    const withSlash = normalizeRemoteUrl("https://example.com/api/v1/");
+    expect(withSlash.ok).toBe(true);
+    if (withSlash.ok) {
+      expect(withSlash.url).toBe("https://example.com/api/v1");
+    }
+
+    // Root path: URL class already canonicalises to trailing `/` which we keep.
+    const root = normalizeRemoteUrl("https://example.com");
+    expect(root.ok).toBe(true);
+    if (root.ok) {
+      expect(root.url).toBe("https://example.com/");
+    }
+  });
+});
+
+describe("buildResourceId", () => {
+  it("uses the URL itself (no double scheme) for http and sse kinds", () => {
+    expect(buildResourceId("http", "https://mcp.linear.app/mcp")).toBe(
+      "https://mcp.linear.app/mcp",
+    );
+    expect(buildResourceId("http", "http://0.0.0.0:8007/mcp")).toBe("http://0.0.0.0:8007/mcp");
+    expect(buildResourceId("sse", "https://example.com/sse")).toBe("https://example.com/sse");
+  });
+
+  it("preserves the kind prefix for registry metadata kinds", () => {
+    expect(buildResourceId("npm", "@org/pkg")).toBe("npm:@org/pkg");
+    expect(buildResourceId("pypi", "requests")).toBe("pypi:requests");
+    expect(buildResourceId("git", "https://github.com/org/repo")).toBe(
+      "git:https://github.com/org/repo",
+    );
+  });
+
+  it("produces resource IDs that do not start with http:http or http:https", () => {
+    for (const url of [
+      "http://0.0.0.0:8007/mcp",
+      "https://mcp.linear.app/mcp",
+      "https://example.com/sse",
+    ]) {
+      const id = buildResourceId("http", url);
+      expect(id.startsWith("http:http://")).toBe(false);
+      expect(id.startsWith("http:https://")).toBe(false);
+    }
+  });
+});
diff --git a/tests/path-display.test.ts b/tests/path-display.test.ts
index f5fe522..f3dcea3 100644
--- a/tests/path-display.test.ts
+++ b/tests/path-display.test.ts
@@ -9,8 +9,8 @@ describe("path display", () => {
   });
 
   it("keeps URI-like resource identifiers unchanged", () => {
-    expect(toAbsoluteDisplayPath("/tmp/codegate-case3", "http:https://mcp.linear.app/mcp")).toBe(
-      "http:https://mcp.linear.app/mcp",
+    expect(toAbsoluteDisplayPath("/tmp/codegate-case3", "https://mcp.linear.app/mcp")).toBe(
+      "https://mcp.linear.app/mcp",
     );
   });
 });

From 846398905d0b6a9280d5470d155ef7db357a8e99 Mon Sep 17 00:00:00 2001
From: Jonathan Santilli <1774227+jonathansantilli@users.noreply.github.com>
Date: Wed, 22 Apr 2026 12:21:07 +0100
Subject: [PATCH 2/2] fix(layer3): add configurable timeout and byte-size guard
 for remote fetches
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A slow or malicious remote host could previously hang a scan or flood it
with gigabytes of data through a Layer 3 resource probe. Harden the
remote fetcher with two new limits:

* `layer3_remote_fetch_timeout_ms` (default 5000) — per-attempt timeout
  enforced via AbortController. Overridable by
  `CODEGATE_LAYER3_REMOTE_FETCH_TIMEOUT_MS`.
* `layer3_remote_fetch_max_bytes` (default 1_048_576) — rejects declared
  Content-Length above the cap immediately, and aborts the streaming
  read once bytes exceed it (so servers that lie about / omit the header
  cannot bypass the guard). Overridable by
  `CODEGATE_LAYER3_REMOTE_FETCH_MAX_BYTES`.

Both limits are exposed only via config file and env var — no CLI flag.
The fetcher exports `resourceFetcherOptionsFromConfig` as a small helper
for callers plumbing the limits from `CodeGateConfig`.
---
 src/config.ts                                 |  60 +++++++++
 src/layer3-dynamic/resource-fetcher.ts        | 126 ++++++++++++++++-
 .../tool-description-acquisition.ts           |  12 +-
 .../config/layer3-remote-fetch-limits.test.ts | 101 ++++++++++++++
 tests/layer3/resource-fetcher-limits.test.ts  | 127 ++++++++++++++++++
 5 files changed, 417 insertions(+), 9 deletions(-)
 create mode 100644 tests/config/layer3-remote-fetch-limits.test.ts
 create mode 100644 tests/layer3/resource-fetcher-limits.test.ts

diff --git a/src/config.ts b/src/config.ts
index 685774a..adab95c 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -76,6 +76,22 @@ export interface CodeGateConfig {
   workflow_audits?: WorkflowAuditConfig;
   suppress_findings: string[];
   suppression_rules?: SuppressionRule[];
+  /**
+   * Timeout (in milliseconds) applied to Layer 3 remote resource fetches
+   * (npm/PyPI registry lookups, git ls-remote, and any http/sse MCP probes).
+   * Kept deliberately low so a slow or deliberately stalling host cannot
+   * hang a scan. Overridable via `CODEGATE_LAYER3_REMOTE_FETCH_TIMEOUT_MS`.
+   */
+  layer3_remote_fetch_timeout_ms: number;
+  /**
+   * Maximum response size (in bytes) accepted from a Layer 3 remote fetch.
+   * A declared `Content-Length` above this value is rejected immediately,
+   * and the streaming reader aborts once the running byte count exceeds
+   * this limit (defends against servers that lie about or omit
+   * `Content-Length`). Overridable via
+   * `CODEGATE_LAYER3_REMOTE_FETCH_MAX_BYTES`.
+   */
+  layer3_remote_fetch_max_bytes: number;
 }
 
 interface PartialTuiConfig {
@@ -122,6 +138,8 @@ interface PartialCodeGateConfig {
   };
   suppress_findings?: string[];
   suppression_rules?: SuppressionRule[];
+  layer3_remote_fetch_timeout_ms?: number;
+  layer3_remote_fetch_max_bytes?: number;
 }
 
 export interface CliConfigOverrides {
@@ -175,8 +193,36 @@ export const DEFAULT_CONFIG: CodeGateConfig = {
   workflow_audits: { enabled: false },
   suppress_findings: [],
   suppression_rules: [],
+  layer3_remote_fetch_timeout_ms: 5000,
+  layer3_remote_fetch_max_bytes: 1_048_576,
 };
 
+/** Env var name that overrides `layer3_remote_fetch_timeout_ms`. */
+export const LAYER3_REMOTE_FETCH_TIMEOUT_ENV = "CODEGATE_LAYER3_REMOTE_FETCH_TIMEOUT_MS";
+/** Env var name that overrides `layer3_remote_fetch_max_bytes`. */
+export const LAYER3_REMOTE_FETCH_MAX_BYTES_ENV = "CODEGATE_LAYER3_REMOTE_FETCH_MAX_BYTES";
+
+function normalizePositiveInteger(value: unknown): number | undefined {
+  if (typeof value === "number" && Number.isFinite(value) && value > 0) {
+    return Math.floor(value);
+  }
+  if (typeof value === "string" && value.trim().length > 0) {
+    const parsed = Number(value.trim());
+    if (Number.isFinite(parsed) && parsed > 0) {
+      return Math.floor(parsed);
+    }
+  }
+  return undefined;
+}
+
+function readEnvOverride(name: string): number | undefined {
+  const raw = process.env[name];
+  if (raw === undefined) {
+    return undefined;
+  }
+  return normalizePositiveInteger(raw);
+}
+
 interface PartialRulePolicyConfig {
   disable?: boolean;
   ignore?: string[];
@@ -627,6 +673,20 @@ export function resolveEffectiveConfig(options: ResolveConfigOptions): CodeGateC
       ...(globalConfig.suppression_rules ?? []),
       ...(projectConfig.suppression_rules ?? []),
     ],
+    layer3_remote_fetch_timeout_ms:
+      pickFirst(
+        readEnvOverride(LAYER3_REMOTE_FETCH_TIMEOUT_ENV),
+        normalizePositiveInteger(projectConfig.layer3_remote_fetch_timeout_ms),
+        normalizePositiveInteger(globalConfig.layer3_remote_fetch_timeout_ms),
+        DEFAULT_CONFIG.layer3_remote_fetch_timeout_ms,
+      ) ?? DEFAULT_CONFIG.layer3_remote_fetch_timeout_ms,
+    layer3_remote_fetch_max_bytes:
+      pickFirst(
+        readEnvOverride(LAYER3_REMOTE_FETCH_MAX_BYTES_ENV),
+        normalizePositiveInteger(projectConfig.layer3_remote_fetch_max_bytes),
+        normalizePositiveInteger(globalConfig.layer3_remote_fetch_max_bytes),
+        DEFAULT_CONFIG.layer3_remote_fetch_max_bytes,
+      ) ?? DEFAULT_CONFIG.layer3_remote_fetch_max_bytes,
   };
 }
 
diff --git a/src/layer3-dynamic/resource-fetcher.ts b/src/layer3-dynamic/resource-fetcher.ts
index d1f3275..d6a3a74 100644
--- a/src/layer3-dynamic/resource-fetcher.ts
+++ b/src/layer3-dynamic/resource-fetcher.ts
@@ -11,6 +11,29 @@ export interface ResourceRequest {
 export interface ResourceFetcherOptions {
   maxRetries?: number;
   timeoutMs?: number;
+  /**
+   * Maximum number of bytes accepted in the response body. Enforced against
+   * both the declared `Content-Length` header (if present) and the running
+   * byte count during streaming read. Defaults to 1 MiB.
+   */
+  maxBytes?: number;
+}
+
+export const DEFAULT_FETCH_TIMEOUT_MS = 5000;
+export const DEFAULT_FETCH_MAX_BYTES = 1_048_576;
+
+/**
+ * Extract Layer 3 remote-fetch limits from the resolved CodeGate config.
+ * Kept here so callers don't have to remember the config field names.
+ */
+export function resourceFetcherOptionsFromConfig(config: {
+  layer3_remote_fetch_timeout_ms: number;
+  layer3_remote_fetch_max_bytes: number;
+}): ResourceFetcherOptions {
+  return {
+    timeoutMs: config.layer3_remote_fetch_timeout_ms,
+    maxBytes: config.layer3_remote_fetch_max_bytes,
+  };
 }
 
 export interface ResourceFetcherDeps {
@@ -58,12 +81,80 @@ function endpointFor(request: ResourceRequest): string {
   return request.locator;
 }
 
-async function parseResponse(response: Response): Promise<unknown> {
+/**
+ * Read a response body while enforcing `maxBytes`. Returns the collected
+ * string, or throws a tagged error if the declared `Content-Length` or the
+ * streamed size exceeds the cap.
+ */
+async function readBodyWithLimit(response: Response, maxBytes: number): Promise<string> {
+  const declared = response.headers.get("content-length");
+  if (declared !== null) {
+    const parsed = Number(declared);
+    if (Number.isFinite(parsed) && parsed > maxBytes) {
+      // Drain & release the stream without reading bytes.
+      try {
+        await response.body?.cancel();
+      } catch {
+        // no-op: cancel failures are non-fatal.
+      }
+      throw new Error(
+        `response_too_large: declared Content-Length ${parsed} exceeds limit ${maxBytes}`,
+      );
+    }
+  }
+
+  const body = response.body;
+  if (!body) {
+    // No stream (e.g., HEAD or empty body): fall back to text().
+    const text = await response.text();
+    if (Buffer.byteLength(text, "utf8") > maxBytes) {
+      throw new Error(`response_too_large: body ${Buffer.byteLength(text, "utf8")} > ${maxBytes}`);
+    }
+    return text;
+  }
+
+  const reader = body.getReader();
+  const chunks: Uint8Array[] = [];
+  let total = 0;
+  try {
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) {
+        break;
+      }
+      if (!value) {
+        continue;
+      }
+      total += value.byteLength;
+      if (total > maxBytes) {
+        try {
+          await reader.cancel();
+        } catch {
+          // no-op
+        }
+        throw new Error(`response_too_large: streamed ${total} > ${maxBytes}`);
+      }
+      chunks.push(value);
+    }
+  } finally {
+    try {
+      reader.releaseLock();
+    } catch {
+      // releaseLock throws if the reader was already cancelled; ignore.
+    }
+  }
+
+  const buffer = Buffer.concat(chunks.map((chunk) => Buffer.from(chunk)));
+  return buffer.toString("utf8");
+}
+
+async function parseResponse(response: Response, maxBytes: number): Promise<unknown> {
   const contentType = response.headers.get("content-type") ?? "";
+  const text = await readBodyWithLimit(response, maxBytes);
   if (contentType.includes("application/json")) {
-    return (await response.json()) as unknown;
+    return JSON.parse(text) as unknown;
   }
-  return await response.text();
+  return text;
 }
 
 function timeoutError(error: unknown): boolean {
@@ -72,6 +163,11 @@ function timeoutError(error: unknown): boolean {
   return message.includes("timeout") || message.includes("aborted");
 }
 
+function isResponseTooLarge(error: unknown): boolean {
+  const message = error instanceof Error ? error.message : String(error);
+  return message.startsWith("response_too_large");
+}
+
 export async function fetchResourceMetadata(
   request: ResourceRequest,
   customDeps: ResourceFetcherDeps = defaultDeps(),
@@ -104,14 +200,19 @@ export async function fetchResourceMetadata(
   }
 
   const endpoint = endpointFor(request);
-  const timeoutMs = options.timeoutMs ?? 5000;
+  const timeoutMs = options.timeoutMs ?? DEFAULT_FETCH_TIMEOUT_MS;
+  const maxBytes = options.maxBytes ?? DEFAULT_FETCH_MAX_BYTES;
 
   for (let attempt = 0; attempt <= maxRetries; attempt += 1) {
     try {
       const controller = new AbortController();
       const timer = setTimeout(() => controller.abort(), timeoutMs);
-      const response = await deps.fetch(endpoint, { signal: controller.signal });
-      clearTimeout(timer);
+      let response: Response;
+      try {
+        response = await deps.fetch(endpoint, { signal: controller.signal });
+      } finally {
+        clearTimeout(timer);
+      }
 
       if (response.status === 401 || response.status === 403) {
         return {
@@ -135,13 +236,24 @@ export async function fetchResourceMetadata(
         };
       }
 
+      const metadata = await parseResponse(response, maxBytes);
       return {
         status: "ok",
         attempts: attempt + 1,
         elapsedMs: deps.now() - startedAt,
-        metadata: await parseResponse(response),
+        metadata,
       };
     } catch (error) {
+      // Size-limit breaches are deterministic — do not retry, surface as network_error.
+      if (isResponseTooLarge(error)) {
+        return {
+          status: "network_error",
+          attempts: attempt + 1,
+          elapsedMs: deps.now() - startedAt,
+          error: error instanceof Error ? error.message : String(error),
+        };
+      }
+
       if (attempt < maxRetries) {
         await deps.sleep(100 * (attempt + 1));
         continue;
diff --git a/src/layer3-dynamic/tool-description-acquisition.ts b/src/layer3-dynamic/tool-description-acquisition.ts
index 7566b71..e9a435d 100644
--- a/src/layer3-dynamic/tool-description-acquisition.ts
+++ b/src/layer3-dynamic/tool-description-acquisition.ts
@@ -1,6 +1,7 @@
 import {
   fetchResourceMetadata,
   type ResourceFetchResult,
+  type ResourceFetcherOptions,
   type ResourceKind,
   type ResourceRequest,
 } from "./resource-fetcher.js";
@@ -37,9 +38,16 @@ export interface ToolDescriptionAcquisitionDeps {
   fetchMetadata: (request: ResourceRequest) => Promise<ResourceFetchResult>;
 }
 
-function defaultDeps(): ToolDescriptionAcquisitionDeps {
+export interface ToolDescriptionAcquisitionOptions {
+  fetchOptions?: ResourceFetcherOptions;
+}
+
+function defaultDeps(
+  options: ToolDescriptionAcquisitionOptions = {},
+): ToolDescriptionAcquisitionDeps {
   return {
-    fetchMetadata: async (request) => fetchResourceMetadata(request),
+    fetchMetadata: async (request) =>
+      fetchResourceMetadata(request, undefined, options.fetchOptions),
   };
 }
 
diff --git a/tests/config/layer3-remote-fetch-limits.test.ts b/tests/config/layer3-remote-fetch-limits.test.ts
new file mode 100644
index 0000000..16c2056
--- /dev/null
+++ b/tests/config/layer3-remote-fetch-limits.test.ts
@@ -0,0 +1,101 @@
+import { mkdtempSync, mkdirSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, describe, expect, it } from "vitest";
+import {
+  DEFAULT_CONFIG,
+  LAYER3_REMOTE_FETCH_MAX_BYTES_ENV,
+  LAYER3_REMOTE_FETCH_TIMEOUT_ENV,
+  resolveEffectiveConfig,
+} from "../../src/config";
+
+const tempDirs: string[] = [];
+
+function makeTempDir(prefix: string): string {
+  const dir = mkdtempSync(join(tmpdir(), prefix));
+  tempDirs.push(dir);
+  return dir;
+}
+
+afterEach(() => {
+  delete process.env[LAYER3_REMOTE_FETCH_TIMEOUT_ENV];
+  delete process.env[LAYER3_REMOTE_FETCH_MAX_BYTES_ENV];
+  tempDirs.splice(0);
+});
+
+describe("layer3 remote fetch limits config", () => {
+  it("defaults to 5000ms / 1 MiB", () => {
+    const home = makeTempDir("codegate-l3-defaults-home-");
+    const project = makeTempDir("codegate-l3-defaults-project-");
+    const resolved = resolveEffectiveConfig({ scanTarget: project, homeDir: home });
+    expect(resolved.layer3_remote_fetch_timeout_ms).toBe(5000);
+    expect(resolved.layer3_remote_fetch_max_bytes).toBe(1_048_576);
+    expect(DEFAULT_CONFIG.layer3_remote_fetch_timeout_ms).toBe(5000);
+    expect(DEFAULT_CONFIG.layer3_remote_fetch_max_bytes).toBe(1_048_576);
+  });
+
+  it("honours project config > global config > defaults", () => {
+    const home = makeTempDir("codegate-l3-files-home-");
+    const project = makeTempDir("codegate-l3-files-project-");
+    mkdirSync(join(home, ".codegate"), { recursive: true });
+    writeFileSync(
+      join(home, ".codegate", "config.json"),
+      JSON.stringify({
+        layer3_remote_fetch_timeout_ms: 7000,
+        layer3_remote_fetch_max_bytes: 2048,
+      }),
+      "utf8",
+    );
+    writeFileSync(
+      join(project, ".codegate.json"),
+      JSON.stringify({ layer3_remote_fetch_timeout_ms: 2500 }),
+      "utf8",
+    );
+
+    const resolved = resolveEffectiveConfig({ scanTarget: project, homeDir: home });
+    // Project override wins on timeout.
+    expect(resolved.layer3_remote_fetch_timeout_ms).toBe(2500);
+    // No project override on max_bytes => global value applies.
+    expect(resolved.layer3_remote_fetch_max_bytes).toBe(2048);
+  });
+
+  it("env var takes precedence over project and global config", () => {
+    const home = makeTempDir("codegate-l3-env-home-");
+    const project = makeTempDir("codegate-l3-env-project-");
+    mkdirSync(join(home, ".codegate"), { recursive: true });
+    writeFileSync(
+      join(home, ".codegate", "config.json"),
+      JSON.stringify({
+        layer3_remote_fetch_timeout_ms: 4000,
+        layer3_remote_fetch_max_bytes: 2048,
+      }),
+      "utf8",
+    );
+    writeFileSync(
+      join(project, ".codegate.json"),
+      JSON.stringify({
+        layer3_remote_fetch_timeout_ms: 3000,
+        layer3_remote_fetch_max_bytes: 4096,
+      }),
+      "utf8",
+    );
+
+    process.env[LAYER3_REMOTE_FETCH_TIMEOUT_ENV] = "1500";
+    process.env[LAYER3_REMOTE_FETCH_MAX_BYTES_ENV] = "16384";
+
+    const resolved = resolveEffectiveConfig({ scanTarget: project, homeDir: home });
+    expect(resolved.layer3_remote_fetch_timeout_ms).toBe(1500);
+    expect(resolved.layer3_remote_fetch_max_bytes).toBe(16384);
+  });
+
+  it("ignores invalid env var values and falls back to config/defaults", () => {
+    const home = makeTempDir("codegate-l3-bad-env-home-");
+    const project = makeTempDir("codegate-l3-bad-env-project-");
+    process.env[LAYER3_REMOTE_FETCH_TIMEOUT_ENV] = "not-a-number";
+    process.env[LAYER3_REMOTE_FETCH_MAX_BYTES_ENV] = "-42";
+
+    const resolved = resolveEffectiveConfig({ scanTarget: project, homeDir: home });
+    expect(resolved.layer3_remote_fetch_timeout_ms).toBe(5000);
+    expect(resolved.layer3_remote_fetch_max_bytes).toBe(1_048_576);
+  });
+});
diff --git a/tests/layer3/resource-fetcher-limits.test.ts b/tests/layer3/resource-fetcher-limits.test.ts
new file mode 100644
index 0000000..a5ed6d2
--- /dev/null
+++ b/tests/layer3/resource-fetcher-limits.test.ts
@@ -0,0 +1,127 @@
+import { describe, expect, it, vi } from "vitest";
+import {
+  DEFAULT_FETCH_MAX_BYTES,
+  DEFAULT_FETCH_TIMEOUT_MS,
+  fetchResourceMetadata,
+  resourceFetcherOptionsFromConfig,
+  type ResourceFetcherDeps,
+} from "../../src/layer3-dynamic/resource-fetcher";
+
+function depsWithFetch(fetchImpl: ResourceFetcherDeps["fetch"]): ResourceFetcherDeps {
+  return {
+    fetch: fetchImpl,
+    runCommand: async () => ({ code: 0, stdout: "ok", stderr: "" }),
+    sleep: async () => {},
+    now: () => Date.now(),
+  };
+}
+
+describe("resource-fetcher size limits", () => {
+  it("rejects responses whose declared Content-Length exceeds maxBytes", async () => {
+    const fetch = vi.fn(async () => {
+      // Body that would exceed the limit; the fetcher should bail out before reading.
+      return new Response("x".repeat(10), {
+        status: 200,
+        headers: {
+          "content-type": "application/json",
+          "content-length": "5000000",
+        },
+      });
+    });
+
+    const result = await fetchResourceMetadata(
+      { id: "npm:big", kind: "npm", locator: "big-package" },
+      depsWithFetch(fetch),
+      { maxBytes: 1024, maxRetries: 0 },
+    );
+
+    expect(result.status).toBe("network_error");
+    expect(result.error).toContain("response_too_large");
+  });
+
+  it("aborts mid-stream when bytes exceed maxBytes despite a missing Content-Length", async () => {
+    // A stream that emits chunks larger than the configured limit.
+    const body = new ReadableStream<Uint8Array>({
+      start(controller) {
+        controller.enqueue(new TextEncoder().encode("aaaaaaaaaa")); // 10 bytes
+        controller.enqueue(new TextEncoder().encode("bbbbbbbbbb")); // 20 bytes total
+        controller.close();
+      },
+    });
+    const fetch = vi.fn(async () => {
+      return new Response(body, {
+        status: 200,
+        headers: { "content-type": "application/json" },
+      });
+    });
+
+    const result = await fetchResourceMetadata(
+      { id: "http:stream", kind: "http", locator: "https://example.com/stream" },
+      depsWithFetch(fetch),
+      { maxBytes: 15, maxRetries: 0 },
+    );
+
+    expect(result.status).toBe("network_error");
+    expect(result.error).toContain("response_too_large");
+  });
+
+  it("accepts responses within the size limit and returns parsed metadata", async () => {
+    const payload = JSON.stringify({ name: "@org/pkg", version: "1.0.0" });
+    const fetch = vi.fn(async () => {
+      return new Response(payload, {
+        status: 200,
+        headers: {
+          "content-type": "application/json",
+          "content-length": String(Buffer.byteLength(payload, "utf8")),
+        },
+      });
+    });
+
+    const result = await fetchResourceMetadata(
+      { id: "npm:pkg", kind: "npm", locator: "@org/pkg" },
+      depsWithFetch(fetch),
+      { maxBytes: 4096 },
+    );
+
+    expect(result.status).toBe("ok");
+    expect(result.metadata).toEqual({ name: "@org/pkg", version: "1.0.0" });
+  });
+
+  it("passes the configured timeout to AbortController", async () => {
+    let receivedSignal: AbortSignal | null = null;
+    const fetch = vi.fn(async (_input, init?: RequestInit) => {
+      receivedSignal = init?.signal ?? null;
+      return new Response("{}", {
+        status: 200,
+        headers: { "content-type": "application/json" },
+      });
+    });
+
+    await fetchResourceMetadata(
+      { id: "npm:ok", kind: "npm", locator: "ok" },
+      depsWithFetch(fetch),
+      { timeoutMs: 123, maxRetries: 0 },
+    );
+
+    // We cannot observe the timeout directly, but we can verify the abort
+    // signal plumbed through the fetch call.
+    expect(receivedSignal).not.toBeNull();
+  });
+});
+
+describe("resourceFetcherOptionsFromConfig", () => {
+  it("maps CodeGate config fields to fetcher options", () => {
+    const options = resourceFetcherOptionsFromConfig({
+      layer3_remote_fetch_timeout_ms: 7500,
+      layer3_remote_fetch_max_bytes: 2048,
+    });
+    expect(options).toEqual({ timeoutMs: 7500, maxBytes: 2048 });
+  });
+});
+
+describe("fetcher defaults", () => {
+  it("exposes sensible defaults", () => {
+    expect(DEFAULT_FETCH_TIMEOUT_MS).toBe(5000);
+    expect(DEFAULT_FETCH_MAX_BYTES).toBe(1_048_576);
+  });
+});