From 976f410de17b2d48c5717087766fd72efc1efc51 Mon Sep 17 00:00:00 2001 From: Landon Cox Date: Tue, 14 Apr 2026 13:31:55 -0700 Subject: [PATCH 1/2] feat: add upstream corporate proxy support for self-hosted runners Add --upstream-proxy flag and auto-detection from host https_proxy/ http_proxy/no_proxy environment variables. When configured, Squid chains outbound traffic through the corporate proxy via cache_peer. Key changes: - New upstream-proxy.ts with parseProxyUrl(), parseNoProxy(), detectUpstreamProxy(), and PROXY_ENV_VARS constant - UpstreamProxyConfig interface in types.ts - generateUpstreamProxySection() in squid-config.ts for cache_peer, always_direct (no_proxy bypass), and never_direct directives - CLI auto-detection with --upstream-proxy explicit override - Host proxy env vars excluded from --env-all passthrough - Security: reject credentials, loopback, HTTPS scheme, injection chars - 35 new tests across upstream-proxy, squid-config, docker-manager - Documentation in docs/environment.md Closes #1975 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/environment.md | 40 +++++++ src/cli.ts | 34 ++++++ src/docker-manager.test.ts | 29 +++++ src/docker-manager.ts | 6 + src/squid-config.test.ts | 51 ++++++++ src/squid-config.ts | 43 ++++++- src/types.ts | 40 +++++++ src/upstream-proxy.test.ts | 232 +++++++++++++++++++++++++++++++++++++ src/upstream-proxy.ts | 197 +++++++++++++++++++++++++++++++ 9 files changed, 669 insertions(+), 3 deletions(-) create mode 100644 src/upstream-proxy.test.ts create mode 100644 src/upstream-proxy.ts diff --git a/docs/environment.md b/docs/environment.md index 03432151..b1965463 100644 --- a/docs/environment.md +++ b/docs/environment.md @@ -246,6 +246,46 @@ The DinD TCP address (e.g., `tcp://localhost:2375`) typically refers to the runn - **`--enable-host-access`** — allows the agent to reach `host.docker.internal` and set `DOCKER_HOST=tcp://host.docker.internal:2375` inside the agent. - **`--enable-dind`** — mounts the local Docker socket (`/var/run/docker.sock`) directly into the agent container (only works when using the local daemon, not a remote DinD TCP socket). +## Upstream (Corporate) Proxy Support + +When running on self-hosted runners behind a corporate proxy, AWF can chain Squid +through the upstream proxy using the `cache_peer` directive. + +### Auto-detection + +If the host has `https_proxy`/`HTTPS_PROXY` or `http_proxy`/`HTTP_PROXY` set, AWF +automatically configures Squid to route outbound traffic through that proxy. +`no_proxy`/`NO_PROXY` domain suffixes are honored as bypass rules (`always_direct`). + +```bash +# Auto-detected — no flags needed when host proxy env vars are set +export https_proxy=http://proxy.corp.com:3128 +export no_proxy=.internal.corp.com,localhost +awf --allow-domains github.com 'curl https://api.github.com' +``` + +### Explicit override + +Use `--upstream-proxy ` to specify the proxy explicitly (overrides auto-detection): + +```bash +awf --upstream-proxy http://proxy.corp.com:3128 --allow-domains github.com 'curl https://api.github.com' +``` + +### Limitations (v1) + +- **HTTP proxies only** — Squid `cache_peer` requires an HTTP proxy (HTTPS tunneling uses CONNECT) +- **No proxy credentials** — `user:pass@proxy` URLs are rejected; configure auth on the proxy server +- **No loopback** — `localhost`/`127.0.0.1` proxies are rejected (Squid is in a container) +- **Single proxy** — If `http_proxy` and `https_proxy` differ, use `--upstream-proxy` to disambiguate +- **Domain-only bypass** — `no_proxy` IPs, CIDRs, and wildcards are ignored (only domain suffixes work) + +### Proxy environment variable exclusion + +Host proxy environment variables (`HTTP_PROXY`, `HTTPS_PROXY`, `http_proxy`, `https_proxy`, +`ALL_PROXY`, `NO_PROXY`, etc.) are **always excluded** from container passthrough, even with +`--env-all`. AWF sets its own proxy variables pointing to Squid (`172.30.0.10:3128`). + ## Troubleshooting **Variable not accessible:** Use `sudo -E` or pass explicitly with `--env VAR="$VAR"` diff --git a/src/cli.ts b/src/cli.ts index a7b35332..f8b0141f 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -28,6 +28,7 @@ import { redactSecrets } from './redact-secrets'; import { validateDomainOrPattern, SQUID_DANGEROUS_CHARS } from './domain-patterns'; import { loadAndMergeDomains } from './rules'; import { detectHostDnsServers } from './dns-resolver'; +import { detectUpstreamProxy, parseProxyUrl, parseNoProxy } from './upstream-proxy'; import { OutputFormat } from './types'; import { version } from '../package.json'; @@ -1236,6 +1237,7 @@ const optionGroupHeaders: Record = { 'build-local': 'Image Management:', 'env': 'Container Configuration:', 'dns-servers': 'Network & Security:', + 'upstream-proxy': 'Network & Security:', 'enable-api-proxy': 'API Proxy:', 'log-level': 'Logging & Debug:', }; @@ -1430,6 +1432,12 @@ program '--dns-over-https [resolver-url]', 'Enable DNS-over-HTTPS via sidecar proxy (default: https://dns.google/dns-query)' ) + .option( + '--upstream-proxy ', + 'Upstream (corporate) proxy URL for Squid to chain through.\n' + + ' Auto-detected from host https_proxy/http_proxy if not set.\n' + + ' Example: http://proxy.corp.com:3128' + ) .option( '--enable-host-access', 'Enable access to host services via host.docker.internal', @@ -1785,6 +1793,31 @@ program logger.info(`DNS-over-HTTPS enabled: ${dnsOverHttps}`); } + // Detect or parse upstream proxy configuration + let upstreamProxy: import('./types').UpstreamProxyConfig | undefined; + if (options.upstreamProxy) { + // Explicit --upstream-proxy flag + try { + const { host, port } = parseProxyUrl(options.upstreamProxy); + // Parse no_proxy from environment even when --upstream-proxy is explicit + const noProxyStr = (process.env.no_proxy || process.env.NO_PROXY || '').trim(); + const noProxy = noProxyStr ? parseNoProxy(noProxyStr) : []; + upstreamProxy = { host, port, ...(noProxy.length > 0 ? { noProxy } : {}) }; + logger.info(`Upstream proxy (explicit): ${host}:${port}`); + } catch (error) { + logger.error(`Invalid --upstream-proxy: ${error instanceof Error ? error.message : error}`); + process.exit(1); + } + } else { + // Auto-detect from host environment variables + try { + upstreamProxy = detectUpstreamProxy(); + } catch (error) { + logger.error(`Upstream proxy auto-detection failed: ${error instanceof Error ? error.message : error}`); + process.exit(1); + } + } + // Parse --allow-urls for SSL Bump mode let allowedUrls: string[] | undefined; if (options.allowUrls) { @@ -1919,6 +1952,7 @@ program githubToken: process.env.GITHUB_TOKEN || process.env.GH_TOKEN, diagnosticLogs: options.diagnosticLogs || false, awfDockerHost: options.dockerHost, + upstreamProxy, }; // Apply --docker-host override for AWF's own container operations. diff --git a/src/docker-manager.test.ts b/src/docker-manager.test.ts index 88e99a31..e3d74ec7 100644 --- a/src/docker-manager.test.ts +++ b/src/docker-manager.test.ts @@ -1616,6 +1616,35 @@ describe('docker-manager', () => { } }); + it('should exclude host proxy env vars from env-all passthrough to prevent routing conflicts', () => { + const saved: Record = {}; + const proxyVars = ['HTTP_PROXY', 'HTTPS_PROXY', 'http_proxy', 'https_proxy', 'NO_PROXY', 'no_proxy']; + + for (const v of proxyVars) { + saved[v] = process.env[v]; + process.env[v] = `http://host-proxy.corp.com:3128`; + } + + try { + const configWithEnvAll = { ...mockConfig, envAll: true }; + const result = generateDockerCompose(configWithEnvAll, mockNetworkConfig); + const env = result.services.agent.environment as Record; + + // Host proxy vars must not leak — AWF sets its own proxy vars pointing to Squid + for (const v of proxyVars) { + // The value should either be absent or overwritten to Squid's address + if (env[v] !== undefined) { + expect(env[v]).not.toBe('http://host-proxy.corp.com:3128'); + } + } + } finally { + for (const v of proxyVars) { + if (saved[v] !== undefined) process.env[v] = saved[v]; + else delete process.env[v]; + } + } + }); + it('should auto-inject GH_HOST from GITHUB_SERVER_URL when envAll is true', () => { const prevServerUrl = process.env.GITHUB_SERVER_URL; const prevGhHost = process.env.GH_HOST; diff --git a/src/docker-manager.ts b/src/docker-manager.ts index 33648876..802abd61 100644 --- a/src/docker-manager.ts +++ b/src/docker-manager.ts @@ -8,6 +8,7 @@ import { logger } from './logger'; import { generateSquidConfig, generatePolicyManifest } from './squid-config'; import { generateSessionCa, initSslDb, CaFiles, parseUrlPatterns, cleanupSslKeyMaterial, unmountSslTmpfs } from './ssl-bump'; import { DEFAULT_DNS_SERVERS } from './dns-resolver'; +import { PROXY_ENV_VARS } from './upstream-proxy'; const SQUID_PORT = 3128; @@ -640,6 +641,10 @@ export function generateDockerCompose( // Actions runner itself, not by the agent. 'ACTIONS_RUNTIME_TOKEN', 'ACTIONS_RESULTS_URL', + // Proxy environment variables — excluded to prevent host proxy settings from + // conflicting with AWF's internal routing (agent → Squid → internet). + // AWF sets its own HTTP_PROXY/HTTPS_PROXY pointing to Squid. + ...PROXY_ENV_VARS, ]); // When api-proxy is enabled, exclude API keys from agent environment @@ -2132,6 +2137,7 @@ export async function writeConfigs(config: WrapperConfig): Promise { allowHostPorts: config.allowHostPorts, enableDlp: config.enableDlp, dnsServers: config.dnsServers, + upstreamProxy: config.upstreamProxy, }); const squidConfigPath = path.join(config.workDir, 'squid.conf'); fs.writeFileSync(squidConfigPath, squidConfig, { mode: 0o644 }); diff --git a/src/squid-config.test.ts b/src/squid-config.test.ts index 0a52ac6a..3a3d3f7c 100644 --- a/src/squid-config.test.ts +++ b/src/squid-config.test.ts @@ -1868,4 +1868,55 @@ describe('generatePolicyManifest', () => { const denyRule = manifest.rules.find(r => r.id === 'deny-default'); expect(httpRule!.order).toBeLessThan(denyRule!.order); }); + + describe('Upstream Proxy Configuration', () => { + it('generates cache_peer directive for upstream proxy', () => { + const config: SquidConfig = { + domains: ['github.com'], + port: defaultPort, + upstreamProxy: { host: 'proxy.corp.com', port: 3128 }, + }; + const result = generateSquidConfig(config); + expect(result).toContain('cache_peer proxy.corp.com parent 3128 0 no-query default'); + expect(result).toContain('never_direct allow all'); + }); + + it('generates always_direct bypass for noProxy domains', () => { + const config: SquidConfig = { + domains: ['github.com'], + port: defaultPort, + upstreamProxy: { + host: 'proxy.corp.com', + port: 3128, + noProxy: ['.corp.com', 'internal.example.com'], + }, + }; + const result = generateSquidConfig(config); + expect(result).toContain('acl upstream_bypass dstdomain .corp.com'); + expect(result).toContain('acl upstream_bypass dstdomain internal.example.com'); + expect(result).toContain('acl upstream_bypass dstdomain .internal.example.com'); + expect(result).toContain('always_direct allow upstream_bypass'); + expect(result).toContain('never_direct allow all'); + }); + + it('omits upstream proxy section when not configured', () => { + const config: SquidConfig = { + domains: ['github.com'], + port: defaultPort, + }; + const result = generateSquidConfig(config); + expect(result).not.toContain('cache_peer'); + expect(result).not.toContain('never_direct'); + }); + + it('generates upstream proxy with custom port', () => { + const config: SquidConfig = { + domains: ['github.com'], + port: defaultPort, + upstreamProxy: { host: '10.0.0.50', port: 8080 }, + }; + const result = generateSquidConfig(config); + expect(result).toContain('cache_peer 10.0.0.50 parent 8080 0 no-query default'); + }); + }); }); diff --git a/src/squid-config.ts b/src/squid-config.ts index 6419672f..f5c0839b 100644 --- a/src/squid-config.ts +++ b/src/squid-config.ts @@ -1,4 +1,4 @@ -import { SquidConfig, PolicyManifest, PolicyRule } from './types'; +import { SquidConfig, PolicyManifest, PolicyRule, UpstreamProxyConfig } from './types'; import { parseDomainList, isDomainMatchedByPattern, @@ -9,6 +9,43 @@ import { import { generateDlpSquidConfig } from './dlp'; import { DEFAULT_DNS_SERVERS } from './dns-resolver'; +/** + * Generates Squid cache_peer / always_direct / never_direct directives for + * upstream (corporate) proxy chaining. + * + * When an upstream proxy is configured, ALL outbound traffic goes through + * the parent proxy except domains in the no_proxy bypass list. + */ +function generateUpstreamProxySection(upstream: UpstreamProxyConfig): string { + const lines: string[] = [ + '# Upstream corporate proxy — route outbound traffic through parent proxy', + '# Required for self-hosted runners where direct egress is blocked', + `cache_peer ${upstream.host} parent ${upstream.port} 0 no-query default`, + ]; + + // Generate always_direct ACL for no_proxy bypass domains + if (upstream.noProxy && upstream.noProxy.length > 0) { + lines.push(''); + lines.push('# Bypass upstream proxy for these domains (from host no_proxy)'); + for (const domain of upstream.noProxy) { + // Domain suffixes: .corp.com matches *.corp.com + // Exact domains: internal.corp.com matches only that host + const squidDomain = domain.startsWith('.') ? domain : `.${domain}`; + lines.push(`acl upstream_bypass dstdomain ${squidDomain}`); + // Also add exact match for non-wildcard domains + if (!domain.startsWith('.')) { + lines.push(`acl upstream_bypass dstdomain ${domain}`); + } + } + lines.push('always_direct allow upstream_bypass'); + } + + // Force all non-bypass traffic through the parent proxy + lines.push('never_direct allow all'); + + return lines.join('\n'); +} + /** * Ports that should never be allowed, even with --allow-host-ports * These ports are blocked for security reasons to prevent access to sensitive services @@ -265,7 +302,7 @@ ${urlAclSection}${urlAccessRules}`; * // Blocked: internal.example.com -> acl blocked_domains dstdomain .internal.example.com */ export function generateSquidConfig(config: SquidConfig): string { - const { domains, blockedDomains, port, sslBump, caFiles, sslDbPath, urlPatterns, enableHostAccess, allowHostPorts, enableDlp, dnsServers } = config; + const { domains, blockedDomains, port, sslBump, caFiles, sslDbPath, urlPatterns, enableHostAccess, allowHostPorts, enableDlp, dnsServers, upstreamProxy } = config; // Parse, deduplicate, and group domains by protocol (shared logic) const { domainsByProto, patternsByProto } = parseDomainConfig(domains); @@ -609,7 +646,7 @@ cache deny all # DNS settings - Squid resolves all domains for HTTP/HTTPS traffic dns_nameservers ${(dnsServers && dnsServers.length > 0) ? dnsServers.join(' ') : DEFAULT_DNS_SERVERS.join(' ')} - +${upstreamProxy ? '\n' + generateUpstreamProxySection(upstreamProxy) : ''} # Forwarded headers forwarded_for delete via off diff --git a/src/types.ts b/src/types.ts index 6ae22ba4..3098af27 100644 --- a/src/types.ts +++ b/src/types.ts @@ -920,6 +920,38 @@ export interface WrapperConfig { * @example 45 */ agentTimeout?: number; + + /** + * Upstream (corporate) proxy for Squid to route outbound traffic through. + * + * When set, Squid uses `cache_peer` to forward all outbound HTTP/HTTPS + * traffic through this parent proxy instead of connecting directly to the + * internet. This is required on self-hosted runners behind corporate proxies + * where direct egress is blocked. + * + * Auto-detected from host `https_proxy`/`HTTPS_PROXY`/`http_proxy`/`HTTP_PROXY` + * environment variables, or explicitly set via `--upstream-proxy `. + * + * @example { host: 'proxy.corp.com', port: 3128 } + */ + upstreamProxy?: UpstreamProxyConfig; +} + +/** + * Upstream proxy configuration for Squid cache_peer routing + */ +export interface UpstreamProxyConfig { + /** Hostname or IP of the upstream proxy (e.g., 'proxy.corp.com') */ + host: string; + /** Port of the upstream proxy (e.g., 3128) */ + port: number; + /** + * Domains that should bypass the upstream proxy and connect directly. + * Parsed from host `no_proxy`/`NO_PROXY`. Only domain suffixes are + * supported (e.g., '.corp.com', 'internal.example.com'). + * IPs, CIDRs, and wildcards are ignored with a warning. + */ + noProxy?: string[]; } /** @@ -1067,6 +1099,14 @@ export interface SquidConfig { * @default ['8.8.8.8', '8.8.4.4'] */ dnsServers?: string[]; + + /** + * Upstream (corporate) proxy for Squid to chain outbound traffic through. + * + * When set, generates `cache_peer` / `never_direct` / `always_direct` + * directives so Squid forwards traffic through the parent proxy. + */ + upstreamProxy?: UpstreamProxyConfig; } /** diff --git a/src/upstream-proxy.test.ts b/src/upstream-proxy.test.ts new file mode 100644 index 00000000..386e630b --- /dev/null +++ b/src/upstream-proxy.test.ts @@ -0,0 +1,232 @@ +import { parseProxyUrl, parseNoProxy, detectUpstreamProxy, PROXY_ENV_VARS } from './upstream-proxy'; + +// Suppress logger output in tests +jest.mock('./logger', () => ({ + logger: { + info: jest.fn(), + warn: jest.fn(), + debug: jest.fn(), + error: jest.fn(), + }, +})); + +describe('PROXY_ENV_VARS', () => { + it('includes all standard proxy environment variable names', () => { + expect(PROXY_ENV_VARS).toContain('HTTP_PROXY'); + expect(PROXY_ENV_VARS).toContain('HTTPS_PROXY'); + expect(PROXY_ENV_VARS).toContain('http_proxy'); + expect(PROXY_ENV_VARS).toContain('https_proxy'); + expect(PROXY_ENV_VARS).toContain('NO_PROXY'); + expect(PROXY_ENV_VARS).toContain('no_proxy'); + expect(PROXY_ENV_VARS).toContain('ALL_PROXY'); + expect(PROXY_ENV_VARS).toContain('all_proxy'); + }); +}); + +describe('parseProxyUrl', () => { + it('parses a standard HTTP proxy URL', () => { + expect(parseProxyUrl('http://proxy.corp.com:3128')).toEqual({ + host: 'proxy.corp.com', + port: 3128, + }); + }); + + it('defaults port to 3128 when omitted', () => { + expect(parseProxyUrl('http://proxy.corp.com')).toEqual({ + host: 'proxy.corp.com', + port: 3128, + }); + }); + + it('handles URL without scheme', () => { + expect(parseProxyUrl('proxy.corp.com:8080')).toEqual({ + host: 'proxy.corp.com', + port: 8080, + }); + }); + + it('handles bare hostname without scheme or port', () => { + expect(parseProxyUrl('proxy.corp.com')).toEqual({ + host: 'proxy.corp.com', + port: 3128, + }); + }); + + it('trims whitespace', () => { + expect(parseProxyUrl(' http://proxy.corp.com:3128 ')).toEqual({ + host: 'proxy.corp.com', + port: 3128, + }); + }); + + it('rejects empty URL', () => { + expect(() => parseProxyUrl('')).toThrow('empty'); + expect(() => parseProxyUrl(' ')).toThrow('empty'); + }); + + it('rejects URLs with credentials', () => { + expect(() => parseProxyUrl('http://user:pass@proxy.corp.com:3128')).toThrow('credentials'); + expect(() => parseProxyUrl('http://user@proxy.corp.com:3128')).toThrow('credentials'); + }); + + it('rejects HTTPS scheme', () => { + expect(() => parseProxyUrl('https://proxy.corp.com:3128')).toThrow('unsupported scheme'); + }); + + it('rejects loopback addresses', () => { + expect(() => parseProxyUrl('http://localhost:3128')).toThrow('loopback'); + expect(() => parseProxyUrl('http://127.0.0.1:3128')).toThrow('loopback'); + expect(() => parseProxyUrl('http://0.0.0.0:3128')).toThrow('loopback'); + }); + + it('rejects hostnames with squid.conf injection characters', () => { + expect(() => parseProxyUrl('http://proxy host.com:3128')).toThrow(); + expect(() => parseProxyUrl("http://proxy'host.com:3128")).toThrow('invalid characters'); + }); + + it('accepts valid IP addresses', () => { + expect(parseProxyUrl('http://10.0.0.1:3128')).toEqual({ + host: '10.0.0.1', + port: 3128, + }); + expect(parseProxyUrl('http://192.168.1.1:8080')).toEqual({ + host: '192.168.1.1', + port: 8080, + }); + }); +}); + +describe('parseNoProxy', () => { + it('parses comma-separated domain suffixes', () => { + expect(parseNoProxy('.corp.com,internal.example.com')).toEqual([ + '.corp.com', + 'internal.example.com', + ]); + }); + + it('returns empty array for empty input', () => { + expect(parseNoProxy('')).toEqual([]); + expect(parseNoProxy(' ')).toEqual([]); + }); + + it('skips loopback entries', () => { + expect(parseNoProxy('localhost,127.0.0.1,.corp.com')).toEqual(['.corp.com']); + }); + + it('skips wildcard *', () => { + expect(parseNoProxy('*,.corp.com')).toEqual(['.corp.com']); + }); + + it('skips IP addresses', () => { + expect(parseNoProxy('10.0.0.0/8,.corp.com,192.168.1.1')).toEqual(['.corp.com']); + }); + + it('skips IPv6 entries', () => { + expect(parseNoProxy('::1,[::1],.corp.com')).toEqual(['.corp.com']); + }); + + it('skips entries with ports', () => { + expect(parseNoProxy('host:8080,.corp.com')).toEqual(['.corp.com']); + }); + + it('skips entries with injection characters', () => { + expect(parseNoProxy('.corp.com,bad domain.com')).toEqual(['.corp.com']); + }); + + it('handles whitespace around entries', () => { + expect(parseNoProxy(' .corp.com , internal.example.com ')).toEqual([ + '.corp.com', + 'internal.example.com', + ]); + }); +}); + +describe('detectUpstreamProxy', () => { + it('returns undefined when no proxy env vars are set', () => { + expect(detectUpstreamProxy({})).toBeUndefined(); + }); + + it('detects from https_proxy', () => { + const result = detectUpstreamProxy({ + https_proxy: 'http://proxy.corp.com:3128', + }); + expect(result).toEqual({ + host: 'proxy.corp.com', + port: 3128, + }); + }); + + it('detects from HTTPS_PROXY', () => { + const result = detectUpstreamProxy({ + HTTPS_PROXY: 'http://proxy.corp.com:3128', + }); + expect(result).toEqual({ + host: 'proxy.corp.com', + port: 3128, + }); + }); + + it('prefers lowercase https_proxy over uppercase', () => { + const result = detectUpstreamProxy({ + https_proxy: 'http://lowercase.corp.com:3128', + HTTPS_PROXY: 'http://uppercase.corp.com:3128', + }); + expect(result).toEqual({ + host: 'lowercase.corp.com', + port: 3128, + }); + }); + + it('falls back to http_proxy when https_proxy is absent', () => { + const result = detectUpstreamProxy({ + http_proxy: 'http://proxy.corp.com:8080', + }); + expect(result).toEqual({ + host: 'proxy.corp.com', + port: 8080, + }); + }); + + it('throws when http_proxy and https_proxy differ', () => { + expect(() => + detectUpstreamProxy({ + http_proxy: 'http://proxy1.corp.com:3128', + https_proxy: 'http://proxy2.corp.com:3128', + }) + ).toThrow('different http_proxy and https_proxy'); + }); + + it('succeeds when http_proxy and https_proxy are the same', () => { + const result = detectUpstreamProxy({ + http_proxy: 'http://proxy.corp.com:3128', + https_proxy: 'http://proxy.corp.com:3128', + }); + expect(result).toEqual({ + host: 'proxy.corp.com', + port: 3128, + }); + }); + + it('includes no_proxy domains', () => { + const result = detectUpstreamProxy({ + https_proxy: 'http://proxy.corp.com:3128', + no_proxy: 'localhost,.corp.com,internal.example.com', + }); + expect(result).toEqual({ + host: 'proxy.corp.com', + port: 3128, + noProxy: ['.corp.com', 'internal.example.com'], + }); + }); + + it('omits noProxy when all entries are filtered out', () => { + const result = detectUpstreamProxy({ + https_proxy: 'http://proxy.corp.com:3128', + no_proxy: 'localhost,127.0.0.1', + }); + expect(result).toEqual({ + host: 'proxy.corp.com', + port: 3128, + }); + }); +}); diff --git a/src/upstream-proxy.ts b/src/upstream-proxy.ts new file mode 100644 index 00000000..8aa6f22e --- /dev/null +++ b/src/upstream-proxy.ts @@ -0,0 +1,197 @@ +/** + * Upstream proxy auto-detection and validation. + * + * Reads host http_proxy/https_proxy/no_proxy environment variables and + * produces a validated UpstreamProxyConfig for Squid cache_peer chaining. + */ + +import { UpstreamProxyConfig } from './types'; +import { logger } from './logger'; + +/** + * All proxy-related environment variable names that should be excluded + * from container passthrough to prevent conflicts with AWF's internal routing. + */ +export const PROXY_ENV_VARS = [ + 'HTTP_PROXY', + 'HTTPS_PROXY', + 'http_proxy', + 'https_proxy', + 'ALL_PROXY', + 'all_proxy', + 'FTP_PROXY', + 'ftp_proxy', + 'NO_PROXY', + 'no_proxy', +] as const; + +/** + * Parses a proxy URL into host and port. Rejects unsupported features. + * + * @param url - Proxy URL (e.g., "http://proxy.corp.com:3128") + * @returns Parsed host and port + * @throws Error if the URL contains credentials, is malformed, or uses an unsupported scheme + */ +export function parseProxyUrl(url: string): { host: string; port: number } { + const trimmed = url.trim(); + if (!trimmed) { + throw new Error('Upstream proxy URL is empty'); + } + + // Normalize: add scheme if missing (common for proxy env vars like "proxy:3128") + let normalized = trimmed; + if (!/^https?:\/\//i.test(normalized)) { + normalized = `http://${normalized}`; + } + + let parsed: URL; + try { + parsed = new URL(normalized); + } catch { + throw new Error(`Invalid upstream proxy URL: ${trimmed}`); + } + + // Reject credentials — they would leak into audit artifacts (squid.conf is not redacted) + if (parsed.username || parsed.password) { + throw new Error( + 'Upstream proxy URL contains credentials (user:pass@), which are not supported in v1. ' + + 'Configure proxy authentication on the proxy server itself, or use a proxy that does not require auth.' + ); + } + + // Only HTTP scheme is supported for cache_peer (Squid uses HTTP CONNECT for HTTPS tunnels) + if (parsed.protocol !== 'http:') { + throw new Error( + `Upstream proxy URL uses unsupported scheme "${parsed.protocol}". ` + + 'Only HTTP proxies are supported (Squid uses HTTP CONNECT for HTTPS tunnels).' + ); + } + + const host = parsed.hostname; + if (!host) { + throw new Error(`Upstream proxy URL has no hostname: ${trimmed}`); + } + + // Sanitize: reject values that could inject into squid.conf + if (/[\s#;'"\\]/.test(host)) { + throw new Error(`Upstream proxy hostname contains invalid characters: ${host}`); + } + + // Reject loopback addresses — Squid runs in a container and localhost != host localhost + const loopbackPatterns = ['localhost', '127.0.0.1', '::1', '0.0.0.0']; + if (loopbackPatterns.includes(host.toLowerCase())) { + throw new Error( + `Upstream proxy "${host}" is a loopback address. Squid runs in a Docker container ` + + 'where localhost refers to the container, not the host. ' + + 'Use the host machine\'s network IP or configure --enable-host-access with host.docker.internal.' + ); + } + + const port = parsed.port ? parseInt(parsed.port, 10) : 3128; + if (isNaN(port) || port < 1 || port > 65535) { + throw new Error(`Invalid upstream proxy port: ${parsed.port}`); + } + + return { host, port }; +} + +/** + * Parses a no_proxy string into validated domain suffixes. + * Non-domain entries (IPs, CIDRs, wildcards, ports) are logged as warnings. + * + * @param noProxy - Comma-separated no_proxy value (e.g., "localhost,.corp.com,10.0.0.0/8") + * @returns Array of validated domain suffixes + */ +export function parseNoProxy(noProxy: string): string[] { + if (!noProxy.trim()) return []; + + const entries = noProxy.split(',').map(e => e.trim()).filter(e => e.length > 0); + const domains: string[] = []; + + for (const entry of entries) { + // Skip loopback (irrelevant for Squid upstream bypass) + if (['localhost', '127.0.0.1', '::1', '0.0.0.0'].includes(entry.toLowerCase())) { + continue; + } + + // Skip wildcard '*' (means "bypass everything" — contradicts having an upstream proxy) + if (entry === '*') { + logger.warn('Ignoring no_proxy wildcard "*" — it would bypass the upstream proxy for all traffic'); + continue; + } + + // Skip IP addresses (v4) + if (/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(\/\d{1,2})?$/.test(entry)) { + logger.warn(`Ignoring no_proxy IP/CIDR "${entry}" — only domain suffixes are supported for upstream proxy bypass`); + continue; + } + + // Skip IPv6 addresses + if (entry.includes(':') || entry.startsWith('[')) { + logger.warn(`Ignoring no_proxy IPv6 entry "${entry}" — only domain suffixes are supported for upstream proxy bypass`); + continue; + } + + // Skip entries with ports (e.g., "host:8080") + if (/:\d+$/.test(entry)) { + logger.warn(`Ignoring no_proxy entry with port "${entry}" — port-based bypass is not supported for upstream proxy`); + continue; + } + + // Sanitize: reject values that could inject into squid.conf + if (/[\s#;'"\\]/.test(entry)) { + logger.warn(`Ignoring no_proxy entry with invalid characters: "${entry}"`); + continue; + } + + // Valid domain suffix (e.g., ".corp.com" or "internal.example.com") + domains.push(entry); + } + + return domains; +} + +/** + * Auto-detects upstream proxy configuration from host environment variables. + * + * Reads https_proxy/HTTPS_PROXY (preferred) or http_proxy/HTTP_PROXY. + * If both are set and differ, throws an error requiring --upstream-proxy. + * + * @param env - Environment variables to inspect (defaults to process.env) + * @returns Upstream proxy config, or undefined if no proxy is detected + */ +export function detectUpstreamProxy(env: Record = process.env): UpstreamProxyConfig | undefined { + // Read proxy URLs (prefer lowercase per convention, then uppercase) + const httpsProxy = (env.https_proxy || env.HTTPS_PROXY || '').trim(); + const httpProxy = (env.http_proxy || env.HTTP_PROXY || '').trim(); + + // No proxy configured + if (!httpsProxy && !httpProxy) { + return undefined; + } + + // If both are set and differ, we can't determine which to use + if (httpsProxy && httpProxy && httpsProxy !== httpProxy) { + throw new Error( + 'Host has different http_proxy and https_proxy values. ' + + 'AWF cannot determine which upstream proxy to use. ' + + `Use --upstream-proxy to specify explicitly.\n` + + ` http_proxy: ${httpProxy}\n` + + ` https_proxy: ${httpsProxy}` + ); + } + + const proxyUrl = httpsProxy || httpProxy; + const { host, port } = parseProxyUrl(proxyUrl); + + // Parse no_proxy + const noProxyStr = (env.no_proxy || env.NO_PROXY || '').trim(); + const noProxy = parseNoProxy(noProxyStr); + + logger.info(`Detected upstream proxy: ${host}:${port}`); + if (noProxy.length > 0) { + logger.debug(`Upstream proxy bypass domains: ${noProxy.join(', ')}`); + } + + return { host, port, ...(noProxy.length > 0 ? { noProxy } : {}) }; +} From 7facedb4bc84ec8e2462d899ad8aafc2cc6c8313 Mon Sep 17 00:00:00 2001 From: Landon Cox Date: Tue, 14 Apr 2026 13:50:00 -0700 Subject: [PATCH 2/2] fix: address PR review feedback for upstream proxy support - Robust loopback detection: check full 127.0.0.0/8 range and IPv6 variants via isLoopback() helper instead of exact-match list - Fix misleading comments in squid-config.ts: non-dot no_proxy entries are treated as suffix matches (domain + subdomains), not exact-only - Update docs/environment.md: clarify that host proxy vars are excluded from container passthrough but are read for upstream proxy detection Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/environment.md | 2 +- src/squid-config.ts | 8 +++++--- src/upstream-proxy.test.ts | 6 ++++-- src/upstream-proxy.ts | 24 +++++++++++++++++++++--- 4 files changed, 31 insertions(+), 9 deletions(-) diff --git a/docs/environment.md b/docs/environment.md index b1965463..946f42af 100644 --- a/docs/environment.md +++ b/docs/environment.md @@ -40,7 +40,7 @@ Using `--env-all` passes all host environment variables to the container, which **Excluded variables** (even with `--env-all`): `PATH`, `PWD`, `OLDPWD`, `SHLVL`, `_`, `SUDO_*` -**Proxy variables:** `HTTP_PROXY`, `HTTPS_PROXY`, `https_proxy` (and their lowercase/uppercase variants) from the host are ignored when using `--env-all` because the firewall always sets these to point to Squid. Host proxy settings cannot be passed through as they would conflict with the firewall's traffic routing. +**Proxy variables:** `HTTP_PROXY`, `HTTPS_PROXY`, `http_proxy`, `https_proxy`, `NO_PROXY`, `no_proxy`, `ALL_PROXY`, and `FTP_PROXY` (all case variants) from the host are **excluded from container passthrough** when using `--env-all`. The firewall sets its own proxy variables pointing to Squid inside the container. However, host proxy variables **are read** for upstream proxy auto-detection — if the host has `https_proxy`/`http_proxy` set, AWF configures Squid to chain outbound traffic through that corporate proxy (see [Upstream Proxy Support](#upstream-corporate-proxy-support)). ## `--env-file` Support diff --git a/src/squid-config.ts b/src/squid-config.ts index f5c0839b..1452866f 100644 --- a/src/squid-config.ts +++ b/src/squid-config.ts @@ -28,11 +28,13 @@ function generateUpstreamProxySection(upstream: UpstreamProxyConfig): string { lines.push(''); lines.push('# Bypass upstream proxy for these domains (from host no_proxy)'); for (const domain of upstream.noProxy) { - // Domain suffixes: .corp.com matches *.corp.com - // Exact domains: internal.corp.com matches only that host + // All entries are treated as suffix matches (domain + subdomains), + // matching standard no_proxy semantics: + // .corp.com → *.corp.com + // internal.corp.com → internal.corp.com AND *.internal.corp.com const squidDomain = domain.startsWith('.') ? domain : `.${domain}`; lines.push(`acl upstream_bypass dstdomain ${squidDomain}`); - // Also add exact match for non-wildcard domains + // For non-dot entries, also add the exact domain for Squid dstdomain matching if (!domain.startsWith('.')) { lines.push(`acl upstream_bypass dstdomain ${domain}`); } diff --git a/src/upstream-proxy.test.ts b/src/upstream-proxy.test.ts index 386e630b..3b2a3248 100644 --- a/src/upstream-proxy.test.ts +++ b/src/upstream-proxy.test.ts @@ -76,6 +76,8 @@ describe('parseProxyUrl', () => { it('rejects loopback addresses', () => { expect(() => parseProxyUrl('http://localhost:3128')).toThrow('loopback'); expect(() => parseProxyUrl('http://127.0.0.1:3128')).toThrow('loopback'); + expect(() => parseProxyUrl('http://127.0.1.1:3128')).toThrow('loopback'); + expect(() => parseProxyUrl('http://127.255.255.255:3128')).toThrow('loopback'); expect(() => parseProxyUrl('http://0.0.0.0:3128')).toThrow('loopback'); }); @@ -109,8 +111,8 @@ describe('parseNoProxy', () => { expect(parseNoProxy(' ')).toEqual([]); }); - it('skips loopback entries', () => { - expect(parseNoProxy('localhost,127.0.0.1,.corp.com')).toEqual(['.corp.com']); + it('skips loopback entries including full 127.x range', () => { + expect(parseNoProxy('localhost,127.0.0.1,127.0.1.1,.corp.com')).toEqual(['.corp.com']); }); it('skips wildcard *', () => { diff --git a/src/upstream-proxy.ts b/src/upstream-proxy.ts index 8aa6f22e..fd51a63f 100644 --- a/src/upstream-proxy.ts +++ b/src/upstream-proxy.ts @@ -25,6 +25,25 @@ export const PROXY_ENV_VARS = [ 'no_proxy', ] as const; +/** + * Checks whether an address is a loopback address. + * Covers localhost, the full 127.0.0.0/8 range, IPv6 ::1 variants, and 0.0.0.0. + */ +function isLoopback(host: string): boolean { + const lower = host.toLowerCase(); + if (lower === 'localhost' || lower === '0.0.0.0') return true; + + // IPv6 loopback — handles ::1, [::1], 0:0:0:0:0:0:0:1 + const bare = lower.replace(/^\[|\]$/g, ''); + if (bare === '::1' || bare === '0:0:0:0:0:0:0:1') return true; + + // IPv4 127.0.0.0/8 — any address starting with 127.x.x.x + const ipv4Match = bare.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/); + if (ipv4Match && parseInt(ipv4Match[1], 10) === 127) return true; + + return false; +} + /** * Parses a proxy URL into host and port. Rejects unsupported features. * @@ -78,8 +97,7 @@ export function parseProxyUrl(url: string): { host: string; port: number } { } // Reject loopback addresses — Squid runs in a container and localhost != host localhost - const loopbackPatterns = ['localhost', '127.0.0.1', '::1', '0.0.0.0']; - if (loopbackPatterns.includes(host.toLowerCase())) { + if (isLoopback(host)) { throw new Error( `Upstream proxy "${host}" is a loopback address. Squid runs in a Docker container ` + 'where localhost refers to the container, not the host. ' + @@ -110,7 +128,7 @@ export function parseNoProxy(noProxy: string): string[] { for (const entry of entries) { // Skip loopback (irrelevant for Squid upstream bypass) - if (['localhost', '127.0.0.1', '::1', '0.0.0.0'].includes(entry.toLowerCase())) { + if (isLoopback(entry)) { continue; }