diff --git a/assets/gep/failed_capsules.json b/assets/gep/failed_capsules.json new file mode 100644 index 00000000..76dff646 --- /dev/null +++ b/assets/gep/failed_capsules.json @@ -0,0 +1,4 @@ +{ + "version": 1, + "failed_capsules": [] +} diff --git a/assets/gep/genes.jsonl b/assets/gep/genes.jsonl new file mode 100644 index 00000000..e69de29b diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 00000000..5456a46a --- /dev/null +++ b/package-lock.json @@ -0,0 +1,13 @@ +{ + "name": "evolver", + "version": "1.20.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "evolver", + "version": "1.20.0", + "license": "MIT" + } + } +} diff --git a/src/gep/sanitize.js b/src/gep/sanitize.js index 8e4a628b..595aa8dd 100644 --- a/src/gep/sanitize.js +++ b/src/gep/sanitize.js @@ -4,12 +4,30 @@ // Patterns to redact (replaced with placeholder) const REDACT_PATTERNS = [ - // API keys & tokens + // API keys & tokens (generic) /Bearer\s+[A-Za-z0-9\-._~+\/]+=*/g, /sk-[A-Za-z0-9]{20,}/g, /token[=:]\s*["']?[A-Za-z0-9\-._~+\/]{16,}["']?/gi, /api[_-]?key[=:]\s*["']?[A-Za-z0-9\-._~+\/]{16,}["']?/gi, /secret[=:]\s*["']?[A-Za-z0-9\-._~+\/]{16,}["']?/gi, + /password[=:]\s*["']?[^\s"',;)}\]]{6,}["']?/gi, + // GitHub tokens (ghp_, gho_, ghu_, ghs_, github_pat_) + /ghp_[A-Za-z0-9]{36,}/g, + /gho_[A-Za-z0-9]{36,}/g, + /ghu_[A-Za-z0-9]{36,}/g, + /ghs_[A-Za-z0-9]{36,}/g, + /github_pat_[A-Za-z0-9_]{22,}/g, + // AWS access keys + /AKIA[0-9A-Z]{16}/g, + // OpenAI / Anthropic tokens + /sk-proj-[A-Za-z0-9\-_]{20,}/g, + /sk-ant-[A-Za-z0-9\-_]{20,}/g, + // npm tokens + /npm_[A-Za-z0-9]{36,}/g, + // Private keys + /-----BEGIN\s+(?:RSA\s+|EC\s+|DSA\s+|OPENSSH\s+)?PRIVATE\s+KEY-----[\s\S]*?-----END\s+(?:RSA\s+|EC\s+|DSA\s+|OPENSSH\s+)?PRIVATE\s+KEY-----/g, + // Basic auth in URLs (redact only credentials, keep :// and @) + /(?<=:\/\/)[^@\s]+:[^@\s]+(?=@)/g, // Local filesystem paths /\/home\/[^\s"',;)}\]]+/g, /\/Users\/[^\s"',;)}\]]+/g, diff --git a/test/sanitize.test.js b/test/sanitize.test.js new file mode 100644 index 00000000..7df23ed0 --- /dev/null +++ b/test/sanitize.test.js @@ -0,0 +1,90 @@ +const assert = require('assert'); +const { sanitizePayload, redactString } = require('../src/gep/sanitize'); + +const REDACTED = '[REDACTED]'; + +// --- redactString --- + +// Existing patterns (regression) +assert.strictEqual(redactString('Bearer abc123def456ghi789jkl0'), REDACTED); +assert.strictEqual(redactString('sk-abcdefghijklmnopqrstuvwxyz'), REDACTED); +assert.strictEqual(redactString('token=abcdefghijklmnop1234'), REDACTED); +assert.strictEqual(redactString('api_key=abcdefghijklmnop1234'), REDACTED); +assert.strictEqual(redactString('secret: abcdefghijklmnop1234'), REDACTED); +assert.strictEqual(redactString('/home/user/secret/file.txt'), REDACTED); +assert.strictEqual(redactString('/Users/admin/docs'), REDACTED); +assert.strictEqual(redactString('user@example.com'), REDACTED); + +// GitHub tokens (bare, without token= prefix) +assert.ok(redactString('ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234').includes(REDACTED), + 'bare ghp_ token should be redacted'); +assert.ok(redactString('gho_abcdefghijklmnopqrstuvwxyz1234567890').includes(REDACTED), + 'bare gho_ token should be redacted'); +assert.ok(redactString('github_pat_abcdefghijklmnopqrstuvwxyz123456').includes(REDACTED), + 'github_pat_ token should be redacted'); +assert.ok(redactString('use ghs_abcdefghijklmnopqrstuvwxyz1234567890 for auth').includes(REDACTED), + 'ghs_ in sentence should be redacted'); + +// AWS keys +assert.ok(redactString('AKIAIOSFODNN7EXAMPLE').includes(REDACTED), + 'AWS access key should be redacted'); + +// OpenAI project tokens +assert.ok(redactString('sk-proj-bxOCXoWsaPj0IDE1yqlXCXIkWO1f').includes(REDACTED), + 'sk-proj- token should be redacted'); + +// Anthropic tokens +assert.ok(redactString('sk-ant-api03-abcdefghijklmnopqrst').includes(REDACTED), + 'sk-ant- token should be redacted'); + +// npm tokens +assert.ok(redactString('npm_abcdefghijklmnopqrstuvwxyz1234567890').includes(REDACTED), + 'npm token should be redacted'); + +// Private keys +assert.ok(redactString('-----BEGIN RSA PRIVATE KEY-----\nabc\n-----END RSA PRIVATE KEY-----').includes(REDACTED), + 'RSA private key should be redacted'); +assert.ok(redactString('-----BEGIN PRIVATE KEY-----\ndata\n-----END PRIVATE KEY-----').includes(REDACTED), + 'generic private key should be redacted'); + +// Password fields +assert.ok(redactString('password=mysecretpassword123').includes(REDACTED), + 'password= should be redacted'); +assert.ok(redactString('PASSWORD: "hunter2xyz"').includes(REDACTED), + 'PASSWORD: should be redacted'); + +// Basic auth in URLs (should preserve scheme and @) +var urlResult = redactString('https://user:pass123@github.com/repo'); +assert.ok(urlResult.includes(REDACTED), 'basic auth in URL should be redacted'); +assert.ok(urlResult.startsWith('https://'), 'URL scheme should be preserved'); +assert.ok(urlResult.includes('@github.com'), '@ and host should be preserved'); + +// Safe strings should NOT be redacted +assert.strictEqual(redactString('hello world'), 'hello world'); +assert.strictEqual(redactString('error: something failed'), 'error: something failed'); +assert.strictEqual(redactString('fix the bug in parser'), 'fix the bug in parser'); + +// --- sanitizePayload --- + +// Deep sanitization +var payload = { + summary: 'Fixed auth using ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx5678', + nested: { + path: '/home/user/.ssh/id_rsa', + email: 'admin@internal.corp', + safe: 'this is fine', + }, +}; +var sanitized = sanitizePayload(payload); +assert.ok(sanitized.summary.includes(REDACTED), 'ghp token in summary'); +assert.ok(sanitized.nested.path.includes(REDACTED), 'path in nested'); +assert.ok(sanitized.nested.email.includes(REDACTED), 'email in nested'); +assert.strictEqual(sanitized.nested.safe, 'this is fine'); + +// Null/undefined/number inputs +assert.strictEqual(sanitizePayload(null), null); +assert.strictEqual(sanitizePayload(undefined), undefined); +assert.strictEqual(redactString(null), null); +assert.strictEqual(redactString(123), 123); + +console.log('All sanitize tests passed (34 assertions)');