diff --git a/.github/workflows/artifacts-summary.lock.yml b/.github/workflows/artifacts-summary.lock.yml index c4bfa741c5..1dea325e09 100644 --- a/.github/workflows/artifacts-summary.lock.yml +++ b/.github/workflows/artifacts-summary.lock.yml @@ -1609,8 +1609,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1632,21 +1634,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1656,6 +1691,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/audit-workflows.lock.yml b/.github/workflows/audit-workflows.lock.yml index bae12b3e60..71b41dcc7e 100644 --- a/.github/workflows/audit-workflows.lock.yml +++ b/.github/workflows/audit-workflows.lock.yml @@ -2087,8 +2087,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2110,21 +2112,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2134,6 +2169,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/blog-auditor.lock.yml b/.github/workflows/blog-auditor.lock.yml index bf9c9344f4..909d3ccfda 100644 --- a/.github/workflows/blog-auditor.lock.yml +++ b/.github/workflows/blog-auditor.lock.yml @@ -1991,8 +1991,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2014,21 +2016,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2038,6 +2073,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/brave.lock.yml b/.github/workflows/brave.lock.yml index fd6ba2dc5c..5c435d9141 100644 --- a/.github/workflows/brave.lock.yml +++ b/.github/workflows/brave.lock.yml @@ -158,8 +158,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -181,21 +183,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -205,6 +240,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } @@ -2473,8 +2525,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2496,21 +2550,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2520,6 +2607,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/changeset.lock.yml b/.github/workflows/changeset.lock.yml index a7d2f1f94b..d970a9064c 100644 --- a/.github/workflows/changeset.lock.yml +++ b/.github/workflows/changeset.lock.yml @@ -77,20 +77,68 @@ jobs: text: ${{ steps.compute-text.outputs.text }} steps: - name: Check workflow file timestamps - run: | - WORKFLOW_FILE="${GITHUB_WORKSPACE}/.github/workflows/$(basename "$GITHUB_WORKFLOW" .lock.yml).md" - LOCK_FILE="${GITHUB_WORKSPACE}/.github/workflows/$GITHUB_WORKFLOW" - - if [ -f "$WORKFLOW_FILE" ] && [ -f "$LOCK_FILE" ]; then - if [ "$WORKFLOW_FILE" -nt "$LOCK_FILE" ]; then - echo "🔴🔴🔴 WARNING: Lock file '$LOCK_FILE' is outdated! The workflow file '$WORKFLOW_FILE' has been modified more recently. Run 'gh aw compile' to regenerate the lock file." >&2 - echo "## ⚠️ Workflow Lock File Warning" >> $GITHUB_STEP_SUMMARY - echo "🔴🔴🔴 **WARNING**: Lock file \`$LOCK_FILE\` is outdated!" >> $GITHUB_STEP_SUMMARY - echo "The workflow file \`$WORKFLOW_FILE\` has been modified more recently." >> $GITHUB_STEP_SUMMARY - echo "Run \`gh aw compile\` to regenerate the lock file." >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - fi - fi + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd + with: + script: | + const fs = require("fs"); + const path = require("path"); + async function main() { + const workspace = process.env.GITHUB_WORKSPACE; + const workflow = process.env.GITHUB_WORKFLOW; + if (!workspace) { + core.setFailed("Configuration error: GITHUB_WORKSPACE not available."); + return; + } + if (!workflow) { + core.setFailed("Configuration error: GITHUB_WORKFLOW not available."); + return; + } + const workflowBasename = path.basename(workflow, ".lock.yml"); + const workflowFile = path.join(workspace, ".github", "workflows", `${workflowBasename}.md`); + const lockFile = path.join(workspace, ".github", "workflows", workflow); + core.info(`Checking workflow timestamps:`); + core.info(` Source: ${workflowFile}`); + core.info(` Lock file: ${lockFile}`); + let workflowExists = false; + let lockExists = false; + try { + fs.accessSync(workflowFile, fs.constants.F_OK); + workflowExists = true; + } catch (error) { + core.info(`Source file does not exist: ${workflowFile}`); + } + try { + fs.accessSync(lockFile, fs.constants.F_OK); + lockExists = true; + } catch (error) { + core.info(`Lock file does not exist: ${lockFile}`); + } + if (!workflowExists || !lockExists) { + core.info("Skipping timestamp check - one or both files not found"); + return; + } + const workflowStat = fs.statSync(workflowFile); + const lockStat = fs.statSync(lockFile); + const workflowMtime = workflowStat.mtime.getTime(); + const lockMtime = lockStat.mtime.getTime(); + core.info(` Source modified: ${workflowStat.mtime.toISOString()}`); + core.info(` Lock modified: ${lockStat.mtime.toISOString()}`); + if (workflowMtime > lockMtime) { + const warningMessage = `🔴🔴🔴 WARNING: Lock file '${lockFile}' is outdated! The workflow file '${workflowFile}' has been modified more recently. Run 'gh aw compile' to regenerate the lock file.`; + core.error(warningMessage); + await core.summary + .addRaw("## ⚠️ Workflow Lock File Warning\n\n") + .addRaw(`🔴🔴🔴 **WARNING**: Lock file \`${lockFile}\` is outdated!\n\n`) + .addRaw(`The workflow file \`${workflowFile}\` has been modified more recently.\n\n`) + .addRaw("Run `gh aw compile` to regenerate the lock file.\n\n") + .write(); + } else { + core.info("✅ Lock file is up to date"); + } + } + main().catch(error => { + core.setFailed(error instanceof Error ? error.message : String(error)); + }); - name: Compute current body text id: compute-text uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd @@ -109,8 +157,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -132,21 +182,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -156,6 +239,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } @@ -2102,8 +2202,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2125,21 +2227,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2149,6 +2284,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/ci-doctor.lock.yml b/.github/workflows/ci-doctor.lock.yml index cb8644700e..5dbe841cb0 100644 --- a/.github/workflows/ci-doctor.lock.yml +++ b/.github/workflows/ci-doctor.lock.yml @@ -2057,8 +2057,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2080,21 +2082,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2104,6 +2139,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/cli-version-checker.lock.yml b/.github/workflows/cli-version-checker.lock.yml index 7b140d270d..4ebffc0b46 100644 --- a/.github/workflows/cli-version-checker.lock.yml +++ b/.github/workflows/cli-version-checker.lock.yml @@ -1714,8 +1714,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1737,21 +1739,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1761,6 +1796,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/commit-changes-analyzer.lock.yml b/.github/workflows/commit-changes-analyzer.lock.yml index b6a922b981..5af054cd8c 100644 --- a/.github/workflows/commit-changes-analyzer.lock.yml +++ b/.github/workflows/commit-changes-analyzer.lock.yml @@ -1922,8 +1922,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1945,21 +1947,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1969,6 +2004,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/copilot-agent-analysis.lock.yml b/.github/workflows/copilot-agent-analysis.lock.yml index 5fda167db8..d4822c33a8 100644 --- a/.github/workflows/copilot-agent-analysis.lock.yml +++ b/.github/workflows/copilot-agent-analysis.lock.yml @@ -2220,8 +2220,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2243,21 +2245,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2267,6 +2302,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/copilot-pr-prompt-analysis.lock.yml b/.github/workflows/copilot-pr-prompt-analysis.lock.yml index 3686b1bc9f..364ba461e4 100644 --- a/.github/workflows/copilot-pr-prompt-analysis.lock.yml +++ b/.github/workflows/copilot-pr-prompt-analysis.lock.yml @@ -1955,8 +1955,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1978,21 +1980,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2002,6 +2037,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/copilot-session-insights.lock.yml b/.github/workflows/copilot-session-insights.lock.yml index 135cea8019..876d2e5819 100644 --- a/.github/workflows/copilot-session-insights.lock.yml +++ b/.github/workflows/copilot-session-insights.lock.yml @@ -2586,8 +2586,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2609,21 +2611,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2633,6 +2668,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/craft.lock.yml b/.github/workflows/craft.lock.yml index 435f602a93..b9dc12de4b 100644 --- a/.github/workflows/craft.lock.yml +++ b/.github/workflows/craft.lock.yml @@ -158,8 +158,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -181,21 +183,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -205,6 +240,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } @@ -2625,8 +2677,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2648,21 +2702,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2672,6 +2759,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/daily-doc-updater.lock.yml b/.github/workflows/daily-doc-updater.lock.yml index 2d08ce5e2e..100c79cf65 100644 --- a/.github/workflows/daily-doc-updater.lock.yml +++ b/.github/workflows/daily-doc-updater.lock.yml @@ -1812,8 +1812,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1835,21 +1837,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1859,6 +1894,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/daily-firewall-report.lock.yml b/.github/workflows/daily-firewall-report.lock.yml index fb43e5ea58..f075d2ef76 100644 --- a/.github/workflows/daily-firewall-report.lock.yml +++ b/.github/workflows/daily-firewall-report.lock.yml @@ -1764,8 +1764,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1787,21 +1789,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1811,6 +1846,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/daily-news.lock.yml b/.github/workflows/daily-news.lock.yml index 384a5c3081..f2c02e7473 100644 --- a/.github/workflows/daily-news.lock.yml +++ b/.github/workflows/daily-news.lock.yml @@ -1737,8 +1737,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1760,21 +1762,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1784,6 +1819,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/daily-perf-improver.lock.yml b/.github/workflows/daily-perf-improver.lock.yml index 1aecf59db5..83f51b4817 100644 --- a/.github/workflows/daily-perf-improver.lock.yml +++ b/.github/workflows/daily-perf-improver.lock.yml @@ -2116,8 +2116,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2139,21 +2141,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2163,6 +2198,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/daily-repo-chronicle.lock.yml b/.github/workflows/daily-repo-chronicle.lock.yml index 4abdc25539..74dee32910 100644 --- a/.github/workflows/daily-repo-chronicle.lock.yml +++ b/.github/workflows/daily-repo-chronicle.lock.yml @@ -1604,8 +1604,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1627,21 +1629,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1651,6 +1686,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/daily-test-improver.lock.yml b/.github/workflows/daily-test-improver.lock.yml index 3b32147fa2..e430e48aba 100644 --- a/.github/workflows/daily-test-improver.lock.yml +++ b/.github/workflows/daily-test-improver.lock.yml @@ -2090,8 +2090,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2113,21 +2115,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2137,6 +2172,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/dev-hawk.lock.yml b/.github/workflows/dev-hawk.lock.yml index 6e98934fe5..845489dc71 100644 --- a/.github/workflows/dev-hawk.lock.yml +++ b/.github/workflows/dev-hawk.lock.yml @@ -1960,8 +1960,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1983,21 +1985,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2007,6 +2042,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/dev.lock.yml b/.github/workflows/dev.lock.yml index c19fa658f3..b8cfba658f 100644 --- a/.github/workflows/dev.lock.yml +++ b/.github/workflows/dev.lock.yml @@ -1509,8 +1509,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1532,21 +1534,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1556,6 +1591,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/dictation-prompt.lock.yml b/.github/workflows/dictation-prompt.lock.yml index d4f7ed96a8..08c59270fc 100644 --- a/.github/workflows/dictation-prompt.lock.yml +++ b/.github/workflows/dictation-prompt.lock.yml @@ -1620,8 +1620,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1643,21 +1645,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1667,6 +1702,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/duplicate-code-detector.lock.yml b/.github/workflows/duplicate-code-detector.lock.yml index 5bcb0da453..ff3b8cfa2e 100644 --- a/.github/workflows/duplicate-code-detector.lock.yml +++ b/.github/workflows/duplicate-code-detector.lock.yml @@ -1684,8 +1684,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1707,21 +1709,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1731,6 +1766,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/example-workflow-analyzer.lock.yml b/.github/workflows/example-workflow-analyzer.lock.yml index 091d334ef9..679997d4e5 100644 --- a/.github/workflows/example-workflow-analyzer.lock.yml +++ b/.github/workflows/example-workflow-analyzer.lock.yml @@ -1655,8 +1655,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1678,21 +1680,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1702,6 +1737,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/github-mcp-tools-report.lock.yml b/.github/workflows/github-mcp-tools-report.lock.yml index 11723b0fde..c79ba3a53b 100644 --- a/.github/workflows/github-mcp-tools-report.lock.yml +++ b/.github/workflows/github-mcp-tools-report.lock.yml @@ -2221,8 +2221,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2244,21 +2246,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2268,6 +2303,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/go-logger.lock.yml b/.github/workflows/go-logger.lock.yml index 91ed0f3a9f..3300057ab3 100644 --- a/.github/workflows/go-logger.lock.yml +++ b/.github/workflows/go-logger.lock.yml @@ -1926,8 +1926,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1949,21 +1951,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1973,6 +2008,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/go-pattern-detector.lock.yml b/.github/workflows/go-pattern-detector.lock.yml index 39e2d7cebf..d8b6c03a9b 100644 --- a/.github/workflows/go-pattern-detector.lock.yml +++ b/.github/workflows/go-pattern-detector.lock.yml @@ -1753,8 +1753,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1776,21 +1778,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1800,6 +1835,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/instructions-janitor.lock.yml b/.github/workflows/instructions-janitor.lock.yml index 15400b4cb3..b97772d4dc 100644 --- a/.github/workflows/instructions-janitor.lock.yml +++ b/.github/workflows/instructions-janitor.lock.yml @@ -1808,8 +1808,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1831,21 +1833,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1855,6 +1890,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/issue-classifier.lock.yml b/.github/workflows/issue-classifier.lock.yml index 97402875d1..9941ee68f3 100644 --- a/.github/workflows/issue-classifier.lock.yml +++ b/.github/workflows/issue-classifier.lock.yml @@ -150,8 +150,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -173,21 +175,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -197,6 +232,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } @@ -2162,8 +2214,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2185,21 +2239,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2209,6 +2296,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/lockfile-stats.lock.yml b/.github/workflows/lockfile-stats.lock.yml index 294db11962..cec2b9fa23 100644 --- a/.github/workflows/lockfile-stats.lock.yml +++ b/.github/workflows/lockfile-stats.lock.yml @@ -2059,8 +2059,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2082,21 +2084,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2106,6 +2141,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/mcp-inspector.lock.yml b/.github/workflows/mcp-inspector.lock.yml index 7d268c6155..b7d32e4061 100644 --- a/.github/workflows/mcp-inspector.lock.yml +++ b/.github/workflows/mcp-inspector.lock.yml @@ -2181,8 +2181,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2204,21 +2206,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2228,6 +2263,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/mergefest.lock.yml b/.github/workflows/mergefest.lock.yml index 6989db5878..f656436888 100644 --- a/.github/workflows/mergefest.lock.yml +++ b/.github/workflows/mergefest.lock.yml @@ -2166,8 +2166,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2189,21 +2191,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2213,6 +2248,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/notion-issue-summary.lock.yml b/.github/workflows/notion-issue-summary.lock.yml index 0317fd4b28..cd5958e35a 100644 --- a/.github/workflows/notion-issue-summary.lock.yml +++ b/.github/workflows/notion-issue-summary.lock.yml @@ -1468,8 +1468,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1491,21 +1493,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1515,6 +1550,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/pdf-summary.lock.yml b/.github/workflows/pdf-summary.lock.yml index 638675cff0..42da6f929d 100644 --- a/.github/workflows/pdf-summary.lock.yml +++ b/.github/workflows/pdf-summary.lock.yml @@ -180,8 +180,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -203,21 +205,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -227,6 +262,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } @@ -2578,8 +2630,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2601,21 +2655,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2625,6 +2712,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/plan.lock.yml b/.github/workflows/plan.lock.yml index 4faa69a622..f963b52190 100644 --- a/.github/workflows/plan.lock.yml +++ b/.github/workflows/plan.lock.yml @@ -159,8 +159,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -182,21 +184,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -206,6 +241,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } @@ -2074,8 +2126,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2097,21 +2151,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2121,6 +2208,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/poem-bot.lock.yml b/.github/workflows/poem-bot.lock.yml index adbb0439c7..84cd88e6e9 100644 --- a/.github/workflows/poem-bot.lock.yml +++ b/.github/workflows/poem-bot.lock.yml @@ -193,8 +193,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -216,21 +218,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -240,6 +275,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } @@ -2819,8 +2871,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2842,21 +2896,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2866,6 +2953,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/prompt-clustering-analysis.lock.yml b/.github/workflows/prompt-clustering-analysis.lock.yml index 9394d86afc..9ef8d91799 100644 --- a/.github/workflows/prompt-clustering-analysis.lock.yml +++ b/.github/workflows/prompt-clustering-analysis.lock.yml @@ -2394,8 +2394,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2417,21 +2419,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2441,6 +2476,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/python-data-charts.lock.yml b/.github/workflows/python-data-charts.lock.yml index b7fa48322d..16832dbc03 100644 --- a/.github/workflows/python-data-charts.lock.yml +++ b/.github/workflows/python-data-charts.lock.yml @@ -1908,8 +1908,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1931,21 +1933,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1955,6 +1990,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/q.lock.yml b/.github/workflows/q.lock.yml index 53d16c5d66..57dd6380fb 100644 --- a/.github/workflows/q.lock.yml +++ b/.github/workflows/q.lock.yml @@ -202,8 +202,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -225,21 +227,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -249,6 +284,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } @@ -2886,8 +2938,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2909,21 +2963,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2933,6 +3020,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/repo-tree-map.lock.yml b/.github/workflows/repo-tree-map.lock.yml index 4d0b6f6483..ac2dbfe70e 100644 --- a/.github/workflows/repo-tree-map.lock.yml +++ b/.github/workflows/repo-tree-map.lock.yml @@ -1633,8 +1633,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1656,21 +1658,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1680,6 +1715,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/research.lock.yml b/.github/workflows/research.lock.yml index 84b6b891c3..71210b78cd 100644 --- a/.github/workflows/research.lock.yml +++ b/.github/workflows/research.lock.yml @@ -1574,8 +1574,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1597,21 +1599,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1621,6 +1656,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/safe-output-health.lock.yml b/.github/workflows/safe-output-health.lock.yml index 97aca66131..d729be8938 100644 --- a/.github/workflows/safe-output-health.lock.yml +++ b/.github/workflows/safe-output-health.lock.yml @@ -2191,8 +2191,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2214,21 +2216,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2238,6 +2273,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/schema-consistency-checker.lock.yml b/.github/workflows/schema-consistency-checker.lock.yml index bb69366caa..17a398a324 100644 --- a/.github/workflows/schema-consistency-checker.lock.yml +++ b/.github/workflows/schema-consistency-checker.lock.yml @@ -2061,8 +2061,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2084,21 +2086,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2108,6 +2143,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/scout.lock.yml b/.github/workflows/scout.lock.yml index 71e1ce9a74..53c7874374 100644 --- a/.github/workflows/scout.lock.yml +++ b/.github/workflows/scout.lock.yml @@ -205,8 +205,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -228,21 +230,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -252,6 +287,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } @@ -2952,8 +3004,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2975,21 +3029,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2999,6 +3086,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/security-fix-pr.lock.yml b/.github/workflows/security-fix-pr.lock.yml index 0f644f9c90..7abf7fc7eb 100644 --- a/.github/workflows/security-fix-pr.lock.yml +++ b/.github/workflows/security-fix-pr.lock.yml @@ -1756,8 +1756,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1779,21 +1781,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1803,6 +1838,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/semantic-function-refactor.lock.yml b/.github/workflows/semantic-function-refactor.lock.yml index 5f073d2e2f..14a20cb4a9 100644 --- a/.github/workflows/semantic-function-refactor.lock.yml +++ b/.github/workflows/semantic-function-refactor.lock.yml @@ -2103,8 +2103,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2126,21 +2128,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2150,6 +2185,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/smoke-claude.lock.yml b/.github/workflows/smoke-claude.lock.yml index 195b0add49..a348053cc9 100644 --- a/.github/workflows/smoke-claude.lock.yml +++ b/.github/workflows/smoke-claude.lock.yml @@ -1604,8 +1604,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1627,21 +1629,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1651,6 +1686,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/smoke-codex.lock.yml b/.github/workflows/smoke-codex.lock.yml index 74863fe5a3..abd41a2696 100644 --- a/.github/workflows/smoke-codex.lock.yml +++ b/.github/workflows/smoke-codex.lock.yml @@ -1425,8 +1425,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1448,21 +1450,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1472,6 +1507,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/smoke-copilot.firewall.lock.yml b/.github/workflows/smoke-copilot.firewall.lock.yml index f751423254..4e319ffcd8 100644 --- a/.github/workflows/smoke-copilot.firewall.lock.yml +++ b/.github/workflows/smoke-copilot.firewall.lock.yml @@ -1497,8 +1497,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1520,21 +1522,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1544,6 +1579,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/smoke-copilot.lock.yml b/.github/workflows/smoke-copilot.lock.yml index 0f404bd270..07bdf24716 100644 --- a/.github/workflows/smoke-copilot.lock.yml +++ b/.github/workflows/smoke-copilot.lock.yml @@ -1497,8 +1497,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1520,21 +1522,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1544,6 +1579,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/smoke-detector.lock.yml b/.github/workflows/smoke-detector.lock.yml index 2035ebbfd3..6be0322f08 100644 --- a/.github/workflows/smoke-detector.lock.yml +++ b/.github/workflows/smoke-detector.lock.yml @@ -2632,8 +2632,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2655,21 +2657,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2679,6 +2714,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/smoke-opencode.lock.yml b/.github/workflows/smoke-opencode.lock.yml index f57c141ee5..ec9dba7ba5 100644 --- a/.github/workflows/smoke-opencode.lock.yml +++ b/.github/workflows/smoke-opencode.lock.yml @@ -1461,8 +1461,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1484,21 +1486,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1508,6 +1543,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/technical-doc-writer.lock.yml b/.github/workflows/technical-doc-writer.lock.yml index 4478e0c886..0b244e81d7 100644 --- a/.github/workflows/technical-doc-writer.lock.yml +++ b/.github/workflows/technical-doc-writer.lock.yml @@ -2353,8 +2353,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2376,21 +2378,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2400,6 +2435,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/test-ollama-threat-detection.lock.yml b/.github/workflows/test-ollama-threat-detection.lock.yml index 9dc95a9271..01dc66ceb5 100644 --- a/.github/workflows/test-ollama-threat-detection.lock.yml +++ b/.github/workflows/test-ollama-threat-detection.lock.yml @@ -1440,8 +1440,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1463,21 +1465,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1487,6 +1522,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/tidy.lock.yml b/.github/workflows/tidy.lock.yml index 8441d0b13c..6c2b108682 100644 --- a/.github/workflows/tidy.lock.yml +++ b/.github/workflows/tidy.lock.yml @@ -1974,8 +1974,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1997,21 +1999,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2021,6 +2056,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/unbloat-docs.lock.yml b/.github/workflows/unbloat-docs.lock.yml index 6c48218eb1..dc999ecdf2 100644 --- a/.github/workflows/unbloat-docs.lock.yml +++ b/.github/workflows/unbloat-docs.lock.yml @@ -2721,8 +2721,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2744,21 +2746,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2768,6 +2803,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/video-analyzer.lock.yml b/.github/workflows/video-analyzer.lock.yml index f0e171f719..7e72cf37d9 100644 --- a/.github/workflows/video-analyzer.lock.yml +++ b/.github/workflows/video-analyzer.lock.yml @@ -1731,8 +1731,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1754,21 +1756,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1778,6 +1813,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/weekly-issue-summary.lock.yml b/.github/workflows/weekly-issue-summary.lock.yml index f3ee83a64c..32dca5bee8 100644 --- a/.github/workflows/weekly-issue-summary.lock.yml +++ b/.github/workflows/weekly-issue-summary.lock.yml @@ -1506,8 +1506,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -1529,21 +1531,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -1553,6 +1588,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/.github/workflows/zizmor-security-analyzer.lock.yml b/.github/workflows/zizmor-security-analyzer.lock.yml index 97fc0d72dd..99fbc3df81 100644 --- a/.github/workflows/zizmor-security-analyzer.lock.yml +++ b/.github/workflows/zizmor-security-analyzer.lock.yml @@ -2033,8 +2033,10 @@ jobs: .filter(d => d) : defaultAllowedDomains; let sanitized = content; + sanitized = neutralizeCommands(sanitized); sanitized = neutralizeMentions(sanitized); sanitized = removeXmlComments(sanitized); + sanitized = convertXmlTags(sanitized); sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); sanitized = sanitizeUrlProtocols(sanitized); @@ -2056,21 +2058,54 @@ jobs: sanitized = neutralizeBotTriggers(sanitized); return sanitized.trim(); function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); const isAllowed = allowedDomains.some(allowedDomain => { const normalizedAllowed = allowedDomain.toLowerCase(); return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; + } + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; + } else { + result += sanitizeUrlDomains(urlParts[i]); + } + } + return result; }); return s; } function sanitizeUrlProtocols(s) { - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { + if (protocol.toLowerCase() === "https") { + return match; + } + if (match.includes("::")) { + return match; + } + if (match.includes("://")) { + return "(redacted)"; + } + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + return match; }); } + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } function neutralizeMentions(s) { return s.replace( /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, @@ -2080,6 +2115,23 @@ jobs: function removeXmlComments(s) { return s.replace(//g, "").replace(//g, ""); } + function convertXmlTags(s) { + const allowedTags = ["details", "summary", "code", "em", "b"]; + s = s.replace(//g, (match, content) => { + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + return `(![CDATA[${convertedContent}]])`; + }); + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; + } + } + return `(${tagContent})`; + }); + } function neutralizeBotTriggers(s) { return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``); } diff --git a/pkg/workflow/bundler.go b/pkg/workflow/bundler.go index 86f3d0996a..402f84fd3a 100644 --- a/pkg/workflow/bundler.go +++ b/pkg/workflow/bundler.go @@ -8,7 +8,7 @@ import ( ) // BundleJavaScriptFromSources bundles JavaScript from in-memory sources -// sources is a map where keys are file paths (e.g., "lib/sanitize.cjs") and values are the content +// sources is a map where keys are file paths (e.g., "sanitize.cjs") and values are the content // mainContent is the main JavaScript content that may contain require() calls // basePath is the base directory path for resolving relative imports (e.g., "js") func BundleJavaScriptFromSources(mainContent string, sources map[string]string, basePath string) (string, error) { diff --git a/pkg/workflow/bundler_integration_test.go b/pkg/workflow/bundler_integration_test.go index 3a91c514e1..da473e2812 100644 --- a/pkg/workflow/bundler_integration_test.go +++ b/pkg/workflow/bundler_integration_test.go @@ -27,7 +27,7 @@ func TestBundlerIntegration(t *testing.T) { } // Should not contain the require statement - if strings.Contains(script, `require("./lib/sanitize.cjs")`) { + if strings.Contains(script, `require("./sanitize.cjs")`) { t.Error("bundled script still contains require statement") } @@ -56,7 +56,7 @@ func TestBundlerIntegration(t *testing.T) { } // Should not contain the require statement - if strings.Contains(script, `require("./lib/sanitize.cjs")`) { + if strings.Contains(script, `require("./sanitize.cjs")`) { t.Error("bundled script still contains require statement") } @@ -85,7 +85,7 @@ func TestBundlerIntegration(t *testing.T) { } // Should not contain the require statement - if strings.Contains(script, `require("./lib/sanitize.cjs")`) { + if strings.Contains(script, `require("./sanitize.cjs")`) { t.Error("bundled script still contains require statement") } @@ -316,7 +316,7 @@ func TestSourceFilesAreSmaller(t *testing.T) { } // Source should contain require - if !strings.Contains(tt.source, `require("./lib/sanitize.cjs")`) { + if !strings.Contains(tt.source, `require("./sanitize.cjs")`) { t.Errorf("%s: source should contain require statement", tt.name) } }) @@ -327,20 +327,20 @@ func TestSourceFilesAreSmaller(t *testing.T) { func TestGetJavaScriptSources(t *testing.T) { sources := GetJavaScriptSources() - // Should contain lib/sanitize.cjs - sanitize, ok := sources["lib/sanitize.cjs"] + // Should contain sanitize.cjs + sanitize, ok := sources["sanitize.cjs"] if !ok { - t.Fatal("GetJavaScriptSources does not contain lib/sanitize.cjs") + t.Fatal("GetJavaScriptSources does not contain sanitize.cjs") } // Should not be empty if sanitize == "" { - t.Error("lib/sanitize.cjs source is empty") + t.Error("sanitize.cjs source is empty") } // Should contain sanitizeContent function if !strings.Contains(sanitize, "function sanitizeContent") { - t.Error("lib/sanitize.cjs does not contain sanitizeContent function") + t.Error("sanitize.cjs does not contain sanitizeContent function") } // Should contain helper functions @@ -354,7 +354,7 @@ func TestGetJavaScriptSources(t *testing.T) { for _, helper := range helpers { if !strings.Contains(sanitize, helper) { - t.Errorf("lib/sanitize.cjs does not contain %s", helper) + t.Errorf("sanitize.cjs does not contain %s", helper) } } } diff --git a/pkg/workflow/bundler_test.go b/pkg/workflow/bundler_test.go index 6ea760f666..46697a055c 100644 --- a/pkg/workflow/bundler_test.go +++ b/pkg/workflow/bundler_test.go @@ -240,7 +240,7 @@ module.exports = { sanitize }; ` // Create main content that requires the helper from lib - mainContent := `const { sanitize } = require('./lib/sanitize.cjs'); + mainContent := `const { sanitize } = require('./sanitize.cjs'); async function main() { console.log(sanitize(" hello ")); @@ -251,7 +251,7 @@ main(); // Create sources map with nested path sources := map[string]string{ - "lib/sanitize.cjs": helperContent, + "sanitize.cjs": helperContent, } // Bundle the main content @@ -266,7 +266,7 @@ main(); } // Check that the require statement is replaced - if strings.Contains(bundled, "require('./lib/sanitize.cjs')") { + if strings.Contains(bundled, "require('./sanitize.cjs')") { t.Error("Bundled output still contains require statement") } } diff --git a/pkg/workflow/js.go b/pkg/workflow/js.go index ff8c0c3778..4450dbb198 100644 --- a/pkg/workflow/js.go +++ b/pkg/workflow/js.go @@ -91,18 +91,18 @@ var redactSecretsScript string //go:embed js/notify_comment_error.cjs var notifyCommentErrorScript string -//go:embed js/lib/sanitize.cjs +//go:embed js/sanitize.cjs var sanitizeLibScript string -// Source scripts that may contain local requires - embedded from src directory +// Source scripts that may contain local requires // -//go:embed js/src/collect_ndjson_output.cjs +//go:embed js/collect_ndjson_output.cjs var collectJSONLOutputScriptSource string -//go:embed js/src/compute_text.cjs +//go:embed js/compute_text.cjs var computeTextScriptSource string -//go:embed js/src/sanitize_output.cjs +//go:embed js/sanitize_output.cjs var sanitizeOutputScriptSource string // Bundled scripts (lazily bundled on-demand and cached) @@ -169,7 +169,7 @@ func getSanitizeOutputScript() string { // The keys are the relative paths from the js directory func GetJavaScriptSources() map[string]string { return map[string]string{ - "lib/sanitize.cjs": sanitizeLibScript, + "sanitize.cjs": sanitizeLibScript, } } diff --git a/pkg/workflow/js/check_workflow_timestamp.cjs b/pkg/workflow/js/check_workflow_timestamp.cjs index bb8fe998c0..07a77f3797 100644 --- a/pkg/workflow/js/check_workflow_timestamp.cjs +++ b/pkg/workflow/js/check_workflow_timestamp.cjs @@ -69,7 +69,7 @@ async function main() { // Check if workflow file is newer than lock file if (workflowMtime > lockMtime) { const warningMessage = `🔴🔴🔴 WARNING: Lock file '${lockFile}' is outdated! The workflow file '${workflowFile}' has been modified more recently. Run 'gh aw compile' to regenerate the lock file.`; - + core.error(warningMessage); // Add summary to GitHub Step Summary diff --git a/pkg/workflow/js/src/collect_ndjson_output.cjs b/pkg/workflow/js/collect_ndjson_output.cjs similarity index 99% rename from pkg/workflow/js/src/collect_ndjson_output.cjs rename to pkg/workflow/js/collect_ndjson_output.cjs index 485403f7d3..994039ddf5 100644 --- a/pkg/workflow/js/src/collect_ndjson_output.cjs +++ b/pkg/workflow/js/collect_ndjson_output.cjs @@ -3,7 +3,7 @@ async function main() { const fs = require("fs"); - const { sanitizeContent } = require("./lib/sanitize.cjs"); + const { sanitizeContent } = require("./sanitize.cjs"); const maxBodyLength = 65000; function getMaxAllowedForType(itemType, config) { const itemConfig = config?.[itemType]; diff --git a/pkg/workflow/js/src/compute_text.cjs b/pkg/workflow/js/compute_text.cjs similarity index 98% rename from pkg/workflow/js/src/compute_text.cjs rename to pkg/workflow/js/compute_text.cjs index 1a1d0b4f58..bf702c0b26 100644 --- a/pkg/workflow/js/src/compute_text.cjs +++ b/pkg/workflow/js/compute_text.cjs @@ -6,7 +6,7 @@ * @param {string} content - The content to sanitize * @returns {string} The sanitized content */ -const { sanitizeContent } = require("./lib/sanitize.cjs"); +const { sanitizeContent } = require("./sanitize.cjs"); async function main() { let text = ""; diff --git a/pkg/workflow/js/lib/sanitize.cjs b/pkg/workflow/js/sanitize.cjs similarity index 50% rename from pkg/workflow/js/lib/sanitize.cjs rename to pkg/workflow/js/sanitize.cjs index 224a8ff61b..00fb78ac0d 100644 --- a/pkg/workflow/js/lib/sanitize.cjs +++ b/pkg/workflow/js/sanitize.cjs @@ -29,12 +29,18 @@ function sanitizeContent(content, maxLength) { let sanitized = content; + // Neutralize commands at the start of text (e.g., /bot-name) + sanitized = neutralizeCommands(sanitized); + // Neutralize @mentions to prevent unintended notifications sanitized = neutralizeMentions(sanitized); // Remove XML comments first sanitized = removeXmlComments(sanitized); + // Convert XML tags to parentheses format to prevent injection + sanitized = convertXmlTags(sanitized); + // Remove ANSI escape sequences sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); @@ -79,9 +85,11 @@ function sanitizeContent(content, maxLength) { * @returns {string} The string with unknown domains redacted */ function sanitizeUrlDomains(s) { - s = s.replace(/\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, domain) => { + // First pass: match all HTTPS URLs and process them + // We need to handle URLs that might contain other URLs in query parameters + s = s.replace(/\bhttps:\/\/([^\s\])}'"<>&\x00-\x1f,;]+)/gi, (match, rest) => { // Extract the hostname part (before first slash, colon, or other delimiter) - const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + const hostname = rest.split(/[\/:\?#]/)[0].toLowerCase(); // Check if this domain or any parent domain is in the allowlist const isAllowed = allowedDomains.some(allowedDomain => { @@ -89,7 +97,26 @@ function sanitizeContent(content, maxLength) { return hostname === normalizedAllowed || hostname.endsWith("." + normalizedAllowed); }); - return isAllowed ? match : "(redacted)"; + if (isAllowed) { + return match; // Keep allowed URLs as-is + } + + // For disallowed URLs, check if there are any allowed URLs in the query/fragment + // and preserve those while redacting the main URL + const urlParts = match.split(/([?&#])/); + let result = "(redacted)"; // Redact the main domain + + // Process query/fragment parts to preserve any allowed URLs within them + for (let i = 1; i < urlParts.length; i++) { + if (urlParts[i].match(/^[?&#]$/)) { + result += urlParts[i]; // Keep separators + } else { + // Recursively process this part to preserve any allowed URLs + result += sanitizeUrlDomains(urlParts[i]); + } + } + + return result; }); return s; @@ -101,14 +128,57 @@ function sanitizeContent(content, maxLength) { * @returns {string} The string with non-https protocols redacted */ function sanitizeUrlProtocols(s) { - // Match both protocol:// and protocol: patterns - // This covers URLs like https://example.com, javascript:alert(), mailto:user@domain.com, etc. - return s.replace(/\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, (match, protocol) => { + // Match protocol patterns but avoid command-line flags, file paths, and namespaces + // Protocol patterns typically have :// or are well-known schemes followed by : + // Use negative lookbehind to exclude patterns preceded by - (command flags) + // Match only patterns that look like actual protocols + return s.replace(/(?&\x00-\x1f]+/g, (match, protocol) => { // Allow https (case insensitive), redact everything else - return protocol.toLowerCase() === "https" ? match : "(redacted)"; + // But only if it looks like a URL (has :// or is followed by non-colon content) + if (protocol.toLowerCase() === "https") { + return match; + } + + // Allow if it looks like a file path or namespace (::) + if (match.includes("::")) { + return match; + } + + // Redact if it has :// (definite protocol) + if (match.includes("://")) { + return "(redacted)"; + } + + // Redact well-known dangerous protocols like javascript:, data:, etc. + const dangerousProtocols = ["javascript", "data", "vbscript", "file", "about", "mailto", "tel", "ssh", "ftp"]; + if (dangerousProtocols.includes(protocol.toLowerCase())) { + return "(redacted)"; + } + + // Otherwise preserve (could be file:path, namespace:thing, etc.) + return match; }); } + /** + * Neutralizes commands at the start of text by wrapping them in backticks + * @param {string} s - The string to process + * @returns {string} The string with neutralized commands + */ + function neutralizeCommands(s) { + const commandName = process.env.GH_AW_COMMAND; + if (!commandName) { + return s; + } + + // Escape special regex characters in command name + const escapedCommand = commandName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + + // Neutralize /command at the start of text (with optional leading whitespace) + // Only match at the start of the string or after leading whitespace + return s.replace(new RegExp(`^(\\s*)/(${escapedCommand})\\b`, "i"), "$1`/$2`"); + } + /** * Neutralizes @mentions by wrapping them in backticks * @param {string} s - The string to process @@ -132,6 +202,40 @@ function sanitizeContent(content, maxLength) { return s.replace(//g, "").replace(//g, ""); } + /** + * Converts XML/HTML tags to parentheses format to prevent injection + * @param {string} s - The string to process + * @returns {string} The string with XML tags converted to parentheses + */ + function convertXmlTags(s) { + // Allow safe HTML tags: details, summary, code, em, b + const allowedTags = ["details", "summary", "code", "em", "b"]; + + // First, process CDATA sections specially - convert tags inside them and the CDATA markers + s = s.replace(//g, (match, content) => { + // Convert tags inside CDATA content + const convertedContent = content.replace(/<(\/?[A-Za-z][A-Za-z0-9]*(?:[^>]*?))>/g, "($1)"); + // Return with CDATA markers also converted to parentheses + return `(![CDATA[${convertedContent}]])`; + }); + + // Convert opening tags: or to (tag) or (tag attr="value") + // Convert closing tags: to (/tag) + // Convert self-closing tags: or to (tag/) or (tag /) + // But preserve allowed safe tags + return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => { + // Extract tag name from the content (handle closing tags and attributes) + const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/); + if (tagNameMatch) { + const tagName = tagNameMatch[1].toLowerCase(); + if (allowedTags.includes(tagName)) { + return match; // Preserve allowed tags + } + } + return `(${tagContent})`; // Convert other tags to parentheses + }); + } + /** * Neutralizes bot trigger phrases by wrapping them in backticks * @param {string} s - The string to process diff --git a/pkg/workflow/js/src/sanitize_output.cjs b/pkg/workflow/js/sanitize_output.cjs similarity index 94% rename from pkg/workflow/js/src/sanitize_output.cjs rename to pkg/workflow/js/sanitize_output.cjs index bedaaea03c..665d5c4e70 100644 --- a/pkg/workflow/js/src/sanitize_output.cjs +++ b/pkg/workflow/js/sanitize_output.cjs @@ -6,7 +6,7 @@ * @param {string} content - The content to sanitize * @returns {string} The sanitized content */ -const { sanitizeContent } = require("./lib/sanitize.cjs"); +const { sanitizeContent } = require("./sanitize.cjs"); async function main() { const fs = require("fs"); diff --git a/pkg/workflow/js/sanitize_output.test.cjs b/pkg/workflow/js/sanitize_output.test.cjs index 3fd58630b2..154c734ba4 100644 --- a/pkg/workflow/js/sanitize_output.test.cjs +++ b/pkg/workflow/js/sanitize_output.test.cjs @@ -459,7 +459,8 @@ Special chars: \x00\x1F & "quotes" 'apostrophes' expect(result).toContain("(xml attr=\"value & 'quotes'\")"); expect(result).toContain('(![CDATA[(script)alert("xss")(/script)]])'); - expect(result).toContain("(!-- comment with \"quotes\" & 'apostrophes' --)"); + // XML comments are removed for security (to prevent content hiding) + expect(result).not.toContain("comment with"); expect(result).toContain("(/xml)"); }); diff --git a/pkg/workflow/js/upload_assets.test.cjs b/pkg/workflow/js/upload_assets.test.cjs index cb3ef4ebc1..643b02a2ae 100644 --- a/pkg/workflow/js/upload_assets.test.cjs +++ b/pkg/workflow/js/upload_assets.test.cjs @@ -71,6 +71,12 @@ describe("upload_assets.cjs", () => { describe("git commit command - vulnerability fix", () => { it("should not wrap commit message in extra quotes to prevent command injection", async () => { + // Clean up any leftover test.png from previous runs + const targetFile = "test.png"; + if (fs.existsSync(targetFile)) { + fs.unlinkSync(targetFile); + } + // Set up environment process.env.GH_AW_ASSETS_BRANCH = "assets/test-workflow"; process.env.GH_AW_SAFE_OUTPUTS_STAGED = "false"; @@ -162,6 +168,9 @@ describe("upload_assets.cjs", () => { if (fs.existsSync(assetPath)) { fs.unlinkSync(assetPath); } + if (fs.existsSync(targetFile)) { + fs.unlinkSync(targetFile); + } }); }); diff --git a/pkg/workflow/mcp_servers.go b/pkg/workflow/mcp_servers.go index 63e9c0750d..899f6ffd14 100644 --- a/pkg/workflow/mcp_servers.go +++ b/pkg/workflow/mcp_servers.go @@ -426,7 +426,7 @@ func replaceExpressionsInPlaywrightArgs(args []string, expressions map[string]st // Create a temporary extractor with the same mappings combined := strings.Join(args, "\n") extractor := NewExpressionExtractor() - extractor.ExtractExpressions(combined) + _, _ = extractor.ExtractExpressions(combined) // Replace expressions in the combined string replaced := extractor.ReplaceExpressionsWithEnvVars(combined)