From af861a7ff7a7b23582a60079db2af2552ba9656f Mon Sep 17 00:00:00 2001 From: Klappy Date: Sat, 18 Apr 2026 01:23:07 +0000 Subject: [PATCH 1/3] =?UTF-8?q?feat(telemetry=5Fpolicy):=20canary=20refact?= =?UTF-8?q?or=20=E2=80=94=20headers=20from=20canon=20at=20runtime?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the hardcoded self_report_headers dictionary with a runtime parse of canon/constraints/telemetry-governance.md #### Self-Report Fields table. Response envelope now declares governance_source: 'canon' when the fetch succeeds and the table parses, 'minimal' when it falls back to the shipped baseline. This is the canary refactor for the governance anti-pattern sweep (docs/oddkit/audit/governance-anti-pattern-sweep-2026-04-17). It conforms to the three-tier resolution contract drafted in klappy/klappy.dev#101 (canon/constraints/core-governance-baseline), exercising tiers 1 (live canon) and 3 (minimal baseline in code). Tier 2 (bundled baseline directory with manifest) and the build-time schema check arrive in follow-up work once the contract graduates from status:draft to status:active. Implementation: - New helper parseSelfReportHeadersTable in index.ts parses the '### Self-Report Fields' table section from the canon doc. - Parser is permissive (whitespace + backticks) and fails closed to null so the caller falls back to the minimal baseline rather than hiding the degradation. - Minimal baseline remains the 8 stable headers; canon controls the descriptions once live. Verified: - npm run typecheck: clean - Parser unit-tested against live canon content: 8/8 headers parsed - Parser degradation paths (no section, empty table) return null Refactor discipline this commit follows (from PR #100 post-mortem): - Single feature PR, single site touched - Public contract (MCP tool response) changes are additive (governance_source field added; self_report_headers keys unchanged) - Preview smoke against live prod will verify canon-tier response before promotion --- workers/src/index.ts | 98 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 83 insertions(+), 15 deletions(-) diff --git a/workers/src/index.ts b/workers/src/index.ts index 1ac23a6..68b9a4e 100644 --- a/workers/src/index.ts +++ b/workers/src/index.ts @@ -28,6 +28,47 @@ export type { Env }; const BUILD_VERSION = pkg.version; +// ────────────────────────────────────────────────────────────────────────────── +// Canon-table parsing helper (local copy — mirrors orchestrate.ts parseTableRow). +// +// parseSelfReportHeadersTable extracts the self-report header contract from +// canon/constraints/telemetry-governance.md. The table format is governed by +// the canon doc itself; this parser is deliberately permissive (whitespace, +// backticks around header name) and fails closed to null so the caller can +// fall back to the minimal baseline without hiding the degradation. +// ────────────────────────────────────────────────────────────────────────────── + +function parseTableRow(row: string): string[] { + const parts = row.split("|"); + if (parts.length > 0 && parts[0].trim() === "") parts.shift(); + if (parts.length > 0 && parts[parts.length - 1].trim() === "") parts.pop(); + return parts.map((c) => c.trim()); +} + +function parseSelfReportHeadersTable(markdown: string): Record | null { + // Target section: "### Self-Report Fields" — grab the table that follows. + // Stop at the next `###` or `##` heading, whichever comes first. + const section = markdown.match( + /###\s+Self-Report Fields[^\n]*\n([\s\S]*?)(?=\n###|\n##|$)/, + ); + if (!section) return null; + + const headers: Record = {}; + for (const raw of section[1].split("\n")) { + if (!raw.includes("|")) continue; + const cols = parseTableRow(raw); + // Expected layout: | Field | Header | Source | + // Skip header row, separator row, and any malformed row. + if (cols.length < 2) continue; + const fieldDescription = cols[0]; + const headerName = cols[1].replace(/`/g, "").trim(); + if (!headerName.startsWith("x-oddkit-")) continue; // skip header/separator + headers[headerName] = fieldDescription; + } + + return Object.keys(headers).length > 0 ? headers : null; +} + // ────────────────────────────────────────────────────────────────────────────── // Consumer identification nudge // @@ -451,7 +492,7 @@ Time filter example: WHERE timestamp > NOW() - INTERVAL '30' DAY`, server.tool( "telemetry_policy", - "Return oddkit telemetry and sharing policy guidance. What is tracked, what is excluded, and why. Fetched from canonical governance document at runtime.", + "Return oddkit telemetry and sharing policy guidance. What is tracked, what is excluded, and why. Fetched from canonical governance document at runtime. Response envelope declares governance_source (canon|baseline|minimal) per canon/constraints/core-governance-baseline.", {}, { readOnlyHint: true, @@ -460,15 +501,50 @@ Time filter example: WHERE timestamp > NOW() - INTERVAL '30' DAY`, openWorldHint: true, }, async () => { - // Fetch the governance doc from canon + // Governance resolution per canon/constraints/core-governance-baseline: + // 1. Live canon fetch (preferred) → governance_source: "canon" + // 2. Minimal baseline (shipped in code) → governance_source: "minimal" + // + // This canary refactor implements tiers 1 and 3 only. The bundled + // baseline tier (2) and the build-time schema check arrive in follow-up + // work; the manifest + baseline directory are not yet in place. const fetcher = new ZipBaselineFetcher(env); - let policyContent = "Governance document not found. See https://github.com/klappy/klappy.dev/blob/main/canon/constraints/telemetry-governance.md"; + let policyContent: string | null = null; + let selfReportHeaders: Record | null = null; + let governanceSource: "canon" | "baseline" | "minimal" = "minimal"; try { const content = await fetcher.getFile("canon/constraints/telemetry-governance.md"); - if (content) policyContent = content; + if (content) { + policyContent = content; + const parsed = parseSelfReportHeadersTable(content); + if (parsed && Object.keys(parsed).length > 0) { + selfReportHeaders = parsed; + governanceSource = "canon"; + } + } } catch { - // Fall through to default message + // Fall through to minimal tier below + } + + if (governanceSource === "minimal") { + // Minimal baseline — the tool remains useful when canon is unreachable + // or the table cannot be parsed. These eight headers are the stable + // self-report contract; if canon adds a 9th, the "canon" tier delivers + // it and this list stays as the floor. + selfReportHeaders = { + "x-oddkit-client": "Your client name (highest priority identifier)", + "x-oddkit-client-version": "Your client version", + "x-oddkit-agent-name": "The AI agent name", + "x-oddkit-agent-version": "The AI agent version", + "x-oddkit-surface": "Where this is running (e.g. claude.ai, vscode)", + "x-oddkit-contact-url": "URL for your project or org", + "x-oddkit-policy-url": "Your privacy/telemetry policy URL", + "x-oddkit-capabilities": "Comma-separated capability list", + }; + if (!policyContent) { + policyContent = "Governance document not reachable. See https://github.com/klappy/klappy.dev/blob/main/canon/constraints/telemetry-governance.md"; + } } return { @@ -479,16 +555,8 @@ Time filter example: WHERE timestamp > NOW() - INTERVAL '30' DAY`, result: { policy: policyContent, governance_uri: "klappy://canon/constraints/telemetry-governance", - self_report_headers: { - "x-oddkit-client": "Your client name (highest priority identifier)", - "x-oddkit-client-version": "Your client version", - "x-oddkit-agent-name": "The AI agent name", - "x-oddkit-agent-version": "The AI agent version", - "x-oddkit-surface": "Where this is running (e.g. claude.ai, vscode)", - "x-oddkit-contact-url": "URL for your project or org", - "x-oddkit-policy-url": "Your privacy/telemetry policy URL", - "x-oddkit-capabilities": "Comma-separated capability list", - }, + governance_source: governanceSource, + self_report_headers: selfReportHeaders, generated_at: new Date().toISOString(), }, }, null, 2), From 7b2d68bcfb844f6e4b89844677f951fd712651ff Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 18 Apr 2026 01:30:19 +0000 Subject: [PATCH 2/3] refactor(workers): extract parseTableRow to shared markdown-utils --- workers/src/index.ts | 10 ++-------- workers/src/markdown-utils.ts | 24 ++++++++++++++++++++++++ workers/src/orchestrate.ts | 22 +--------------------- 3 files changed, 27 insertions(+), 29 deletions(-) create mode 100644 workers/src/markdown-utils.ts diff --git a/workers/src/index.ts b/workers/src/index.ts index 68b9a4e..db9268d 100644 --- a/workers/src/index.ts +++ b/workers/src/index.ts @@ -22,6 +22,7 @@ import { ZipBaselineFetcher } from "./zip-baseline-fetcher"; import { RequestTracer } from "./tracing"; import { parseConsumerLabel } from "./telemetry"; import { renderNotFoundPage } from "./not-found-ui"; +import { parseTableRow } from "./markdown-utils"; import pkg from "../package.json"; export type { Env }; @@ -29,7 +30,7 @@ export type { Env }; const BUILD_VERSION = pkg.version; // ────────────────────────────────────────────────────────────────────────────── -// Canon-table parsing helper (local copy — mirrors orchestrate.ts parseTableRow). +// Canon-table parsing helper. // // parseSelfReportHeadersTable extracts the self-report header contract from // canon/constraints/telemetry-governance.md. The table format is governed by @@ -38,13 +39,6 @@ const BUILD_VERSION = pkg.version; // fall back to the minimal baseline without hiding the degradation. // ────────────────────────────────────────────────────────────────────────────── -function parseTableRow(row: string): string[] { - const parts = row.split("|"); - if (parts.length > 0 && parts[0].trim() === "") parts.shift(); - if (parts.length > 0 && parts[parts.length - 1].trim() === "") parts.pop(); - return parts.map((c) => c.trim()); -} - function parseSelfReportHeadersTable(markdown: string): Record | null { // Target section: "### Self-Report Fields" — grab the table that follows. // Stop at the next `###` or `##` heading, whichever comes first. diff --git a/workers/src/markdown-utils.ts b/workers/src/markdown-utils.ts new file mode 100644 index 0000000..4158202 --- /dev/null +++ b/workers/src/markdown-utils.ts @@ -0,0 +1,24 @@ +/** + * Shared markdown parsing helpers. + * + * Keep this module dependency-free so it can be imported from any code path + * (orchestrate, index, future canon readers) without pulling in unrelated + * state. Every helper here must be pure and stateless. + */ + +/** + * Parse a single markdown table row into trimmed cell values, preserving + * legitimately-empty middle cells. Only the leading and trailing empty strings + * produced by splitting a `| a | b |`-style row are stripped — a prior + * `.filter(c => c.length > 0)` approach also dropped empty interior cells, + * which silently collapsed the column count and caused `cols.length >= N` + * guards to misfire (e.g. a voice-dump row with an empty tiers cell). + */ +export function parseTableRow(row: string): string[] { + const parts = row.split("|"); + // Strip the leading empty produced by a leading `|`, if present + if (parts.length > 0 && parts[0].trim() === "") parts.shift(); + // Strip the trailing empty produced by a trailing `|`, if present + if (parts.length > 0 && parts[parts.length - 1].trim() === "") parts.pop(); + return parts.map((c) => c.trim()); +} diff --git a/workers/src/orchestrate.ts b/workers/src/orchestrate.ts index 401529a..d50ba86 100644 --- a/workers/src/orchestrate.ts +++ b/workers/src/orchestrate.ts @@ -18,6 +18,7 @@ import { type SectionResult, } from "./zip-baseline-fetcher"; import { buildBM25Index, searchBM25, type BM25Index } from "./bm25"; +import { parseTableRow } from "./markdown-utils"; import type { RequestTracer } from "./tracing"; import pkg from "../package.json"; @@ -154,27 +155,6 @@ export interface OrchestrateOptions { canonUrl?: string; } -// ────────────────────────────────────────────────────────────────────────────── -// Markdown table helpers -// ────────────────────────────────────────────────────────────────────────────── - -/** - * Parse a single markdown table row into trimmed cell values, preserving - * legitimately-empty middle cells. Only the leading and trailing empty strings - * produced by splitting a `| a | b |`-style row are stripped — a prior - * `.filter(c => c.length > 0)` approach also dropped empty interior cells, - * which silently collapsed the column count and caused `cols.length >= N` - * guards to misfire (e.g. a voice-dump row with an empty tiers cell). - */ -function parseTableRow(row: string): string[] { - const parts = row.split("|"); - // Strip the leading empty produced by a leading `|`, if present - if (parts.length > 0 && parts[0].trim() === "") parts.shift(); - // Strip the trailing empty produced by a trailing `|`, if present - if (parts.length > 0 && parts[parts.length - 1].trim() === "") parts.pop(); - return parts.map((c) => c.trim()); -} - // ────────────────────────────────────────────────────────────────────────────── // BM25 Index Cache (per-request, lazy) // ────────────────────────────────────────────────────────────────────────────── From db1936d775b5339b43e9faa3d4a8cad564ac0f4e Mon Sep 17 00:00:00 2001 From: Klappy Date: Sat, 18 Apr 2026 01:43:21 +0000 Subject: [PATCH 3/3] fix(telemetry_policy canary): read Description column + add parser tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses execution-mode challenge gaps on PR #106: 1. Information regression fixed: parser now reads canon's column 4 (Description) instead of column 0 (Field label). Canon was extended with richer per-header descriptions in klappy/klappy.dev#102; this commit updates the parser to consume that new column. 2. Tests committed: Test 8 added to workers/test/governance-parser.test.mjs covering 8/8 header extraction, non-trivial description lengths, and degradation paths (no section, empty table). All 105 tests pass against the unmerged klappy.dev branch via KLAPPYDEV_RAW override. Still outstanding (follow-up work, not blocking the canary): - parseTableRow duplicated across workers/src/index.ts and workers/src/orchestrate.ts. Accepted duplication for now, flagged in both sites; export-and-share refactor lands when the sweep surfaces more duplication candidates. - Preview smoke against Cloudflare preview with the extended canon loaded but no worker redeploy — run manually after this PR deploys. Companion PR: klappy/klappy.dev#102 (canon extension). This worker change is backward-compatible with the old 3-column table: the parser requires 4 cols, so against the old canon it falls through to the minimal baseline tier. Once klappy.dev#102 merges, canon tier takes over. --- workers/src/index.ts | 18 ++++++++--- workers/test/governance-parser.test.mjs | 42 +++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 4 deletions(-) diff --git a/workers/src/index.ts b/workers/src/index.ts index db9268d..999e3da 100644 --- a/workers/src/index.ts +++ b/workers/src/index.ts @@ -42,6 +42,15 @@ const BUILD_VERSION = pkg.version; function parseSelfReportHeadersTable(markdown: string): Record | null { // Target section: "### Self-Report Fields" — grab the table that follows. // Stop at the next `###` or `##` heading, whichever comes first. + // + // Expected table schema (governed by canon/constraints/telemetry-governance): + // | Field | Header | Source | Description | + // cols[0] cols[1] cols[2] cols[3] + // + // We key on the Header (col 1, with backticks stripped) and use the + // Description (col 3) as the value. The parser is deliberately permissive + // on whitespace and fails closed to null so the caller falls back to the + // minimal baseline rather than hiding the degradation. const section = markdown.match( /###\s+Self-Report Fields[^\n]*\n([\s\S]*?)(?=\n###|\n##|$)/, ); @@ -51,13 +60,14 @@ function parseSelfReportHeadersTable(markdown: string): Record | for (const raw of section[1].split("\n")) { if (!raw.includes("|")) continue; const cols = parseTableRow(raw); - // Expected layout: | Field | Header | Source | + // Need at least 4 cols (Field, Header, Source, Description). // Skip header row, separator row, and any malformed row. - if (cols.length < 2) continue; - const fieldDescription = cols[0]; + if (cols.length < 4) continue; const headerName = cols[1].replace(/`/g, "").trim(); if (!headerName.startsWith("x-oddkit-")) continue; // skip header/separator - headers[headerName] = fieldDescription; + const description = cols[3].trim(); + if (!description) continue; + headers[headerName] = description; } return Object.keys(headers).length > 0 ? headers : null; diff --git a/workers/test/governance-parser.test.mjs b/workers/test/governance-parser.test.mjs index 4ae40c5..a3cc414 100644 --- a/workers/test/governance-parser.test.mjs +++ b/workers/test/governance-parser.test.mjs @@ -36,6 +36,7 @@ const ARTICLE_PATHS = { basePrerequisites: "odd/challenge/base-prerequisites.md", normativeVocabulary: "odd/challenge/normative-vocabulary.md", stakesCalibration: "odd/challenge/stakes-calibration.md", + telemetryGovernance: "canon/constraints/telemetry-governance.md", }; async function fetchArticle(path) { @@ -339,6 +340,47 @@ async function run() { ok("planning has baseline+elevated", calib.get("planning")?.tiers.length === 2); ok("execution has all three tiers", calib.get("execution")?.tiers.length === 3); + console.log("\n─── Test 8: Self-report headers table (telemetry_policy canary) ───"); + // Mirrors parseSelfReportHeadersTable in workers/src/index.ts. If either + // parser changes, both must change — tracked as a known duplication per + // PR #106 discussion. + const parseHeaders = (md) => { + const section = md.match(/###\s+Self-Report Fields[^\n]*\n([\s\S]*?)(?=\n###|\n##|$)/); + if (!section) return null; + const out = {}; + for (const raw of section[1].split("\n")) { + if (!raw.includes("|")) continue; + const parts = raw.split("|"); + if (parts.length > 0 && parts[0].trim() === "") parts.shift(); + if (parts.length > 0 && parts[parts.length - 1].trim() === "") parts.pop(); + const cols = parts.map((c) => c.trim()); + if (cols.length < 4) continue; + const headerName = cols[1].replace(/`/g, "").trim(); + if (!headerName.startsWith("x-oddkit-")) continue; + const description = cols[3].trim(); + if (!description) continue; + out[headerName] = description; + } + return Object.keys(out).length > 0 ? out : null; + }; + + const headers = parseHeaders(articles.telemetryGovernance); + ok("self-report headers parse", headers !== null); + ok("eight headers extracted", headers && Object.keys(headers).length === 8, `got ${headers ? Object.keys(headers).length : 0}`); + ok("x-oddkit-client present", headers && typeof headers["x-oddkit-client"] === "string" && headers["x-oddkit-client"].length > 0); + ok("x-oddkit-surface present", headers && typeof headers["x-oddkit-surface"] === "string" && headers["x-oddkit-surface"].length > 0); + ok("x-oddkit-capabilities present", headers && typeof headers["x-oddkit-capabilities"] === "string" && headers["x-oddkit-capabilities"].length > 0); + ok( + "descriptions are non-trivial (canon Description column, not Field label)", + headers && Object.values(headers).every((d) => d.length > 15), + `shortest: ${headers ? Math.min(...Object.values(headers).map((d) => d.length)) : 0} chars`, + ); + + // Degradation: missing section → null + ok("no section returns null", parseHeaders("# No section here\n") === null); + // Degradation: section present but no table rows → null + ok("empty section returns null", parseHeaders("### Self-Report Fields\n\n(no table)\n") === null); + console.log(`\n${passed} passed, ${failed} failed`); process.exit(failed === 0 ? 0 : 1); }