diff --git a/workers/src/index.ts b/workers/src/index.ts index 1ac23a6..999e3da 100644 --- a/workers/src/index.ts +++ b/workers/src/index.ts @@ -22,12 +22,57 @@ import { ZipBaselineFetcher } from "./zip-baseline-fetcher"; import { RequestTracer } from "./tracing"; import { parseConsumerLabel } from "./telemetry"; import { renderNotFoundPage } from "./not-found-ui"; +import { parseTableRow } from "./markdown-utils"; import pkg from "../package.json"; export type { Env }; const BUILD_VERSION = pkg.version; +// ────────────────────────────────────────────────────────────────────────────── +// Canon-table parsing helper. +// +// parseSelfReportHeadersTable extracts the self-report header contract from +// canon/constraints/telemetry-governance.md. The table format is governed by +// the canon doc itself; this parser is deliberately permissive (whitespace, +// backticks around header name) and fails closed to null so the caller can +// fall back to the minimal baseline without hiding the degradation. +// ────────────────────────────────────────────────────────────────────────────── + +function parseSelfReportHeadersTable(markdown: string): Record | null { + // Target section: "### Self-Report Fields" — grab the table that follows. + // Stop at the next `###` or `##` heading, whichever comes first. + // + // Expected table schema (governed by canon/constraints/telemetry-governance): + // | Field | Header | Source | Description | + // cols[0] cols[1] cols[2] cols[3] + // + // We key on the Header (col 1, with backticks stripped) and use the + // Description (col 3) as the value. The parser is deliberately permissive + // on whitespace and fails closed to null so the caller falls back to the + // minimal baseline rather than hiding the degradation. + const section = markdown.match( + /###\s+Self-Report Fields[^\n]*\n([\s\S]*?)(?=\n###|\n##|$)/, + ); + if (!section) return null; + + const headers: Record = {}; + for (const raw of section[1].split("\n")) { + if (!raw.includes("|")) continue; + const cols = parseTableRow(raw); + // Need at least 4 cols (Field, Header, Source, Description). + // Skip header row, separator row, and any malformed row. + if (cols.length < 4) continue; + const headerName = cols[1].replace(/`/g, "").trim(); + if (!headerName.startsWith("x-oddkit-")) continue; // skip header/separator + const description = cols[3].trim(); + if (!description) continue; + headers[headerName] = description; + } + + return Object.keys(headers).length > 0 ? headers : null; +} + // ────────────────────────────────────────────────────────────────────────────── // Consumer identification nudge // @@ -451,7 +496,7 @@ Time filter example: WHERE timestamp > NOW() - INTERVAL '30' DAY`, server.tool( "telemetry_policy", - "Return oddkit telemetry and sharing policy guidance. What is tracked, what is excluded, and why. Fetched from canonical governance document at runtime.", + "Return oddkit telemetry and sharing policy guidance. What is tracked, what is excluded, and why. Fetched from canonical governance document at runtime. Response envelope declares governance_source (canon|baseline|minimal) per canon/constraints/core-governance-baseline.", {}, { readOnlyHint: true, @@ -460,15 +505,50 @@ Time filter example: WHERE timestamp > NOW() - INTERVAL '30' DAY`, openWorldHint: true, }, async () => { - // Fetch the governance doc from canon + // Governance resolution per canon/constraints/core-governance-baseline: + // 1. Live canon fetch (preferred) → governance_source: "canon" + // 2. Minimal baseline (shipped in code) → governance_source: "minimal" + // + // This canary refactor implements tiers 1 and 3 only. The bundled + // baseline tier (2) and the build-time schema check arrive in follow-up + // work; the manifest + baseline directory are not yet in place. const fetcher = new ZipBaselineFetcher(env); - let policyContent = "Governance document not found. See https://github.com/klappy/klappy.dev/blob/main/canon/constraints/telemetry-governance.md"; + let policyContent: string | null = null; + let selfReportHeaders: Record | null = null; + let governanceSource: "canon" | "baseline" | "minimal" = "minimal"; try { const content = await fetcher.getFile("canon/constraints/telemetry-governance.md"); - if (content) policyContent = content; + if (content) { + policyContent = content; + const parsed = parseSelfReportHeadersTable(content); + if (parsed && Object.keys(parsed).length > 0) { + selfReportHeaders = parsed; + governanceSource = "canon"; + } + } } catch { - // Fall through to default message + // Fall through to minimal tier below + } + + if (governanceSource === "minimal") { + // Minimal baseline — the tool remains useful when canon is unreachable + // or the table cannot be parsed. These eight headers are the stable + // self-report contract; if canon adds a 9th, the "canon" tier delivers + // it and this list stays as the floor. + selfReportHeaders = { + "x-oddkit-client": "Your client name (highest priority identifier)", + "x-oddkit-client-version": "Your client version", + "x-oddkit-agent-name": "The AI agent name", + "x-oddkit-agent-version": "The AI agent version", + "x-oddkit-surface": "Where this is running (e.g. claude.ai, vscode)", + "x-oddkit-contact-url": "URL for your project or org", + "x-oddkit-policy-url": "Your privacy/telemetry policy URL", + "x-oddkit-capabilities": "Comma-separated capability list", + }; + if (!policyContent) { + policyContent = "Governance document not reachable. See https://github.com/klappy/klappy.dev/blob/main/canon/constraints/telemetry-governance.md"; + } } return { @@ -479,16 +559,8 @@ Time filter example: WHERE timestamp > NOW() - INTERVAL '30' DAY`, result: { policy: policyContent, governance_uri: "klappy://canon/constraints/telemetry-governance", - self_report_headers: { - "x-oddkit-client": "Your client name (highest priority identifier)", - "x-oddkit-client-version": "Your client version", - "x-oddkit-agent-name": "The AI agent name", - "x-oddkit-agent-version": "The AI agent version", - "x-oddkit-surface": "Where this is running (e.g. claude.ai, vscode)", - "x-oddkit-contact-url": "URL for your project or org", - "x-oddkit-policy-url": "Your privacy/telemetry policy URL", - "x-oddkit-capabilities": "Comma-separated capability list", - }, + governance_source: governanceSource, + self_report_headers: selfReportHeaders, generated_at: new Date().toISOString(), }, }, null, 2), diff --git a/workers/src/markdown-utils.ts b/workers/src/markdown-utils.ts new file mode 100644 index 0000000..4158202 --- /dev/null +++ b/workers/src/markdown-utils.ts @@ -0,0 +1,24 @@ +/** + * Shared markdown parsing helpers. + * + * Keep this module dependency-free so it can be imported from any code path + * (orchestrate, index, future canon readers) without pulling in unrelated + * state. Every helper here must be pure and stateless. + */ + +/** + * Parse a single markdown table row into trimmed cell values, preserving + * legitimately-empty middle cells. Only the leading and trailing empty strings + * produced by splitting a `| a | b |`-style row are stripped — a prior + * `.filter(c => c.length > 0)` approach also dropped empty interior cells, + * which silently collapsed the column count and caused `cols.length >= N` + * guards to misfire (e.g. a voice-dump row with an empty tiers cell). + */ +export function parseTableRow(row: string): string[] { + const parts = row.split("|"); + // Strip the leading empty produced by a leading `|`, if present + if (parts.length > 0 && parts[0].trim() === "") parts.shift(); + // Strip the trailing empty produced by a trailing `|`, if present + if (parts.length > 0 && parts[parts.length - 1].trim() === "") parts.pop(); + return parts.map((c) => c.trim()); +} diff --git a/workers/src/orchestrate.ts b/workers/src/orchestrate.ts index 401529a..d50ba86 100644 --- a/workers/src/orchestrate.ts +++ b/workers/src/orchestrate.ts @@ -18,6 +18,7 @@ import { type SectionResult, } from "./zip-baseline-fetcher"; import { buildBM25Index, searchBM25, type BM25Index } from "./bm25"; +import { parseTableRow } from "./markdown-utils"; import type { RequestTracer } from "./tracing"; import pkg from "../package.json"; @@ -154,27 +155,6 @@ export interface OrchestrateOptions { canonUrl?: string; } -// ────────────────────────────────────────────────────────────────────────────── -// Markdown table helpers -// ────────────────────────────────────────────────────────────────────────────── - -/** - * Parse a single markdown table row into trimmed cell values, preserving - * legitimately-empty middle cells. Only the leading and trailing empty strings - * produced by splitting a `| a | b |`-style row are stripped — a prior - * `.filter(c => c.length > 0)` approach also dropped empty interior cells, - * which silently collapsed the column count and caused `cols.length >= N` - * guards to misfire (e.g. a voice-dump row with an empty tiers cell). - */ -function parseTableRow(row: string): string[] { - const parts = row.split("|"); - // Strip the leading empty produced by a leading `|`, if present - if (parts.length > 0 && parts[0].trim() === "") parts.shift(); - // Strip the trailing empty produced by a trailing `|`, if present - if (parts.length > 0 && parts[parts.length - 1].trim() === "") parts.pop(); - return parts.map((c) => c.trim()); -} - // ────────────────────────────────────────────────────────────────────────────── // BM25 Index Cache (per-request, lazy) // ────────────────────────────────────────────────────────────────────────────── diff --git a/workers/test/governance-parser.test.mjs b/workers/test/governance-parser.test.mjs index 4ae40c5..a3cc414 100644 --- a/workers/test/governance-parser.test.mjs +++ b/workers/test/governance-parser.test.mjs @@ -36,6 +36,7 @@ const ARTICLE_PATHS = { basePrerequisites: "odd/challenge/base-prerequisites.md", normativeVocabulary: "odd/challenge/normative-vocabulary.md", stakesCalibration: "odd/challenge/stakes-calibration.md", + telemetryGovernance: "canon/constraints/telemetry-governance.md", }; async function fetchArticle(path) { @@ -339,6 +340,47 @@ async function run() { ok("planning has baseline+elevated", calib.get("planning")?.tiers.length === 2); ok("execution has all three tiers", calib.get("execution")?.tiers.length === 3); + console.log("\n─── Test 8: Self-report headers table (telemetry_policy canary) ───"); + // Mirrors parseSelfReportHeadersTable in workers/src/index.ts. If either + // parser changes, both must change — tracked as a known duplication per + // PR #106 discussion. + const parseHeaders = (md) => { + const section = md.match(/###\s+Self-Report Fields[^\n]*\n([\s\S]*?)(?=\n###|\n##|$)/); + if (!section) return null; + const out = {}; + for (const raw of section[1].split("\n")) { + if (!raw.includes("|")) continue; + const parts = raw.split("|"); + if (parts.length > 0 && parts[0].trim() === "") parts.shift(); + if (parts.length > 0 && parts[parts.length - 1].trim() === "") parts.pop(); + const cols = parts.map((c) => c.trim()); + if (cols.length < 4) continue; + const headerName = cols[1].replace(/`/g, "").trim(); + if (!headerName.startsWith("x-oddkit-")) continue; + const description = cols[3].trim(); + if (!description) continue; + out[headerName] = description; + } + return Object.keys(out).length > 0 ? out : null; + }; + + const headers = parseHeaders(articles.telemetryGovernance); + ok("self-report headers parse", headers !== null); + ok("eight headers extracted", headers && Object.keys(headers).length === 8, `got ${headers ? Object.keys(headers).length : 0}`); + ok("x-oddkit-client present", headers && typeof headers["x-oddkit-client"] === "string" && headers["x-oddkit-client"].length > 0); + ok("x-oddkit-surface present", headers && typeof headers["x-oddkit-surface"] === "string" && headers["x-oddkit-surface"].length > 0); + ok("x-oddkit-capabilities present", headers && typeof headers["x-oddkit-capabilities"] === "string" && headers["x-oddkit-capabilities"].length > 0); + ok( + "descriptions are non-trivial (canon Description column, not Field label)", + headers && Object.values(headers).every((d) => d.length > 15), + `shortest: ${headers ? Math.min(...Object.values(headers).map((d) => d.length)) : 0} chars`, + ); + + // Degradation: missing section → null + ok("no section returns null", parseHeaders("# No section here\n") === null); + // Degradation: section present but no table rows → null + ok("empty section returns null", parseHeaders("### Self-Report Fields\n\n(no table)\n") === null); + console.log(`\n${passed} passed, ${failed} failed`); process.exit(failed === 0 ? 0 : 1); }