Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 87 additions & 15 deletions workers/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,57 @@ import { ZipBaselineFetcher } from "./zip-baseline-fetcher";
import { RequestTracer } from "./tracing";
import { parseConsumerLabel } from "./telemetry";
import { renderNotFoundPage } from "./not-found-ui";
import { parseTableRow } from "./markdown-utils";
import pkg from "../package.json";

export type { Env };

const BUILD_VERSION = pkg.version;

// ──────────────────────────────────────────────────────────────────────────────
// Canon-table parsing helper.
//
// parseSelfReportHeadersTable extracts the self-report header contract from
// canon/constraints/telemetry-governance.md. The table format is governed by
// the canon doc itself; this parser is deliberately permissive (whitespace,
// backticks around header name) and fails closed to null so the caller can
// fall back to the minimal baseline without hiding the degradation.
// ──────────────────────────────────────────────────────────────────────────────

function parseSelfReportHeadersTable(markdown: string): Record<string, string> | null {
// Target section: "### Self-Report Fields" — grab the table that follows.
// Stop at the next `###` or `##` heading, whichever comes first.
//
// Expected table schema (governed by canon/constraints/telemetry-governance):
// | Field | Header | Source | Description |
// cols[0] cols[1] cols[2] cols[3]
//
// We key on the Header (col 1, with backticks stripped) and use the
// Description (col 3) as the value. The parser is deliberately permissive
// on whitespace and fails closed to null so the caller falls back to the
// minimal baseline rather than hiding the degradation.
const section = markdown.match(
/###\s+Self-Report Fields[^\n]*\n([\s\S]*?)(?=\n###|\n##|$)/,
);
if (!section) return null;

const headers: Record<string, string> = {};
for (const raw of section[1].split("\n")) {
if (!raw.includes("|")) continue;
const cols = parseTableRow(raw);
// Need at least 4 cols (Field, Header, Source, Description).
// Skip header row, separator row, and any malformed row.
if (cols.length < 4) continue;
const headerName = cols[1].replace(/`/g, "").trim();
if (!headerName.startsWith("x-oddkit-")) continue; // skip header/separator
const description = cols[3].trim();
if (!description) continue;
headers[headerName] = description;
}

return Object.keys(headers).length > 0 ? headers : null;
}

// ──────────────────────────────────────────────────────────────────────────────
// Consumer identification nudge
//
Expand Down Expand Up @@ -451,7 +496,7 @@ Time filter example: WHERE timestamp > NOW() - INTERVAL '30' DAY`,

server.tool(
"telemetry_policy",
"Return oddkit telemetry and sharing policy guidance. What is tracked, what is excluded, and why. Fetched from canonical governance document at runtime.",
"Return oddkit telemetry and sharing policy guidance. What is tracked, what is excluded, and why. Fetched from canonical governance document at runtime. Response envelope declares governance_source (canon|baseline|minimal) per canon/constraints/core-governance-baseline.",
{},
{
readOnlyHint: true,
Expand All @@ -460,15 +505,50 @@ Time filter example: WHERE timestamp > NOW() - INTERVAL '30' DAY`,
openWorldHint: true,
},
async () => {
// Fetch the governance doc from canon
// Governance resolution per canon/constraints/core-governance-baseline:
// 1. Live canon fetch (preferred) → governance_source: "canon"
// 2. Minimal baseline (shipped in code) → governance_source: "minimal"
//
// This canary refactor implements tiers 1 and 3 only. The bundled
// baseline tier (2) and the build-time schema check arrive in follow-up
// work; the manifest + baseline directory are not yet in place.
const fetcher = new ZipBaselineFetcher(env);
let policyContent = "Governance document not found. See https://github.com/klappy/klappy.dev/blob/main/canon/constraints/telemetry-governance.md";
let policyContent: string | null = null;
let selfReportHeaders: Record<string, string> | null = null;
let governanceSource: "canon" | "baseline" | "minimal" = "minimal";

try {
const content = await fetcher.getFile("canon/constraints/telemetry-governance.md");
if (content) policyContent = content;
if (content) {
policyContent = content;
const parsed = parseSelfReportHeadersTable(content);
if (parsed && Object.keys(parsed).length > 0) {
selfReportHeaders = parsed;
governanceSource = "canon";
}
}
} catch {
// Fall through to default message
// Fall through to minimal tier below
}

if (governanceSource === "minimal") {
// Minimal baseline — the tool remains useful when canon is unreachable
// or the table cannot be parsed. These eight headers are the stable
// self-report contract; if canon adds a 9th, the "canon" tier delivers
// it and this list stays as the floor.
selfReportHeaders = {
"x-oddkit-client": "Your client name (highest priority identifier)",
"x-oddkit-client-version": "Your client version",
"x-oddkit-agent-name": "The AI agent name",
"x-oddkit-agent-version": "The AI agent version",
"x-oddkit-surface": "Where this is running (e.g. claude.ai, vscode)",
"x-oddkit-contact-url": "URL for your project or org",
"x-oddkit-policy-url": "Your privacy/telemetry policy URL",
"x-oddkit-capabilities": "Comma-separated capability list",
};
if (!policyContent) {
policyContent = "Governance document not reachable. See https://github.com/klappy/klappy.dev/blob/main/canon/constraints/telemetry-governance.md";
}
}

return {
Expand All @@ -479,16 +559,8 @@ Time filter example: WHERE timestamp > NOW() - INTERVAL '30' DAY`,
result: {
policy: policyContent,
governance_uri: "klappy://canon/constraints/telemetry-governance",
self_report_headers: {
"x-oddkit-client": "Your client name (highest priority identifier)",
"x-oddkit-client-version": "Your client version",
"x-oddkit-agent-name": "The AI agent name",
"x-oddkit-agent-version": "The AI agent version",
"x-oddkit-surface": "Where this is running (e.g. claude.ai, vscode)",
"x-oddkit-contact-url": "URL for your project or org",
"x-oddkit-policy-url": "Your privacy/telemetry policy URL",
"x-oddkit-capabilities": "Comma-separated capability list",
},
governance_source: governanceSource,
self_report_headers: selfReportHeaders,
generated_at: new Date().toISOString(),
},
}, null, 2),
Expand Down
24 changes: 24 additions & 0 deletions workers/src/markdown-utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/**
* Shared markdown parsing helpers.
*
* Keep this module dependency-free so it can be imported from any code path
* (orchestrate, index, future canon readers) without pulling in unrelated
* state. Every helper here must be pure and stateless.
*/

/**
* Parse a single markdown table row into trimmed cell values, preserving
* legitimately-empty middle cells. Only the leading and trailing empty strings
* produced by splitting a `| a | b |`-style row are stripped — a prior
* `.filter(c => c.length > 0)` approach also dropped empty interior cells,
* which silently collapsed the column count and caused `cols.length >= N`
* guards to misfire (e.g. a voice-dump row with an empty tiers cell).
*/
export function parseTableRow(row: string): string[] {
const parts = row.split("|");
// Strip the leading empty produced by a leading `|`, if present
if (parts.length > 0 && parts[0].trim() === "") parts.shift();
// Strip the trailing empty produced by a trailing `|`, if present
if (parts.length > 0 && parts[parts.length - 1].trim() === "") parts.pop();
return parts.map((c) => c.trim());
}
22 changes: 1 addition & 21 deletions workers/src/orchestrate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import {
type SectionResult,
} from "./zip-baseline-fetcher";
import { buildBM25Index, searchBM25, type BM25Index } from "./bm25";
import { parseTableRow } from "./markdown-utils";
import type { RequestTracer } from "./tracing";
import pkg from "../package.json";

Expand Down Expand Up @@ -154,27 +155,6 @@ export interface OrchestrateOptions {
canonUrl?: string;
}

// ──────────────────────────────────────────────────────────────────────────────
// Markdown table helpers
// ──────────────────────────────────────────────────────────────────────────────

/**
* Parse a single markdown table row into trimmed cell values, preserving
* legitimately-empty middle cells. Only the leading and trailing empty strings
* produced by splitting a `| a | b |`-style row are stripped — a prior
* `.filter(c => c.length > 0)` approach also dropped empty interior cells,
* which silently collapsed the column count and caused `cols.length >= N`
* guards to misfire (e.g. a voice-dump row with an empty tiers cell).
*/
function parseTableRow(row: string): string[] {
const parts = row.split("|");
// Strip the leading empty produced by a leading `|`, if present
if (parts.length > 0 && parts[0].trim() === "") parts.shift();
// Strip the trailing empty produced by a trailing `|`, if present
if (parts.length > 0 && parts[parts.length - 1].trim() === "") parts.pop();
return parts.map((c) => c.trim());
}

// ──────────────────────────────────────────────────────────────────────────────
// BM25 Index Cache (per-request, lazy)
// ──────────────────────────────────────────────────────────────────────────────
Expand Down
42 changes: 42 additions & 0 deletions workers/test/governance-parser.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ const ARTICLE_PATHS = {
basePrerequisites: "odd/challenge/base-prerequisites.md",
normativeVocabulary: "odd/challenge/normative-vocabulary.md",
stakesCalibration: "odd/challenge/stakes-calibration.md",
telemetryGovernance: "canon/constraints/telemetry-governance.md",
};

async function fetchArticle(path) {
Expand Down Expand Up @@ -339,6 +340,47 @@ async function run() {
ok("planning has baseline+elevated", calib.get("planning")?.tiers.length === 2);
ok("execution has all three tiers", calib.get("execution")?.tiers.length === 3);

console.log("\n─── Test 8: Self-report headers table (telemetry_policy canary) ───");
// Mirrors parseSelfReportHeadersTable in workers/src/index.ts. If either
// parser changes, both must change — tracked as a known duplication per
// PR #106 discussion.
const parseHeaders = (md) => {
const section = md.match(/###\s+Self-Report Fields[^\n]*\n([\s\S]*?)(?=\n###|\n##|$)/);
if (!section) return null;
const out = {};
for (const raw of section[1].split("\n")) {
if (!raw.includes("|")) continue;
const parts = raw.split("|");
if (parts.length > 0 && parts[0].trim() === "") parts.shift();
if (parts.length > 0 && parts[parts.length - 1].trim() === "") parts.pop();
const cols = parts.map((c) => c.trim());
if (cols.length < 4) continue;
const headerName = cols[1].replace(/`/g, "").trim();
if (!headerName.startsWith("x-oddkit-")) continue;
const description = cols[3].trim();
if (!description) continue;
out[headerName] = description;
}
return Object.keys(out).length > 0 ? out : null;
};

const headers = parseHeaders(articles.telemetryGovernance);
ok("self-report headers parse", headers !== null);
ok("eight headers extracted", headers && Object.keys(headers).length === 8, `got ${headers ? Object.keys(headers).length : 0}`);
ok("x-oddkit-client present", headers && typeof headers["x-oddkit-client"] === "string" && headers["x-oddkit-client"].length > 0);
ok("x-oddkit-surface present", headers && typeof headers["x-oddkit-surface"] === "string" && headers["x-oddkit-surface"].length > 0);
ok("x-oddkit-capabilities present", headers && typeof headers["x-oddkit-capabilities"] === "string" && headers["x-oddkit-capabilities"].length > 0);
ok(
"descriptions are non-trivial (canon Description column, not Field label)",
headers && Object.values(headers).every((d) => d.length > 15),
`shortest: ${headers ? Math.min(...Object.values(headers).map((d) => d.length)) : 0} chars`,
);

// Degradation: missing section → null
ok("no section returns null", parseHeaders("# No section here\n") === null);
// Degradation: section present but no table rows → null
ok("empty section returns null", parseHeaders("### Self-Report Fields\n\n(no table)\n") === null);

console.log(`\n${passed} passed, ${failed} failed`);
process.exit(failed === 0 ? 0 : 1);
}
Expand Down
Loading