From e331c5bd626ee7f6af5f96b18d21a2d4b089aeac Mon Sep 17 00:00:00 2001 From: Klappy Date: Fri, 3 Apr 2026 15:49:24 +0000 Subject: [PATCH 01/13] =?UTF-8?q?E0007:=20proactive=20posture=20=E2=80=94?= =?UTF-8?q?=20catalog=20metadata=20exposure,=20full=20frontmatter=20indexi?= =?UTF-8?q?ng,=20proactive=20tool=20hints?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Catalog temporal discovery (new params): - sort_by: 'date' returns articles sorted newest-first with full metadata - limit: cap article count (1-100, default 10) - filter_epoch: server-side deterministic filtering by epoch Full frontmatter indexing: - IndexEntry now stores complete parsed frontmatter (was cherry-picking 6 fields) - parseFrontmatter rewritten as generic YAML parser — captures all top-level fields - Enables date, epoch, audience, tier, stability, and all custom fields Proactive tool descriptions (Phase 3): - orient: 'Call proactively whenever context shifts' - search: 'Search before claiming — not just when asked' - challenge: 'Challenge proactively before encoding consequential decisions' - gate: 'Gate at every implicit mode transition' - validate: 'Validate proactively before claiming any task complete' - preflight: 'Preflight before any execution that produces an artifact' - encode: Full rewrite with persistence warning + OLDC+H vocabulary Response format changes: - Orient: adds proactive OLDC+H tracking instruction - Encode: adds persist_required: true and next_action All changes typecheck clean. --- workers/package-lock.json | 4 +- workers/src/index.ts | 28 +++++++--- workers/src/orchestrate.ts | 83 ++++++++++++++++++++++++----- workers/src/zip-baseline-fetcher.ts | 54 +++++++++++++------ 4 files changed, 130 insertions(+), 39 deletions(-) diff --git a/workers/package-lock.json b/workers/package-lock.json index 3b7c775..d9ab280 100644 --- a/workers/package-lock.json +++ b/workers/package-lock.json @@ -1,12 +1,12 @@ { "name": "oddkit-mcp-worker", - "version": "0.15.0", + "version": "0.15.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "oddkit-mcp-worker", - "version": "0.15.0", + "version": "0.15.1", "dependencies": { "agents": "^0.4.1", "fflate": "^0.8.2", diff --git a/workers/src/index.ts b/workers/src/index.ts index cabc817..c9c9b1e 100644 --- a/workers/src/index.ts +++ b/workers/src/index.ts @@ -145,6 +145,9 @@ Use when: canon_url: z.string().optional().describe("Optional GitHub repo URL for canon override."), include_metadata: z.boolean().optional().describe("When true, search/get responses include a metadata object with full parsed frontmatter. Default: false."), section: z.string().optional().describe("For action='get': extract only the named ## section from the document. Returns section content or available sections if not found."), + sort_by: z.enum(["date"]).optional().describe("For action='catalog': sort articles by frontmatter field. 'date' returns newest first with full metadata."), + limit: z.number().min(1).max(100).optional().describe("For action='catalog': max articles to return when sort_by is provided. Default: 10."), + filter_epoch: z.string().optional().describe("For action='catalog': filter to articles with this epoch value in frontmatter (e.g. 'E0007')."), state: z.record(z.string(), z.unknown()).optional().describe("Optional client-side conversation state, passed back and forth."), }, { @@ -162,6 +165,9 @@ Use when: canon_url: args.canon_url, include_metadata: args.include_metadata, section: args.section, + sort_by: args.sort_by, + limit: args.limit, + filter_epoch: args.filter_epoch, state: args.state as any, env, }); @@ -180,7 +186,7 @@ Use when: }> = [ { name: "oddkit_orient", - description: "Assess a goal, idea, or situation against epistemic modes (exploration/planning/execution). Surfaces unresolved items, assumptions, and questions.", + description: "Assess a goal, idea, or situation against epistemic modes (exploration/planning/execution). Surfaces unresolved items, assumptions, and questions. Call proactively whenever context shifts, not just at session start.", action: "orient", schema: { input: z.string().describe("A goal, idea, or situation description to orient against."), @@ -190,7 +196,7 @@ Use when: }, { name: "oddkit_challenge", - description: "Pressure-test a claim, assumption, or proposal against canon constraints. Surfaces tensions, missing evidence, and contradictions.", + description: "Pressure-test a claim, assumption, or proposal against canon constraints. Surfaces tensions, missing evidence, and contradictions. Challenge proactively before encoding consequential decisions.", action: "challenge", schema: { input: z.string().describe("A claim, assumption, or proposal to challenge."), @@ -201,7 +207,7 @@ Use when: }, { name: "oddkit_gate", - description: "Check transition prerequisites before changing epistemic modes. Validates readiness and blocks premature convergence.", + description: "Check transition prerequisites before changing epistemic modes. Validates readiness and blocks premature convergence. Gate at every implicit mode transition, not just formal ones.", action: "gate", schema: { input: z.string().describe("The proposed transition (e.g., 'ready to build', 'moving to planning')."), @@ -212,7 +218,7 @@ Use when: }, { name: "oddkit_encode", - description: "Structure a decision, insight, or boundary as a durable record. Assesses quality and suggests improvements.", + description: "Structure a decision, insight, or boundary as a durable record. IMPORTANT: This tool returns the structured artifact in the response — it does NOT persist or save it. The caller must save the output to storage. Standard artifact types: Observations (O), Learnings (L), Decisions (D), Constraints (C), Handoffs (H) — OLDC+H. Track OLDC+H continuously — encode what the user shared, encode what you did. Persist at natural breakpoints.", action: "encode", schema: { input: z.string().describe("A decision, insight, or boundary to capture."), @@ -223,7 +229,7 @@ Use when: }, { name: "oddkit_search", - description: "Search canon and baseline docs by natural language query or tags. Returns ranked results with citations and excerpts.", + description: "Search canon and baseline docs by natural language query or tags. Returns ranked results with citations and excerpts. Search before claiming — not just when asked.", action: "search", schema: { input: z.string().describe("Natural language query or tags to search for."), @@ -246,16 +252,19 @@ Use when: }, { name: "oddkit_catalog", - description: "Lists available documentation with categories, counts, and start-here suggestions.", + description: "Lists available documentation with categories, counts, and start-here suggestions. Supports temporal discovery: use sort_by='date' to get recent articles with full frontmatter metadata.", action: "catalog", schema: { canon_url: z.string().optional().describe("Optional: GitHub repo URL for canon override."), + sort_by: z.enum(["date"]).optional().describe("Sort articles by frontmatter field. 'date' returns newest first with full metadata."), + limit: z.number().min(1).max(100).optional().describe("Max articles to return when sort_by is provided. Default: 10."), + filter_epoch: z.string().optional().describe("Filter to articles with this epoch value in frontmatter (e.g. 'E0007')."), }, annotations: { readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true }, }, { name: "oddkit_validate", - description: "Validates completion claims against required artifacts. Returns VERIFIED or NEEDS_ARTIFACTS.", + description: "Validates completion claims against required artifacts. Returns VERIFIED or NEEDS_ARTIFACTS. Validate proactively before claiming any task complete.", action: "validate", schema: { input: z.string().describe("The completion claim with artifact references."), @@ -264,7 +273,7 @@ Use when: }, { name: "oddkit_preflight", - description: "Pre-implementation check. Returns relevant docs, constraints, definition of done, and pitfalls.", + description: "Pre-implementation check. Returns relevant docs, constraints, definition of done, and pitfalls. Preflight before any execution that produces an artifact.", action: "preflight", schema: { input: z.string().describe("Description of what you're about to implement."), @@ -307,6 +316,9 @@ Use when: canon_url: args.canon_url as string | undefined, include_metadata: args.include_metadata as boolean | undefined, section: args.section as string | undefined, + sort_by: args.sort_by as string | undefined, + limit: args.limit as number | undefined, + filter_epoch: args.filter_epoch as string | undefined, env, }); return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] }; diff --git a/workers/src/orchestrate.ts b/workers/src/orchestrate.ts index bcf5d09..f28c126 100644 --- a/workers/src/orchestrate.ts +++ b/workers/src/orchestrate.ts @@ -57,6 +57,9 @@ export interface UnifiedParams { canon_url?: string; include_metadata?: boolean; section?: string; + sort_by?: string; + limit?: number; + filter_epoch?: string; state?: OddkitState; env: Env; } @@ -862,9 +865,12 @@ async function runCatalog( fetcher: ZipBaselineFetcher, canonUrl?: string, state?: OddkitState, + options?: { sort_by?: string; limit?: number; filter_epoch?: string }, ): Promise { const startMs = Date.now(); const index = await fetcher.getIndex(canonUrl); + const { sort_by, limit: rawLimit, filter_epoch } = options || {}; + const effectiveLimit = Math.min(Math.max(rawLimit || 10, 1), 100); const byTag: Record = {}; for (const entry of index.entries) { @@ -889,7 +895,35 @@ async function runCatalog( .sort((a, b) => b[1].length - a[1].length) .slice(0, 5); - const assistantText = [ + // Build articles list when sort_by is provided + let articles: Array<{ path: string; uri: string; metadata: Record }> | undefined; + if (sort_by === "date") { + let candidates = index.entries.filter((e) => e.frontmatter); + + // Server-side epoch filter — deterministic, cheap, correct + if (filter_epoch) { + candidates = candidates.filter( + (e) => e.frontmatter && (e.frontmatter as Record).epoch === filter_epoch, + ); + } + + // Server-side date sort — deterministic, cheap, correct + candidates.sort((a, b) => { + const da = ((a.frontmatter as Record)?.date as string) || ""; + const db = ((b.frontmatter as Record)?.date as string) || ""; + if (db && !da) return 1; // docs without dates sort last + if (da && !db) return -1; + return db.localeCompare(da); // newest first + }); + + articles = candidates.slice(0, effectiveLimit).map((e) => ({ + path: e.path, + uri: e.uri, + metadata: (e.frontmatter as Record) || {}, + })); + } + + const assistantTextParts = [ `ODD Documentation Catalog`, ``, `Total: ${index.stats.total} docs (${index.stats.canon} canon, ${index.stats.baseline} baseline)`, @@ -907,20 +941,38 @@ async function runCatalog( .map((e) => e.title) .join(", ")}`, ), - ] - .filter(Boolean) - .join("\n") - .trim(); + ]; + + // Append articles listing to assistant_text when present + if (articles && articles.length > 0) { + assistantTextParts.push( + ``, + `Recent articles${filter_epoch ? ` (${filter_epoch})` : ""}:`, + ...articles.map((a) => { + const date = (a.metadata.date as string) || "no date"; + return `- \`${a.path}\` — ${a.metadata.title || "Untitled"} (${date})`; + }), + ); + } + + const assistantText = assistantTextParts.filter(Boolean).join("\n").trim(); + + const result: Record = { + total: index.stats.total, + canon: index.stats.canon, + baseline: index.stats.baseline, + categories: Object.keys(byTag), + start_here: startHere.map((e) => e.path), + }; + + // Include articles array only when sort_by is provided + if (articles) { + result.articles = articles; + } return { action: "catalog", - result: { - total: index.stats.total, - canon: index.stats.canon, - baseline: index.stats.baseline, - categories: Object.keys(byTag), - start_here: startHere.map((e) => e.path), - }, + result, state: state ? initState(state) : undefined, assistant_text: assistantText, debug: { @@ -1100,6 +1152,7 @@ async function runOrientAction( lines.push(""); } lines.push(`Orientation: ${mode} mode (${confidence} confidence)`, ""); + lines.push("Proactive posture: Track OLDC+H continuously throughout this session. Encode what the user shares and what you do at every exchange. Resurface the creed when confidence outpaces evidence. Persist to project storage at natural breakpoints.", ""); if (assumptions.length > 0) { lines.push("Assumptions detected:"); for (const a of assumptions.slice(0, 3)) lines.push(` - ${a}`); @@ -1492,6 +1545,8 @@ async function runEncodeAction( status: "ENCODED", artifact, quality: { level: qualityLevel, score, max_score: 5, gaps, suggestions }, + persist_required: true, + next_action: "Save this artifact to the project's storage (project journal, file, database). Encode does NOT persist.", }, state: updatedState, assistant_text: lines.join("\n").trim(), @@ -1518,7 +1573,7 @@ const VALID_ACTIONS = [ ] as const; export async function handleUnifiedAction(params: UnifiedParams): Promise { - const { action, input, context, mode, canon_url, include_metadata, section, state, env } = params; + const { action, input, context, mode, canon_url, include_metadata, section, sort_by, limit, filter_epoch, state, env } = params; if (!VALID_ACTIONS.includes(action as (typeof VALID_ACTIONS)[number])) { return { @@ -1546,7 +1601,7 @@ export async function handleUnifiedAction(params: UnifiedParams): Promise; } export interface BaselineIndex { @@ -69,6 +70,7 @@ interface FrontmatterResult { tags?: string[]; uri?: string; exposure?: string; + [key: string]: unknown; // Full frontmatter passthrough for metadata exposure } // ────────────────────────────────────────────────────────────────────────────── @@ -88,26 +90,47 @@ function parseFrontmatter(content: string): FrontmatterResult { const yaml = match[1]; const result: FrontmatterResult = {}; - // Simple YAML parsing for common fields - const titleMatch = yaml.match(/^title:\s*["']?(.+?)["']?\s*$/m); - if (titleMatch) result.title = titleMatch[1]; + // Parse all top-level YAML key-value pairs generically + for (const line of yaml.split("\n")) { + // Skip empty lines, comments, and continuation lines + if (!line.trim() || line.trim().startsWith("#") || line.startsWith(" ") || line.startsWith("\t")) continue; + + const kvMatch = line.match(/^([a-zA-Z_][a-zA-Z0-9_-]*)\s*:\s*(.*)/); + if (!kvMatch) continue; + + const key = kvMatch[1]; + let value = kvMatch[2].trim(); + + // Inline array: tags: ["a", "b", "c"] + if (value.startsWith("[") && value.endsWith("]")) { + result[key] = value + .slice(1, -1) + .split(",") + .map((t) => t.trim().replace(/["']/g, "")) + .filter(Boolean); + continue; + } - const intentMatch = yaml.match(/^intent:\s*["']?(.+?)["']?\s*$/m); - if (intentMatch) result.intent = intentMatch[1]; + // Boolean + if (value === "true") { result[key] = true; continue; } + if (value === "false") { result[key] = false; continue; } - const bandMatch = yaml.match(/^authority_band:\s*["']?(.+?)["']?\s*$/m); - if (bandMatch) result.authority_band = bandMatch[1]; + // Numeric (integers and simple decimals) + if (/^-?\d+(\.\d+)?$/.test(value)) { + result[key] = Number(value); + continue; + } - const uriMatch = yaml.match(/^uri:\s*["']?(.+?)["']?\s*$/m); - if (uriMatch) result.uri = uriMatch[1]; + // Null + if (value === "null" || value === "~" || value === "") { continue; } - const tagsMatch = yaml.match(/^tags:\s*\[(.+?)\]/m); - if (tagsMatch) { - result.tags = tagsMatch[1].split(",").map((t) => t.trim().replace(/["']/g, "")); - } + // String — strip surrounding quotes + if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) { + value = value.slice(1, -1); + } - const exposureMatch = yaml.match(/^exposure:\s*["']?(.+?)["']?\s*$/m); - if (exposureMatch) result.exposure = exposureMatch[1]; + result[key] = value; + } return result; } @@ -501,6 +524,7 @@ export class ZipBaselineFetcher { excerpt: extractExcerpt(content), content_hash: hashContent(content), source, + frontmatter: Object.keys(frontmatter).length > 0 ? frontmatter : undefined, }; entries.push(entry); From 0db2f6b9795df3de3bcd360c328ccd568179268e Mon Sep 17 00:00:00 2001 From: Klappy Date: Fri, 3 Apr 2026 15:52:45 +0000 Subject: [PATCH 02/13] =?UTF-8?q?Bump=20INDEX=5FVERSION=20to=202.2=20?= =?UTF-8?q?=E2=80=94=20force=20cache=20rebuild=20for=20frontmatter=20index?= =?UTF-8?q?ing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- workers/src/zip-baseline-fetcher.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workers/src/zip-baseline-fetcher.ts b/workers/src/zip-baseline-fetcher.ts index 6c7842f..567fc57 100644 --- a/workers/src/zip-baseline-fetcher.ts +++ b/workers/src/zip-baseline-fetcher.ts @@ -18,7 +18,7 @@ import { unzipSync } from "fflate"; // to the indexing pipeline (filters, fields, scoring) invalidate stale indexes. // Bump when indexing logic changes. Without this, a cached index built by // old code persists until the repo's commit SHA changes. -const INDEX_VERSION = "2.1"; // 2.1: version-keyed cache invalidation +const INDEX_VERSION = "2.2"; // 2.2: full frontmatter indexing for metadata exposure (E0007) export interface Env { BASELINE_URL: string; From 80fd8c4c1187ebde7d48b26be253b57346e67bca Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 3 Apr 2026 16:03:18 +0000 Subject: [PATCH 03/13] fix: normalize bare-string tags to array in parseFrontmatter When frontmatter contains tags: single-value without bracket syntax, the generic parser assigned a plain string to result.tags. Downstream consumers iterate with for...of which on a string yields individual characters, producing garbled single-character tag categories in catalog and corrupted BM25 search index data. Normalize to a single-element array after parsing. --- workers/src/zip-baseline-fetcher.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/workers/src/zip-baseline-fetcher.ts b/workers/src/zip-baseline-fetcher.ts index 567fc57..d601ffc 100644 --- a/workers/src/zip-baseline-fetcher.ts +++ b/workers/src/zip-baseline-fetcher.ts @@ -132,6 +132,10 @@ function parseFrontmatter(content: string): FrontmatterResult { result[key] = value; } + if (typeof result.tags === "string") { + result.tags = [result.tags]; + } + return result; } From b50b65c54560fdbea6a83dd0453c09dbd3cd802d Mon Sep 17 00:00:00 2001 From: Klappy Date: Fri, 3 Apr 2026 16:08:24 +0000 Subject: [PATCH 04/13] Fix: extract branch ref from raw.githubusercontent.com canon_url MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit getZipUrl was discarding the branch name from canon_url and always fetching main.zip. e.g. canon_url with e0007-proactive-posture branch was downloading main branch ZIP — branch-specific articles never appeared. Now extracts parts[2] as the ref when converting raw.githubusercontent.com URLs to GitHub archive URLs. --- workers/src/zip-baseline-fetcher.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/workers/src/zip-baseline-fetcher.ts b/workers/src/zip-baseline-fetcher.ts index d601ffc..48bba0c 100644 --- a/workers/src/zip-baseline-fetcher.ts +++ b/workers/src/zip-baseline-fetcher.ts @@ -230,16 +230,20 @@ function hashContent(content: string): string { function getZipUrl(repoUrl: string, ref: string = "main"): string { // Handle various URL formats // https://github.com/owner/repo -> https://github.com/owner/repo/archive/main.zip - // https://raw.githubusercontent.com/owner/repo/main -> https://github.com/owner/repo/archive/main.zip + // https://raw.githubusercontent.com/owner/repo/branch -> https://github.com/owner/repo/archive/branch.zip let cleanUrl = repoUrl .replace(/\.git$/, "") .replace(/\/$/, ""); if (cleanUrl.includes("raw.githubusercontent.com")) { - // Convert raw URL to repo URL + // Convert raw URL to repo URL, extracting branch ref if present const parts = cleanUrl.replace("https://raw.githubusercontent.com/", "").split("/"); cleanUrl = `https://github.com/${parts[0]}/${parts[1]}`; + // parts[2] is the branch ref (e.g., "e0007-proactive-posture" or "main") + if (parts[2]) { + ref = parts[2]; + } } return `${cleanUrl}/archive/${ref}.zip`; From 6ddd34977177dd936b2efc28aa7b9540a0d49808 Mon Sep 17 00:00:00 2001 From: Klappy Date: Fri, 3 Apr 2026 16:10:41 +0000 Subject: [PATCH 05/13] =?UTF-8?q?Bump=20INDEX=5FVERSION=20to=202.3=20?= =?UTF-8?q?=E2=80=94=20cache=20bust=20for=20branch=20ref=20fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- workers/src/zip-baseline-fetcher.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workers/src/zip-baseline-fetcher.ts b/workers/src/zip-baseline-fetcher.ts index 48bba0c..10e7dfe 100644 --- a/workers/src/zip-baseline-fetcher.ts +++ b/workers/src/zip-baseline-fetcher.ts @@ -18,7 +18,7 @@ import { unzipSync } from "fflate"; // to the indexing pipeline (filters, fields, scoring) invalidate stale indexes. // Bump when indexing logic changes. Without this, a cached index built by // old code persists until the repo's commit SHA changes. -const INDEX_VERSION = "2.2"; // 2.2: full frontmatter indexing for metadata exposure (E0007) +const INDEX_VERSION = "2.3"; // 2.3: branch ref extraction fix + full frontmatter (E0007) export interface Env { BASELINE_URL: string; From 3a7000b5af9c4ee3ff2744e24f2ae6e60525e87c Mon Sep 17 00:00:00 2001 From: Klappy Date: Fri, 3 Apr 2026 16:15:11 +0000 Subject: [PATCH 06/13] v0.16.0: changelog, version bump, session journal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - CHANGELOG.md: 0.16.0 entry with all E0007 changes - package.json + workers/package.json: 0.15.1 → 0.16.0 - odd/ledger/journal/2026-04-03.md: session OLDC+H --- CHANGELOG.md | 22 ++++++++++++ odd/ledger/journal/2026-04-03.md | 62 ++++++++++++++++++++++++++++++++ package.json | 2 +- workers/package.json | 2 +- 4 files changed, 86 insertions(+), 2 deletions(-) create mode 100644 odd/ledger/journal/2026-04-03.md diff --git a/CHANGELOG.md b/CHANGELOG.md index e5ba8a6..ac1b855 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.16.0] - 2026-04-03 + +### Added + +- **Catalog temporal discovery** — New `sort_by`, `limit`, and `filter_epoch` parameters on `oddkit_catalog`. `sort_by: "date"` returns articles sorted newest-first with full frontmatter metadata. `filter_epoch` provides server-side deterministic filtering. Addresses the "what's new?" discoverability gap — no new tools added, extending catalog as the discovery tool. + +- **Full frontmatter indexing** — `IndexEntry` now stores complete parsed frontmatter on every document (previously cherry-picked 6 fields). Generic YAML parser replaces field-specific regex extraction. Enables `date`, `epoch`, `audience`, `tier`, `stability`, and all custom fields in metadata responses. + +- **Proactive tool descriptions (E0007)** — Every tool description now includes a proactive usage hint: orient ("call at every context shift"), search ("search before claiming"), challenge ("challenge before encoding"), gate ("gate at every implicit transition"), validate ("validate before claiming done"), preflight ("preflight before every execution task"). + +- **Encode persistence warning** — Encode responses now include `persist_required: true` and `next_action` instructing the caller to save the output. Addresses the silent data loss pattern where operators assumed encode persisted. + +- **Orient OLDC+H instruction** — Orient responses now include a proactive posture instruction: "Track OLDC+H continuously throughout this session." + +### Fixed + +- **Branch ref extraction from canon_url** — `getZipUrl` was discarding the branch name from `raw.githubusercontent.com` URLs, always fetching `main.zip`. Branch-specific articles never appeared in canon_url overrides. Now correctly extracts `parts[2]` as the branch ref. + +### Changed + +- **Index version bumped to 2.3** — Reflects full frontmatter indexing, branch ref fix, and cache invalidation. + ## [0.15.1] - 2026-03-14 ### Added diff --git a/odd/ledger/journal/2026-04-03.md b/odd/ledger/journal/2026-04-03.md new file mode 100644 index 0000000..c1be5d6 --- /dev/null +++ b/odd/ledger/journal/2026-04-03.md @@ -0,0 +1,62 @@ +## 2026-04-03 Session — E0007 Implementation + +> Epistemic ledger entry for the session that implemented E0007 proactive posture changes in the oddkit Worker: catalog temporal discovery, full frontmatter indexing, proactive tool descriptions, encode persistence warning, and the branch ref extraction bug fix. + +### Session Context + +Continuation of E0007 epoch work. Governance articles were written first on the klappy.dev repo (PR #72), then implementation moved to the oddkit Worker codebase. The session produced catalog metadata exposure, proactive tool description rewrites, encode response changes, and a critical bug fix in branch ref handling. + +### Observations + +**O1: The frontmatter was already parsed — it was just discarded.** +`parseFrontmatter` in the Worker cherry-picked 6 specific fields via regex and threw away everything else. The `date`, `epoch`, `audience`, `tier`, and `stability` fields were parsed but never stored on `IndexEntry`. The data was there; the code didn't keep it. + +**O2: `getZipUrl` was silently fetching main for every branch override.** +The function received `raw.githubusercontent.com/owner/repo/branch` URLs, extracted `parts[0]` (owner) and `parts[1]` (repo), and discarded `parts[2]` (branch). Every `canon_url` branch override was downloading `main.zip`. Branch-specific articles never appeared. The bug was silent — the index looked plausible because baseline articles filled the gap. + +**O3: INDEX_VERSION is the cache invalidation mechanism — must bump on schema changes.** +The Worker caches indexes keyed to `INDEX_VERSION + commit SHA`. Changing what fields are stored on `IndexEntry` without bumping INDEX_VERSION means stale cached indexes (without the new fields) are served until the commit SHA changes. Bumped 2.1 → 2.2 → 2.3 across the session. + +### Learnings + +**L1: Deterministic work belongs server-side, not in the LLM.** +Sort and filter are deterministic operations — cheap and correct on the server, slow and error-prone in the LLM. Metadata exposure enables LLM judgment (synthesis, recommendation). Server-side sort/filter prevents the LLM from burning tokens on arithmetic. Both are required. + +**L2: Adding tools dilutes them all.** +New features should be params on existing tools, not new tools. Every MCP tool competes for attention in tool selection. Catalog was the natural home for temporal discovery because it's already the discovery tool. + +**L3: "Cache issue" is a hypothesis, not a diagnosis.** +When articles didn't appear after the first deploy, the assumption was cache timing. The operator correctly pushed back: "It could be a code issue." It was. The `getZipUrl` bug was the root cause — not cache propagation delay. + +### Decisions + +**D1: Catalog gets sort_by, limit, filter_epoch — no new tools.** +Temporal discovery is params on the existing catalog tool. Rationale: adding tools dilutes the set. + +**D2: Full frontmatter passthrough on IndexEntry.** +No cherry-picking. The `frontmatter` field stores the complete parsed YAML. Consumers decide what to use. + +**D3: Generic YAML parser replaces field-specific regex.** +`parseFrontmatter` rewritten to capture all top-level YAML fields automatically. No more adding a regex each time a new field is needed. + +**D4: Proactive session close should be a governance article.** +The operator's frustration at remembering to request journal/changelog/version bump is the E0007 signal. Written as `docs/oddkit/proactive/proactive-session-close.md`. + +### Constraints + +**C1: INDEX_VERSION must be bumped whenever IndexEntry schema changes.** +Stale cached indexes will serve old field shapes until the version key changes. + +**C2: Governance articles before code changes — always.** +This session followed the pattern: IMPL-catalog-recent.md was written and committed before any oddkit code was modified. + +### Handoffs + +**H1: oddkit PR #67 ready for review and merge.** +Branch: `e0007-proactive-posture`. All changes typecheck clean. Preview verified at `e0007-proactive-posture-oddkit.klappy.workers.dev`. + +**H2: klappy.dev PR #72 needs session close artifacts.** +Journal entry, version bump, proactive-session-close.md governance article. + +**H3: Phase 4 (A/B testing) is next after merge.** +Both PRs need merge before testing proactive behavior in fresh sessions. diff --git a/package.json b/package.json index 8329c67..27f2400 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "oddkit", - "version": "0.15.1", + "version": "0.16.0", "description": "Agent-first CLI for ODD-governed repos. Epistemic terrain rendering with portable baseline.", "type": "module", "bin": { diff --git a/workers/package.json b/workers/package.json index 2339146..394effa 100644 --- a/workers/package.json +++ b/workers/package.json @@ -1,6 +1,6 @@ { "name": "oddkit-mcp-worker", - "version": "0.15.1", + "version": "0.16.0", "private": true, "type": "module", "scripts": { From d301b01c0f6698bcc4816a36b3e70452222312b2 Mon Sep 17 00:00:00 2001 From: Klappy Date: Fri, 3 Apr 2026 16:20:21 +0000 Subject: [PATCH 07/13] =?UTF-8?q?Commit=20hygiene=20in=20oddkit=20?= =?UTF-8?q?=E2=80=94=20orient=20instruction=20+=20validate=20gate?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Orient: proactive posture now includes commit hygiene instruction — journal, changelog, version bump at every commit/PR/merge. Validate: when completion claims mention git lifecycle events, checks for journal, changelog, and version bump. Surfaces gaps before merge. Every agent that calls oddkit_orient or oddkit_validate gets this — no memory required. --- CHANGELOG.md | 4 +++- workers/src/orchestrate.ts | 14 +++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ac1b855..8ef29f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **Encode persistence warning** — Encode responses now include `persist_required: true` and `next_action` instructing the caller to save the output. Addresses the silent data loss pattern where operators assumed encode persisted. -- **Orient OLDC+H instruction** — Orient responses now include a proactive posture instruction: "Track OLDC+H continuously throughout this session." +- **Orient OLDC+H instruction** — Orient responses now include a proactive posture instruction: "Track OLDC+H continuously throughout this session." Includes commit hygiene gate: journal, changelog, version bump at every commit, PR, and merge. + +- **Validate commit hygiene gate** — When completion claims mention git lifecycle events (commit, PR, merge, ship, deploy, release), validate checks for journal entry, changelog update, and version bump. Surfaces gaps before merge — the most critical gate. ### Fixed diff --git a/workers/src/orchestrate.ts b/workers/src/orchestrate.ts index f28c126..f5229b2 100644 --- a/workers/src/orchestrate.ts +++ b/workers/src/orchestrate.ts @@ -837,6 +837,18 @@ async function runValidate(message: string, state?: OddkitState): Promise 0) { return { action: "validate", @@ -1152,7 +1164,7 @@ async function runOrientAction( lines.push(""); } lines.push(`Orientation: ${mode} mode (${confidence} confidence)`, ""); - lines.push("Proactive posture: Track OLDC+H continuously throughout this session. Encode what the user shares and what you do at every exchange. Resurface the creed when confidence outpaces evidence. Persist to project storage at natural breakpoints.", ""); + lines.push("Proactive posture: Track OLDC+H continuously throughout this session. Encode what the user shares and what you do at every exchange. Resurface the creed when confidence outpaces evidence. Persist to project storage at natural breakpoints. COMMIT HYGIENE: At every commit, before every PR, and before every merge — produce journal entry, changelog update, and version bump. Before merge is the most critical gate. Do not wait to be asked.", ""); if (assumptions.length > 0) { lines.push("Assumptions detected:"); for (const a of assumptions.slice(0, 3)) lines.push(` - ${a}`); From c8f4016647a36d4292e557702f52ef0450ba4c7d Mon Sep 17 00:00:00 2001 From: Klappy Date: Fri, 3 Apr 2026 16:23:04 +0000 Subject: [PATCH 08/13] =?UTF-8?q?Fix:=20commit=20hygiene=20=E2=86=92=20art?= =?UTF-8?q?ifact=20provenance=20=E2=80=94=20domain-agnostic?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Orient instruction and validate gate now use domain-agnostic language. 'Artifact provenance' replaces 'commit hygiene.' Triggers: milestone, review, finalization — not git-specific events. Applies to code, writing, planning, or any domain that produces durable artifacts. --- CHANGELOG.md | 4 ++-- workers/src/orchestrate.ts | 22 +++++++++++----------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ef29f8..ee8a959 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,9 +19,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **Encode persistence warning** — Encode responses now include `persist_required: true` and `next_action` instructing the caller to save the output. Addresses the silent data loss pattern where operators assumed encode persisted. -- **Orient OLDC+H instruction** — Orient responses now include a proactive posture instruction: "Track OLDC+H continuously throughout this session." Includes commit hygiene gate: journal, changelog, version bump at every commit, PR, and merge. +- **Orient OLDC+H instruction** — Orient responses now include a proactive posture instruction: "Track OLDC+H continuously throughout this session." Includes artifact provenance gate: capture what happened (journal), what changed (summary), and what version — at every milestone, before every review, and before finalizing. -- **Validate commit hygiene gate** — When completion claims mention git lifecycle events (commit, PR, merge, ship, deploy, release), validate checks for journal entry, changelog update, and version bump. Surfaces gaps before merge — the most critical gate. +- **Validate artifact provenance gate** — When completion claims mention finalizing work (commit, merge, publish, submit, deliver, etc.), validate checks for session capture (OLDC+H), change summary, and version tracking. Domain-agnostic — applies to code, writing, planning, or any domain. ### Fixed diff --git a/workers/src/orchestrate.ts b/workers/src/orchestrate.ts index f5229b2..5792620 100644 --- a/workers/src/orchestrate.ts +++ b/workers/src/orchestrate.ts @@ -837,16 +837,16 @@ async function runValidate(message: string, state?: OddkitState): Promise 0) { @@ -1164,7 +1164,7 @@ async function runOrientAction( lines.push(""); } lines.push(`Orientation: ${mode} mode (${confidence} confidence)`, ""); - lines.push("Proactive posture: Track OLDC+H continuously throughout this session. Encode what the user shares and what you do at every exchange. Resurface the creed when confidence outpaces evidence. Persist to project storage at natural breakpoints. COMMIT HYGIENE: At every commit, before every PR, and before every merge — produce journal entry, changelog update, and version bump. Before merge is the most critical gate. Do not wait to be asked.", ""); + lines.push("Proactive posture: Track OLDC+H continuously throughout this session. Encode what the user shares and what you do at every exchange. Resurface the creed when confidence outpaces evidence. Persist to project storage at natural breakpoints. ARTIFACT PROVENANCE: When work produces durable artifacts, capture what happened (journal), what changed (changelog/summary), and what version (if applicable). Do this at every milestone, before every review, and before finalizing — not at session end. Do not wait to be asked.", ""); if (assumptions.length > 0) { lines.push("Assumptions detected:"); for (const a of assumptions.slice(0, 3)) lines.push(` - ${a}`); From 0b751408521d64c600b337e686af6b1287ad3cca Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 3 Apr 2026 16:40:44 +0000 Subject: [PATCH 09/13] fix: cache key mismatch, lock file sync, unified YAML parser, numeric date sort safety - Extract branch ref for getLatestCommitSha so cache key matches ZIP content - Regenerate package-lock.json to match package.json version 0.16.0 - Unify YAML frontmatter parsing into single shared parser in zip-baseline-fetcher - Use String() coercion in catalog date sort to prevent TypeError on numeric dates --- workers/package-lock.json | 4 +- workers/src/orchestrate.ts | 256 +--------------------------- workers/src/zip-baseline-fetcher.ts | 224 +++++++++++++++++++----- 3 files changed, 187 insertions(+), 297 deletions(-) diff --git a/workers/package-lock.json b/workers/package-lock.json index d9ab280..d5ca82b 100644 --- a/workers/package-lock.json +++ b/workers/package-lock.json @@ -1,12 +1,12 @@ { "name": "oddkit-mcp-worker", - "version": "0.15.1", + "version": "0.16.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "oddkit-mcp-worker", - "version": "0.15.1", + "version": "0.16.0", "dependencies": { "agents": "^0.4.1", "fflate": "^0.8.2", diff --git a/workers/src/orchestrate.ts b/workers/src/orchestrate.ts index 5792620..59059d6 100644 --- a/workers/src/orchestrate.ts +++ b/workers/src/orchestrate.ts @@ -11,6 +11,7 @@ import { ZipBaselineFetcher, extractSection, + parseFullFrontmatter, type Env, type BaselineIndex, type IndexEntry, @@ -292,257 +293,6 @@ function scoreEntries(entries: IndexEntry[], query: string): Array b.score - a.score); } -// ────────────────────────────────────────────────────────────────────────────── -// Full frontmatter parser for include_metadata support -// Handles common YAML patterns used in klappy.dev frontmatter without -// requiring a full YAML library (keeps worker bundle small). -// ────────────────────────────────────────────────────────────────────────────── - -function parseFullFrontmatter(content: string): Record | null { - const match = content.match(/^---\n([\s\S]*?)\n---/); - if (!match) return null; - - const yaml = match[1]; - const result: Record = {}; - const lines = yaml.split("\n"); - let i = 0; - - while (i < lines.length) { - const line = lines[i]; - const trimmed = line.trim(); - - // Skip empty lines and comments - if (!trimmed || trimmed.startsWith("#")) { - i++; - continue; - } - - const colonIdx = trimmed.indexOf(":"); - if (colonIdx === -1) { - i++; - continue; - } - - const key = trimmed.slice(0, colonIdx).trim(); - const rawValue = trimmed.slice(colonIdx + 1).trim(); - - if (!key) { - i++; - continue; - } - - if (!rawValue) { - // Value is on next lines — collect indented block - i++; - const items: string[] = []; - while (i < lines.length) { - const nextLine = lines[i]; - const nextTrimmed = nextLine.trim(); - if (!nextTrimmed) { - i++; - continue; - } - // Stop if not indented (new top-level key or comment) - if (!nextLine.startsWith(" ") && !nextLine.startsWith("\t")) break; - if (nextTrimmed.startsWith("#")) { - i++; - continue; - } - items.push(nextLine); - i++; - } - - if (items.length > 0) { - if (items[0].trim().startsWith("- ")) { - result[key] = parseYamlList(items); - } else { - result[key] = parseYamlObject(items); - } - } - } else if (rawValue.startsWith("[")) { - // Inline array - result[key] = parseInlineArray(rawValue); - i++; - } else { - // Scalar value - result[key] = parseScalarValue(rawValue); - i++; - } - } - - return Object.keys(result).length > 0 ? result : null; -} - -function parseYamlList(lines: string[]): unknown[] { - const items: unknown[] = []; - let i = 0; - - while (i < lines.length) { - const trimmed = lines[i].trim(); - if (!trimmed.startsWith("- ")) { - i++; - continue; - } - - const value = trimmed.slice(2).trim(); - - // Check if next lines are indented properties (object in list) - const objectProps: string[] = []; - let j = i + 1; - while (j < lines.length) { - const nextLine = lines[j]; - const nextTrimmed = nextLine.trim(); - if (!nextTrimmed || nextTrimmed.startsWith("- ")) break; - // Must be more deeply indented than the list item - const itemIndent = lines[i].search(/\S/); - const nextIndent = nextLine.search(/\S/); - if (nextIndent <= itemIndent) break; - objectProps.push(nextTrimmed); - j++; - } - - if (objectProps.length > 0) { - // This list item is an object — first property may be in the `- key: val` line - const obj: Record = {}; - // Parse the first line (e.g., "uri: klappy://...") - const firstColonIdx = value.indexOf(":"); - if (firstColonIdx !== -1) { - const k = value.slice(0, firstColonIdx).trim(); - const v = value.slice(firstColonIdx + 1).trim(); - if (k) obj[k] = parseScalarValue(v); - } - // Parse remaining properties - for (const prop of objectProps) { - const propColonIdx = prop.indexOf(":"); - if (propColonIdx !== -1) { - const k = prop.slice(0, propColonIdx).trim(); - const v = prop.slice(propColonIdx + 1).trim(); - if (k) obj[k] = parseScalarValue(v); - } - } - items.push(obj); - i = j; - } else { - items.push(parseScalarValue(value)); - i++; - } - } - - return items; -} - -function parseYamlObject(lines: string[]): Record { - const obj: Record = {}; - if (lines.length === 0) return obj; - - // Determine the base indentation level from the first non-empty line - const baseIndent = lines[0].search(/\S/); - let i = 0; - - while (i < lines.length) { - const line = lines[i]; - const trimmed = line.trim(); - - // Skip empty lines and comments - if (!trimmed || trimmed.startsWith("#")) { - i++; - continue; - } - - // Only process lines at the base indentation level - const currentIndent = line.search(/\S/); - if (currentIndent > baseIndent) { - // Stray deeper-indented line without a parent key — skip - i++; - continue; - } - if (currentIndent < baseIndent) { - // De-indented past our block — stop - break; - } - - const colonIdx = trimmed.indexOf(":"); - if (colonIdx === -1) { - i++; - continue; - } - - const key = trimmed.slice(0, colonIdx).trim(); - const rawValue = trimmed.slice(colonIdx + 1).trim(); - - if (!key) { - i++; - continue; - } - - if (!rawValue) { - // Collect deeper-indented block - i++; - const nested: string[] = []; - while (i < lines.length) { - const nextLine = lines[i]; - const nextTrimmed = nextLine.trim(); - if (!nextTrimmed) { - i++; - continue; - } - const nextIndent = nextLine.search(/\S/); - if (nextIndent <= baseIndent) break; - if (nextTrimmed.startsWith("#")) { - i++; - continue; - } - nested.push(nextLine); - i++; - } - - if (nested.length > 0) { - if (nested[0].trim().startsWith("- ")) { - obj[key] = parseYamlList(nested); - } else { - obj[key] = parseYamlObject(nested); - } - } - } else if (rawValue.startsWith("[")) { - obj[key] = parseInlineArray(rawValue); - i++; - } else { - obj[key] = parseScalarValue(rawValue); - i++; - } - } - - return obj; -} - -function parseInlineArray(raw: string): unknown[] { - const inner = raw.slice(1, raw.lastIndexOf("]")).trim(); - if (!inner) return []; - return inner.split(",").map((item) => parseScalarValue(item.trim())); -} - -function parseScalarValue(raw: string): unknown { - if (!raw) return ""; - - // Remove surrounding quotes - if ((raw.startsWith('"') && raw.endsWith('"')) || (raw.startsWith("'") && raw.endsWith("'"))) { - return raw.slice(1, -1); - } - - // Booleans - if (raw === "true") return true; - if (raw === "false") return false; - - // Null - if (raw === "null" || raw === "~") return null; - - // Numbers - if (/^-?\d+$/.test(raw)) return parseInt(raw, 10); - if (/^-?\d+\.\d+$/.test(raw)) return parseFloat(raw); - - return raw; -} - // ────────────────────────────────────────────────────────────────────────────── // Individual action handlers // ────────────────────────────────────────────────────────────────────────────── @@ -921,8 +671,8 @@ async function runCatalog( // Server-side date sort — deterministic, cheap, correct candidates.sort((a, b) => { - const da = ((a.frontmatter as Record)?.date as string) || ""; - const db = ((b.frontmatter as Record)?.date as string) || ""; + const da = String((a.frontmatter as Record)?.date ?? ""); + const db = String((b.frontmatter as Record)?.date ?? ""); if (db && !da) return 1; // docs without dates sort last if (da && !db) return -1; return db.localeCompare(da); // newest first diff --git a/workers/src/zip-baseline-fetcher.ts b/workers/src/zip-baseline-fetcher.ts index 10e7dfe..d217a61 100644 --- a/workers/src/zip-baseline-fetcher.ts +++ b/workers/src/zip-baseline-fetcher.ts @@ -80,57 +80,181 @@ interface FrontmatterResult { // No staleness window. No manual flush for correctness. // ────────────────────────────────────────────────────────────────────────────── -/** - * Parse YAML frontmatter from markdown content - */ -function parseFrontmatter(content: string): FrontmatterResult { - const match = content.match(/^---\n([\s\S]*?)\n---/); - if (!match) return {}; - - const yaml = match[1]; - const result: FrontmatterResult = {}; +// ────────────────────────────────────────────────────────────────────────────── +// Shared YAML frontmatter parser — used at index time AND request time so that +// metadata is consistent across all APIs (catalog, search, get). +// ────────────────────────────────────────────────────────────────────────────── - // Parse all top-level YAML key-value pairs generically - for (const line of yaml.split("\n")) { - // Skip empty lines, comments, and continuation lines - if (!line.trim() || line.trim().startsWith("#") || line.startsWith(" ") || line.startsWith("\t")) continue; +function fmParseScalarValue(raw: string): unknown { + if (!raw) return ""; + if ((raw.startsWith('"') && raw.endsWith('"')) || (raw.startsWith("'") && raw.endsWith("'"))) { + return raw.slice(1, -1); + } + if (raw === "true") return true; + if (raw === "false") return false; + if (raw === "null" || raw === "~") return null; + if (/^-?\d+$/.test(raw)) return parseInt(raw, 10); + if (/^-?\d+\.\d+$/.test(raw)) return parseFloat(raw); + return raw; +} - const kvMatch = line.match(/^([a-zA-Z_][a-zA-Z0-9_-]*)\s*:\s*(.*)/); - if (!kvMatch) continue; +function fmParseInlineArray(raw: string): unknown[] { + const inner = raw.slice(1, raw.lastIndexOf("]")).trim(); + if (!inner) return []; + return inner.split(",").map((item) => fmParseScalarValue(item.trim())); +} - const key = kvMatch[1]; - let value = kvMatch[2].trim(); +function fmParseYamlList(lines: string[]): unknown[] { + const items: unknown[] = []; + let i = 0; + while (i < lines.length) { + const trimmed = lines[i].trim(); + if (!trimmed.startsWith("- ")) { i++; continue; } + const value = trimmed.slice(2).trim(); + const objectProps: string[] = []; + let j = i + 1; + while (j < lines.length) { + const nextLine = lines[j]; + const nextTrimmed = nextLine.trim(); + if (!nextTrimmed || nextTrimmed.startsWith("- ")) break; + const itemIndent = lines[i].search(/\S/); + const nextIndent = nextLine.search(/\S/); + if (nextIndent <= itemIndent) break; + objectProps.push(nextTrimmed); + j++; + } + if (objectProps.length > 0) { + const obj: Record = {}; + const firstColonIdx = value.indexOf(":"); + if (firstColonIdx !== -1) { + const k = value.slice(0, firstColonIdx).trim(); + const v = value.slice(firstColonIdx + 1).trim(); + if (k) obj[k] = fmParseScalarValue(v); + } + for (const prop of objectProps) { + const propColonIdx = prop.indexOf(":"); + if (propColonIdx !== -1) { + const k = prop.slice(0, propColonIdx).trim(); + const v = prop.slice(propColonIdx + 1).trim(); + if (k) obj[k] = fmParseScalarValue(v); + } + } + items.push(obj); + i = j; + } else { + items.push(fmParseScalarValue(value)); + i++; + } + } + return items; +} - // Inline array: tags: ["a", "b", "c"] - if (value.startsWith("[") && value.endsWith("]")) { - result[key] = value - .slice(1, -1) - .split(",") - .map((t) => t.trim().replace(/["']/g, "")) - .filter(Boolean); - continue; +function fmParseYamlObject(lines: string[]): Record { + const obj: Record = {}; + if (lines.length === 0) return obj; + const baseIndent = lines[0].search(/\S/); + let i = 0; + while (i < lines.length) { + const line = lines[i]; + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith("#")) { i++; continue; } + const currentIndent = line.search(/\S/); + if (currentIndent > baseIndent) { i++; continue; } + if (currentIndent < baseIndent) break; + const colonIdx = trimmed.indexOf(":"); + if (colonIdx === -1) { i++; continue; } + const key = trimmed.slice(0, colonIdx).trim(); + const rawValue = trimmed.slice(colonIdx + 1).trim(); + if (!key) { i++; continue; } + if (!rawValue) { + i++; + const nested: string[] = []; + while (i < lines.length) { + const nextLine = lines[i]; + const nextTrimmed = nextLine.trim(); + if (!nextTrimmed) { i++; continue; } + const nextIndent = nextLine.search(/\S/); + if (nextIndent <= baseIndent) break; + if (nextTrimmed.startsWith("#")) { i++; continue; } + nested.push(nextLine); + i++; + } + if (nested.length > 0) { + if (nested[0].trim().startsWith("- ")) { + obj[key] = fmParseYamlList(nested); + } else { + obj[key] = fmParseYamlObject(nested); + } + } + } else if (rawValue.startsWith("[")) { + obj[key] = fmParseInlineArray(rawValue); + i++; + } else { + obj[key] = fmParseScalarValue(rawValue); + i++; } + } + return obj; +} - // Boolean - if (value === "true") { result[key] = true; continue; } - if (value === "false") { result[key] = false; continue; } +export function parseFullFrontmatter(content: string): Record | null { + const match = content.match(/^---\n([\s\S]*?)\n---/); + if (!match) return null; - // Numeric (integers and simple decimals) - if (/^-?\d+(\.\d+)?$/.test(value)) { - result[key] = Number(value); - continue; + const yaml = match[1]; + const result: Record = {}; + const lines = yaml.split("\n"); + let i = 0; + + while (i < lines.length) { + const line = lines[i]; + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith("#")) { i++; continue; } + const colonIdx = trimmed.indexOf(":"); + if (colonIdx === -1) { i++; continue; } + const key = trimmed.slice(0, colonIdx).trim(); + const rawValue = trimmed.slice(colonIdx + 1).trim(); + if (!key) { i++; continue; } + if (!rawValue) { + i++; + const items: string[] = []; + while (i < lines.length) { + const nextLine = lines[i]; + const nextTrimmed = nextLine.trim(); + if (!nextTrimmed) { i++; continue; } + if (!nextLine.startsWith(" ") && !nextLine.startsWith("\t")) break; + if (nextTrimmed.startsWith("#")) { i++; continue; } + items.push(nextLine); + i++; + } + if (items.length > 0) { + if (items[0].trim().startsWith("- ")) { + result[key] = fmParseYamlList(items); + } else { + result[key] = fmParseYamlObject(items); + } + } + } else if (rawValue.startsWith("[")) { + result[key] = fmParseInlineArray(rawValue); + i++; + } else { + result[key] = fmParseScalarValue(rawValue); + i++; } + } - // Null - if (value === "null" || value === "~" || value === "") { continue; } + return Object.keys(result).length > 0 ? result : null; +} - // String — strip surrounding quotes - if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) { - value = value.slice(1, -1); - } +/** + * Parse YAML frontmatter for index-time use, returning FrontmatterResult. + * Delegates to the full parser for consistency across all APIs. + */ +function parseFrontmatter(content: string): FrontmatterResult { + const full = parseFullFrontmatter(content); + if (!full) return {}; - result[key] = value; - } + const result: FrontmatterResult = { ...full }; if (typeof result.tags === "string") { result.tags = [result.tags]; @@ -224,6 +348,20 @@ function hashContent(content: string): string { return Math.abs(hash).toString(36); } +/** + * Extract branch ref from a GitHub URL. + * raw.githubusercontent.com URLs encode the branch as the third path segment; + * all other URLs default to "main". + */ +function extractBranchRef(url: string): string { + const cleanUrl = url.replace(/\.git$/, "").replace(/\/$/, ""); + if (cleanUrl.includes("raw.githubusercontent.com")) { + const parts = cleanUrl.replace("https://raw.githubusercontent.com/", "").split("/"); + if (parts[2]) return parts[2]; + } + return "main"; +} + /** * Convert GitHub repo URL to ZIP download URL */ @@ -602,7 +740,8 @@ export class ZipBaselineFetcher { // Step 1: Resolve current commit SHAs (lightweight) const baselineSha = await this.getLatestCommitSha(baselineRepoUrl); - const canonSha = canonUrl ? await this.getLatestCommitSha(canonUrl) : undefined; + const canonRef = canonUrl ? extractBranchRef(canonUrl) : undefined; + const canonSha = canonUrl ? await this.getLatestCommitSha(canonUrl, canonRef) : undefined; // Step 2: Content-addressed lookup — SHA + version is the cache key. // Including INDEX_VERSION ensures code changes invalidate stale indexes @@ -686,7 +825,8 @@ export class ZipBaselineFetcher { const sources: Array<{ url: string; repoKey: string; sha: string }> = []; if (canonUrl) { - const canonSha = await this.getLatestCommitSha(canonUrl); + const canonRef = extractBranchRef(canonUrl); + const canonSha = await this.getLatestCommitSha(canonUrl, canonRef); sources.push({ url: canonUrl, repoKey: getCacheKey(canonUrl), From ea0e8411c2c138ca696fde2b978f37b89fe08140 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 3 Apr 2026 16:50:36 +0000 Subject: [PATCH 10/13] Fix epoch filter strict equality mismatch with numeric frontmatter values fmParseScalarValue converts numeric-looking YAML values to JS numbers, but filter_epoch from the zod schema is always a string. Wrap the frontmatter epoch in String() so strict equality works regardless of the parsed type. --- workers/src/orchestrate.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workers/src/orchestrate.ts b/workers/src/orchestrate.ts index 59059d6..f07b4c0 100644 --- a/workers/src/orchestrate.ts +++ b/workers/src/orchestrate.ts @@ -665,7 +665,7 @@ async function runCatalog( // Server-side epoch filter — deterministic, cheap, correct if (filter_epoch) { candidates = candidates.filter( - (e) => e.frontmatter && (e.frontmatter as Record).epoch === filter_epoch, + (e) => e.frontmatter && String((e.frontmatter as Record).epoch) === filter_epoch, ); } From 3a98aaac7f3597a1a46b9aea2e9ceb07a5af9ce7 Mon Sep 17 00:00:00 2001 From: Klappy Date: Fri, 3 Apr 2026 16:59:59 +0000 Subject: [PATCH 11/13] =?UTF-8?q?Fix:=20SSE=20test=20timeout=2030s=20?= =?UTF-8?q?=E2=86=92=205s=20=E2=80=94=20only=20needs=20headers,=20not=20th?= =?UTF-8?q?e=20stream?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SSE connections are long-lived by design. curl waited the full 30s max-time for the stream to close (it never does). 5s is plenty to receive the content-type header. Prevents flaky follow-on tests. --- tests/cloudflare-production.test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cloudflare-production.test.sh b/tests/cloudflare-production.test.sh index bdb4794..49e2957 100755 --- a/tests/cloudflare-production.test.sh +++ b/tests/cloudflare-production.test.sh @@ -190,7 +190,7 @@ fi # Test 4c: GET /mcp with SSE Accept header returns stream echo "" echo "Test 4c: GET /mcp with SSE Accept returns text/event-stream" -CONTENT_TYPE=$(curl -sf --max-time 30 "$WORKER_URL/mcp" -X GET \ +CONTENT_TYPE=$(curl -sf --max-time 5 "$WORKER_URL/mcp" -X GET \ -H "Accept: text/event-stream" \ -D - -o /dev/null 2>&1 | grep -i "content-type" | head -1 || true) if echo "$CONTENT_TYPE" | grep -qi "text/event-stream"; then From a0062f161c8ec4cb8e43b5e2c3e4cf7d0ece40b8 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 3 Apr 2026 17:08:44 +0000 Subject: [PATCH 12/13] fix: coerce tag array elements to strings in parseFrontmatter fmParseScalarValue can return numbers, booleans, or null for tag values. Downstream scoreEntries calls toLowerCase on each tag, which would throw a TypeError on non-string values. Ensure all array elements are stringified. --- workers/src/zip-baseline-fetcher.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/workers/src/zip-baseline-fetcher.ts b/workers/src/zip-baseline-fetcher.ts index d217a61..e764ae1 100644 --- a/workers/src/zip-baseline-fetcher.ts +++ b/workers/src/zip-baseline-fetcher.ts @@ -258,6 +258,8 @@ function parseFrontmatter(content: string): FrontmatterResult { if (typeof result.tags === "string") { result.tags = [result.tags]; + } else if (Array.isArray(result.tags)) { + result.tags = result.tags.map((t) => String(t)); } return result; From f6de07593b86d0477f31bc8374c542464eff58ca Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 3 Apr 2026 17:18:07 +0000 Subject: [PATCH 13/13] fix: remove done/complete from finalization regex and coerce frontmatter scalars to strings - Remove done and complete from isFinalization regex in runValidate so normal validate calls using the documented done: prefix are not blocked by the artifact provenance gate - Coerce title, intent, authority_band, uri, and exposure to String in parseFrontmatter to prevent TypeError when fmParseScalarValue returns non-string types for fields that downstream code expects as strings --- workers/src/orchestrate.ts | 2 +- workers/src/zip-baseline-fetcher.ts | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/workers/src/orchestrate.ts b/workers/src/orchestrate.ts index f07b4c0..57bd20c 100644 --- a/workers/src/orchestrate.ts +++ b/workers/src/orchestrate.ts @@ -589,7 +589,7 @@ async function runValidate(message: string, state?: OddkitState): Promise)[key] = String(result[key]); + } + } + if (typeof result.tags === "string") { result.tags = [result.tags]; } else if (Array.isArray(result.tags)) {