diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f50f38..4abaf49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.19.0] - 2026-04-20 + +### Added + +- **`governance_source` on `oddkit_challenge` envelope** — Challenge response `result` now declares which tier served its governance vocabulary: `"knowledge_base"` (all four governance surfaces parsed from canon) or `"minimal"` (one or more surfaces fell through to hardcoded defaults). Strict aggregation rule: any helper falling through to minimal makes the aggregate `"minimal"`. Two-tier cascade today, not three — `workers/baseline/` is not yet shipped (the bundled tier from `canon/constraints/core-governance-baseline` is a contract aspiration, not in-repo code). When the bundled tier ships later, the union expands additively to include `"bundled"` without breaking consumers. + +- **`governance_uris` (plural array) on `oddkit_challenge` envelope** — Challenge reads four peer governance documents (`odd/challenge/base-prerequisites`, `odd/challenge-types/`, `odd/challenge/normative-vocabulary`, `odd/challenge/stakes-calibration`); the envelope now surfaces all four URIs in alphabetical order by path-tail. **This is an intentional shape divergence from `oddkit_encode`'s singular `governance_uri`** — encode's encoding-type docs sit under a single canonical umbrella (`canon/definitions/dolcheo-vocabulary`), but challenge's four files are peers with no governing hierarchy, so a single anchor would misrepresent where `base-prerequisites` and `normative-vocabulary` live. Consumers reading both tools must handle both field names. A consumer that prefers a singular anchor can read `governance_uris[0]` — alphabetical ordering makes this stable. + +- **`debug.knowledge_base_url` on `oddkit_challenge` envelope** — Challenge now echoes the caller's `knowledge_base_url` override in the debug envelope, matching encode's pattern from 0.18.0. Helps callers verify their override was threaded through, especially when pointing at private or custom canon repos. + +### Changed + +- **`oddkit_challenge` four governance helpers return `{, source}` tuples** — `discoverChallengeTypes` → `{types, source}`, `fetchBasePrerequisites` → `{prerequisites, source}`, `fetchNormativeVocabulary` → `{vocabulary, source}`, `fetchStakesCalibration` → `{calibration, source}`. Per-helper domain-noun field names preserve readability at the call site; the `source` flag feeds the aggregate envelope signal. Internal refactor; no input-shape change for callers. + +### Fixed + +- **0.17.0 release note correction: `governance_source` on challenge.** The 0.17.0 entry for "`governance_source` on refactored tool envelopes" claimed challenge, encode, and telemetry_policy all declared the tier signal. In practice only telemetry_policy did at that HEAD. 0.18.0 retrofitted encode. This release retrofits challenge and closes the last gap in the original 0.17.0 overstatement. + +### Known limitations + +- **Challenge does not yet implement strict-mode at the index layer** — Same limitation documented in 0.18.0 for encode, inherited through the shared `KnowledgeBaseFetcher.getIndex` merge behavior. Passing `knowledge_base_url` to `oddkit_challenge` echoes the override in `debug.knowledge_base_url` and honors canon overrides when the target repo has challenge-type docs, but `getIndex` merges baseline entries by design (`arbitrateEntries`: canon overrides baseline, baseline is the floor). A custom `knowledge_base_url` pointing at a repo without challenge docs will still return `governance_source: "knowledge_base"` via the default baseline index rather than falling through to `"minimal"`. Strict-mode on `getIndex` remains a tracked follow-up for the P1.3 sweep. + ## [0.18.0] - 2026-04-19 ### Added diff --git a/package.json b/package.json index 3978c9f..061a9af 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "oddkit", - "version": "0.18.0", + "version": "0.19.0", "description": "Agent-first CLI for ODD-governed repos. Epistemic terrain rendering with portable baseline.", "type": "module", "bin": { diff --git a/workers/package.json b/workers/package.json index ac9c718..a1e814c 100644 --- a/workers/package.json +++ b/workers/package.json @@ -1,6 +1,6 @@ { "name": "oddkit-mcp-worker", - "version": "0.18.0", + "version": "0.19.0", "private": true, "type": "module", "scripts": { diff --git a/workers/src/index.ts b/workers/src/index.ts index 4e68418..4a503cb 100644 --- a/workers/src/index.ts +++ b/workers/src/index.ts @@ -277,7 +277,7 @@ Use when: }, { name: "oddkit_challenge", - description: "Pressure-test a claim, assumption, or proposal against canon constraints. Surfaces tensions, missing evidence, and contradictions. Challenge proactively before encoding consequential decisions.", + description: "Pressure-test a claim, assumption, or proposal against canon constraints. Surfaces tensions, missing evidence, and contradictions. Reads four peer governance surfaces at runtime — challenge-types (detection patterns + questions), base-prerequisites (universal checks), normative-vocabulary (directive signal and detection noise), and stakes-calibration (9-mode proportional pressure) — and declares `governance_source` in the response envelope (`knowledge_base` when all four resolved from canon, `minimal` when any fell through to hardcoded defaults). Challenge proactively before encoding consequential decisions.", action: "challenge", schema: { input: z.string().describe("A claim, assumption, or proposal to challenge."), diff --git a/workers/src/orchestrate.ts b/workers/src/orchestrate.ts index ade3bb6..fd88fef 100644 --- a/workers/src/orchestrate.ts +++ b/workers/src/orchestrate.ts @@ -126,14 +126,18 @@ interface StakesCalibration { let cachedChallengeTypes: ChallengeTypeDef[] | null = null; let cachedChallengeTypesKnowledgeBaseUrl: string | undefined = undefined; +let cachedChallengeTypesSource: "knowledge_base" | "minimal" = "minimal"; let cachedChallengeTypeIndex: BM25Index | null = null; let cachedChallengeTypeIndexKnowledgeBaseUrl: string | undefined = undefined; let cachedBasePrerequisites: BasePrerequisite[] | null = null; let cachedBasePrerequisitesKnowledgeBaseUrl: string | undefined = undefined; +let cachedBasePrerequisitesSource: "knowledge_base" | "minimal" = "minimal"; let cachedNormativeVocabulary: NormativeVocabulary | null = null; let cachedNormativeVocabularyKnowledgeBaseUrl: string | undefined = undefined; +let cachedNormativeVocabularySource: "knowledge_base" | "minimal" = "minimal"; let cachedStakesCalibration: StakesCalibration | null = null; let cachedStakesCalibrationKnowledgeBaseUrl: string | undefined = undefined; +let cachedStakesCalibrationSource: "knowledge_base" | "minimal" = "minimal"; export interface UnifiedParams { action: string; @@ -442,8 +446,10 @@ async function discoverEncodingTypes( async function discoverChallengeTypes( fetcher: KnowledgeBaseFetcher, knowledgeBaseUrl?: string, -): Promise { - if (cachedChallengeTypes && cachedChallengeTypesKnowledgeBaseUrl === knowledgeBaseUrl) return cachedChallengeTypes; +): Promise<{ types: ChallengeTypeDef[]; source: "knowledge_base" | "minimal" }> { + if (cachedChallengeTypes && cachedChallengeTypesKnowledgeBaseUrl === knowledgeBaseUrl) { + return { types: cachedChallengeTypes, source: cachedChallengeTypesSource }; + } const index = await fetcher.getIndex(knowledgeBaseUrl); const typeArticles = index.entries.filter( @@ -564,11 +570,16 @@ async function discoverChallengeTypes( cachedChallengeTypes = types; cachedChallengeTypesKnowledgeBaseUrl = knowledgeBaseUrl; + // Source classification per PRD D3: types.length > 0 from canon = "knowledge_base"; + // zero docs parsed = "minimal" (challenge preserves current hollow-response behavior + // rather than inventing a built-in fallback registry — see PRD D7). + const source: "knowledge_base" | "minimal" = types.length > 0 ? "knowledge_base" : "minimal"; + cachedChallengeTypesSource = source; // Index build deferred — needs vocab.stopWords from fetchNormativeVocabulary, // assembled lazily by getOrBuildChallengeTypeIndex below. Both types and the // index are deterministic functions of knowledgeBaseUrl, so caching by knowledgeBaseUrl // remains safe. - return types; + return { types, source }; } /** Lazily build (or return cached) per-knowledgeBaseUrl BM25 index over the per-type @@ -598,9 +609,9 @@ function getOrBuildChallengeTypeIndex( async function fetchBasePrerequisites( fetcher: KnowledgeBaseFetcher, knowledgeBaseUrl?: string, -): Promise { +): Promise<{ prerequisites: BasePrerequisite[]; source: "knowledge_base" | "minimal" }> { if (cachedBasePrerequisites && cachedBasePrerequisitesKnowledgeBaseUrl === knowledgeBaseUrl) - return cachedBasePrerequisites; + return { prerequisites: cachedBasePrerequisites, source: cachedBasePrerequisitesSource }; const result: BasePrerequisite[] = []; try { @@ -628,24 +639,34 @@ async function fetchBasePrerequisites( cachedBasePrerequisites = result; cachedBasePrerequisitesKnowledgeBaseUrl = knowledgeBaseUrl; - return result; + // Source classification per PRD D3: result.length > 0 when the canon article + // parsed at least one overlay row. Empty result = canon unreachable OR article + // exists but has no rows — in either case the tool falls back to type overlays + // only, which is the "minimal" tier for this dimension. + const source: "knowledge_base" | "minimal" = result.length > 0 ? "knowledge_base" : "minimal"; + cachedBasePrerequisitesSource = source; + return { prerequisites: result, source }; } async function fetchNormativeVocabulary( fetcher: KnowledgeBaseFetcher, knowledgeBaseUrl?: string, -): Promise { +): Promise<{ vocabulary: NormativeVocabulary; source: "knowledge_base" | "minimal" }> { if (cachedNormativeVocabulary && cachedNormativeVocabularyKnowledgeBaseUrl === knowledgeBaseUrl) - return cachedNormativeVocabulary; + return { vocabulary: cachedNormativeVocabulary, source: cachedNormativeVocabularySource }; const caseSensitiveWords: string[] = []; const caseInsensitiveWords: string[] = []; const directiveTypes = new Map(); const stopWords = new Set(); + // Track whether canon parse produced anything. Left-falling to the hardcoded + // RFC 2119 fallback below is the "minimal" tier for this dimension. + let parsedFromCanon = false; try { const content = await fetcher.getFile("odd/challenge/normative-vocabulary.md", knowledgeBaseUrl); if (content) { + parsedFromCanon = true; // ── Surface 1: Normative Vocabulary (signal in canon quotes) ── // Two subsections under "## Normative Vocabulary": one keyed by "RFC 2119" // or "Directive Language" (case-sensitive), one for architectural-writing @@ -723,15 +744,21 @@ async function fetchNormativeVocabulary( }; cachedNormativeVocabulary = vocab; cachedNormativeVocabularyKnowledgeBaseUrl = knowledgeBaseUrl; - return vocab; + // Source classification per PRD D3: parsedFromCanon is true iff the canon article + // returned content; false means the hardcoded RFC 2119 fallback took over. The + // vocab article having content but parsing zero rows is still "knowledge_base" + // (canon authoritatively said the lists are empty), not "minimal". + const source: "knowledge_base" | "minimal" = parsedFromCanon ? "knowledge_base" : "minimal"; + cachedNormativeVocabularySource = source; + return { vocabulary: vocab, source }; } async function fetchStakesCalibration( fetcher: KnowledgeBaseFetcher, knowledgeBaseUrl?: string, -): Promise { +): Promise<{ calibration: StakesCalibration; source: "knowledge_base" | "minimal" }> { if (cachedStakesCalibration && cachedStakesCalibrationKnowledgeBaseUrl === knowledgeBaseUrl) - return cachedStakesCalibration; + return { calibration: cachedStakesCalibration, source: cachedStakesCalibrationSource }; const byMode = new Map(); try { @@ -770,7 +797,12 @@ async function fetchStakesCalibration( cachedStakesCalibration = { byMode }; cachedStakesCalibrationKnowledgeBaseUrl = knowledgeBaseUrl; - return cachedStakesCalibration; + // Source classification per PRD D3: byMode populated from canon = "knowledge_base"; + // zero modes parsed = "minimal" (runChallengeAction falls to "uniformly loud" + // undefined-modeConfig branch at the call site, already handled there). + const source: "knowledge_base" | "minimal" = byMode.size > 0 ? "knowledge_base" : "minimal"; + cachedStakesCalibrationSource = source; + return { calibration: cachedStakesCalibration, source }; } function isStructuredInput(input: string): boolean { @@ -1263,14 +1295,18 @@ async function runCleanupStorage( // E0008 — governance-driven challenge caches (mirror PR #96 fix) cachedChallengeTypes = null; cachedChallengeTypesKnowledgeBaseUrl = undefined; + cachedChallengeTypesSource = "minimal"; cachedChallengeTypeIndex = null; cachedChallengeTypeIndexKnowledgeBaseUrl = undefined; cachedBasePrerequisites = null; cachedBasePrerequisitesKnowledgeBaseUrl = undefined; + cachedBasePrerequisitesSource = "minimal"; cachedNormativeVocabulary = null; cachedNormativeVocabularyKnowledgeBaseUrl = undefined; + cachedNormativeVocabularySource = "minimal"; cachedStakesCalibration = null; cachedStakesCalibrationKnowledgeBaseUrl = undefined; + cachedStakesCalibrationSource = "minimal"; return { action: "cleanup_storage", @@ -1715,14 +1751,43 @@ async function runChallengeAction( const startMs = Date.now(); const mode = (modeHint || "planning").toLowerCase(); - // Load governance in parallel - const [types, basePrereqs, vocab, calibration] = await Promise.all([ + // Load governance in parallel. Each helper returns a { , source } + // tuple per PRD D3; aggregate the four source flags into a single envelope + // signal per PRD D1 (strict: any helper minimal → aggregate minimal). + const [ + { types, source: typesSource }, + { prerequisites: basePrereqs, source: basePrereqsSource }, + { vocabulary: vocab, source: vocabSource }, + { calibration, source: calibrationSource }, + ] = await Promise.all([ discoverChallengeTypes(fetcher, knowledgeBaseUrl), fetchBasePrerequisites(fetcher, knowledgeBaseUrl), fetchNormativeVocabulary(fetcher, knowledgeBaseUrl), fetchStakesCalibration(fetcher, knowledgeBaseUrl), ]); + // Aggregate: strict union per canon/constraints/core-governance-baseline. + // Two-tier today (workers/baseline/ not shipped — see PRD §3.2); when the + // bundled tier ships later, this union expands additively to include + // "bundled" without breaking consumers. + const governanceSource: "knowledge_base" | "minimal" = + [typesSource, basePrereqsSource, vocabSource, calibrationSource].some((s) => s === "minimal") + ? "minimal" + : "knowledge_base"; + + // Four peer governance URIs per PRD D4 — shape diverges from encode's + // singular governance_uri by design. Challenge's governance surfaces are + // peers (not a hierarchy), so a single anchor would misrepresent where + // base-prerequisites and normative-vocabulary live. Alphabetical by + // path-tail for stability; consumers that want a singular anchor can read + // governance_uris[0]. + const governanceUris = [ + "klappy://odd/challenge/base-prerequisites", + "klappy://odd/challenge-types", + "klappy://odd/challenge/normative-vocabulary", + "klappy://odd/challenge/stakes-calibration", + ]; + const modeConfig = calibration.byMode.get(mode); // Detect matching types via BM25 over per-type detection text. @@ -1766,6 +1831,8 @@ async function runChallengeAction( name: t.name, description: t.blockquote, })), + governance_source: governanceSource, + governance_uris: governanceUris, tensions: [], missing_prerequisites: [], challenges: [], @@ -1776,7 +1843,11 @@ async function runChallengeAction( }, state: state ? initState(state) : undefined, assistant_text: `Challenge suppressed for mode '${mode}'. Raw thought capture protected.`, - debug: { duration_ms: Date.now() - startMs, generated_at: new Date().toISOString() }, + debug: { + duration_ms: Date.now() - startMs, + generated_at: new Date().toISOString(), + knowledge_base_url: knowledgeBaseUrl, + }, }; } @@ -1972,6 +2043,8 @@ async function runChallengeAction( name: t.name, description: t.blockquote, })), + governance_source: governanceSource, + governance_uris: governanceUris, tensions, missing_prerequisites: missing, challenges: surfacedQuestions, @@ -1981,7 +2054,11 @@ async function runChallengeAction( }, state: updatedState, assistant_text: lines.join("\n").trim(), - debug: { duration_ms: Date.now() - startMs, generated_at: new Date().toISOString() }, + debug: { + duration_ms: Date.now() - startMs, + generated_at: new Date().toISOString(), + knowledge_base_url: knowledgeBaseUrl, + }, }; } diff --git a/workers/test/canon-tool-envelope.smoke.mjs b/workers/test/canon-tool-envelope.smoke.mjs index ac9f24c..45f88fe 100644 --- a/workers/test/canon-tool-envelope.smoke.mjs +++ b/workers/test/canon-tool-envelope.smoke.mjs @@ -217,6 +217,110 @@ async function run() { `got: ${encodeOverride.result?.governance_source}`, ); + // Tool 5: oddkit_challenge — canon-driven, four governance surfaces. + // Full envelope + governance_source + governance_uris (plural, per PRD D4 — + // shape diverges from encode by design because challenge reads four peer + // governance files, not a single hierarchy). + console.log(`\n─── oddkit_challenge: envelope + governance_source + governance_uris ───`); + const challengeDefault = await callTool("oddkit_challenge", { + input: "I think we should ship this refactor today", + mode: "planning", + }); + expectFullEnvelope("oddkit_challenge (default knowledge_base)", challengeDefault); + expectGovernanceSource("oddkit_challenge (default knowledge_base)", challengeDefault, "knowledge_base"); + ok( + "oddkit_challenge: result.governance_uris is an array of exactly 4 entries", + Array.isArray(challengeDefault.result?.governance_uris) && + challengeDefault.result?.governance_uris.length === 4, + `got: ${JSON.stringify(challengeDefault.result?.governance_uris)}`, + ); + const expectedUris = [ + "klappy://odd/challenge/base-prerequisites", + "klappy://odd/challenge-types", + "klappy://odd/challenge/normative-vocabulary", + "klappy://odd/challenge/stakes-calibration", + ]; + ok( + "oddkit_challenge: governance_uris matches alphabetical peer set", + JSON.stringify(challengeDefault.result?.governance_uris) === JSON.stringify(expectedUris), + `got: ${JSON.stringify(challengeDefault.result?.governance_uris)}`, + ); + ok( + "oddkit_challenge: result.governance_uri (singular) is NOT emitted on challenge (divergence from encode by design — PRD D4)", + challengeDefault.result?.governance_uri === undefined, + `got: ${challengeDefault.result?.governance_uri}`, + ); + + console.log(`\n─── oddkit_challenge: knowledge_base_url override ───`); + const challengeOverride = await callTool("oddkit_challenge", { + input: "testing override threading", + mode: "planning", + knowledge_base_url: "https://github.com/torvalds/linux", + }); + expectFullEnvelope("oddkit_challenge (knowledge_base_url override)", challengeOverride); + ok( + "oddkit_challenge: debug.knowledge_base_url echoes the override", + challengeOverride.debug?.knowledge_base_url === "https://github.com/torvalds/linux", + `got: ${challengeOverride.debug?.knowledge_base_url}`, + ); + // Same getIndex merge caveat as encode (PRD §3.5 + Known Limitations): + // override without challenge docs can still resolve via default baseline + // merge. Accept either valid enum value. + ok( + "oddkit_challenge: override returns valid governance_source enum value", + ["knowledge_base", "minimal"].includes(challengeOverride.result?.governance_source), + `got: ${challengeOverride.result?.governance_source}`, + ); + + // 9-mode parse integrity — PR #100 regression guard. stakes-calibration + // defines 9 modes; every one must return a full envelope with valid + // governance_source. voice-dump additionally asserts SUPPRESSED status + // because that branch has its own early-return envelope that must also + // carry governance_source + governance_uris (see PRD §10 risk register). + console.log(`\n─── oddkit_challenge: 9-mode parse integrity ───`); + const modes = [ + "exploration", + "planning", + "execution", + "voice-dump", + "drafting", + "peer-review-ready", + "canon-tier-2", + "canon-tier-1", + "published-essay", + ]; + for (const m of modes) { + const r = await callTool("oddkit_challenge", { + input: "sample claim under mode pressure — canon defines the rules", + mode: m, + }); + ok(`oddkit_challenge[${m}]: has 'action'`, typeof r.action === "string"); + ok(`oddkit_challenge[${m}]: has 'server_time'`, typeof r.server_time === "string"); + ok( + `oddkit_challenge[${m}]: governance_source valid`, + ["knowledge_base", "bundled", "minimal"].includes(r.result?.governance_source), + `got: ${r.result?.governance_source}`, + ); + ok( + `oddkit_challenge[${m}]: governance_uris present and length 4`, + Array.isArray(r.result?.governance_uris) && r.result?.governance_uris.length === 4, + `got: ${JSON.stringify(r.result?.governance_uris)}`, + ); + if (m === "voice-dump") { + ok( + `oddkit_challenge[voice-dump]: status === SUPPRESSED (SUPPRESSED branch carries governance fields)`, + r.result?.status === "SUPPRESSED", + `got: ${r.result?.status}`, + ); + } else { + ok( + `oddkit_challenge[${m}]: status === CHALLENGED`, + r.result?.status === "CHALLENGED", + `got: ${r.result?.status}`, + ); + } + } + console.log(`\n${passed} passed, ${failed} failed`); process.exit(failed === 0 ? 0 : 1); }