diff --git a/CHANGELOG.md b/CHANGELOG.md
index 207d1af..35f1b26 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.21.0] - 2026-04-20
+
+### Changed
+
+- **`oddkit_challenge` prerequisite evaluation migrated from regex-per-check to stemmed set intersection** (per PRD D5 from P1.3.2 — split-by-fit). Each prereq now evaluates via `Array.from(prereq.stemmedTokens).some(s => inputStems.has(s))` over a Set computed once at canon-fetch time, with `tokenize(input)` hoisted out of the per-prereq loop. **Strictly additive**: every input that matched the prior regex still matches, plus stemmed variations now do too — `problems identified` satisfies `evidence-cited` (stems `problem` + `identif`), `considered alternatives` satisfies `alternatives-considered` (stems `consid` + `altern`), `acknowledged the risks` satisfies `risk-acknowledged` (stems `acknowledg` + `risk`). The four structural side-tests (URL / numeric / proper-noun / citation) preserved verbatim from the pre-refactor evaluator because they cover cases the keyword vocabulary cannot — `source-named` inputs like `"here's the URL: https://..."` have no stemmed overlap with the vocab `per / according to / from / source: / who said / where i read` but the URL structural test catches them. The conservative no-keyword-no-flag fallback (pass on `input.trim().length >= 20`) also preserved. Same matcher gate shipped in 0.20.0.
+
+- **`oddkit_challenge` type-detection BM25 index cache removed** (per PRD D9 from P1.3.2 — don't cache microsecond derivations). `cachedChallengeTypeIndex` and `cachedChallengeTypeIndexKnowledgeBaseUrl` module-level fields deleted; `getOrBuildChallengeTypeIndex` function deleted; `cleanup_storage` resets deleted; the call site in `runChallengeAction` rebuilds the BM25 index inline per request via `buildBM25Index(types.map(t => ({id: t.slug, text: t.detectionText})), vocab.stopWords)`. Same pattern gate shipped in 0.20.0. Removes module-level cache state, URL-keyed invalidation logic, cleanup_storage wiring, and drift risk when source data changes — the four hidden costs enumerated in the new canon principle. Parse-product caches (`cachedChallengeTypes`, `cachedBasePrerequisites`, `cachedNormativeVocabulary`, `cachedStakesCalibration`) remain — those are actual parse work.
+
+### Added
+
+- **New canon principle:** `klappy://canon/principles/cache-fetches-and-parses` (klappy.dev#125, merged `3726073`). Graduates the "cache fetches and parses, not microsecond derivations" pattern to canon as a tier-2 principle after its third deciding-argument recurrence across the tool sweep: 0.18.0 encode parse-product caching (implicit), 0.20.0 gate D9 (first explicit), 0.21.0 challenge `cachedChallengeTypeIndex` removal (second explicit). Names the two halves of the principle, enumerates the four-cost plumbing tax, and anchors the threshold to current corpus sizes (6–9 challenge types, 4 gate transitions, 8 base prereqs).
+
+- **New shared interface `PrereqMatchVocab`** in `workers/src/orchestrate.ts` capturing `stemmedTokens: Set<string>` plus four boolean structural-test flags (`hasURLCheck`, `hasNumericCheck`, `hasProperNounCheck`, `hasCitationCheck`). Mixed into both `BasePrerequisite` and the inline type on `ChallengeTypeDef.prerequisiteOverlays[]` to keep per-type and base-prereq structs in sync. Populated by the new `parseCheckColumn(check: string)` helper at canon-fetch time in both `discoverChallengeTypes` and `fetchBasePrerequisites`.
+
+### Known limitations
+
+- Same as 0.20.0 — Porter-style stemmer does not reverse consonant gemination (`shipping` → `shipp`, not `ship`); affected vocabulary is fixed at canon tier per `klappy.dev#122` precedent. `getIndex` strict-mode (`skipBaselineFallback`) still pending across encode/challenge/gate (carry-forward O-open P2).
+
 ## [0.20.0] - 2026-04-20
 
 ### Added
diff --git a/package.json b/package.json
index 0fb596e..ad7066c 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "oddkit",
-  "version": "0.20.0",
+  "version": "0.21.0",
   "description": "Agent-first CLI for ODD-governed repos. Epistemic terrain rendering with portable baseline.",
   "type": "module",
   "bin": {
diff --git a/workers/package-lock.json b/workers/package-lock.json
index 7eaee0e..1afedd1 100644
--- a/workers/package-lock.json
+++ b/workers/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "oddkit-mcp-worker",
-  "version": "0.18.0",
+  "version": "0.21.0",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "oddkit-mcp-worker",
-      "version": "0.18.0",
+      "version": "0.21.0",
       "dependencies": {
         "agents": "^0.4.1",
         "fflate": "^0.8.2",
diff --git a/workers/package.json b/workers/package.json
index 18a6711..ba97cbc 100644
--- a/workers/package.json
+++ b/workers/package.json
@@ -1,6 +1,6 @@
 {
   "name": "oddkit-mcp-worker",
-  "version": "0.20.0",
+  "version": "0.21.0",
   "private": true,
   "type": "module",
   "scripts": {
diff --git a/workers/src/orchestrate.ts b/workers/src/orchestrate.ts
index cf48a0f..664e177 100644
--- a/workers/src/orchestrate.ts
+++ b/workers/src/orchestrate.ts
@@ -91,7 +91,13 @@ interface ChallengeTypeDef {
   triggerWords: string[];
   detectionText: string; // triggerWords + blockquote, fed to BM25 indexer
   questions: Array<{ question: string; tier: string }>;
-  prerequisiteOverlays: Array<{ prerequisite: string; check: string; gapMessage: string }>;
+  prerequisiteOverlays: Array<
+    {
+      prerequisite: string;
+      check: string;
+      gapMessage: string;
+    } & PrereqMatchVocab
+  >;
   reframings: string[];
   fallback: boolean;
 }
@@ -100,6 +106,26 @@ interface BasePrerequisite {
   prerequisite: string;
   check: string;
   gapMessage: string;
+  // Per PRD D2 (P1.3.3): parse products populated at canon-fetch time.
+  // stemmedTokens is the stemmed form of quoted keywords in `check`;
+  // the four has*Check booleans flag structural-test hints detected in
+  // the check description. See parseCheckColumn below. These are parse
+  // products per klappy://canon/principles/cache-fetches-and-parses.
+  stemmedTokens: Set<string>;
+  hasURLCheck: boolean;
+  hasNumericCheck: boolean;
+  hasProperNounCheck: boolean;
+  hasCitationCheck: boolean;
+}
+
+/** Shared shape for the runtime match vocabulary attached to challenge
+ *  prereqs. Keeps the per-type and base-prereq structs in sync (DRY). */
+interface PrereqMatchVocab {
+  stemmedTokens: Set<string>;
+  hasURLCheck: boolean;
+  hasNumericCheck: boolean;
+  hasProperNounCheck: boolean;
+  hasCitationCheck: boolean;
 }
 
 // Gate governance types — P1.3.2 (0.20.0). Consumed by runGateAction via
@@ -160,8 +186,12 @@ interface StakesCalibration {
 let cachedChallengeTypes: ChallengeTypeDef[] | null = null;
 let cachedChallengeTypesKnowledgeBaseUrl: string | undefined = undefined;
 let cachedChallengeTypesSource: "knowledge_base" | "minimal" = "minimal";
-let cachedChallengeTypeIndex: BM25Index | null = null;
-let cachedChallengeTypeIndexKnowledgeBaseUrl: string | undefined = undefined;
+// Note: challenge's BM25 type-detection index is NOT cached — per
+// klappy://canon/principles/cache-fetches-and-parses, rebuilding a BM25
+// index over challenge's 6–9-type corpus is a microsecond derivation and
+// the plumbing tax (URL-keyed invalidation + cleanup_storage wiring +
+// drift risk) costs more than the rebuild. Inline-built at the call site
+// in runChallengeAction, same pattern as gate's transition index (0.20.0).
 let cachedBasePrerequisites: BasePrerequisite[] | null = null;
 let cachedBasePrerequisitesKnowledgeBaseUrl: string | undefined = undefined;
 let cachedBasePrerequisitesSource: "knowledge_base" | "minimal" = "minimal";
@@ -550,15 +580,19 @@ async function discoverChallengeTypes(
         }
       }
 
-      // Prerequisite Overlays table — rows of (Prerequisite, Check, Gap message)
+      // Prerequisite Overlays table — rows of (Prerequisite, Check, Gap message).
+      // Per P1.3.3 PRD D2: each row is enriched with PrereqMatchVocab (stemmed
+      // tokens + structural-test flags) at parse time; see parseCheckColumn.
       const prereqSection = content.match(
         /## Prerequisite Overlays[\s\S]*?\| Prerequisite[\s\S]*?\|[-|\s]+\|\n([\s\S]*?)(?=\n\n|\n##|$)/,
       );
-      const prerequisiteOverlays: Array<{
-        prerequisite: string;
-        check: string;
-        gapMessage: string;
-      }> = [];
+      const prerequisiteOverlays: Array<
+        {
+          prerequisite: string;
+          check: string;
+          gapMessage: string;
+        } & PrereqMatchVocab
+      > = [];
       if (prereqSection) {
         for (const row of prereqSection[1].split("\n").filter((r: string) => r.includes("|"))) {
           const cols = parseTableRow(row);
@@ -569,6 +603,7 @@ async function discoverChallengeTypes(
               prerequisite: cols[0],
               check: cols[1],
               gapMessage: gap,
+              ...parseCheckColumn(cols[1]),
             });
           }
         }
@@ -620,37 +655,14 @@ async function discoverChallengeTypes(
   // rather than inventing a built-in fallback registry — see PRD D7).
   const source: "knowledge_base" | "minimal" = types.length > 0 ? "knowledge_base" : "minimal";
   cachedChallengeTypesSource = source;
-  // Index build deferred — needs vocab.stopWords from fetchNormativeVocabulary,
-  // assembled lazily by getOrBuildChallengeTypeIndex below. Both types and the
-  // index are deterministic functions of knowledgeBaseUrl, so caching by knowledgeBaseUrl
-  // remains safe.
+  // Note: the BM25 type-detection index over per-type detection text is
+  // NOT cached — it's a microsecond derivation over already-cached parse
+  // products, rebuilt inline per request in runChallengeAction. See
+  // klappy://canon/principles/cache-fetches-and-parses for the principle
+  // and the plumbing-tax argument.
   return { types, source };
 }
 
-/** Lazily build (or return cached) per-knowledgeBaseUrl BM25 index over the per-type
- *  detection text, using governance-sourced stop words from normative-vocabulary.md.
- *  The cache is keyed on knowledgeBaseUrl so different canon sources do not contaminate
- *  each other's indexes. */
-function getOrBuildChallengeTypeIndex(
-  types: ChallengeTypeDef[],
-  vocab: NormativeVocabulary,
-  knowledgeBaseUrl?: string,
-): BM25Index {
-  if (cachedChallengeTypeIndex && cachedChallengeTypeIndexKnowledgeBaseUrl === knowledgeBaseUrl) {
-    return cachedChallengeTypeIndex;
-  }
-  // Build BM25 index over per-type detection text (triggers + blockquote).
-  // Stemming handles morphology; IDF weights distinctive trigger terms above filler.
-  // vocab.stopWords comes from `## Detection Noise` in normative-vocabulary.md;
-  // it deliberately preserves modal verbs and negation as signal. An empty
-  // Set means no filtering (governance opted into IDF-only scoring).
-  const bm25Docs = types.map((t) => ({ id: t.slug, text: t.detectionText }));
-  const bm25Index = buildBM25Index(bm25Docs, vocab.stopWords);
-  cachedChallengeTypeIndex = bm25Index;
-  cachedChallengeTypeIndexKnowledgeBaseUrl = knowledgeBaseUrl;
-  return bm25Index;
-}
-
 // Gate minimal-tier vocabulary — P1.3.2 D6. Used when canon is unreachable
 // or missing required sections. Vocabulary mirrors the pre-0.20.0 hardcoded
 // detectTransition regexes (L306–L324 pre-refactor) and checkPatterns map
@@ -847,6 +859,7 @@ async function fetchBasePrerequisites(
               prerequisite: cols[0],
               check: cols[1],
               gapMessage: cols[2].replace(/^"|"$/g, ""),
+              ...parseCheckColumn(cols[1]),
             });
           }
         }
@@ -1515,8 +1528,6 @@ async function runCleanupStorage(
   cachedChallengeTypes = null;
   cachedChallengeTypesKnowledgeBaseUrl = undefined;
   cachedChallengeTypesSource = "minimal";
-  cachedChallengeTypeIndex = null;
-  cachedChallengeTypeIndexKnowledgeBaseUrl = undefined;
   cachedBasePrerequisites = null;
   cachedBasePrerequisitesKnowledgeBaseUrl = undefined;
   cachedBasePrerequisitesSource = "minimal";
@@ -2023,9 +2034,15 @@ async function runChallengeAction(
   // Detection runs BEFORE the voice-dump suppression check so the SUPPRESSED
   // response can still expose `governance` — the model sees what would have
   // fired without surfacing the pressure-test questions.
+  // Build BM25 type-detection index inline per request (not cached) —
+  // per klappy://canon/principles/cache-fetches-and-parses, a BM25 index
+  // over challenge's 6–9-type corpus is a microsecond derivation and the
+  // plumbing tax is not worth the rebuild cost. Parse products (types,
+  // vocab) are cached upstream; the index is just a reshape.
   // Stop words come from `## Detection Noise` in normative-vocabulary.md
   // (governance), not a hardcoded constant in this file.
-  const typeIndex = getOrBuildChallengeTypeIndex(types, vocab, knowledgeBaseUrl);
+  const bm25Docs = types.map((t) => ({ id: t.slug, text: t.detectionText }));
+  const typeIndex = buildBM25Index(bm25Docs, vocab.stopWords);
   const matchedTypes: ChallengeTypeDef[] = [];
   const hits = searchBM25(typeIndex, input, types.length);
   const typeBySlug = new Map(types.map((t) => [t.slug, t]));
@@ -2124,9 +2141,14 @@ async function runChallengeAction(
   }
 
   const strictness = modeConfig?.prerequisiteStrictness?.toLowerCase() || "required";
+  // Hoist tokenize(input) out of the per-prereq loop — input is constant across
+  // the loop, stemmedTokens differ per prereq. Per PRD D3 (P1.3.3): stemmed
+  // set intersection at runtime, structural tests preserved, no regex compile
+  // per check. This is the fit-to-problem matcher per D5.
+  const inputStems = new Set(tokenize(input));
   const missing: string[] = [];
   for (const p of prereqMap.values()) {
-    const passed = evaluatePrerequisiteCheck(input, p.check);
+    const passed = evaluatePrerequisiteCheck(inputStems, input, p);
     if (!passed) {
       // source-named check is escalated to blocking when strictness says so
       if (strictness.includes("optional") && !p.prerequisite.includes("source-named")) {
@@ -2288,36 +2310,78 @@ async function runChallengeAction(
   };
 }
 
-// Governance-driven check evaluator — interprets natural-language `check` strings
-// from ## Prerequisite Overlays tables. Uses cheap heuristics: substring matching
-// against quoted keywords in the check description, plus a few special-case patterns.
-function evaluatePrerequisiteCheck(input: string, check: string): boolean {
-  // Extract quoted keywords like "evidence", "observed", "alternative"
-  const quotedKeywords: string[] = [];
+// Parse-time helper: extract quoted keywords from a `check` description and
+// detect the four structural-test hints. Called at canon-fetch time from
+// both discoverChallengeTypes (per-type prereqs) and fetchBasePrerequisites
+// (universal prereqs). Produces a PrereqMatchVocab that the runtime consumes
+// via evaluatePrerequisiteCheck. Per klappy://canon/principles/cache-fetches-
+// and-parses, this is a parse product: the Set is the stemmed form of the
+// canon's vocabulary and is cached alongside the rest of the prereq struct.
+function parseCheckColumn(check: string): PrereqMatchVocab {
   const quotedRegex = /"([^"]+)"/g;
+  const stemmedTokens = new Set<string>();
   let m: RegExpExecArray | null;
   while ((m = quotedRegex.exec(check)) !== null) {
-    quotedKeywords.push(m[1]);
-  }
-
-  if (quotedKeywords.length > 0) {
-    // Pass if ANY quoted keyword appears in input (case-insensitive, word-boundary where possible)
-    for (const kw of quotedKeywords) {
-      const escaped = kw.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
-      // Use word-boundary for single words, substring for phrases
-      const pattern = /^\w+$/.test(kw) ? new RegExp("\\b" + escaped + "\\b", "i") : new RegExp(escaped, "i");
-      if (pattern.test(input)) return true;
+    // Tokenize each quoted keyword or phrase — multi-word phrases like
+    // "according to" contribute multiple stems; stop-words are dropped
+    // by tokenize(). This preserves semantic coverage while normalizing
+    // morphology (problems → problem, considered → consid, etc.).
+    for (const stem of tokenize(m[1])) {
+      stemmedTokens.add(stem);
     }
-    // Special-case check descriptions that mention URLs, citations, numeric markers
-    if (/\bURL\b/i.test(check) && /https?:\/\//.test(input)) return true;
-    if (/numeric/i.test(check) && /\d/.test(input)) return true;
-    if (/proper-?noun/i.test(check) && /\b[A-Z][a-z]+\s+[A-Z]/.test(input)) return true;
-    if (/citation/i.test(check) && /\[\d+\]|\bper\s+[A-Z]|\baccording to\b/i.test(input)) return true;
-    return false;
   }
+  return {
+    stemmedTokens,
+    hasURLCheck: /\bURL\b/i.test(check),
+    hasNumericCheck: /\bnumeric\b/i.test(check),
+    hasProperNounCheck: /\bproper-?noun\b/i.test(check),
+    hasCitationCheck: /\bcitation\b/i.test(check),
+  };
+}
 
-  // No quoted keywords: conservative fallback — passes if input is non-trivial
-  return input.trim().length >= 20;
+// Governance-driven check evaluator — runtime pairing for parseCheckColumn.
+// Per PRD D5 (split-by-fit): prereq evaluation is independent gap-or-not per
+// prereq, not ranked. Stemmed set intersection is the fit-to-problem matcher
+// and catches morphological variations that the prior regex cascade missed
+// (e.g. "problems identified" now stems to `problem` + `identif` and matches
+// a prereq whose vocab includes `problem`). Structural side-tests (URL,
+// numeric, proper-noun, citation) preserved from the pre-refactor evaluator
+// because they cover cases the keyword vocabulary can't — `source-named`
+// inputs like "here's the URL: https://..." have no stemmed overlap with the
+// vocab `per / according to / from / source: / who said / where i read` but
+// the URL structural test catches them. Strictly additive over the prior
+// regex: every input that matched pre-refactor still matches post-refactor.
+function evaluatePrerequisiteCheck(
+  inputStems: Set<string>,
+  rawInput: string,
+  prereq: PrereqMatchVocab,
+): boolean {
+  // Token match — stemmed set intersection.
+  for (const s of prereq.stemmedTokens) {
+    if (inputStems.has(s)) return true;
+  }
+  // Structural tests — preserved from pre-refactor evaluator. Check against
+  // the raw input because these patterns are inherently case- and shape-
+  // sensitive (URLs, proper-noun capitalization, bracketed citations).
+  if (prereq.hasURLCheck && /https?:\/\//.test(rawInput)) return true;
+  if (prereq.hasNumericCheck && /\d/.test(rawInput)) return true;
+  if (prereq.hasProperNounCheck && /\b[A-Z][a-z]+\s+[A-Z]/.test(rawInput)) return true;
+  if (prereq.hasCitationCheck && /\[\d+\]|\bper\s+[A-Z]|\baccording to\b/i.test(rawInput)) {
+    return true;
+  }
+  // Conservative fallback: prereqs whose check description had NO quoted
+  // keywords AND NO structural hints pass on any non-trivial input. This
+  // preserves the pre-refactor fallback behavior (`input.trim().length >= 20`).
+  if (
+    prereq.stemmedTokens.size === 0 &&
+    !prereq.hasURLCheck &&
+    !prereq.hasNumericCheck &&
+    !prereq.hasProperNounCheck &&
+    !prereq.hasCitationCheck
+  ) {
+    return rawInput.trim().length >= 20;
+  }
+  return false;
 }
 
 async function runGateAction(
diff --git a/workers/test/canon-tool-envelope.smoke.mjs b/workers/test/canon-tool-envelope.smoke.mjs
index 8ca122e..1a2fcf7 100644
--- a/workers/test/canon-tool-envelope.smoke.mjs
+++ b/workers/test/canon-tool-envelope.smoke.mjs
@@ -321,6 +321,139 @@ async function run() {
     }
   }
 
+  // P1.3.3 — stemmed set intersection assertions (challenge prereq evaluator).
+  // Per PRD D5 (split-by-fit): prereq evaluation is independent gap-or-not per
+  // prereq, not ranked; stemmed Set intersection over canon-quoted vocabulary
+  // catches morphological variations the prior regex missed. Strictly additive
+  // over the pre-refactor evaluator. Structural side-tests (URL, numeric,
+  // proper-noun, citation) preserved. See klappy://canon/principles/cache-fetches-and-parses.
+  console.log(`\n─── oddkit_challenge: P1.3.3 stemmed prereq evaluator ───`);
+
+  // Helper: derive the missing-prereq list from a challenge response. The
+  // gap-message strings come from canon (base-prerequisites.md and per-type
+  // articles); we test by substring on canon-stable phrases.
+  const challengeMissing = async (text, mode = "execution") => {
+    const r = await callTool("oddkit_challenge", { input: text, mode });
+    return r.result?.missing_prerequisites || [];
+  };
+  const includesGap = (missing, phrase) =>
+    missing.some((g) => typeof g === "string" && g.toLowerCase().includes(phrase.toLowerCase()));
+
+  // (1) Stemmed match on inflected base-prereq vocab — `observed` is in canon
+  // for evidence-cited; `noticed` and `measured` also. Stemmed Set intersection
+  // means inflected forms (e.g. "I'm noticing") all share the stem `notic`.
+  const stemmedBaseInputs = [
+    "I observed a problem in production today, per the logs at https://example.com/log",
+    "I'm noticing an issue per the reports we collected from the field engineers",
+    "I read about this case in the article from John Smith yesterday",
+  ];
+  for (const txt of stemmedBaseInputs) {
+    const missing = await challengeMissing(txt);
+    ok(
+      `oddkit_challenge: P1.3.3 base-prereq evidence-cited passes for stemmed input "${txt.slice(0, 40)}…"`,
+      !includesGap(missing, "no evidence cited"),
+      `missing: ${JSON.stringify(missing)}`,
+    );
+  }
+
+  // (2) Stemmed per-type prereq match — proposal type's `alternatives-considered`
+  // canon vocab is `alternative, instead, option, considered, rejected`; stemmed
+  // forms of `considered` and `alternatives` should pass.
+  const proposalText =
+    "I propose we ship the new auth flow. I considered alternatives like SSO and OAuth, " +
+    "but rejected those options due to risk and tradeoff cost. The rollout is reversible " +
+    "and we know it succeeded when login latency drops below 200ms per Stripe's data.";
+  const proposalMissing = await challengeMissing(proposalText);
+  ok(
+    `oddkit_challenge: P1.3.3 proposal alternatives-considered passes via stemmed match`,
+    !includesGap(proposalMissing, "no alternatives mentioned"),
+    `missing: ${JSON.stringify(proposalMissing)}`,
+  );
+  ok(
+    `oddkit_challenge: P1.3.3 proposal risk-acknowledged passes via stemmed match`,
+    !includesGap(proposalMissing, "no risks or costs"),
+    `missing: ${JSON.stringify(proposalMissing)}`,
+  );
+
+  // (3) Non-match: input with no keyword overlap and no structural hints
+  // surfaces base prereqs (evidence + source + confidence) in missing list.
+  const noMatchText = "Let me think about this problem space for a while in abstract terms.";
+  const noMatchMissing = await challengeMissing(noMatchText);
+  ok(
+    `oddkit_challenge: P1.3.3 non-matching input surfaces evidence-cited gap`,
+    includesGap(noMatchMissing, "no evidence cited"),
+    `missing: ${JSON.stringify(noMatchMissing)}`,
+  );
+
+  // (4) URL structural test preservation: source-named passes via the URL
+  // structural path even though the input has no quoted-vocab overlap with
+  // `per / according to / from / source: / who said / where i read`.
+  const urlOnlyText = "I think this works, see https://docs.example.com/auth-flow for the design.";
+  const urlMissing = await challengeMissing(urlOnlyText);
+  ok(
+    `oddkit_challenge: P1.3.3 source-named passes via URL structural test (no keyword overlap)`,
+    !includesGap(urlMissing, "no source named"),
+    `missing: ${JSON.stringify(urlMissing)}`,
+  );
+
+  // (5) Proper-noun structural test preservation: source-named passes via
+  // the proper-noun pattern (`per <Capitalized> <Capitalized>`).
+  const properNounText =
+    "I believe the auth flow needs revisiting based on what I observed per Jane Smith yesterday.";
+  const pnMissing = await challengeMissing(properNounText);
+  ok(
+    `oddkit_challenge: P1.3.3 source-named passes via proper-noun structural test`,
+    !includesGap(pnMissing, "no source named"),
+    `missing: ${JSON.stringify(pnMissing)}`,
+  );
+
+  // (6) Citation structural test preservation: `\baccording to\b` triggers
+  // the citation path on a source-named-relevant phrase.
+  const citationText =
+    "I observed a regression in the deploy pipeline according to Tuesday's measurements [3].";
+  const citMissing = await challengeMissing(citationText);
+  ok(
+    `oddkit_challenge: P1.3.3 source-named passes via citation structural test`,
+    !includesGap(citMissing, "no source named"),
+    `missing: ${JSON.stringify(citMissing)}`,
+  );
+
+  // (7) Rebuild stability — Item 2's inline BM25 type index build per request
+  // produces deterministic results across consecutive calls with identical input.
+  // (Proxy: same `prerequisites_missing` list across two calls.)
+  const stabilityText =
+    "I observed a problem with the deploy. According to Jane Smith we should ship the fix.";
+  const run1 = await challengeMissing(stabilityText);
+  const run2 = await challengeMissing(stabilityText);
+  ok(
+    `oddkit_challenge: P1.3.3 inline rebuild produces stable results across consecutive calls`,
+    JSON.stringify(run1) === JSON.stringify(run2),
+    `run1: ${JSON.stringify(run1)}\n           run2: ${JSON.stringify(run2)}`,
+  );
+
+  // (8) Backward-compat: pre-refactor regex evaluator passed on inputs containing
+  // any quoted keyword (case-insensitive word-boundary). Confirm a literal-keyword
+  // input still passes — `"observed"` is in evidence-cited's canon vocab.
+  const literalText = "I observed nothing remarkable here per Alice Johnson.";
+  const literalMissing = await challengeMissing(literalText);
+  ok(
+    `oddkit_challenge: P1.3.3 backward-compat — literal canon keyword "observed" still passes evidence-cited`,
+    !includesGap(literalMissing, "no evidence cited"),
+    `missing: ${JSON.stringify(literalMissing)}`,
+  );
+
+  // (9) Confidence-signaled stemmed match — canon vocab includes `believe,
+  // think, know, suspect, certain, tentative, confident, unsure`. Stemmed
+  // form `believ` matches `I believe` and `believing`.
+  const confidenceText =
+    "I believe we observed a stable pattern per the measurements from Jane Smith last week.";
+  const confMissing = await challengeMissing(confidenceText);
+  ok(
+    `oddkit_challenge: P1.3.3 confidence-signaled passes via stemmed match on "believe"`,
+    !includesGap(confMissing, "confidence level not signaled"),
+    `missing: ${JSON.stringify(confMissing)}`,
+  );
+
   // Tool 6: oddkit_gate — canon-driven, two governance surfaces. Full envelope +
   // governance_source + governance_uris (plural array of 2 — shape diverges
   // from encode's singular governance_uri, matches challenge's plural shape,