From 8f16edfd0e277fc5041dbcd82755a5d2156ce5c1 Mon Sep 17 00:00:00 2001
From: Klappy <klappy@users.noreply.github.com>
Date: Sat, 4 Apr 2026 00:50:00 +0000
Subject: [PATCH 1/5] =?UTF-8?q?Overhaul=20README=20=E2=80=94=20MCP=20URL?=
 =?UTF-8?q?=20first,=20clear=20getting-started=20path?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Joshua's feedback: both READMEs need clear bootstrapping instructions.
Old README led with CLI commands and 'librarian' terminology.
New README:
- Leads with MCP URL and four platform connection paths
- Current tool table (orient, search, challenge, gate, encode, etc.)
- Example prompts to try immediately
- Bootstrap and permissions guidance
- 'Point at your own repo' section
- Links to Getting Started, Journey, and From Passive to Proactive
- Cross-links klappy.dev knowledge base repo
---
 README.md | 237 ++++++++++++++++++++----------------------------------
 1 file changed, 85 insertions(+), 152 deletions(-)
diff --git a/README.md b/README.md
index 25c40c9..f74fb6a 100644
--- a/README.md
+++ b/README.md
@@ -1,207 +1,140 @@
 # oddkit
 
-Agent-first CLI for ODD-governed repos. Portable Librarian + Validation with baseline knowledge.
+An open-source MCP server that gives your AI structured memory, epistemic discipline, and the ability to build on what came before.
 
-> **Authoritative:** [docs/oddkit/CHARTER.md](docs/oddkit/CHARTER.md)
->
-> OddKit is epistemic terrain rendering (map), not epistemic authority (compass).
+> **Your AI forgets everything between sessions. It guesses instead of checking. It can't tell a brainstorm from a decision. oddkit fixes that.**
 
-## Deployment Methods
+oddkit reads markdown files from a GitHub repository — decisions, constraints, learnings, governance — and makes them available to your AI through structured tools. It works with any AI tool that supports MCP: Claude, ChatGPT, Gemini, Cursor, Claude Code, Lovable, Replit, ElevenLabs voice agents, and more.
 
-oddkit runs in three ways:
+**Knowledge base repo:** [klappy/klappy.dev](https://github.com/klappy/klappy.dev) — the content oddkit reads from
 
-| Method           | Use Case               | Setup                                       |
-| ---------------- | ---------------------- | ------------------------------------------- |
-| **CLI**          | Terminal usage         | `npx oddkit <command>`                      |
-| **MCP (local)**  | Cursor, Claude Code    | `npx oddkit init --claude`                  |
-| **MCP (remote)** | Claude.ai iOS/iPad/web | Auto-deploys via Cloudflare Git integration |
-
-### Production Deployment
+---
 
-The Cloudflare Worker deploys automatically via Git integration:
+## Get Started in 30 Seconds
 
-- **`main`** branch → staging preview deploy (auto-generated URL)
-- **`prod`** branch → production (`oddkit.klappy.dev`)
+oddkit is a remote MCP server. You don't install anything — you point your AI tool at a URL.
 
-To promote staging to production: `./scripts/promote.sh`
+### Claude.ai
 
-See [workers/README.md](workers/README.md) for full deployment details.
+Settings → Connectors → Add Custom Integration:
+- **Name:** `oddkit`
+- **URL:** `https://oddkit.klappy.dev/mcp`
 
-## Documentation
+### ChatGPT
 
-| Doc                                                               | What It Covers                  |
-| ----------------------------------------------------------------- | ------------------------------- |
-| [**System Overview**](docs/getting-started/odd-agents-and-mcp.md) | How all the pieces fit together |
-| [**Agents Guide**](docs/getting-started/agents.md)                | Set up Epistemic Guide + Scribe |
-| [**Ledger Guide**](docs/getting-started/ledger.md)                | Learnings and decisions capture |
-| [**QUICKSTART**](docs/QUICKSTART.md)                              | CLI and MCP setup in 60 seconds |
-| [**MCP Reference**](docs/MCP.md)                                  | Full MCP integration details    |
+Settings → Developer Mode → Create App → add MCP server URL:
 
-## Quick Start
+`https://oddkit.klappy.dev/mcp`
 
-```bash
-# Install dependencies
-npm install
+### Claude Code / Cursor / Any MCP Client
 
-# Build index (optional, auto-builds on first query)
-oddkit index
+Add to your `.mcp.json`:
 
-# Ask a policy question
-oddkit librarian --query "What is the definition of done?"
-
-# Validate a completion claim
-oddkit validate --message "Done with the UI update. Screenshot: ui.png"
-
-# Explain the last result in human-readable format
-oddkit explain --last
+```json
+{
+  "mcpServers": {
+    "oddkit": {
+      "type": "http",
+      "url": "https://oddkit.klappy.dev/mcp"
+    }
+  }
+}
 ```
 
-## Commands
+Or in Claude Code: `claude mcp add --transport http oddkit https://oddkit.klappy.dev/mcp`
 
-### `oddkit index`
+### Lovable / Replit / Gemini / ElevenLabs / Others
 
-Build or rebuild the document index.
+Any tool that supports MCP can connect. Look for "MCP server" or "custom integration" in your tool's settings and provide the URL:
 
-```bash
-oddkit index --repo /path/to/repo
-```
+`https://oddkit.klappy.dev/mcp`
 
-### `oddkit librarian`
+---
 
-Ask a policy or lookup question. Returns citations with quotes.
+## What oddkit Does
 
-```bash
-oddkit librarian --query "What is the rule about visual proof?" --format json
-```
+Once connected, your AI gets access to these tools:
 
-Options:
+| Tool | What It Does |
+|------|-------------|
+| **orient** | Assess a situation, surface unresolved questions, identify which mode you're in (exploring, planning, executing) |
+| **search** | Find relevant documents, constraints, and prior decisions by topic |
+| **get** | Fetch a specific document by URI |
+| **challenge** | Pressure-test a claim, assumption, or proposal against existing constraints |
+| **gate** | Check readiness before transitioning between phases |
+| **encode** | Structure a decision, insight, or boundary as a durable record |
+| **preflight** | Pre-implementation check — surfaces constraints, definition of done, and pitfalls |
+| **validate** | Verify completion claims against required artifacts |
+| **catalog** | List available documentation with filtering and sorting |
 
-- `-q, --query <text>` — The question to ask (required)
-- `-r, --repo <path>` — Repository root (default: current directory)
-- `-f, --format <type>` — Output format: `json` or `md` (default: `json`)
+### Try It Right Now
 
-### `oddkit validate`
+After connecting, try these prompts:
 
-Validate a completion claim. Returns verdict + evidence gaps.
+- *"Orient me on [your current project challenge]"*
+- *"Challenge the assumption that [something you believe]"*
+- *"Search for any decisions about [a topic]"*
+- *"Encode this decision: we chose X because Y"*
 
-```bash
-oddkit validate --message "Shipped the new feature" --format json
-```
+---
+
+## Bootstrap Your Project
 
-Options:
+To make oddkit proactive — so the AI uses these tools automatically instead of waiting for you to ask — add a bootstrap prompt to your project instructions. See the [full bootstrap guide](https://klappy.dev/page/docs/oddkit/proactive/proactive-bootstrap) or start with the essentials in [Getting Started with ODD and oddkit](https://klappy.dev/page/writings/getting-started-with-odd-and-oddkit).
 
-- `-m, --message <text>` — The completion claim (required)
-- `-r, --repo <path>` — Repository root (default: current directory)
-- `-a, --artifacts <path>` — Optional JSON file with additional artifacts
-- `-f, --format <type>` — Output format: `json` or `md` (default: `json`)
+---
 
-### `oddkit explain`
+## Point oddkit at Your Own Knowledge Base
 
-Explain the last oddkit result in human-readable format.
+By default, oddkit reads from [klappy.dev](https://github.com/klappy/klappy.dev). You can point it at any GitHub repo using the `canon_url` parameter:
 
-```bash
-oddkit explain --last
-oddkit explain --last --format json
+```
+canon_url: "https://raw.githubusercontent.com/YOUR_ORG/YOUR_REPO/main"
 ```
 
-Options:
-
-- `--last` — Explain the last result (default: true)
-- `-f, --format <type>` — Output format: `md` or `json` (default: `md`)
+oddkit reads markdown files with YAML frontmatter. Start with a few files — decisions, constraints, learnings — and grow from there. No schema required.
 
-The explain command:
+---
 
-- Shows what happened (status/verdict)
-- Explains why it happened (which rules fired)
-- Suggests what to do next
-- Lists evidence used (citations, origin)
-- Includes debug info (baseline ref, timestamp)
+## Architecture
 
-## Baseline Knowledge
+oddkit is a Cloudflare Worker that:
 
-By default, oddkit loads the [klappy.dev](https://github.com/klappy/klappy.dev) repo as baseline knowledge.
+1. Fetches markdown files from a GitHub repository (zip download, cached)
+2. Indexes them with BM25 full-text search
+3. Parses YAML frontmatter for metadata, filtering, and sorting
+4. Exposes structured tools via the MCP protocol
 
-### Resolution Order
+It's stateless, serverless, and framework-agnostic. The knowledge base is your repo. oddkit just makes it searchable and structured.
 
-1. `--baseline <path-or-git-url>` CLI flag (highest priority)
-2. `ODDKIT_BASELINE` environment variable (path or git URL)
-3. Default: `https://github.com/klappy/klappy.dev`
+---
 
-### Configuration
+## Development
 
 ```bash
-# Override baseline via CLI flag
-oddkit librarian -q "What is done?" --baseline /path/to/local/canon
-oddkit librarian -q "What is done?" --baseline https://github.com/yourorg/your-canon.git
-
-# Override baseline via environment variable
-export ODDKIT_BASELINE="https://github.com/yourorg/your-canon.git"
-oddkit librarian -q "What is done?"
-
-# Pin to a specific branch/tag
-export ODDKIT_BASELINE_REF="v1.0.0"
-oddkit librarian -q "What is done?"
+cd workers
+npm install
+npm run dev     # Local development
+npm run deploy  # Deploy to Cloudflare
 ```
 
-### Cache Location
-
-- Git repos are cloned to `~/.oddkit/cache/<repo-name>/<ref>/`
-- Local paths are used directly (no caching)
-- Local docs can override baseline via `supersedes` frontmatter field
+**Branches:**
+- `main` → staging preview
+- `prod` → production (`oddkit.klappy.dev`)
 
-## Supersedes Override
+Promote staging to production: `./scripts/promote.sh`
 
-A local doc can override a baseline doc by declaring:
-
-```yaml
----
-supersedes: klappy://canon/definition-of-done
 ---
-```
 
-The baseline doc with that URI will be suppressed from results.
+## Learn More
 
-## Output Format
+- **[Getting Started with ODD and oddkit](https://klappy.dev/page/writings/getting-started-with-odd-and-oddkit)** — five-minute quickstart with bootstrap instructions
+- **[The Journey from AI Tasks to AI-Augmented Workflows](https://klappy.dev/page/writings/the-journey-from-ai-tasks-to-ai-augmented-workflows)** — the four-step progression
+- **[From Passive to Proactive](https://klappy.dev/page/writings/from-passive-to-proactive)** — the story behind oddkit's proactive design
+- **[klappy.dev repo](https://github.com/klappy/klappy.dev)** — the knowledge base oddkit reads from
 
-### Librarian JSON
-
-```json
-{
-  "status": "SUPPORTED",
-  "answer": "Found 3 relevant document(s)...",
-  "evidence": [
-    {
-      "quote": "MUST provide visual proof...",
-      "citation": "canon/visual-proof.md#Operating Constraints",
-      "origin": "baseline"
-    }
-  ],
-  "read_next": [{ "path": "canon/definition-of-done.md#DoD", "reason": "Primary source" }]
-}
-```
-
-### Validate JSON
-
-```json
-{
-  "verdict": "NEEDS_ARTIFACTS",
-  "claims": ["Done with the UI update"],
-  "required_evidence": ["screenshot", "visual artifact"],
-  "provided_artifacts": [],
-  "gaps": ["screenshot", "visual artifact"]
-}
-```
-
-## For Agents
-
-This CLI is designed to be called by AI agents:
-
-```bash
-# Agent asks a question
-oddkit librarian -q "What evidence is required for UI changes?" -f json
+---
 
-# Agent validates completion
-oddkit validate -m "Implemented search with autocomplete. Screenshot: search.png" -f json
-```
+## License
 
-JSON output is canonical and machine-parseable.
+MIT

From 74b7786251f6f04e9853d570c8ce150c8a927560 Mon Sep 17 00:00:00 2001
From: Klappy <klappy@users.noreply.github.com>
Date: Sat, 4 Apr 2026 02:35:05 +0000
Subject: [PATCH 2/5] =?UTF-8?q?Fix=20README=20examples=20=E2=80=94=20unive?=
 =?UTF-8?q?rsal,=20explicitly=20invoke=20oddkit?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index f74fb6a..1240c36 100644
--- a/README.md
+++ b/README.md
@@ -69,12 +69,12 @@ Once connected, your AI gets access to these tools:
 
 ### Try It Right Now
 
-After connecting, try these prompts:
+After connecting, say "use oddkit" or "ask oddkit" to invoke it:
 
-- *"Orient me on [your current project challenge]"*
-- *"Challenge the assumption that [something you believe]"*
-- *"Search for any decisions about [a topic]"*
-- *"Encode this decision: we chose X because Y"*
+- *"Use oddkit to orient me on whether I should [decision you're facing]"*
+- *"Ask oddkit to challenge my assumption that [something you believe]"*
+- *"Use oddkit to encode this decision: we chose [X] because [Y]"*
+- *"[paste meeting notes] Use oddkit to encode the key decisions from this meeting"*
 
 ---
 

From f40d548fda70dd326ee4dd7f5ba10e34302153eb Mon Sep 17 00:00:00 2001
From: oddkit-agent <agent@oddkit>
Date: Thu, 9 Apr 2026 13:08:25 +0000
Subject: [PATCH 3/5] fix: BM25 phrase boost + KV index freshness verification
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bug 1 (workers/src/bm25.ts, src/search/bm25.js):
BM25 scored every query token independently, letting high-frequency
terms like 'pattern' dilute rare-but-precise ones like 'vodka',
pushing exact-title matches down the rankings.

Fix: store originalText on BM25Doc during buildBM25Index, then after
BM25 scoring apply a phrase boost in searchBM25:
  - +5.0 (PHRASE_BOOST_EXACT)   if the full lowercased query appears
    as a substring of the doc's original text
  - +2.0 (PHRASE_BOOST_PARTIAL) if any consecutive word bigram from
    the query appears in the doc text (first hit wins)

These boosts supplement BM25; they never replace it. Applied to both
the Worker TypeScript version and the Node/stdio JS version for
consistency.

Bug 2 (workers/src/zip-baseline-fetcher.ts):
Cloudflare KV is eventually consistent — two requests seconds apart
can hit different edge nodes and return stale cached indexes even
when the SHA-keyed cache key looks valid.

Fix: after a KV cache hit in getIndex(), cross-check the cached
index's embedded commit_sha / canon_commit_sha against the SHAs just
resolved from the GitHub API. If they diverge the entry is stale;
log a warning, discard it, and rebuild from source.
---
 src/search/bm25.js                  | 32 ++++++++++++++++++++++++++-
 workers/src/bm25.ts                 | 34 ++++++++++++++++++++++++++++-
 workers/src/zip-baseline-fetcher.ts | 18 +++++++++++++--
 3 files changed, 80 insertions(+), 4 deletions(-)

diff --git a/src/search/bm25.js b/src/search/bm25.js
index 5064d25..b8718cd 100644
--- a/src/search/bm25.js
+++ b/src/search/bm25.js
@@ -48,7 +48,7 @@ export function buildBM25Index(documents) {
 
   for (const doc of documents) {
     const terms = tokenize(doc.text);
-    docs.push({ id: doc.id, terms, length: terms.length });
+    docs.push({ id: doc.id, terms, length: terms.length, originalText: doc.text });
     totalLength += terms.length;
 
     const seen = new Set();
@@ -68,11 +68,21 @@ export function buildBM25Index(documents) {
   };
 }
 
+// Phrase boost constants — supplement BM25, never replace it.
+// Exact: full query string found as substring in doc text.
+// Partial: any consecutive two-word query bigram found in doc text.
+const PHRASE_BOOST_EXACT = 5.0;
+const PHRASE_BOOST_PARTIAL = 2.0;
+
 /** Search BM25 index, return sorted {id, score} pairs */
 export function searchBM25(index, query, limit = 5) {
   const queryTerms = tokenize(query);
   if (queryTerms.length === 0) return [];
 
+  // Pre-compute phrase matching inputs once, outside the per-doc loop.
+  const queryLower = query.toLowerCase();
+  const queryWords = queryLower.split(/\s+/).filter((w) => w.length > 1);
+
   const scores = [];
 
   for (const doc of index.docs) {
@@ -96,6 +106,26 @@ export function searchBM25(index, query, limit = 5) {
       score += idf * tfNorm;
     }
 
+    // Phrase boost: BM25 treats every query token independently, which lets
+    // high-frequency terms dilute rare-but-important ones (e.g. "pattern"
+    // drowning out "vodka" in "Vodka Architecture pattern"). Checking whether
+    // the original query phrase appears verbatim — or as a bigram — in the
+    // document's original text rescues those precise title/tag matches.
+    const docLower = doc.originalText.toLowerCase();
+    if (docLower.includes(queryLower)) {
+      // Full query is a substring of the doc text — strong exact match.
+      score += PHRASE_BOOST_EXACT;
+    } else if (queryWords.length >= 2) {
+      // Scan every consecutive word pair in the query; first hit wins.
+      for (let i = 0; i < queryWords.length - 1; i++) {
+        const bigram = queryWords[i] + " " + queryWords[i + 1];
+        if (docLower.includes(bigram)) {
+          score += PHRASE_BOOST_PARTIAL;
+          break;
+        }
+      }
+    }
+
     if (score > 0) scores.push({ id: doc.id, score });
   }
 
diff --git a/workers/src/bm25.ts b/workers/src/bm25.ts
index 68c2d60..c4cb345 100644
--- a/workers/src/bm25.ts
+++ b/workers/src/bm25.ts
@@ -44,6 +44,8 @@ export interface BM25Doc {
   id: string;
   terms: string[];
   length: number;
+  /** Original (pre-tokenization) text, used for phrase-level scoring. */
+  originalText: string;
 }
 
 export interface BM25Index {
@@ -63,7 +65,7 @@ export function buildBM25Index(
 
   for (const doc of documents) {
     const terms = tokenize(doc.text);
-    docs.push({ id: doc.id, terms, length: terms.length });
+    docs.push({ id: doc.id, terms, length: terms.length, originalText: doc.text });
     totalLength += terms.length;
 
     const seen = new Set<string>();
@@ -83,6 +85,12 @@ export function buildBM25Index(
   };
 }
 
+// Phrase boost constants — supplement BM25, never replace it.
+// Exact: full query string found as substring in doc text.
+// Partial: any consecutive two-word query bigram found in doc text.
+const PHRASE_BOOST_EXACT = 5.0;
+const PHRASE_BOOST_PARTIAL = 2.0;
+
 /** Search BM25 index, return sorted {id, score} pairs */
 export function searchBM25(
   index: BM25Index,
@@ -92,6 +100,10 @@ export function searchBM25(
   const queryTerms = tokenize(query);
   if (queryTerms.length === 0) return [];
 
+  // Pre-compute phrase matching inputs once, outside the per-doc loop.
+  const queryLower = query.toLowerCase();
+  const queryWords = queryLower.split(/\s+/).filter((w) => w.length > 1);
+
   const scores: Array<{ id: string; score: number }> = [];
 
   for (const doc of index.docs) {
@@ -119,6 +131,26 @@ export function searchBM25(
       score += idf * tfNorm;
     }
 
+    // Phrase boost: BM25 treats every query token independently, which lets
+    // high-frequency terms dilute rare-but-important ones (e.g. "pattern"
+    // drowning out "vodka" in "Vodka Architecture pattern"). Checking whether
+    // the original query phrase appears verbatim — or as a bigram — in the
+    // document's original text rescues those precise title/tag matches.
+    const docLower = doc.originalText.toLowerCase();
+    if (docLower.includes(queryLower)) {
+      // Full query is a substring of the doc text — strong exact match.
+      score += PHRASE_BOOST_EXACT;
+    } else if (queryWords.length >= 2) {
+      // Scan every consecutive word pair in the query; first hit wins.
+      for (let i = 0; i < queryWords.length - 1; i++) {
+        const bigram = queryWords[i] + " " + queryWords[i + 1];
+        if (docLower.includes(bigram)) {
+          score += PHRASE_BOOST_PARTIAL;
+          break;
+        }
+      }
+    }
+
     if (score > 0) scores.push({ id: doc.id, score });
   }
 
diff --git a/workers/src/zip-baseline-fetcher.ts b/workers/src/zip-baseline-fetcher.ts
index 8fe60ea..a79da72 100644
--- a/workers/src/zip-baseline-fetcher.ts
+++ b/workers/src/zip-baseline-fetcher.ts
@@ -760,8 +760,22 @@ export class ZipBaselineFetcher {
     if (this.env.BASELINE_CACHE) {
       const cached = await this.env.BASELINE_CACHE.get(cacheKey, "json") as BaselineIndex | null;
       if (cached) {
-        // Content-addressed cache hit: SHA matches, content is truthful
-        return cached;
+        // Cloudflare KV is eventually consistent — two requests seconds apart
+        // can hit different edge nodes and return stale data even when the
+        // cache key looks correct. Cross-check the cached index's embedded
+        // commit SHAs against the SHAs we just resolved from the GitHub API.
+        // If they diverge, the cached entry is stale; discard and rebuild.
+        const baselineShaMatch = !baselineSha || cached.commit_sha === baselineSha;
+        const canonShaMatch = !canonSha || cached.canon_commit_sha === canonSha;
+        if (baselineShaMatch && canonShaMatch) {
+          // Content-addressed cache hit: SHA verified, content is truthful.
+          return cached;
+        }
+        console.warn(
+          `KV cache SHA mismatch — discarding stale index. ` +
+          `cached=${cached.commit_sha}/${cached.canon_commit_sha} ` +
+          `resolved=${baselineSha}/${canonSha ?? "none"}`
+        );
       }
     }
 

From 44aa00454bc11e45c23e3815284b1f0cb0baee5e Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Thu, 9 Apr 2026 13:23:05 +0000
Subject: [PATCH 4/5] Fix phrase boost: guard behind positive BM25 score and
 filter stop words from bigrams

---
 src/search/bm25.js  | 31 ++++++++++++++-----------------
 workers/src/bm25.ts | 31 ++++++++++++++-----------------
 2 files changed, 28 insertions(+), 34 deletions(-)

diff --git a/src/search/bm25.js b/src/search/bm25.js
index b8718cd..11338b8 100644
--- a/src/search/bm25.js
+++ b/src/search/bm25.js
@@ -81,7 +81,7 @@ export function searchBM25(index, query, limit = 5) {
 
   // Pre-compute phrase matching inputs once, outside the per-doc loop.
   const queryLower = query.toLowerCase();
-  const queryWords = queryLower.split(/\s+/).filter((w) => w.length > 1);
+  const queryWords = queryLower.split(/\s+/).filter((w) => w.length > 1 && !STOP_WORDS.has(w));
 
   const scores = [];
 
@@ -106,22 +106,19 @@ export function searchBM25(index, query, limit = 5) {
       score += idf * tfNorm;
     }
 
-    // Phrase boost: BM25 treats every query token independently, which lets
-    // high-frequency terms dilute rare-but-important ones (e.g. "pattern"
-    // drowning out "vodka" in "Vodka Architecture pattern"). Checking whether
-    // the original query phrase appears verbatim — or as a bigram — in the
-    // document's original text rescues those precise title/tag matches.
-    const docLower = doc.originalText.toLowerCase();
-    if (docLower.includes(queryLower)) {
-      // Full query is a substring of the doc text — strong exact match.
-      score += PHRASE_BOOST_EXACT;
-    } else if (queryWords.length >= 2) {
-      // Scan every consecutive word pair in the query; first hit wins.
-      for (let i = 0; i < queryWords.length - 1; i++) {
-        const bigram = queryWords[i] + " " + queryWords[i + 1];
-        if (docLower.includes(bigram)) {
-          score += PHRASE_BOOST_PARTIAL;
-          break;
+    // Phrase boost: supplement BM25 — never replace it.
+    // Only apply when the document already has genuine BM25 relevance.
+    if (score > 0) {
+      const docLower = doc.originalText.toLowerCase();
+      if (docLower.includes(queryLower)) {
+        score += PHRASE_BOOST_EXACT;
+      } else if (queryWords.length >= 2) {
+        for (let i = 0; i < queryWords.length - 1; i++) {
+          const bigram = queryWords[i] + " " + queryWords[i + 1];
+          if (docLower.includes(bigram)) {
+            score += PHRASE_BOOST_PARTIAL;
+            break;
+          }
         }
       }
     }
diff --git a/workers/src/bm25.ts b/workers/src/bm25.ts
index c4cb345..d07defb 100644
--- a/workers/src/bm25.ts
+++ b/workers/src/bm25.ts
@@ -102,7 +102,7 @@ export function searchBM25(
 
   // Pre-compute phrase matching inputs once, outside the per-doc loop.
   const queryLower = query.toLowerCase();
-  const queryWords = queryLower.split(/\s+/).filter((w) => w.length > 1);
+  const queryWords = queryLower.split(/\s+/).filter((w) => w.length > 1 && !STOP_WORDS.has(w));
 
   const scores: Array<{ id: string; score: number }> = [];
 
@@ -131,22 +131,19 @@ export function searchBM25(
       score += idf * tfNorm;
     }
 
-    // Phrase boost: BM25 treats every query token independently, which lets
-    // high-frequency terms dilute rare-but-important ones (e.g. "pattern"
-    // drowning out "vodka" in "Vodka Architecture pattern"). Checking whether
-    // the original query phrase appears verbatim — or as a bigram — in the
-    // document's original text rescues those precise title/tag matches.
-    const docLower = doc.originalText.toLowerCase();
-    if (docLower.includes(queryLower)) {
-      // Full query is a substring of the doc text — strong exact match.
-      score += PHRASE_BOOST_EXACT;
-    } else if (queryWords.length >= 2) {
-      // Scan every consecutive word pair in the query; first hit wins.
-      for (let i = 0; i < queryWords.length - 1; i++) {
-        const bigram = queryWords[i] + " " + queryWords[i + 1];
-        if (docLower.includes(bigram)) {
-          score += PHRASE_BOOST_PARTIAL;
-          break;
+    // Phrase boost: supplement BM25 — never replace it.
+    // Only apply when the document already has genuine BM25 relevance.
+    if (score > 0) {
+      const docLower = doc.originalText.toLowerCase();
+      if (docLower.includes(queryLower)) {
+        score += PHRASE_BOOST_EXACT;
+      } else if (queryWords.length >= 2) {
+        for (let i = 0; i < queryWords.length - 1; i++) {
+          const bigram = queryWords[i] + " " + queryWords[i + 1];
+          if (docLower.includes(bigram)) {
+            score += PHRASE_BOOST_PARTIAL;
+            break;
+          }
         }
       }
     }

From 3e0dd25e14a417e79a66a4784c040668adcbb734 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Thu, 9 Apr 2026 13:35:08 +0000
Subject: [PATCH 5/5] fix: normalize queryWords with same punctuation/split
 pipeline as tokenize

queryWords was built by splitting the raw lowercased query on whitespace
only, skipping the punctuation stripping and hyphen/underscore/slash
splitting that tokenize() applies. This caused dirty tokens like
pattern? or whats to form bigrams that never matched against clean
document text, silently disabling partial phrase boost for punctuated
queries. Apply the same replace/split pipeline as tokenize (minus
stemming) so bigram matching works correctly.
---
 src/search/bm25.js  | 2 +-
 workers/src/bm25.ts | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/search/bm25.js b/src/search/bm25.js
index 11338b8..36786ee 100644
--- a/src/search/bm25.js
+++ b/src/search/bm25.js
@@ -81,7 +81,7 @@ export function searchBM25(index, query, limit = 5) {
 
   // Pre-compute phrase matching inputs once, outside the per-doc loop.
   const queryLower = query.toLowerCase();
-  const queryWords = queryLower.split(/\s+/).filter((w) => w.length > 1 && !STOP_WORDS.has(w));
+  const queryWords = queryLower.replace(/[^\w\s-]/g, " ").split(/[\s\-_/]+/).filter((w) => w.length > 1 && !STOP_WORDS.has(w));
 
   const scores = [];
 
diff --git a/workers/src/bm25.ts b/workers/src/bm25.ts
index d07defb..f1aea92 100644
--- a/workers/src/bm25.ts
+++ b/workers/src/bm25.ts
@@ -102,7 +102,7 @@ export function searchBM25(
 
   // Pre-compute phrase matching inputs once, outside the per-doc loop.
   const queryLower = query.toLowerCase();
-  const queryWords = queryLower.split(/\s+/).filter((w) => w.length > 1 && !STOP_WORDS.has(w));
+  const queryWords = queryLower.replace(/[^\w\s-]/g, " ").split(/[\s\-_/]+/).filter((w) => w.length > 1 && !STOP_WORDS.has(w));
 
   const scores: Array<{ id: string; score: number }> = [];